From 6fce297e9ff9f495b10281f2c5c78e6e0c2d48ad Mon Sep 17 00:00:00 2001 From: Joe Date: Mon, 22 Feb 2010 12:19:15 -0800 Subject: merge attempt #1 --- ebin/.gitignore | 1 + ebin/dynomite.appup | 6 + include/chunk_size.hrl | 1 + include/common.hrl | 41 +++ include/config.hrl | 24 ++ include/dmerkle.hrl | 14 + include/profile.hrl | 9 + include/test.hrl | 13 + src/Makefile | 11 + src/bootstrap_manager.erl | 261 ++++++++++++++++ src/bootstrap_receiver.erl | 121 ++++++++ src/cluster_ops.erl | 282 +++++++++++++++++ src/configuration.erl | 99 ++++++ src/dynomite.erl | 23 ++ src/dynomite_app.erl | 145 +++++++++ src/dynomite_couch_api.erl | 140 +++++++++ src/dynomite_couch_storage.erl | 41 +++ src/dynomite_http.erl | 21 ++ src/dynomite_prof.erl | 164 ++++++++++ src/dynomite_sup.erl | 85 +++++ src/lib_misc.erl | 235 ++++++++++++++ src/mem_utils.erl | 129 ++++++++ src/membership2.erl | 686 +++++++++++++++++++++++++++++++++++++++++ src/node.erl | 39 +++ src/partitions.erl | 334 ++++++++++++++++++++ src/replication.erl | 165 ++++++++++ src/vector_clock.erl | 99 ++++++ test/Emakefile | 4 + test/Makefile | 12 + test/cluster_ops_test.erl | 83 +++++ test/mem2_code_change.erl | 12 + test/mem_utils_test.erl | 97 ++++++ test/membership2_test.erl | 126 ++++++++ test/mock.erl | 322 +++++++++++++++++++ test/mock_genserver.erl | 209 +++++++++++++ test/partitions_test.erl | 121 ++++++++ test/replication_test.erl | 89 ++++++ test/stub.erl | 168 ++++++++++ test/test_suite.erl | 10 + 39 files changed, 4442 insertions(+) create mode 100644 ebin/.gitignore create mode 100644 ebin/dynomite.appup create mode 100644 include/chunk_size.hrl create mode 100644 include/common.hrl create mode 100644 include/config.hrl create mode 100644 include/dmerkle.hrl create mode 100644 include/profile.hrl create mode 100644 include/test.hrl create mode 100644 src/Makefile create mode 100644 src/bootstrap_manager.erl create mode 100644 src/bootstrap_receiver.erl create mode 100644 src/cluster_ops.erl create mode 100644 src/configuration.erl create mode 100644 src/dynomite.erl create mode 100644 src/dynomite_app.erl create mode 100644 src/dynomite_couch_api.erl create mode 100644 src/dynomite_couch_storage.erl create mode 100644 src/dynomite_http.erl create mode 100644 src/dynomite_prof.erl create mode 100644 src/dynomite_sup.erl create mode 100644 src/lib_misc.erl create mode 100644 src/mem_utils.erl create mode 100644 src/membership2.erl create mode 100644 src/node.erl create mode 100644 src/partitions.erl create mode 100644 src/replication.erl create mode 100644 src/vector_clock.erl create mode 100644 test/Emakefile create mode 100644 test/Makefile create mode 100644 test/cluster_ops_test.erl create mode 100644 test/mem2_code_change.erl create mode 100644 test/mem_utils_test.erl create mode 100644 test/membership2_test.erl create mode 100644 test/mock.erl create mode 100644 test/mock_genserver.erl create mode 100644 test/partitions_test.erl create mode 100644 test/replication_test.erl create mode 100644 test/stub.erl create mode 100644 test/test_suite.erl diff --git a/ebin/.gitignore b/ebin/.gitignore new file mode 100644 index 00000000..13d94f8b --- /dev/null +++ b/ebin/.gitignore @@ -0,0 +1 @@ +*.app diff --git a/ebin/dynomite.appup b/ebin/dynomite.appup new file mode 100644 index 00000000..d6d7726b --- /dev/null +++ b/ebin/dynomite.appup @@ -0,0 +1,6 @@ +{"0.9.0-cloudant", [{"0.9.0-cloudant", [ + {apply, {supervisor, terminate_child, [showroom_sup, dynomite_sup]}}, + {restart_application, dynomite}, + {apply, {supervisor, delete_child, [showroom_sup, dynomite_sup]}}, + {update, showroom_sup, supervisor} +]}],[{"0.9.0-cloudant",[]}]}. diff --git a/include/chunk_size.hrl b/include/chunk_size.hrl new file mode 100644 index 00000000..f9906b5f --- /dev/null +++ b/include/chunk_size.hrl @@ -0,0 +1 @@ +-define(CHUNK_SIZE, 5120). diff --git a/include/common.hrl b/include/common.hrl new file mode 100644 index 00000000..2299950d --- /dev/null +++ b/include/common.hrl @@ -0,0 +1,41 @@ + +-include_lib("eunit/include/eunit.hrl"). + +-define(fmt(Msg, Args), lists:flatten(io_lib:format(Msg, Args))). +-define(infoFmt(Msg, Args), error_logger:info_msg(Msg, Args)). +-define(infoMsg(Msg), error_logger:info_msg(Msg)). + + +%% from couch_db.hrl +-ifndef(LOG_DEBUG). +-define(LOG_DEBUG(Format, Args), + showroom_log:message(debug, Format, Args)). +-endif. + +-ifndef(LOG_INFO). +-define(LOG_INFO(Format, Args), + showroom_log:message(info, Format, Args)). +-endif. + +-ifndef(LOG_ERROR). +-define(LOG_ERROR(Format, Args), + showroom_log:message(error, Format, Args)). +-endif. + +%% -define(PMAP(F,L), lists:map(F,L)). +-define(PMAP(F,L), showroom_utils:pmap(F,L)). + + +%% +%% membership2 (in here for separate testing module) +%% + +-define(VERSION,2). + +-record(membership, {header=?VERSION, + node, + nodes, + partitions, + version, + fullmap + }). diff --git a/include/config.hrl b/include/config.hrl new file mode 100644 index 00000000..20983d26 --- /dev/null +++ b/include/config.hrl @@ -0,0 +1,24 @@ + +-ifndef(CONFIG_HRL). +-define(CONFIG_HRL, true). +%we don't want to turn protocol buffers on by default, since the library is not included +%it should be very easy for new users to start up an instance +-record(config, {n=3, + r=1, + w=1, + q=6, + directory, + web_port, + text_port=11222, + storage_mod=dets_storage, + blocksize=4096, + thrift_port=9200, + pb_port=undefined, + buffered_writes=undefined, + cache=undefined, + cache_size=1048576, + hash_module=partitions, + meta=[] + }). + +-endif. diff --git a/include/dmerkle.hrl b/include/dmerkle.hrl new file mode 100644 index 00000000..b4fe2a08 --- /dev/null +++ b/include/dmerkle.hrl @@ -0,0 +1,14 @@ +-define(DMERKLE_VERSION, 2). +-define(STATIC_HEADER, 93). + +-define(d_from_blocksize(BlockSize), trunc((BlockSize - 17)/16)). +-define(pointers_from_blocksize(BlockSize), (lib_misc:ceiling(math:log(BlockSize)/math:log(2)) - 3)). +-define(pointer_for_size(Size, BlockSize), (if Size =< 16 -> 1; Size =< BlockSize -> ?pointers_from_blocksize(Size); true -> last end)). +-define(size_for_pointer(N), (2 bsl (N+2))). +-define(headersize_from_blocksize(BlockSize), (?STATIC_HEADER + ?pointers_from_blocksize(BlockSize) * 8)). +-define(aligned(Ptr, HeaderSize, BlockSize), (((Ptr - (HeaderSize)) rem BlockSize) == 0)). +-define(block(Ptr, HeaderSize, BlockSize), ((Ptr - (HeaderSize)) div BlockSize)). + +-record(node, {m=0, keys=[], children=[], offset=eof}). +-record(leaf, {m=0, values=[], offset=eof}). +-record(free, {offset,size=0,pointer=0}). diff --git a/include/profile.hrl b/include/profile.hrl new file mode 100644 index 00000000..2ffd8009 --- /dev/null +++ b/include/profile.hrl @@ -0,0 +1,9 @@ +-ifdef(PROF). +-define(balance_prof, dynomite_prof:balance_prof()). +-define(prof(Label), dynomite_prof:start_prof(Label)). +-define(forp(Label), dynomite_prof:stop_prof(Label)). +-else. +-define(prof(Label), true). +-define(forp(Label), true). +-define(balance_prof, true). +-endif. diff --git a/include/test.hrl b/include/test.hrl new file mode 100644 index 00000000..38fb850f --- /dev/null +++ b/include/test.hrl @@ -0,0 +1,13 @@ +-define(TMP_DIR, "../../../tmp/lib"). + +-define(TMP_FILE, fun(File) -> + filename:join(?TMP_DIR, File) + end). + +%% priv_dir() -> +%% Dir = filename:join([t:config(priv_dir), "data", atom_to_list(?MODULE), pid_to_list(self())]), +%% filelib:ensure_dir(filename:join([Dir, atom_to_list(?MODULE)])), +%% Dir. + +%% priv_file(File) -> +%% filename:join(priv_dir(), File). diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 00000000..32aa1872 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,11 @@ +include ../support/include.mk + +all: $(EBIN_FILES_NO_DOCS) + +doc: $(EBIN_FILES) + +debug: + $(MAKE) DEBUG=-DDEBUG + +clean: + rm -rf $(EBIN_FILES) diff --git a/src/bootstrap_manager.erl b/src/bootstrap_manager.erl new file mode 100644 index 00000000..f1303223 --- /dev/null +++ b/src/bootstrap_manager.erl @@ -0,0 +1,261 @@ +%%%------------------------------------------------------------------- +%%% File: bootstrap_manager.erl +%%% @author Cliff Moon <> [] +%%% @copyright 2009 Cliff Moon +%%% @doc This is the bootstrap manager for a cluster. +%%% +%%% @end +%%% +%%% @since 2009-07-29 by Cliff Moon +%%%------------------------------------------------------------------- +-module(bootstrap_manager). +-author('cliff@powerset.com'). +-author('brad@cloudant.com'). + +-behaviour(gen_server). + +%% API +-export([start_bootstrap/3, end_bootstrap/1, + start_link/3, start/3, stop/0, + start_transfers/0, transfers/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(state, {transfer_list, nodes, transfers, futurefullmap}). +-record(transfer, {partition, receivers, rate=0, status=starting}). + +-include("../include/config.hrl"). +-include("../include/common.hrl"). + +%%==================================================================== +%% API +%%==================================================================== +%%-------------------------------------------------------------------- +%% @spec start_link() -> {ok,Pid} | ignore | {error,Error} +%% @doc Starts the server +%% @end +%%-------------------------------------------------------------------- +start_bootstrap(State=#membership{node=Node, nodes=Nodes}, + OldFullMap, NewFullMap) -> + case partitions:diff(OldFullMap, NewFullMap) of + [] -> + % no difference in pmaps + {NewFullMap, State#membership{fullmap=NewFullMap}}; + TransferList when is_list(TransferList) -> + ?LOG_DEBUG("~nBootstrap~nNode : ~p~nTransferList :~n~p~n", + [Node, partitions:pp_diff(TransferList)]), + case start_link(TransferList, Nodes, NewFullMap) of + {ok, _Pid} -> + start_transfers(); + Other -> throw(Other) + end, + + % bootstrap has some stuff to do (async), so just give the state + % passed in for now. end_bootstrap will be called with the resulting + % state when it completes + {OldFullMap, State}; + Other -> + % probably occurs b/c T (# of nodes) < N currently. + % more nodes joining should help avoid this error. + ?LOG_ERROR("no_bootstrap - Other: ~p", [Other]), + {NewFullMap, State#membership{fullmap=NewFullMap}} + end. + + +end_bootstrap(#state{futurefullmap=FutureFullMap}) -> + end_bootstrap(FutureFullMap); + +end_bootstrap(NewFullMap) -> + gen_server:call(membership, {newfullmap, NewFullMap}), + stop(). + + +start(TransferList, Nodes, FutureFullMap) -> + gen_server:start({global, bootstrap_manager}, ?MODULE, + [TransferList, Nodes, FutureFullMap], []). + + +start_link(TransferList, Nodes, FutureFullMap) -> + gen_server:start_link({global, bootstrap_manager}, ?MODULE, + [TransferList, Nodes, FutureFullMap], []). + + +stop() -> + gen_server:cast({global, bootstrap_manager}, stop). + + +start_transfers() -> + gen_server:cast({global, bootstrap_manager}, start_transfers). + + +transfers() -> + gen_server:call({global, bootstrap_manager}, transfers). + + +%%==================================================================== +%% gen_server callbacks +%%==================================================================== + +%%-------------------------------------------------------------------- +%% @spec init(Args) -> {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%% @doc Initiates the server +%% @end +%%-------------------------------------------------------------------- +init([TransferList, Nodes, FutureFullMap]) -> + process_flag(trap_exit, true), + {ok, #state{transfer_list=TransferList,nodes=Nodes, + futurefullmap=FutureFullMap}}. + + +%%-------------------------------------------------------------------- +%% @spec +%% handle_call(Request, From, State) -> {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | +%% {stop, Reason, State} +%% @doc Handling call messages +%% @end +%%-------------------------------------------------------------------- +handle_call(average_transfer_rate, _From, + State=#state{transfers=Transfers}) -> + {Sum, Cardinality} = ets:foldl( + fun(#transfer{rate=Rate}, {Sum, Cardinality}) -> + {Sum+Rate,Cardinality+1} + end, {0, 0}, Transfers), + AverageRate = Sum / Cardinality, + {reply, AverageRate, State}; + +handle_call(aggregate_transfer_rate, _From, + State=#state{transfers=Transfers}) -> + Sum = ets:foldl(fun(#transfer{rate=Rate}, Sum) -> + Rate + Sum + end, 0, Transfers), + {reply, Sum, State}; + +handle_call(transfers, _From, + State=#state{transfers=Transfers}) -> + {reply, {ok, ets:tab2list(Transfers)}, State}; + +%% at least reply that this 'catch-all' was ignored +handle_call(_Request, _From, State) -> + {reply, ignored, State}. + + +%%-------------------------------------------------------------------- +%% @spec handle_cast(Msg, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @doc Handling cast messages +%% @end +%%-------------------------------------------------------------------- +handle_cast(stop, State) -> + {stop, normal, State}; + +handle_cast(start_transfers, + State=#state{transfer_list=TransferList}) -> + Transfers = start_transfers(TransferList, State), + {noreply, State#state{transfers=Transfers}}; + +handle_cast(_Msg, State) -> + {noreply, State}. + + +%%-------------------------------------------------------------------- +%% @spec handle_info(Info, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @doc Handling all non call/cast messages +%% @end +%%-------------------------------------------------------------------- + +handle_info({receiver_done, FromNode, _ToNode, Partition, DbName, Receiver}, + State = #state{transfers=Transfers}) -> + %% TODO use bring_online & ToNode? instead of waiting until end & installing + %% NewFullMap into mem2 + + %% handle the old file + membership2:decommission_part(FromNode, Partition, DbName), + + %% remove from Transfers table + case ets:lookup(Transfers, Partition) of + [Transfer] = [#transfer{receivers=Receivers}] -> + NewReceivers = lists:delete(Receiver, Receivers), + if + length(NewReceivers) == 0 -> ets:delete(Transfers, Partition); + true -> ets:insert(Transfers, Transfer#transfer{receivers=NewReceivers}) + end; + _ -> ok + end, + case ets:first(Transfers) of + '$end_of_table' -> + end_bootstrap(State), + {noreply, State}; + _ -> {noreply, State} + end; + +handle_info(_Info, State) -> + {noreply, State}. + + +%%-------------------------------------------------------------------- +%% @spec terminate(Reason, State) -> void() +%% @doc This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any necessary +%% cleaning up. When it returns, the gen_server terminates with Reason. +%% The return value is ignored. +%% @end +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + + +%%-------------------------------------------------------------------- +%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} +%% @doc Convert process state when code is changed +%% @end +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- +start_transfers([], State) -> + no_transfers, % no diff in pmaps, so no transfers + end_bootstrap(State); + +start_transfers(Diff, State=#state{nodes=Nodes}) -> + case showroom_db:all_databases("") of + {ok, AllDbs} when length(AllDbs) > 0 -> + start_transfers(Diff, Nodes, configuration:get_config(), AllDbs, + ets:new(transfers, [public, set, {keypos, 2}])); + {ok, []} -> end_bootstrap(State); % no databases, so bootstrap not needed + Other -> throw(Other) % problem getting list of dbs + end. + + +start_transfers([], _, _, _, Transfers) -> + Transfers; + +start_transfers([{FromNode, ToNode, Partition} | Diff], Nodes, Config, + AllDbs, Transfers) -> + membership2:take_offline(FromNode, Partition), + Receivers = lists:map( + fun(DbName) -> + {ok, Receiver} = + bootstrap_receiver:start_link(FromNode, ToNode, Partition, + DbName, 10000, self()), + Receiver + end, AllDbs), + % NOTE: by using AllDbs, we are omitting .deleted.couch files + ets:insert(Transfers, #transfer{partition=Partition, + receivers=Receivers}), + start_transfers(Diff, Nodes, Config, AllDbs, Transfers). diff --git a/src/bootstrap_receiver.erl b/src/bootstrap_receiver.erl new file mode 100644 index 00000000..3b4907cb --- /dev/null +++ b/src/bootstrap_receiver.erl @@ -0,0 +1,121 @@ +%%%------------------------------------------------------------------- +%%% File: bootstrap_receiver.erl +%%% @author Brad Anderson +%%% @copyright 2009 Brad Anderson +%%% @doc +%%% +%%% @end +%%% +%%% @since 2009-09-22 by Brad Anderson +%%%------------------------------------------------------------------- +-module(bootstrap_receiver). +-author('brad@cloudant.com'). + +-include("../include/config.hrl"). +-include("../include/common.hrl"). + +%% API +-export([start_link/6, loop/6, fetch_shard/5]). + + +%%==================================================================== +%% API +%%==================================================================== +%%-------------------------------------------------------------------- +%% @spec +%% @doc +%% @end +%%-------------------------------------------------------------------- +start_link(FromNode, ToNode, Partition, DbName, Timeout, Manager) -> + Pid = proc_lib:spawn_link(ToNode, bootstrap_receiver, loop, + [FromNode, Partition, DbName, Timeout, Manager, + self()]), + sync_wait(Pid, Timeout). + + +loop(FromNode, Partition, DbName, Timeout, Manager, Parent) -> + proc_lib:init_ack(Parent, {ok, self()}), + fetch_shard(FromNode, Partition, DbName, Timeout, Manager). + + +%% @doc run at "ToNode" via spawn_link +fetch_shard(FromNode, Partition, DbName, Timeout, Manager) -> + Directory = couch_config:get("couchdb", "database_dir"), + [_NodeName, Hostname] = string:tokens(atom_to_list(FromNode), "@"), + SrcFile = binary_to_list(partitions:shard_name(Partition, DbName)), + DestFile = showroom_utils:full_filename(Partition, DbName, Directory), + Authn = fetch_authn(), + Port = fetch_port(), + Url = lists:concat(["http://", Authn, Hostname, Port, "/", SrcFile, + ".couch"]), + Options = [{save_response_to_file, DestFile}, + {inactivity_timeout, Timeout}], + case filelib:ensure_dir(DestFile) of + ok -> ok; + {error, eexist} -> ok; % duh! + Other -> throw(Other) + end, + ?LOG_DEBUG("~n" + "Directory: ~p~n" + "Hostname : ~p~n" + "SrcFile : ~p~n" + "DestFile : ~p~n" + "Url : ~p~n" + "Options : ~p~n" + , [Directory, Hostname, SrcFile, DestFile, Url, Options]), + case ibrowse:send_req(Url, [], get, [], Options, infinity) of + {ok, "200", _Headers, Body} -> + ?LOG_DEBUG("~nBootstrap ibrowse req Body: ~p~n", [Body]), + Manager ! {receiver_done, FromNode, node(), Partition, DbName, + self()}; + Error -> + ?LOG_ERROR("~nBootstrap ibrowse req Error: ~p~n", [Error]), + throw(Error) + end. + + +%%==================================================================== +%% Internal functions +%%==================================================================== + + +%% from proc_lib.erl in otp r13b01 +sync_wait(Pid, Timeout) -> + receive + {ack, Pid, Return} -> + Return; + {'EXIT', Pid, Reason} -> + {error, Reason} + after Timeout -> + unlink(Pid), + exit(Pid, kill), + flush(Pid), + {error, timeout} + end. + + +flush(Pid) -> + receive + {'EXIT', Pid, _} -> + true + after 0 -> + true + end. + + +fetch_authn() -> + User = couch_config:get("shard_moving", "user", ""), + Pass = couch_config:get("shard_moving", "pass", ""), + if + length(User) > 0 andalso length(Pass) > 0 -> + lists:concat([User, ":", Pass, "@"]); + true -> "" + end. + + +fetch_port() -> + Port = couch_config:get("shard_moving", "port", "8080"), + if + Port =:= "80" -> ""; + true -> lists:concat([":", Port]) + end. diff --git a/src/cluster_ops.erl b/src/cluster_ops.erl new file mode 100644 index 00000000..bd2ad83d --- /dev/null +++ b/src/cluster_ops.erl @@ -0,0 +1,282 @@ +%%%------------------------------------------------------------------- +%%% File: cluster_ops.erl +%%% @author Brad Anderson [http://cloudant.com] +%%% @copyright 2009 Brad Anderson +%%% @doc +%%% +%%% @end +%%% +%%% @since 2009-07-21 by Brad Anderson +%%%------------------------------------------------------------------- +-module(cluster_ops). +-author('brad@cloudant.com'). + +%% API +-export([key_lookup/3, key_lookup/5, + all_parts/4, + some_parts/4, some_parts/5, + quorum_from_each_part/3]). + +-include("../include/common.hrl"). +-include("../include/config.hrl"). + +-include("../include/profile.hrl"). + + +%%==================================================================== +%% API +%%==================================================================== + +%% @doc Get to the proper shard on N nodes by key lookup +%% +%% This fun uses quorum constants from config +key_lookup(Key, {M,F,A}, Access) -> + {N,_R,_W} = Consts = unpack_config(configuration:get_config()), + Const = get_const(Access, Consts), + key_lookup(Key, {M,F,A}, Access, Const, N). + + +%% @doc Get to the proper shard on N nodes by key lookup +%% +%% This fun uses a provided quorum constant, possibly from request, +%% possibly from config +key_lookup(Key, {M,F,A}, Access, Const, N) -> + NodeParts = membership2:nodeparts_for_key(Key), + {ResolveFun, NotFoundFun} = case Access of + r -> {fun resolve_read/1, fun resolve_not_found/2}; + w -> {fun resolve_write/1, fun(_,_) -> {false, notused, []} end} + end, + MapFun = fun({Node,Part}) -> + try + rpc:call(Node, M, F, [[Part | A]]) + catch Class:Exception -> + {error, Class, Exception} + end + end, + {GoodReplies, Bad} = pcall(MapFun, NodeParts, Const), + if length(Bad) > 0 -> ?LOG_DEBUG("~nBad: ~p~n", [Bad]); true -> ok end, + Good = lists:map(fun strip_ok/1, GoodReplies), + final_key_lookup(Good, Bad, N, Const, ResolveFun, NotFoundFun, Access). + + +%% @doc Do op on all shards (and maybe even replication partners) +all_parts({M,F,A}, Access, AndPartners, ResolveFun) -> + NodePartList = membership2:all_nodes_parts(AndPartners), + MapFun = fun({Node, Part}) -> + try + rpc:call(Node, M, F, [[Part | A]]) + catch Class:Exception -> + {error, Class, Exception} + end + end, + Replies = ?PMAP(MapFun, NodePartList), + {Good, Bad} = lists:partition(fun valid/1, Replies), + final_all_parts(Good, Bad, length(NodePartList), ResolveFun, Access). + + +%% @doc Do op on some shards, depending on list of keys sent in. +%% +%% This fun uses quorum constants from config +some_parts(KeyFun, SeqsKVPairs, {M,F,A}, Access) -> + Const = get_const(Access), + some_parts(KeyFun, SeqsKVPairs, {M,F,A}, Access, Const). + + +%% @doc Do op on some shards, depending on list of keys sent in. +%% +%% This fun uses a provided quorum constant, possibly from request, +%% possibly from config +some_parts(KeyFun, SeqsKVPairs, {M,F,A}, _Access, Const) -> + TaskFun = fun({{Node,Part}, Values}) -> + try + rpc:call(Node, M, F, [[Part | [Values | A]]]) + catch Class:Exception -> + {error, Class, Exception} + end + end, + + % get tasks per node that are part / values for that partition + DistTasks = get_dist_tasks(KeyFun, SeqsKVPairs), + + % With the distributed tasklist in hand, do the tasks per partition. + % For each partition, do the work on all nodes/parts. + TaskReplies = ?PMAP(TaskFun, DistTasks), + {GoodReplies, Bad} = lists:partition(fun valid/1, TaskReplies), + if length(Bad) > 0 -> ?LOG_DEBUG("~nBad: ~p~n", [Bad]); true -> ok end, + Good = lists:map(fun strip_ok/1, GoodReplies), + final_some_parts(Good, Bad, Const). + + +quorum_from_each_part({M,F,A}, Access, ResolveFun) -> + Const = get_const(Access), + {_, Parts} = lists:unzip(membership2:partitions()), + PartsMapFun = fun(Part) -> + Nodes = membership2:nodes_for_part(Part), + NodesMapFun = fun(Node) -> rpc:call(Node, M, F, [[Part | A]]) end, + {GoodReplies,BadReplies} = pcall(NodesMapFun, Nodes, Const), + Good1 = lists:map(fun strip_ok/1, GoodReplies), + Bad1 = case length(Good1) >= Const of + true -> []; + false -> BadReplies + end, + {Good1,Bad1} + end, + Results1 = ?PMAP(PartsMapFun, Parts), + {Good,Bad} = lists:foldl(fun({G,B}, {GAcc,BAcc}) -> + {lists:append(G,GAcc),lists:append(B,BAcc)} + end, {[],[]}, Results1), + if length(Bad) > 0 -> ?LOG_DEBUG("~nBad: ~p~n", [Bad]); true -> ok end, + final_quorum_from_each_part(Good, Bad, length(Parts), ResolveFun, Access). + + +%%-------------------------------------------------------------------- +%% Internal functions +%%-------------------------------------------------------------------- + +final_key_lookup(Good, Bad, N, Const, ResolveFun, NotFoundFun, Access) -> + {NotFound, Return, Reasons} = NotFoundFun(Bad, Const), + if + length(Good) >= Const -> {ok, ResolveFun(Good)}; + NotFound -> {ok, Return, Reasons}; + true -> error_message(Good, Bad, N, Const, Access) + end. + + +final_all_parts(Good, Bad, Total, ResolveFun, Access) -> + case length(Good) =:= Total of + true -> {ok, ResolveFun(Good)}; + _ -> error_message(Good, Bad, Total, Total, Access) + end. + + +final_some_parts(Good, _Bad, Const) -> + Good1 = lists:flatten(Good), + {Seqs, _} = lists:unzip(Good1), + {ResG,ResB} = + lists:foldl( + fun(Seq, {AccG,AccB}) -> + Vals = proplists:get_all_values(Seq, Good1), + case length(Vals) >= Const of + true -> {[{Seq, Vals}|AccG],AccB}; + _ -> {AccG, [{Seq, Vals}|AccB]} + end + end, {[],[]}, lists:usort(Seqs)), + case length(ResB) of + 0 -> {ok, ResG}; + _ -> {error, ResB} + end. + + +final_quorum_from_each_part(Good, Bad, Total, ResolveFun, Access) -> + case length(Good) =:= Total of + true -> {ok, ResolveFun(Good)}; + _ -> error_message(Good, Bad, Total, Total, Access) + end. + + +resolve_read([First|Responses]) -> + case First of + not_found -> not_found; + _ -> lists:foldr(fun vector_clock:resolve/2, First, Responses) + end. + + +resolve_write([First|Responses]) -> + case First of + not_found -> not_found; + _ -> lists:foldr(fun vector_clock:resolve/2, First, Responses) + end. + + +resolve_not_found(Bad, R) -> + {NotFoundCnt, DeletedCnt, OtherReasons} = + lists:foldl(fun({Error,Reason}, {NotFoundAcc, DeletedAcc, ReasonAcc}) -> + case {Error,Reason} of + {not_found, {_Clock, [missing|_Rest]}} -> + {NotFoundAcc+1, DeletedAcc, ReasonAcc}; + {not_found, {_Clock, [deleted|_Rest]}} -> + {NotFoundAcc, DeletedAcc+1, ReasonAcc}; + _ -> + {NotFoundAcc, DeletedAcc, [Reason|ReasonAcc]} + end + end, {0, 0, []}, Bad), + % TODO: is the comparison to R good here, or should it be N-R? + if + NotFoundCnt >= R -> {true, {not_found, missing}, OtherReasons}; + DeletedCnt >= R -> {true, {not_found, deleted}, OtherReasons}; + true -> {false, other, OtherReasons} + end. + + +error_message(Good, Bad, N, T, Access) -> + Msg = list_to_atom(lists:concat([atom_to_list(Access), "_quorum_not_met"])), + ?LOG_ERROR("~p~nSuccess on ~p of ~p servers. Needed ~p. Errors: ~w" + , [Msg, length(Good), N, T, Bad]), + [{error, Msg}, {good, Good}, {bad, Bad}]. + + +unpack_config(#config{n=N,r=R,w=W}) -> + {N, R, W}. + + +pcall(MapFun, Servers, Const) -> + Replies = lib_misc:pmap(MapFun, Servers, Const), + lists:partition(fun valid/1, Replies). + + +valid({ok, _}) -> true; +valid(ok) -> true; +valid(_) -> false. + + +strip_ok({ok, Val}) -> Val; +strip_ok(Val) -> Val. + + +%% @spec get_dist_tasks(KeyFun::function(), KVPairs::list()) -> +%% [{{Node::node(), Part::integer()}, SeqVals}] +%% Type - ordered | ?? +%% SeqVals - [{Seq, Val}] +%% @doc builds a distributed task list of nodes with a list of shard/values. +%% This looks like a dict structure +%% but is a list so we can use ?PMAP with the results +%% @end +get_dist_tasks(KeyFun, SeqsKVPairs) -> + %% loop thru SeqsKVPairs adding node/part to each + NPSV = lists:flatmap( + fun({Seq,KVPair}) -> + NodeParts = membership2:nodeparts_for_key(KeyFun(KVPair)), + lists:map( + fun(NodePart) -> + {NodePart, {Seq, KVPair}} + end, NodeParts) + end, SeqsKVPairs), + nodepart_values_list(NPSV). + + +%% pile up the List by NodePart (like a dict) +nodepart_values_list(List) -> + DistTasks = + lists:foldl( + fun(NodePart, AccIn) -> + Values = proplists:get_all_values(NodePart, List), + case length(Values) of + 0 -> AccIn; + _ -> [{NodePart, Values} | AccIn] + end + end, [], membership2:all_nodes_parts(true)), + % ?LOG_DEBUG("~nDistTasks: ~p~n", [DistTasks]), + DistTasks. + + +get_const(Access) -> + get_const(Access, unpack_config(configuration:get_config())). + + +get_const(Access, {_N,R,W}) -> + case Access of + r -> R; + w -> W; + r1 -> 1; + Other -> throw({bad_access_term, Other}) + end. diff --git a/src/configuration.erl b/src/configuration.erl new file mode 100644 index 00000000..1caca5ec --- /dev/null +++ b/src/configuration.erl @@ -0,0 +1,99 @@ +%%% -*- erlang-indent-level:2 -*- +%%%------------------------------------------------------------------- +%%% File: configuration.erl +%%% @author Cliff Moon +%%% @author Brad Anderson +%%% @copyright 2008 Cliff Moon +%%% @doc +%%% This module keeps Dynomite source relatively unchanged, but +%%% reads from couchdb config stuffs +%%% @end +%%% +%%% @since 2008-07-18 by Cliff Moon +%%%------------------------------------------------------------------- +-module(configuration). +-author('cliff@powerset.com'). +-author('brad@cloudant.com'). + +%%-behaviour(gen_server). + +%% API +-export([start_link/1, get_config/1, get_config/0, set_config/1, stop/0]). + +-include_lib("eunit/include/eunit.hrl"). + +-include("../include/config.hrl"). +-include("../include/common.hrl"). + +-define(SERVER, couch_config). +-define(i2l(V), integer_to_list(V)). +-define(l2i(V), list_to_integer(V)). + + +%% ----------------------------------------------------------------- +%% API +%% ----------------------------------------------------------------- + +%% @doc starts couch_config gen_server if it's not already started +start_link(DynomiteConfig) -> + couch_config:start_link([]), + set_config(DynomiteConfig). + + +%% @doc get the config for a remote node +get_config(Node) -> + ClusterConfig = rpc:call(Node, couch_config, get, ["cluster"]), + Directory = rpc:call(Node, couch_config, get, ["couchdb", "database_dir"]), + couch2dynomite_config(ClusterConfig, Directory). + + +%% @doc get the config for the local node +get_config() -> + get_config(node()). + + +%% @doc given a Dynomite config record, put the values into the Couch config +set_config(DynomiteConfig) -> + dynomite2couch_config(DynomiteConfig). + + +%% @doc stop the config server (nothing to do until after couch_config refactor) +stop() -> + couch_config:stop(). + + +%% ----------------------------------------------------------------- +%% Internal functions +%% ----------------------------------------------------------------- + +%% @doc turn a couch config proplist into a dynomite configuration record +couch2dynomite_config(ClusterConfig, Directory) -> + Q = ?l2i(proplists:get_value("q", ClusterConfig, "3")), + R = ?l2i(proplists:get_value("r", ClusterConfig, "2")), + W = ?l2i(proplists:get_value("w", ClusterConfig, "1")), + N = ?l2i(proplists:get_value("n", ClusterConfig, "4")), + %% use couch's database_dir here, to avoid /tmp/data not existing + Webport = ?l2i(proplists:get_value("webport", ClusterConfig, "8080")), + Meta = proplists:get_value("meta", ClusterConfig, []), + StorageMod = proplists:get_value("storage_mod", ClusterConfig, []), + #config{q=Q, r=R, w=W, n=N, directory=Directory, web_port=Webport, + meta=Meta, storage_mod=StorageMod}. + + +%% @doc workhorse for set_config/1 above +dynomite2couch_config(DynomiteConfig) -> + couch_config:set("cluster", "q", ?i2l(DynomiteConfig#config.q), false), + couch_config:set("cluster", "r", ?i2l(DynomiteConfig#config.r), false), + couch_config:set("cluster", "w", ?i2l(DynomiteConfig#config.w), false), + couch_config:set("cluster", "n", ?i2l(DynomiteConfig#config.n), false), + couch_config:set("couchdb", "database_dir", DynomiteConfig#config.directory, + false), + couch_config:set("cluster", "webport", + case DynomiteConfig#config.web_port of + undefined -> "8080"; + _ -> ?i2l(DynomiteConfig#config.web_port) + end, false), + couch_config:set("cluster", "meta", DynomiteConfig#config.meta, false), + couch_config:set("cluster", "storage_mod", + DynomiteConfig#config.storage_mod, false), + ok. diff --git a/src/dynomite.erl b/src/dynomite.erl new file mode 100644 index 00000000..1b9798c0 --- /dev/null +++ b/src/dynomite.erl @@ -0,0 +1,23 @@ +%%% @author Brad Anderson +%%% @doc convenience start/stop functions for Dynomite +%%% +-module(dynomite). +-author('Brad Anderson '). + +-export([start/0, stop/0, restart/0]). + + +%% @doc start Dynomite app with no args, for -s at the command-line +start() -> + application:start(dynomite). + + +%% @doc stops the Dynomite application +stop() -> + application:stop(dynomite). + + +%% @doc restart Dynomite app, with no args +restart() -> + stop(), + start(). diff --git a/src/dynomite_app.erl b/src/dynomite_app.erl new file mode 100644 index 00000000..6ee0b978 --- /dev/null +++ b/src/dynomite_app.erl @@ -0,0 +1,145 @@ +%%%------------------------------------------------------------------- +%%% File: dynomite.erl +%%% @author Cliff Moon [] +%%% @copyright 2008 Cliff Moon +%%% @doc +%%% +%%% @end +%%% +%%% @since 2008-06-27 by Cliff Moon +%%%------------------------------------------------------------------- +-module(dynomite_app). +-author('cliff@powerset.com'). +-author('brad@cloudant.com'). + +-behaviour(application). + +-include("../include/config.hrl"). +-include("../../couch/src/couch_db.hrl"). + +%% Application callbacks +-export([start/2, stop/1]). + +-define(APPS, [crypto,sasl,mochiweb]). +-define(DEFAULT_CLUSTER_URL, "http://localhost:5984/_cluster"). + +%%==================================================================== +%% Application callbacks +%%==================================================================== +%%-------------------------------------------------------------------- +%% @spec start(Type, StartArgs) -> {ok, Pid} | +%% {ok, Pid, State} | +%% {error, Reason} +%% @doc This function is called whenever an application +%% is started using application:start/1,2, and should start the processes +%% of the application. If the application is structured according to the +%% OTP design principles as a supervision tree, this means starting the +%% top supervisor of the tree. +%% @end +%%-------------------------------------------------------------------- + + +%% @doc start required apps, join cluster, start dynomite supervisor +start(_Type, _StartArgs) -> + % get process_dict hack for startargs (i.e. not from .app file) + PdStartArgs = case erase(startargs) of + undefined -> + []; + Args -> + Args + end, + + % start required apps + State = start_apps(), + + % start dynomite supervisor + ok = start_node(), + case dynomite_sup:start_link(PdStartArgs) of + {ok, Supervisor} -> + {ok, Supervisor, State}; + Error -> + Error + end. + + +%%-------------------------------------------------------------------- +%% @spec stop(State) -> void() +%% @doc This function is called whenever an application +%% has stopped. It is intended to be the opposite of Module:start/2 and +%% should do any necessary cleaning up. The return value is ignored. +%% @end +%%-------------------------------------------------------------------- +stop({_, Sup}) -> + showroom_log:message(alert, "dynomite application stopped", []), + exit(Sup, normal), + ok. + + +%%==================================================================== +%% Internal functions +%%==================================================================== + +start_apps() -> + Fun = fun(App, AccIn) -> + Result = case application:start(App) of + ok -> + App; + {error, {already_started, App}} -> + nil; + _Error -> + exit(app_start_fail) + end, + if + Result =/= nil -> [App|AccIn]; + true -> AccIn + end + end, + lists:foldl(Fun, [], ?APPS). + + +%% @spec start_node() -> ok | {error, Reason} +%% @doc start this node (join to dist. erlang cluster) +start_node() -> + PingUrl = couch_config:get("cluster","ping", ?DEFAULT_CLUSTER_URL), + ?LOG_DEBUG("PingUrl: ~p", [PingUrl]), + Result = case get_pingnode(PingUrl, 1) of + {ok, PingNode} -> + join(PingNode); + _ -> + ?LOG_INFO("No pingnode found. Becoming single-node cluster", []) + end, + couch_api:create_db(<<"users">>, []), % all nodes have local 'users' db + Result. + + +%% @spec get_pingnode(Url::string(), Retries::int()) -> node() | +%% {error, Reason} +%% @doc make a http get call to Url to get cluster information +get_pingnode(Url, Retries) -> + try couch_rep_httpc:request(#http_db{url=Url, retries=Retries}) of + {[{<<"ping_node">>, Node}]} -> + {ok, list_to_atom(binary_to_list(Node))}; + _ -> + {error, pingnode_not_found} + catch + _:_ -> + {error, pingnode_not_found} + end. + + +join(PingNode) -> + if + node() =:= PingNode -> + ok; % we must be brain, so we'll take over the world + true -> + case net_adm:ping(PingNode) of + pong -> + % there is a cluster, we just joined it + ?LOG_DEBUG("ping successful, we're in.", []), + timer:sleep(1000); %% grr, what a hack, erlang. rly? + pang -> + ?LOG_ERROR("ping not successful.", []), + throw({cluster_error, ?l2b("cluster ping not successful")}) + end + end, + ok. diff --git a/src/dynomite_couch_api.erl b/src/dynomite_couch_api.erl new file mode 100644 index 00000000..a5ad53c4 --- /dev/null +++ b/src/dynomite_couch_api.erl @@ -0,0 +1,140 @@ +%% This is a Dynomite plugin for calling the CouchDB raw Erlang API +%% +%% Most calls will have come from any of the web endpoints to execute +%% these functions on the proper node for the key(s). + +-module(dynomite_couch_api). +-author('brad@cloudant.com'). + +-export([create_db/1, delete_db/1, get/1, put/1, + bulk_docs/1, missing_revs/1, get_db_info/1, get_view_group_info/1, + ensure_full_commit/1 + ]). + +-include("../../couch/src/couch_db.hrl"). +-include("../include/common.hrl"). + + +%%-------------------------------------------------------------------- +%% @spec create_db([Part, DbName, Options]) -> {ok,Db} | {error,Error} +%% Description: Creates the database shard. +%%-------------------------------------------------------------------- +create_db([Part, DbName, Options]) -> + case couch_server:create(partitions:shard_name(Part, DbName), Options) of + {ok, Shard} -> + couch_db:close(Shard), + ok; + Error -> Error + end. + + +%%-------------------------------------------------------------------- +%% @spec delete_db([Part, DbName, Options]) -> {ok,deleted} | {error,Error} +%% Description: Deletes the database shard. +%%-------------------------------------------------------------------- +delete_db([Part, DbName, Options]) -> + couch_server:delete(partitions:shard_name(Part, DbName), Options). + + +get([Part, Db, DocId, Revs, Options]) -> + case showroom_db:open_shard(node(), Part, Db) of + {ok, Shard} -> + {Status, Doc} = couch_api:open_doc(Shard, DocId, Revs, Options), + showroom_db:close_shard(Shard), + {Status, {[], [Doc]}}; + Error -> + Error + end. + + +put([Part, Db, Doc = #doc{clock=Clock}, Options]) -> + case showroom_db:open_shard(node(), Part, Db) of + {ok, Shard} -> + {Status, NewRev} = couch_db:update_doc(Shard, Doc, Options), + showroom_db:close_shard(Shard), + {Status, {Clock, [NewRev]}}; + Error -> + Error + end. + + +bulk_docs([Part, SeqsDocs, Db, Options, Type]) -> + {Seqs, Docs} = lists:unzip(SeqsDocs), + case Docs of + [] -> {ok, []}; + _ -> + case showroom_db:open_shard(node(), Part, Db) of + {ok, Shard} -> + {ok, Results1} = couch_db:update_docs(Shard, Docs, Options, Type), + showroom_db:close_shard(Shard), + Results = int_zip(Seqs, Results1), + {ok, Results}; + Error -> + Error + end + end. + + +missing_revs([Part, SeqsIdsRevs, Db]) -> + {_Seqs, IdsRevs} = lists:unzip(SeqsIdsRevs), + case IdsRevs of + [] -> {ok, []}; + _ -> + case showroom_db:open_shard(node(), Part, Db) of + {ok, Shard} -> + {ok, Results1} = couch_db:get_missing_revs(Shard, IdsRevs), + showroom_db:close_shard(Shard), + {ok, Results1}; + Error -> + Error + end + end. + + +get_db_info([Part, Db]) -> + case showroom_db:open_shard(node(), Part, Db) of + {ok, Shard} -> + {Status, Info} = couch_db:get_db_info(Shard), + showroom_db:close_shard(Shard), + {Status, {[], Info}}; + Error -> + Error + end. + +get_view_group_info([Part, Db, DesignId]) -> + case showroom_db:open_shard(node(), Part, Db) of + {ok, Shard} -> + {ok, EmptyGroup} = showroom_view:build_skeleton_view_group(Db, DesignId), + <<"S", ShardName/binary>> = Shard#db.name, + {ok, Pid} = gen_server:call(couch_view, {get_group_server, + ShardName, EmptyGroup}), + {ok, Info} = couch_view_group:request_group_info(Pid), + showroom_db:close_shard(Shard), + {ok, {[], Info}}; + Error -> + Error + end. + + +ensure_full_commit([Part, Db]) -> + case showroom_db:open_shard(node(), Part, Db) of + {ok, Shard} -> + {Status, Info} = couch_db:ensure_full_commit(Shard), + showroom_db:close_shard(Shard), + {Status, {[], Info}}; + Error -> + Error + end. + + +%% ======================= +%% internal +%% ======================= + +int_zip(Seqs, Docs) when length(Seqs) == length(Docs) -> + lists:zip(Seqs, Docs); +int_zip(_Seqs, []) -> + []; +int_zip(Seqs, Docs) -> + ?debugFmt("~nWTF? int_zip~nSeqs: ~p~nDocs: ~p~n", [Seqs, Docs]), + []. diff --git a/src/dynomite_couch_storage.erl b/src/dynomite_couch_storage.erl new file mode 100644 index 00000000..4fd21b80 --- /dev/null +++ b/src/dynomite_couch_storage.erl @@ -0,0 +1,41 @@ +%%%------------------------------------------------------------------- +%%% File: dynomite_couch_storage.erl +%%% @author Brad Anderson +%%% @copyright 2009 Brad Anderson +%%% @doc +%%% +%%% @end +%%% +%%% @since 2009-07-14 +%%%------------------------------------------------------------------- +-module(dynomite_couch_storage). +-author('brad@cloudant.com'). + +%% API +-export([name/1, open/2, close/1, create/2]). +%% , close/1, get/2, put/4, has_key/2, delete/2, fold/3 + +-include_lib("../include/common.hrl"). + +%% -record(row, {key, context, values}). + +%%==================================================================== +%% API +%%==================================================================== + +name(Boundary) -> + showroom_utils:int_to_hexstr(Boundary). + +open(Directory, Name) -> +%% ?debugFmt("~nDirectory: ~p~nName : ~p~n", [Directory,Name]), + {ok, {Directory, Name}}. + +close(_Table) -> ok. + +create(_Directory, _Name) -> + ok. + + +%%==================================================================== +%% Internal functions +%%==================================================================== diff --git a/src/dynomite_http.erl b/src/dynomite_http.erl new file mode 100644 index 00000000..8b6f7fbb --- /dev/null +++ b/src/dynomite_http.erl @@ -0,0 +1,21 @@ +%%%------------------------------------------------------------------- +%%% File : dynomite_http.erl +%%% Author : Brad Anderson +%%% Description : +%%% +%%% Created : 10 Jan 2010 by Brad Anderson +%%%------------------------------------------------------------------- +-module(dynomite_http). +-author('Brad Anderson '). + +-include("../couch/src/couch_db.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +-export([handle_cluster_info/1]). + + +%% GET /_cluster +handle_cluster_info(#httpd{method='GET', path_parts=[_]}=Req) -> + ClusterInfo = [{<<"ping_node">>, ?l2b(atom_to_list(node()))}], + showroom_log:message(info, "Cluster Info: ~p", [ClusterInfo]), + couch_httpd:send_json(Req, {ClusterInfo}). diff --git a/src/dynomite_prof.erl b/src/dynomite_prof.erl new file mode 100644 index 00000000..80c4b5b7 --- /dev/null +++ b/src/dynomite_prof.erl @@ -0,0 +1,164 @@ +%%%------------------------------------------------------------------- +%%% File: dynomite_prof.erl +%%% @author Cliff Moon <> [] +%%% @copyright 2009 Cliff Moon +%%% @doc +%%% +%%% @end +%%% +%%% @since 2009-02-15 by Cliff Moon +%%%------------------------------------------------------------------- +-module(dynomite_prof). +-author('cliff@powerset.com'). + +-behaviour(gen_server). + +%% API +-export([start_link/0, start_prof/1, stop_prof/1, stats/1, averages/0, balance_prof/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(state, {ets,balance}). + +-record(profile, {name, count, sum}). + +%%==================================================================== +%% API +%%==================================================================== +%%-------------------------------------------------------------------- +%% @spec start_link() -> {ok,Pid} | ignore | {error,Error} +%% @doc Starts the server +%% @end +%%-------------------------------------------------------------------- +start_link() -> + gen_server:start_link({local, dynomite_prof}, ?MODULE, [], []). + +stats(Id) -> + gen_server:call(dynomite_prof, {stats, Id}). + +balance_prof() -> + gen_server:cast(dynomite_prof, {balance, self(), lib_misc:now_float()}). + +start_prof(Id) -> + gen_server:cast(dynomite_prof, {start, self(), Id, lib_misc:now_float()}). + +stop_prof(Id) -> + gen_server:cast(dynomite_prof, {stop, self(), Id, lib_misc:now_float()}). + +averages() -> + gen_server:call(dynomite_prof, averages). + +%%==================================================================== +%% gen_server callbacks +%%==================================================================== + +%%-------------------------------------------------------------------- +%% @spec init(Args) -> {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%% @doc Initiates the server +%% @end +%%-------------------------------------------------------------------- +init([]) -> + Tid = ets:new(profiling, [set, {keypos, 2}]), + Bal = ets:new(balance, [set]), + {ok, #state{ets=Tid, balance=Bal}}. + +%%-------------------------------------------------------------------- +%% @spec +%% handle_call(Request, From, State) -> {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | +%% {stop, Reason, State} +%% @doc Handling call messages +%% @end +%%-------------------------------------------------------------------- +handle_call({stats, Id}, _From, State = #state{ets=Ets}) -> + Reply = ets:lookup(Ets, Id), + {reply, Reply, State}; + +handle_call(table, _From, State = #state{ets=Ets}) -> + {reply, Ets, State}; + +handle_call(averages, _From, State = #state{ets=Ets,balance=Bal}) -> + Avgs = ets:foldl(fun(#profile{name=Name,count=Count,sum=Sum}, List) -> + [{Name, Sum/Count}|List] + end, [], Ets), + {_, MaxCount} = ets:foldl(fun + ({Pid, Count}, {_P, M}) when Count > M -> {Pid, Count}; + (_, {P, M}) -> {P, M} + end, {pid, 0}, Bal), + Balances = ets:foldl(fun({Pid, Count}, List) -> + [{Pid, Count / MaxCount} | List] + end, [], Bal), + {reply, [Balances, Avgs], State}. + +%%-------------------------------------------------------------------- +%% @spec handle_cast(Msg, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @doc Handling cast messages +%% @end +%%-------------------------------------------------------------------- +handle_cast({balance, Pid, _Time}, State = #state{balance=Ets}) -> + case ets:lookup(Ets, Pid) of + [] -> ets:insert(Ets, {Pid, 1}); + [{Pid, Count}] -> ets:insert(Ets, {Pid, Count+1}) + end, + {noreply, State}; + +handle_cast({start, Pid, Id, Time}, State = #state{ets=_Ets}) -> + put({Pid,Id}, Time), + {noreply, State}; + +handle_cast({stop, Pid, Id, Time}, State = #state{ets=Ets}) -> + case get({Pid, Id}) of + undefined -> ok; + OldTime -> + erase({Pid, Id}), + increment_time(Ets, Time-OldTime, Id) + end, + {noreply, State}. + +%%-------------------------------------------------------------------- +%% @spec handle_info(Info, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @doc Handling all non call/cast messages +%% @end +%%-------------------------------------------------------------------- +handle_info(_Info, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% @spec terminate(Reason, State) -> void() +%% @doc This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any necessary +%% cleaning up. When it returns, the gen_server terminates with Reason. +%% The return value is ignored. +%% @end +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%-------------------------------------------------------------------- +%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} +%% @doc Convert process state when code is changed +%% @end +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- +increment_time(Ets, Time, Id) -> + case ets:lookup(Ets, Id) of + [] -> ets:insert(Ets, #profile{name=Id,count=1,sum=Time}); + [#profile{name=Id,count=Count,sum=Sum}] -> ets:insert(Ets, #profile{name=Id,count=Count+1,sum=Sum+Time}) + end. diff --git a/src/dynomite_sup.erl b/src/dynomite_sup.erl new file mode 100644 index 00000000..f8136934 --- /dev/null +++ b/src/dynomite_sup.erl @@ -0,0 +1,85 @@ +%%%------------------------------------------------------------------- +%%% File: dynomite_sup.erl +%%% @author Cliff Moon [] +%%% @copyright 2008 Cliff Moon +%%% @doc +%%% +%%% @end +%%% +%%% @since 2008-06-27 by Cliff Moon +%%%------------------------------------------------------------------- +-module(dynomite_sup). +-author('cliff@powerset.com'). + +-behaviour(supervisor). + +%% API +-export([start_link/1]). + +%% Supervisor callbacks +-export([init/1]). + +-include("../include/config.hrl"). + +-define(SERVER, ?MODULE). + +%%==================================================================== +%% API functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% @spec start_link() -> {ok,Pid} | ignore | {error,Error} +%% @doc Starts the supervisor +%% @end +%%-------------------------------------------------------------------- +start_link(Hints) -> + supervisor:start_link(?MODULE, [Hints]). + +%%==================================================================== +%% Supervisor callbacks +%%==================================================================== +%%-------------------------------------------------------------------- +%% @spec init(Args) -> {ok, {SupFlags, [ChildSpec]}} | +%% ignore | +%% {error, Reason} +%% @doc Whenever a supervisor is started using +%% supervisor:start_link/[2,3], this function is called by the new process +%% to find out about restart strategy, maximum restart frequency and child +%% specifications. +%% @end +%%-------------------------------------------------------------------- +init(Args) -> + Node = node(), + Nodes = running_nodes() ++ [node()], + Membership = {membership, + {membership2, start_link, [Node, Nodes, Args]}, + permanent, + 1000, + worker, + [membership2]}, + MemEventMgr = {mem_event_manager, + {gen_event, start_link, [{local, membership_events}]}, + permanent, + 1000, + worker, + []}, + {ok, {{one_for_one,10,1}, [Membership, MemEventMgr]}}. + + +%%==================================================================== +%% Internal functions +%%==================================================================== + +%% @doc get a list of running nodes visible to this local node +running_nodes() -> + [Node || Node <- nodes([this,visible]), running(Node)]. + +%% @doc monitor the membership server on Node from here +running(Node) -> + Ref = erlang:monitor(process, {membership, Node}), + R = receive + {'DOWN', Ref, _, _, _} -> false + after 1 -> + true + end, + erlang:demonitor(Ref), + R. diff --git a/src/lib_misc.erl b/src/lib_misc.erl new file mode 100644 index 00000000..f5449295 --- /dev/null +++ b/src/lib_misc.erl @@ -0,0 +1,235 @@ +-module(lib_misc). + +-define(OFFSET_BASIS, 2166136261). +-define(FNV_PRIME, 16777619). + +-export([rm_rf/1, pmap/3, succ/1, fast_acc/3, hash/1, hash/2, fnv/1, + nthdelete/2, zero_split/1, nthreplace/3, rand_str/1, position/2, + shuffle/1, floor/1, ceiling/1, time_to_epoch_int/1, + time_to_epoch_float/1, now_int/0, now_float/0, byte_size/1, listify/1, + reverse_bits/1]). + +-include("../include/config.hrl"). +-include("../include/profile.hrl"). + + +rm_rf(Name) when is_list(Name) -> + case filelib:is_dir(Name) of + false -> + file:delete(Name); + true -> + case file:list_dir(Name) of + {ok, Filenames} -> + lists:foreach(fun rm_rf/1, [ filename:join(Name, F) || F <- Filenames]), + file:del_dir(Name); + {error, Reason} -> error_logger:info_msg("rm_rf failed because ~p~n", [Reason]) + end + end. + +zero_split(Bin) -> + zero_split(0, Bin). + +zero_split(N, Bin) when N > erlang:byte_size(Bin) -> Bin; + +zero_split(N, Bin) -> + case Bin of + <<_:N/binary, 0:8, _/binary>> -> split_binary(Bin, N); + _ -> zero_split(N+1, Bin) + end. + +rand_str(N) -> + lists:map(fun(_I) -> + random:uniform(26) + $a - 1 + end, lists:seq(1,N)). + +nthreplace(N, E, List) -> + lists:sublist(List, N-1) ++ [E] ++ lists:nthtail(N, List). + +nthdelete(N, List) -> + nthdelete(N, List, []). + +nthdelete(0, List, Ret) -> + lists:reverse(Ret) ++ List; + +nthdelete(_, [], Ret) -> + lists:reverse(Ret); + +nthdelete(1, [_E|L], Ret) -> + nthdelete(0, L, Ret); + +nthdelete(N, [E|L], Ret) -> + nthdelete(N-1, L, [E|Ret]). + +floor(X) -> + T = erlang:trunc(X), + case (X - T) of + Neg when Neg < 0 -> T - 1; + Pos when Pos > 0 -> T; + _ -> T + end. + +ceiling(X) -> + T = erlang:trunc(X), + case (X - T) of + Neg when Neg < 0 -> T; + Pos when Pos > 0 -> T + 1; + _ -> T + end. + +succ([]) -> + []; + +succ(Str) -> + succ_int(lists:reverse(Str), []). + +succ_int([Char|Str], Acc) -> + if + Char >= $z -> succ_int(Str, [$a|Acc]); + true -> lists:reverse(lists:reverse([Char+1|Acc]) ++ Str) + end. + +fast_acc(_, Acc, 0) -> Acc; + +fast_acc(Fun, Acc, N) -> + fast_acc(Fun, Fun(Acc), N-1). + +shuffle(List) when is_list(List) -> + [ N || {_R,N} <- lists:keysort(1, [{random:uniform(),X} || X <- List]) ]. + +pmap(Fun, List, ReturnNum) -> + N = if + ReturnNum > length(List) -> length(List); + true -> ReturnNum + end, + SuperParent = self(), + SuperRef = erlang:make_ref(), + Ref = erlang:make_ref(), + %% we spawn an intermediary to collect the results + %% this is so that there will be no leaked messages sitting in our mailbox + Parent = spawn(fun() -> + L = gather(N, length(List), Ref, []), + SuperParent ! {SuperRef, pmap_sort(List, L)} + end), + Pids = [spawn(fun() -> + Parent ! {Ref, {Elem, (catch Fun(Elem))}} + end) || Elem <- List], + Ret = receive + {SuperRef, Ret1} -> Ret1 + end, + % i think we need to cleanup here. + lists:foreach(fun(P) -> exit(P, die) end, Pids), + Ret. + +pmap_sort(Original, Results) -> + pmap_sort([], Original, lists:reverse(Results)). + +% pmap_sort(Sorted, [], _) -> lists:reverse(Sorted); +pmap_sort(Sorted, _, []) -> lists:reverse(Sorted); +pmap_sort(Sorted, [E|Original], Results) -> + case lists:keytake(E, 1, Results) of + {value, {E, Val}, Rest} -> pmap_sort([Val|Sorted], Original, Rest); + false -> pmap_sort(Sorted, Original, Results) + end. + +gather(_, Max, _, L) when length(L) == Max -> L; +gather(0, _, _, L) -> L; +gather(N, Max, Ref, L) -> + receive + {Ref, {Elem, {not_found, Ret}}} -> gather(N, Max, Ref, [{Elem, {not_found, Ret}}|L]); + {Ref, {Elem, {badrpc, Ret}}} -> gather(N, Max, Ref, [{Elem, {badrpc, Ret}}|L]); + {Ref, {Elem, {'EXIT', Ret}}} -> gather(N, Max, Ref, [{Elem, {'EXIT', Ret}}|L]); + {Ref, Ret} -> gather(N-1, Max, Ref, [Ret|L]) + end. + +get_hash_module(#config{hash_module=HashModule}) -> + HashModule. + +hash(Term) -> + HashModule = get_hash_module(configuration:get_config()), + ?prof(hash), + R = HashModule:hash(Term), + ?forp(hash), + R. + +hash(Term, Seed) -> + HashModule = get_hash_module(configuration:get_config()), + ?prof(hash), + R = HashModule:hash(Term, Seed), + ?forp(hash), + R. + +%32 bit fnv. magic numbers ahoy +fnv(Term) when is_binary(Term) -> + fnv_int(?OFFSET_BASIS, 0, Term); + +fnv(Term) -> + fnv_int(?OFFSET_BASIS, 0, term_to_binary(Term)). + +fnv_int(Hash, ByteOffset, Bin) when erlang:byte_size(Bin) == ByteOffset -> + Hash; + +fnv_int(Hash, ByteOffset, Bin) -> + <<_:ByteOffset/binary, Octet:8, _/binary>> = Bin, + Xord = Hash bxor Octet, + fnv_int((Xord * ?FNV_PRIME) rem (2 bsl 31), ByteOffset+1, Bin). + +position(Predicate, List) when is_function(Predicate) -> + position(Predicate, List, 1); + +position(E, List) -> + position(E, List, 1). + +position(Predicate, [], _N) when is_function(Predicate) -> false; + +position(Predicate, [E|List], N) when is_function(Predicate) -> + case Predicate(E) of + true -> N; + false -> position(Predicate, List, N+1) + end; + +position(_, [], _) -> false; + +position(E, [E|_List], N) -> N; + +position(E, [_|List], N) -> position(E, List, N+1). + +now_int() -> + time_to_epoch_int(now()). + +now_float() -> + time_to_epoch_float(now()). + +time_to_epoch_int(Time) when is_integer(Time) or is_float(Time) -> + Time; + +time_to_epoch_int({Mega,Sec,_}) -> + Mega * 1000000 + Sec. + +time_to_epoch_float(Time) when is_integer(Time) or is_float(Time) -> + Time; + +time_to_epoch_float({Mega,Sec,Micro}) -> + Mega * 1000000 + Sec + Micro / 1000000. + +byte_size(List) when is_list(List) -> + lists:foldl(fun(El, Acc) -> Acc + lib_misc:byte_size(El) end, 0, List); + +byte_size(Term) -> + erlang:byte_size(Term). + +listify(List) when is_list(List) -> + List; + +listify(El) -> [El]. + +reverse_bits(V) when is_integer(V) -> + % swap odd and even bits + V1 = ((V bsr 1) band 16#55555555) bor (((V band 16#55555555) bsl 1) band 16#ffffffff), + % swap consecutive pairs + V2 = ((V1 bsr 2) band 16#33333333) bor (((V1 band 16#33333333) bsl 2) band 16#ffffffff), + % swap nibbles ... + V3 = ((V2 bsr 4) band 16#0F0F0F0F) bor (((V2 band 16#0F0F0F0F) bsl 4) band 16#ffffffff), + % swap bytes + V4 = ((V3 bsr 8) band 16#00FF00FF) bor (((V3 band 16#00FF00FF) bsl 8) band 16#ffffffff), + % swap 2-byte long pairs + ((V4 bsr 16) band 16#ffffffff) bor ((V4 bsl 16) band 16#ffffffff). diff --git a/src/mem_utils.erl b/src/mem_utils.erl new file mode 100644 index 00000000..ffefd5cb --- /dev/null +++ b/src/mem_utils.erl @@ -0,0 +1,129 @@ +-module(mem_utils). + +-export([fix_mappings/3, get_remote_fullmap/1, join_type/3, pmap_from_full/1, + nodeparts_up/1, remove_partition/3, use_persistent/2, + was_i_nodedown/2]). + +-include("../include/common.hrl"). + +join_type(Node, Fullmap, Options) -> + case proplists:get_value(replace, Options) of + undefined -> + case lists:filter(fun({N,_P,_T}) -> N =:= Node end, Fullmap) of + [] -> new; + _ -> rejoin + end; + OldNode when is_atom(OldNode) -> + % not a particularly strong guard, but will have to do + {replace, OldNode}; + _ -> new + end. + + +%% @doc return a {PMap, Fullmap} tuple that has corrections for +%% down, rejoining, or replacing Node +fix_mappings(nodedown, Node, OldFullmap) -> + fix_mappings_fold(fun({N,P,T}, AccIn) -> + case {N,T} of + {Node, {nodedown, Type}} -> + % already marked as nodedown, so leave it + [{N,P, {nodedown, Type}} | AccIn]; + {Node, _} -> + % mark it as nodedown + [{N,P, {nodedown, T}} | AccIn]; + _ -> [{N,P,T} | AccIn] + end + end, [], OldFullmap); + +fix_mappings(rejoin, Node, OldFullmap) -> + fix_mappings_fold(fun({N,P,{nodedown,T}}, AccIn) when N =:= Node -> + [{N,P,T} | AccIn]; + (NPT, AccIn) -> [NPT | AccIn] + end, [], OldFullmap); + +fix_mappings(replace, {OldNode, NewNode}, OldFullmap) -> + fix_mappings_fold(fun({N,P,T}, AccIn) -> + case {N, T} of + {OldNode, {nodedown,T1}} -> [{NewNode,P,T1} | AccIn]; + {OldNode, _} -> [{NewNode,P,T} | AccIn]; + _ -> [{N,P,T} | AccIn] + end + end, [], OldFullmap). + + +fix_mappings_fold(Fun, Acc0, OldFullmap) -> + NewFullmap = lists:foldl(Fun, Acc0, OldFullmap), + NewPMap = pmap_from_full(NewFullmap), + {NewPMap, NewFullmap}. + + +%% @doc create a PMap (primary nodes only) from provided Fullmap +%% If a primary node is down, a partner will be supplied +pmap_from_full(Fullmap) -> + NodePartList = nodeparts_up(Fullmap), + lists:keysort(2,lists:foldl(fun({N,P,T}, AccIn) -> + case T of + primary -> [{N,P} | AccIn]; + {nodedown, primary} -> + NewNode = case lists:delete(N, + membership2:nodes_for_part(P, NodePartList)) of + [First|_] -> First; + [] -> N % wtf, are all partners down too? + end, + [{NewNode,P} | AccIn]; + _ -> AccIn + end + end, [], Fullmap)). + + +nodeparts_up(Fullmap) -> + lists:foldl(fun({_N,_P,{nodedown,_}}, AccIn) -> AccIn; + ({N,P,_T}, AccIn) -> [{N,P} | AccIn] + end, [], Fullmap). + + + +%% @doc if Node is in the Fullmap as {nodedown,_} return true +was_i_nodedown(Node, Fullmap) -> + lists:member(yes, lists:map(fun({N,_P,{nodedown,_T}}) -> + case N of + Node -> yes; + _ -> no + end; + (_) -> no + end, Fullmap)). + + +remove_partition(FullMap, Node, Partition) -> + case lists:filter( + fun({N,P,_Type}) -> N =:= Node andalso P =:= Partition end, + FullMap) of + [Elem|_] -> + lists:delete(Elem, FullMap); + Other -> + ?LOG_ERROR("~nNo partition to remove: ~p~n" + "Node: ~p~nPartition: ~p~n", [Other, Node, Partition]), + FullMap + end. + + +use_persistent(_PartnersPlus, undefined) -> + false; + +use_persistent(PartnersPlus, _PersistentParts) -> + % get a fullmap from a partner + % this may need rework for network partitions, as you could get a bad + % fullmap from another node that was partitioned w/ this one :\ + RemoteFullmap = get_remote_fullmap(PartnersPlus), + % return opposite of was_i_nodedown + not mem_utils:was_i_nodedown(node(), RemoteFullmap). + + +get_remote_fullmap([]) -> + []; % no remote fullmap available, so return empty list + +get_remote_fullmap([Node|Rest]) -> + case gen_server:call({membership, Node}, fullmap) of + {ok, Fullmap} -> Fullmap; + _ -> get_remote_fullmap(Rest) + end. diff --git a/src/membership2.erl b/src/membership2.erl new file mode 100644 index 00000000..4c4780c3 --- /dev/null +++ b/src/membership2.erl @@ -0,0 +1,686 @@ +%%%------------------------------------------------------------------- +%%% File: membership2.erl +%%% @author Cliff Moon [] +%%% @copyright 2009 Cliff Moon +%%% @doc +%%% +%%% @end +%%% +%%% @since 2009-05-04 by Cliff Moon +%%%------------------------------------------------------------------- +-module(membership2). +-author('cliff@powerset.com'). +-author('brad@cloudant.com'). + +-behaviour(gen_server). + +%% API +-export([start_link/2, start_link/3, stop/1, check_nodes/0, + partitions/0, partition_for_key/1, fullmap/0, + all_nodes_parts/1, clock/0, + nodes/0, nodeparts_for_key/1, nodes_for_part/1, nodes_for_part/2, + nodes_for_shard/1, nodes_down/0, + parts_for_node/1, + take_offline/2, bring_online/2, + decommission_part/3, pp_fullmap/0, snafu/1, snafu/3]). + + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%% includes +-include("../include/config.hrl"). +-include("../include/common.hrl"). +-include("../include/profile.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +%%==================================================================== +%% API +%%==================================================================== +%% @doc Starts the server +%% @end +%%-------------------------------------------------------------------- + +start_link(Node, Nodes) -> + start_link(Node, Nodes, []). + + +start_link(Node, Nodes, Args) -> + gen_server:start_link({local, membership}, ?MODULE, [Node, Nodes, Args], []). + + +stop(Server) -> + gen_server:cast(Server, stop). + + +%% @doc for when things have really gone south. Install a new state on all +%% nodes, given a filename, or node list, partition map, and fullmap. +%% @end +snafu(Filename) -> + NewState = case file:consult(Filename) of + {ok, [Terms]} -> + Terms; + Error -> + throw(Error) + end, + #membership{nodes=Nodes, partitions=PMap, fullmap=Fullmap} = NewState, + snafu(Nodes, PMap, Fullmap). + + +snafu(Nodes, PMap, Fullmap) -> + NewState = #membership{node=node(), nodes=Nodes, + partitions=PMap, fullmap=Fullmap, version=vector_clock:create(dbcore)}, + update_ets(ets_name(node()), NewState), + fire_gossip(node(), Nodes, NewState), + save(NewState). + + +check_nodes() -> + ErlangNodes = lists:usort([node() | erlang:nodes()]), + {ok, MemNodeList} = membership2:nodes(), + MemNodes = lists:usort(MemNodeList), + {PMapNodeList, _PMapPartList} = lists:unzip(partitions()), + PMapNodes = lists:usort(PMapNodeList), + case ErlangNodes =:= MemNodes andalso + ErlangNodes =:= PMapNodes andalso + MemNodes =:= PMapNodes of + true -> true; + _ -> + Msg = "membership: Node Lists do not match.~n" + "Erlang Nodes : ~p~n" + "Membership Nodes : ~p~n" + "PMap Nodes : ~p~n", + Lst = [ErlangNodes, MemNodes, PMapNodes], + showroom_log:message(error, Msg, Lst), + io:format(Msg, Lst), + false + end. + + +%% @doc retrieve the primary partition map. This is a list of partitions and +%% their corresponding primary node, no replication partner nodes. +partitions() -> + ets_pmap(). + + +%% @doc retrieve the full partition map, like above, but including replication +%% partner nodes. List should number 2^Q * N +fullmap() -> + lists:keysort(2, ets_fullmap()). + + +%% @doc pretty-print the full partition map (sorted by node, then part) +pp_fullmap() -> + lists:foreach( + fun({N,P}) -> + io:format("~-60s ~s~n", [N, showroom_utils:int_to_hexstr(P)]) + end, + lists:sort(membership2:all_nodes_parts(true))). + + +%% @doc get the current vector clock from membership state +clock() -> + gen_server:call(membership, clock). + + +%% @doc get the list of cluster nodes (according to membership module) +%% This may differ from erlang:nodes() +nodes() -> + gen_server:call(membership, nodes). + + +%% @doc get all the responsible nodes for a given partition, including +%% replication partner nodes +nodes_for_part(Part) -> + nodes_for_part(Part, all_nodes_parts(true)). + + +nodes_for_part(Part, NodePartList) -> + Filtered = lists:filter(fun({_N, P}) -> P =:= Part end, NodePartList), + {Nodes, _Parts} = lists:unzip(Filtered), + lists:usort(Nodes). + + +nodes_for_shard(ShardName) when is_binary(ShardName) -> + nodes_for_shard(binary_to_list(ShardName)); + +nodes_for_shard(ShardName) when is_list(ShardName) -> + HexPart = case string:rchr(ShardName, $_) + 1 of + 1 -> ShardName; + Last -> string:substr(ShardName, Last) + end, + Int = showroom_utils:hexstr_to_int(HexPart), + {_, Parts} = lists:unzip(membership2:partitions()), + nodes_for_part(partitions:int_to_partition(Int, Parts)). + + +%% @doc get all the responsible nodes and partitions for a given key, including +%% nodes/parts on replication partner nodes +nodeparts_for_key(Key) -> + int_node_parts_for_key(Key). + + +%% @doc get a list of all the nodes marked down in this node's fullmap +nodes_down() -> + Downs = lists:foldl(fun({N,_P,{nodedown, _T}}, AccIn) -> [N|AccIn]; + (_, AccIn) -> AccIn end, [], fullmap()), + lists:usort(Downs). + + +%% @doc return the partition responsible for the given Key +partition_for_key(Key) -> + Config = configuration:get_config(), + Hash = lib_misc:hash(Key), + partitions:hash_to_partition(Hash, Config#config.q). + + +%% @doc return the partitions that reside on a given node +parts_for_node(Node) -> + lists:sort(lists:foldl(fun({N,P,_Type}, AccIn) -> + case N of + Node -> [P | AccIn]; + _ -> AccIn + end + end, [], fullmap())). + + +%% @doc get all the nodes and partitions in the cluster. Depending on the +%% AllPartners param, you get only primary nodes or replication partner +%% nodes, as well. +%% No nodes/parts currently down are returned. +all_nodes_parts(false) -> + ets_pmap(); +all_nodes_parts(true) -> + mem_utils:nodeparts_up(ets_fullmap()). + + +%% @doc If a local storage server exists for this partition it will be taken +%% out of rotation until put back in. +%% @end +take_offline(Node, Partition) when Node =:= node() -> + gen_server:call(membership, {take_offline, Partition}); + +take_offline(Node, Partition)-> + gen_server:call({membership, Node}, {take_offline, Partition}). + + +%% @doc Brings a storage server that has been taken offline back online. +%% @end +bring_online(Node, Partition) -> + showroom_log:message(debug, "membership: bring_online Node: ~p Partition: ~p", + [Node, Partition]), + gen_server:call({membership, Node}, {bring_online, Partition}). + + +%% @doc cleans up the remaining .couch shard/partition file after it has been +%% moved to a new node. +decommission_part(Node, Part, DbName) -> + gen_server:cast({membership, Node}, {decommission, Part, DbName}). + + +%%==================================================================== +%% gen_server callbacks +%%==================================================================== + +%%-------------------------------------------------------------------- +%% @spec init(Args) -> {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%% @doc Initiates the server +%% @end +%%-------------------------------------------------------------------- +init([Node, Nodes, Args]) -> + process_flag(trap_exit,true), + showroom_log:message(info, "membership: membership server starting...", []), + Options = lists:flatten(Args), + showroom_log:message(info, "membership: options ~p", [Options]), + net_kernel:monitor_nodes(true), + Config = configuration:get_config(), + PersistentState=#membership{partitions=PersistentParts} = load(Node), + PartnersPlus = replication:partners_plus(Node, Nodes), + State = + case mem_utils:use_persistent(PartnersPlus, PersistentParts) of + false -> + showroom_log:message(info, "membership: not using persisted state", []), + % didn't find persistent state on disk or this node was nodedown + % so we don't want to use persisted state + PartialNodes = lists:usort(Nodes), + {NewVersion, RemoteNodes, NewPMap1, NewFullMap1} = + join_to(Node, PartnersPlus, Options), + NewWorldNodes = lists:usort(PartialNodes ++ RemoteNodes), + NewPMap = case NewPMap1 of + [] -> partitions:create_partitions(Config#config.q, Node, + NewWorldNodes); + _ -> NewPMap1 + end, + NewFullMap = case NewFullMap1 of + [] -> make_all_nodes_parts(NewPMap); + _ -> NewFullMap1 + end, + #membership{ + node=Node, + nodes=NewWorldNodes, + partitions=lists:keysort(2,NewPMap), + % version=vector_clock:increment(dbcore, NewVersion), + version=NewVersion, + fullmap=NewFullMap}; + _ -> + % found persistent state on disk + showroom_log:message(info, "membership: using persisted state", []), + case Options of + [] -> ok; + _ -> + showroom_log:message(info, "membership: options ~p ignored.", [Options]) + end, + %% fire gossip even if state comes from disk + fire_gossip(Node, Nodes, PersistentState), + PersistentState + end, + save(State), + % ets table is an optimization for cluster_ops performance + Ets = ets:new(ets_name(Node), [public, set, named_table]), + update_ets(Ets, State), + {ok, State}. + + +%%-------------------------------------------------------------------- +%% @spec +%% handle_call(Request, From, State) -> {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | +%% {stop, Reason, State} +%% @doc Handling call messages +%% @end +%%-------------------------------------------------------------------- + +%% join +handle_call({join, JoiningNode, Options}, _From, + State = #membership{version=Version, node=Node, nodes=Nodes, + partitions=Partitions, fullmap=OldFullMap}) -> + JoinType = mem_utils:join_type(JoiningNode, OldFullMap, Options), + showroom_log:message(alert, "membership: node ~p wants to join, type '~p'", + [JoiningNode, JoinType]), + {PMap, NewFullmap} = case JoinType of + rejoin -> + mem_utils:fix_mappings(rejoin, JoiningNode, OldFullMap); + {replace, OldNode} -> + mem_utils:fix_mappings(replace, {OldNode, JoiningNode}, OldFullMap); + new -> + Hints = proplists:get_value(hints, Options), + PMap1 = case partitions:join(JoiningNode, Partitions, Hints) of + {ok, Table} -> Table; + {error, Error, _Table} -> throw({join_error, Error}) + end, + Fullmap1 = make_all_nodes_parts(PMap1), + {PMap1, Fullmap1} + end, + WorldNodes = lists:usort(Nodes ++ [JoiningNode]), + NewVersion = vector_clock:increment(dbcore, Version), + NewState1 = State#membership{nodes=WorldNodes, partitions=PMap, + version=NewVersion}, + {Fullmap, NewState2} = case proplists:get_value(bootstrap, Options) of + true -> + % join not complete until bootstrap finishes, + % so this NewState isn't the final (i.e. NewState1 will be installed) + showroom_log:message(info, "membership: bootstrap process starting", []), + bootstrap_manager:start_bootstrap(NewState1, OldFullMap, NewFullmap); + _ -> + % no bootstrap, so install NewFullmap now + showroom_log:message(info, "membership: no bootstrap", []), + {NewFullmap, NewState1#membership{fullmap=NewFullmap}} + end, + save(NewState2), + update_ets(ets_name(node()), NewState2), + notify(node_join, [JoiningNode]), + fire_gossip(Node, WorldNodes, NewState2), + % If we're bootstrapping, then the join is not complete. + % So return FullMap for now. bootstrap_manager:end_bootstrap will fix it + {reply, {ok, NewVersion, WorldNodes, PMap, Fullmap}, NewState2}; + +%% clock +handle_call(clock, _From, State = #membership{version=Version}) -> + {reply, Version, State}; + +%% state +handle_call(state, _From, State) -> + {reply, State, State}; + +%% newfullmap +handle_call({newfullmap, NewFullMap}, _From, + State = #membership{node=Node, nodes=Nodes, version=Version}) -> + NewVersion = vector_clock:increment(dbcore, Version), + NewState = State#membership{version=NewVersion, fullmap=NewFullMap}, + save(NewState), + update_ets(ets_name(node()), NewState), + fire_gossip(Node, Nodes, NewState), + {reply, installed, NewState}; + +%% partitions +handle_call(partitions, _From, State = #membership{partitions=Parts}) -> + {reply, {ok, Parts}, State}; + +%% fullmap +handle_call(fullmap, _From, State = #membership{fullmap=FullMap}) -> + {reply, {ok, FullMap}, State}; + +%% nodes +handle_call(nodes, _From, State = #membership{nodes=Nodes}) -> + {reply, {ok, Nodes}, State}; + +%% take_offline +handle_call({take_offline, Partition}, _From, + State = #membership{node=Node, nodes=Nodes, fullmap=OldFullMap}) -> + showroom_log:message(info, "membership: take_offline Node: ~p Partition: ~p", + [Node, Partition]), + NewFullMap = mem_utils:remove_partition(OldFullMap, Node, Partition), + NewState = State#membership{fullmap=NewFullMap}, + fire_gossip(Node, Nodes, NewState), + update_ets(ets_name(node()), NewState), + {reply, {offline, Node, Partition}, NewState}; + +%% at least reply that this 'catch-all' was ignored +handle_call(_Request, _From, State) -> + {reply, ignored, State}. + + +%%-------------------------------------------------------------------- +%% @spec handle_cast(Msg, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @doc Handling cast messages +%% @end +%%-------------------------------------------------------------------- + +handle_cast({gossip, RemoteState = #membership{node=RemoteNode}}, + LocalState = #membership{node=_Me}) -> + showroom_log:message(info, "membership: received gossip from ~p", + [RemoteNode]), + {MergeType, MergedState = #membership{nodes=_MergedNodes}} = + merge_state(RemoteState, LocalState), + case MergeType of + equal -> {noreply, MergedState}; + merged -> + showroom_log:message(info, "membership: merged new gossip", []), + % fire_gossip(Me, MergedNodes, MergedState), + update_ets(ets_name(node()), MergedState), + save(MergedState), + {noreply, MergedState} + end; + +% decommission +% renaming for now, until case 1245 can be completed +handle_cast({decommission, Part, DbName}, State) -> + {{Y,Mon,D}, {H,Min,S}} = calendar:universal_time(), + Directory = couch_config:get("couchdb", "database_dir"), + OrigFilename = showroom_utils:full_filename(Part, DbName, Directory), + Moved = lists:flatten(io_lib:format(".~w~2.10.0B~2.10.0B." ++ + "~2.10.0B~2.10.0B~2.10.0B.moved.couch", [Y,Mon,D,H,Min,S])), + % Note: this MovedFilename bit below gives weird results: + % ["/Users/brad/dev/erlang/dbcore/tmp/lib/x800000/test_800000", + % ".20091001.162640.moved.couch"] but list/string behavior handles it. + MovedFilename = lists:map(fun(E) -> binary_to_list(E) end, + re:replace(OrigFilename, "\.couch", Moved, [])), + ok = file:rename(OrigFilename, MovedFilename), + {noreply, State}. + + +%% @doc handle nodedown messages because we have +%% net_kernel:monitor_nodes(true) +handle_info({nodedown, Node}, + State = #membership{nodes=OldNodes, fullmap=OldFullmap, + version=OldVersion}) -> + showroom_log:message(alert, "membership: nodedown from ~p", [Node]), + case lists:member(Node, OldNodes) of + true -> + notify(nodedown, [Node]), + % clean up membership state + Nodes = lists:delete(Node, OldNodes), + {PMap, Fullmap} = mem_utils:fix_mappings(nodedown, Node, OldFullmap), + % Do we increment clock here? w/o gossip? + % This is happening near simultaneously on the other nodes, too :\ + % Only reason to increment is persisted clock on down node will be older + % when it returns + Version = vector_clock:increment(dbcore, OldVersion), + NewState = State#membership{nodes=Nodes, partitions=PMap, fullmap=Fullmap, + version=Version}, + update_ets(ets_name(node()), NewState), + save(NewState), + {noreply, NewState}; + _ -> {noreply, State} + end; + +%% @doc handle nodeup messages because we have +%% net_kernel:monitor_nodes(true) +handle_info({nodeup, Node}, State) -> + showroom_log:message(alert, "membership: nodeup Node: ~p", [Node]), + {noreply, State}; + +handle_info(Info, State) -> + showroom_log:message(info, "membership: handle_info Info: ~p", [Info]), + {noreply, State}. + +%%-------------------------------------------------------------------- +%% @spec terminate(Reason, State) -> void() +%% @doc This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any necessary +%% cleaning up. When it returns, the gen_server terminates with Reason. +%% The return value is ignored. +%% @end +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%% 0.5.6 to 0.5.7 +code_change(184380560337424323902805568963460261434, State, _Extra) -> + backup_old_config_file(), + % update State to the new version + {membership, _Hdr, Node, Nodes, PMap, Version} = State, + NewState = #membership{ + node = Node, + nodes = Nodes, + partitions = PMap, + version = Version, + fullmap = make_all_nodes_parts(PMap) + }, + save(NewState), + % also create new ets table + Ets = ets:new(ets_name(Node), [public, set, named_table]), + update_ets(Ets, NewState), + {ok, NewState}; + +%% 0.8.8 to 0.9.0 +code_change(239470595681156900105628017899543243419, State, _Extra) -> + net_kernel:monitor_nodes(true), + {ok, State}; + +code_change(OldVsn, State, _Extra) -> + io:format("Unknown Old Version!~nOldVsn: ~p~nState : ~p~n", [OldVsn, State]), + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- + +backup_old_config_file() -> + Config = configuration:get_config(), + FileName = filename:join([Config#config.directory, + lists:concat([node:name(node()), ".state"])]), + BackupName = filename:join([Config#config.directory, + lists:concat([node:name(node()), ".state.bak"])]), + file:copy(FileName, BackupName). + + +%% return State from membership file +load(Node) -> + Config = configuration:get_config(), + case file:consult(filename:join([Config#config.directory, + lists:concat([node:name(Node), ".state"])])) of + {error, Reason} -> + showroom_log:message(info, "membership: could not load state: ~p~n", + [Reason]), + #membership{nodes=[]}; + {ok, [Terms]} -> + Terms + end. + + +%% save the State to a file +save(State) -> + Config = configuration:get_config(), + Filename = filename:join([Config#config.directory, + lists:concat([node:name(State#membership.node), ".state"])]), + {ok, File} = file:open(Filename, [binary, write]), + io:format(File, "~w.~n", [State]), + file:close(File). + + +%% joining is bi-directional, as opposed to gossip which is unidirectional +%% we want to collect the list of known nodes to compute the partition map +%% which isn't necessarily the same as the list of running nodes +join_to(Node, Partners, Options) -> + join_to(Node, Partners, + {vector_clock:create(dbcore), [], [], []}, Options). + + +%% @doc join this node to one of its partners (or PartnersPlus if no partners +%% are available). +join_to(_, [], {Version, World, PMap, FullMap}, _Options) -> + {Version, World, PMap, FullMap}; + +join_to(Node, [Partner|Rest], {Version, World, PMap, FullMap}, Options) -> + case call_join(Partner, Node, Options) of + {ok, RemoteVersion, NewNodes, NewPMap, NewFullMap} -> + {vector_clock:merge(Version, RemoteVersion), + lists:usort(World ++ NewNodes), + NewPMap, + NewFullMap}; + Other -> + showroom_log:message(info, "membership: join_to Other: ~p~n", [Other]), + join_to(Node, Rest, {Version, World, PMap, FullMap}, Options) + end. + + +%% @doc make the join call to Remote node (usually a partner of Node) +call_join(Remote, Node, Options) -> + showroom_log:message(info, "membership: call_join From: ~p To: ~p", + [Node, Remote]), + catch gen_server:call({membership, node:name(Remote)}, + {join, Node, Options}). + + +merge_state(_RemoteState=#membership{version=RemoteVersion, nodes=RemoteNodes, + partitions=RemotePMap, + fullmap=RemoteFullMap}, + LocalState=#membership{version=LocalVersion, nodes=LocalNodes, + partitions=LocalPMap, + fullmap=LocalFullMap}) -> + case vector_clock:equals(RemoteVersion, LocalVersion) of + true -> + {equal, LocalState}; + false -> + % Note, we're matching MergedVersion from these funs. + % They should be the same. + {MergedVersion, MergedNodes} = + merge_nodes(RemoteVersion, RemoteNodes, LocalVersion, LocalNodes), + {MergedVersion, MergedPMap} = + merge_pmaps(RemoteVersion, RemotePMap, LocalVersion, LocalPMap), + {MergedVersion, MergedFullMap} = + merge_fullmaps(RemoteVersion, RemoteFullMap, + LocalVersion, LocalFullMap), + + % notify of arrivals & departures + Arrived = MergedNodes -- LocalNodes, + notify(node_join, Arrived), + % Departed = LocalNodes -- MergedNodes, + % notify(node_leave, Departed), + + {merged, LocalState#membership{version=MergedVersion, nodes=MergedNodes, + partitions=MergedPMap, + fullmap=MergedFullMap}} + end. + + +merge_nodes(RemoteVersion, RemoteNodes, LocalVersion, LocalNodes) -> + {MergedVersion, Merged} = vector_clock:resolve({RemoteVersion, RemoteNodes}, + {LocalVersion, LocalNodes}), + {MergedVersion, lists:usort(Merged)}. + + +merge_pmaps(RemoteVersion, RemotePMap, LocalVersion, LocalPMap) -> + {MergedVersion, Merged} = vector_clock:resolve({RemoteVersion, RemotePMap}, + {LocalVersion, LocalPMap}), + {MergedVersion, lists:ukeysort(2, Merged)}. + + +merge_fullmaps(RemoteVersion, RemoteFullMap, LocalVersion, LocalFullMap) -> + {MergedVersion, Merged} = vector_clock:resolve({RemoteVersion, RemoteFullMap}, + {LocalVersion, LocalFullMap}), + {MergedVersion, lists:usort(Merged)}. + + +notify(Type, Nodes) -> + lists:foreach(fun(Node) -> + gen_event:notify(membership_events, {Type, Node}) + end, Nodes). + + +%% @doc fires a gossip message (membership state) to partners nodes in the +%% cluster. +%% @end +fire_gossip(Me, WorldNodes, Gossip) -> + % GossipPartners = partners_plus(Me, WorldNodes), + % random experiment, gossip with all nodes, not just partners_plus + GossipPartners = lists:delete(Me, WorldNodes), + lists:foreach(fun(TargetNode) -> + showroom_log:message(info, "membership: firing gossip from ~p to ~p", + [Me, TargetNode]), + gen_server:cast({membership, TargetNode}, {gossip, Gossip}) + end, GossipPartners). + + +%% @doc construct a table with all partitions, with the primary node and all +%% replication partner nodes as well. +make_all_nodes_parts(PMap) -> + {Nodes, _Parts} = lists:unzip(PMap), + NodeParts = lists:flatmap( + fun({Node,Part}) -> + Partners = replication:partners(Node, lists:usort(Nodes)), + PartnerList = [{Partner, Part, partner} || Partner <- Partners], + [{Node, Part, primary} | PartnerList] + end, PMap), + NodeParts. + + +%% @doc for the given key, return a list of {Node,Part} tuples. Nodes are both +%% primary and replication partner nodes, and should number N. +int_node_parts_for_key(Key) -> + Config = configuration:get_config(), + Hash = lib_misc:hash(Key), + Part = partitions:hash_to_partition(Hash, Config#config.q), + NodePartList = all_nodes_parts(true), + lists:filter(fun({_N,P}) -> P =:= Part end, NodePartList). + + +%% ets table helper functions +ets_name(Node) -> + list_to_atom(lists:concat(["mem_", atom_to_list(Node)])). + + +update_ets(Table, #membership{partitions=PMap, fullmap=FullMap}) -> + ets:insert(Table, {pmap, PMap}), + ets:insert(Table, {fullmap, FullMap}), + ok. + + +ets_pmap() -> + [{pmap, PMap}] = ets:lookup(ets_name(node()), pmap), + PMap. + + +ets_fullmap() -> + [{fullmap, FullMap}] = ets:lookup(ets_name(node()), fullmap), + FullMap. diff --git a/src/node.erl b/src/node.erl new file mode 100644 index 00000000..9a9c82c1 --- /dev/null +++ b/src/node.erl @@ -0,0 +1,39 @@ +%%%------------------------------------------------------------------- +%%% File: node.erl +%%% @author Cliff Moon <> [] +%%% @copyright 2009 Cliff Moon +%%% @doc +%%% +%%% @end +%%% +%%% @since 2009-05-11 by Cliff Moon +%%%------------------------------------------------------------------- +-module(node). +-author('cliff@powerset.com'). + +%% API +-export([name/1, attributes/1]). + +-include("../include/common.hrl"). + +%% -ifdef(TEST). +%% -include("../etest/node_test.erl"). +%% -endif. + +%%==================================================================== +%% API +%%==================================================================== + +name(Name) when is_atom(Name) -> + Name; +name(Node) when is_tuple(Node) -> + element(1, Node); +name(Node) -> + Node. + +attributes(Name) when is_atom(Name) -> + []; +attributes(Node) when is_tuple(Node) -> + element(2, Node); +attributes(_) -> + []. diff --git a/src/partitions.erl b/src/partitions.erl new file mode 100644 index 00000000..942968e1 --- /dev/null +++ b/src/partitions.erl @@ -0,0 +1,334 @@ +%%%------------------------------------------------------------------- +%%% File: partitions.erl +%%% @author Cliff Moon [http://www.powerset.com/] +%%% @copyright 2008 Cliff Moon +%%% @doc +%%% +%%% @end +%%% +%%% @since 2008-10-12 by Cliff Moon +%%%------------------------------------------------------------------- +-module(partitions). +-author('cliff@powerset.com'). + +%% API +-export([partition_range/1, create_partitions/3, map_partitions/2, + diff/2, pp_diff/1, int_to_partition/2, + join/3, leave/3, hash/1, hash_to_partition/2, item_to_nodepart/1, + shard_name/2, hash_to_hex/2]). + +-define(RINGTOP, trunc(math:pow(2,160)-1)). % SHA-1 space + +-include("../../couch/src/couch_db.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +%% -ifdef(TEST). +%% -include("etest/partitions_test.erl"). +%% -endif. + +%%==================================================================== +%% API +%%==================================================================== + +partition_range(Q) -> + trunc( ?RINGTOP / math:pow(2,Q) ). % SHA-1 space / 2^Q + +create_partitions(Q, Node, _Nodes) -> + fresh(trunc(math:pow(2,Q)), Node). + % map_partitions(Table, Nodes). + + +%% @spec map_partitions(Table::proplist(),Nodes::list()) -> proplist() +%% @doc maps partitions to nodes. The resulting list should be Dynomite format, +%% namely {Node,Part} +%% @end +map_partitions(Table, Nodes) -> + {_Nodes, Parts} = lists:unzip(Table), + do_map(Nodes, Parts). + + +%% @doc in case Hints is undefined, turn it into a list for clauses below. +join(Node, Table, undefined) -> + join(Node, Table, []); + +%% @spec join(node(), proplist(), list()) -> {ok, PartTable::proplist()} | +%% {error, Error} +%% @doc given a node, current partition table, and hints, this function returns +%% the new partition table +join(Node, Table, Hints) -> + {NodeList, Parts} = lists:unzip(Table), + OtherNodes = lists:delete(Node, NodeList), + OtherDistinctNodes = lists:usort(OtherNodes), + %% quick check to see if we have more nodes than partitions + if + length(Parts) == length(OtherDistinctNodes) -> + {error, "Too many nodes vs partitions", Table}; + true -> + AlreadyPresent = length(NodeList) - length(OtherNodes), + Nodes = lists:usort(NodeList), + PartCountToTake = trunc(length(Parts) / (length(Nodes) + 1)), + %% calcs done, let's steal some partitions + {HintsTaken, NewTable} = steal_hints(Node, Table, Hints), + if + PartCountToTake - AlreadyPresent - HintsTaken > 0 -> + steal_partitions(Node, OtherDistinctNodes, NewTable, + PartCountToTake - AlreadyPresent - HintsTaken); + true -> + %% no partitions to take + {ok, NewTable} + end + end. + + +%% TODO: implement me +leave(_Node, Table, _Hints) -> + Table. + + +diff(From, To) when length(From) =/= length(To) -> + {error, badlength, "Cannot diff partition maps with different length"}; + +diff(From, To) -> + diff(sort_for_diff(From), sort_for_diff(To), []). + + +pp_diff(Diff) -> + lists:map( + fun({F,T,Part}) -> {F,T,showroom_utils:int_to_hexstr(Part)} end, + Diff). + + +%% @spec hash(term()) -> Digest::binary() +%% @doc Showroom uses SHA-1 as its hash +hash(Item) -> + crypto:sha(term_to_binary(Item)). + + +%% @spec hash_to_partition(binary(), integer()) -> integer() +%% @doc given a hashed value and Q, return the partition +hash_to_partition(Hash, Q) -> + HashInt = hash_int(Hash), + Size = partition_range(Q), + Factor = (HashInt div Size), + Rem = (HashInt rem Size), + if + Rem > 0 -> Factor * Size; + true -> ((Factor-1) * Size) + end. + + +hash_to_hex(Hash, Q) -> + Part = hash_to_partition(Hash, Q), + showroom_utils:int_to_hexstr(Part). + + +%% @doc given an int and a list of partitions, get the first part greater +%% than Int. Used for a hex part being turned back into an int. +int_to_partition(Int, Parts) -> + Rem = lists:dropwhile(fun(E) -> E < Int end, lists:sort(Parts)), + case Rem of + [] -> 0; % wrap-around-ring case (back to 0) + [H|_T] -> H + end. + + +%% @spec item_to_nodepart(bin()) -> {Node::node(),Part::integer()} +%% @doc given a raw item, return the node/partition/shard +%% name based on consistent hashing +item_to_nodepart(Item) when is_binary(Item) -> + Q = list_to_integer(couch_config:get("cluster","q")), + Hash = hash(?b2l(Item)), + Part = hash_to_partition(Hash, Q), + {ok, Table} = membership2:partitions(), + lists:keyfind(Part, 2, Table); + +item_to_nodepart(Item) -> + item_to_nodepart(term_to_binary(Item)). + + +%% @spec shard_name(integer(), binary()) -> binary() +%% @doc create shard name +shard_name(Part, DbName) -> + PartHex = ?l2b(showroom_utils:int_to_hexstr(Part)), + <<"x", PartHex/binary, "/", DbName/binary, "_", PartHex/binary>>. + +%%==================================================================== +%% Internal functions +%%==================================================================== + +%% @doc Create a brand new table. The size and seednode are specified; +%% initially all partitions are owned by the seednode. If NumPartitions +%% is not much larger than the intended eventual number of +%% participating nodes, then performance will suffer. +%% from http://code.google.com/p/distributerl (trunk revision 4) chash:fresh/2 +%% @spec fresh(NumPartitions :: integer(), SeedNode :: node()) -> table() +fresh(NumPartitions, SeedNode) -> + Increment = ?RINGTOP div NumPartitions, + [{SeedNode, IndexAsInt} || IndexAsInt <- lists:seq(0,(?RINGTOP-1),Increment)]. + + +%% @spec steal_hints(node(), proplist(), list( integer() )) -> +%% {integer(), proplist()} +%% @doc move the partitions listed in Hints over to the new owner, Node +steal_hints(Node, Table, Hints) -> + steal_hints(Node, Table, Hints, 0). + + +%% @doc recursive workhorse for hints mechanism, Acc is tracking how many +%% hints/partitions were successfully moved to a new Node. +%% @end +steal_hints(_Node, Table, [], Acc) -> + {Acc, Table}; + +steal_hints(Node, Table, [Hint|RestHints], Acc) -> + {Status, NewTable} = swap_node_for_part(Node, Hint, Table), + Acc1 = case Status of + ok -> Acc+1; + _ -> Acc + end, + steal_hints(Node, NewTable, RestHints, Acc1). + + +%% @doc take a part from one of the other nodes based on most # of parts per +%% node. +%% @end +%% TODO: This fun does list ops on the Table each time through. Inefficient? +%% Hopefully not, due to small Table sizes +steal_partitions(_Node, _OtherNodes, Table, 0) -> + {ok, Table}; +steal_partitions(Node, OtherNodes, Table, Count) -> + %% first, get a list of OtherNodes and their partition counts + NPCountFun = fun(N) -> + L = proplists:get_all_values(N, Table), + {N, length(lists:delete(undefined, L))} + end, + NPCounts = lists:reverse(lists:keysort(2,lists:map(NPCountFun, OtherNodes))), + %% grab the node that has the most partitions + [{TakeFrom, _PartsCount}|_RestOfTable] = NPCounts, + %% get the highest # partition of the TakeFrom node + TakeFromParts = lists:reverse(lists:sort(proplists:get_all_values(TakeFrom, + Table))), + [Part|_RestOfParts] = TakeFromParts, + {ok, NewTable} = swap_node_for_part(Node, Part, Table), + steal_partitions(Node, OtherNodes, NewTable, Count-1). + + +%% @doc Make Node the owner of the partition beginning at Part. +%% from http://code.google.com/p/distributerl (trunk revision 4) chash:update/3 +swap_node_for_part(Node, Part, Table) -> + case lists:keymember(Part, 2, Table) of + true -> + GapList = [{N,P} || {N,P} <- Table, P /= Part], + {A, B} = lists:partition(fun({_,K1}) -> K1 < Part end, GapList), + {ok, A ++ [{Node, Part}] ++ B}; + false -> + showroom_log:message(info, + "'~p' partition was not found in partition table", [Part]), + {noswap, Table} + end. + + +%% @doc get the difference between two FullPMaps +%% lists need to be sorted by part, then node +diff([], [], Results) -> + lists:reverse(remove_dupes(Results)); + +diff([{Node,Part,_}|PartsA], [{Node,Part,_}|PartsB], Results) -> + diff(PartsA, PartsB, Results); + +diff([{NodeA,Part,_}|PartsA], [{NodeB,Part,_}|PartsB], Results) -> + diff(PartsA, PartsB, [{NodeA,NodeB,Part}|Results]). + + +%% @doc sorts the full map for diff/3. This may change to get more accurate +%% diff w/o dupes +sort_for_diff(FullMap) -> + lists:keysort(2,lists:sort(FullMap)). + + +remove_dupes(Diff) -> + {_,_,AllParts} = lists:unzip3(Diff), + Parts = lists:usort(AllParts), + remove_dupes_from_part(Parts, Diff, []). + + +%% @doc ex: take [{a,b,1},{b,c,1}] diff and make it [{a,c,1}] so we don't go +%% moving unnecessary shard files. 'Move partition 1 from a to b and +%% then move partition 1 from b to c' is unnecessary. Just move it a to c. +remove_dupes_from_part([], _Diff, Acc) -> + Acc; + +remove_dupes_from_part([Part|Rest], Diff, Acc) -> + PartData = lists:filter(fun({_,_,P}) -> P =:= Part end, Diff), + NewPartData = process_part_data(Part, PartData, PartData, PartData), + remove_dupes_from_part(Rest, Diff, lists:concat([NewPartData, Acc])). + + +%% for one partition of the full diff, remove the dupes +process_part_data(_Part, _PartData, [], Acc) -> + Acc; + +process_part_data(Part, PartData, [{From,To,_Part}|Rest], Acc) -> + case proplists:lookup(To, PartData) of + {To, NewTo, _Part} -> + + Remove1 = proplists:delete(To, PartData), + Remove2 = proplists:delete(From, Remove1), + NewPartData = [{From, NewTo, Part}|Remove2], + %?debugFmt("~nFrom : ~p~nTo : ~p~nNewTo: ~p~n" + % "Remove1: ~p~nRemove2: ~p~n" + % "NewPartData: ~p~n" + % , [From, To, NewTo, Remove1, Remove2, NewPartData]), + process_part_data(Part, NewPartData, Rest, NewPartData); + none -> + process_part_data(Part, PartData, Rest, Acc) + end. + + +% %% @doc from dynomite +% diff([], [], Results) -> +% lists:reverse(Results); + +% diff([{Node,Part}|PartsA], [{Node,Part}|PartsB], Results) -> +% diff(PartsA, PartsB, Results); + +% diff([{NodeA,Part}|PartsA], [{NodeB,Part}|PartsB], Results) -> +% diff(PartsA, PartsB, [{NodeA,NodeB,Part}|Results]). + + +%% @doc does Node/Partition mapping based on Amazon Dynamo paper, +%% section 6.2, strategy 3, more or less +%% http://www.allthingsdistributed.com/2007/10/amazons_dynamo.html +%% @end +do_map([Node|RestNodes], Parts) -> + Max = length(Parts) / length([Node|RestNodes]), + do_map(Node, RestNodes, Parts, [], 1, Max). + + +%% return final mapped list +do_map(_,_,[],Mapped, _, _) -> + lists:keysort(1, Mapped); + +%% finish off last node, Cnt & Max no longer needed +do_map(Node, [], [Part|RestParts], Mapped, _, _) -> + do_map(Node, [], RestParts, [{Node, Part}|Mapped], 0,0); + +%% workhorse clause, iterates through parts, until Cnt > Max, then advances to +%% next node, wash, rinse, repeat +do_map(Node, [NextNode|RestNodes], [Part|RestParts], Mapped, Cnt, Max) -> + case Cnt > Max of + true -> + do_map(NextNode, RestNodes, RestParts, [{Node, Part}|Mapped], + 1, Max); + false -> + do_map(Node, [NextNode|RestNodes], RestParts, [{Node, Part}|Mapped], + Cnt+1, Max) + end. + + +%% TODO: other guards +hash_int(Hash) when is_binary(Hash) -> + <> = Hash, + IndexAsInt; +hash_int(Hash) when is_integer(Hash) -> + Hash. diff --git a/src/replication.erl b/src/replication.erl new file mode 100644 index 00000000..96be0ad3 --- /dev/null +++ b/src/replication.erl @@ -0,0 +1,165 @@ +%%%------------------------------------------------------------------- +%%% File: replication.erl +%%% @author Brad Anderson [http://www.cloudant.com] +%%% @copyright 2009 Brad Anderson +%%% @doc +%%% +%%% @end +%%% +%%% @since 2009-06-14 by Brad Anderson +%%%------------------------------------------------------------------- +-module(replication). +-author('brad@cloudant.com'). + +%% API +-export([partners/2, partners/3, partners_plus/2]). + +-include_lib("eunit/include/eunit.hrl"). +-include("../include/config.hrl"). +-include("../include/common.hrl"). + + +%%==================================================================== +%% API +%%==================================================================== + +partners(Node, Nodes) -> + partners(Node, Nodes, configuration:get_config()). + + +%%-------------------------------------------------------------------- +%% @spec partners(Node::atom(), Nodes::list(), Config::config()) -> +%% list() +%% @doc returns the list of all replication partners for the specified node +%% @end +%%-------------------------------------------------------------------- +partners(Node, Nodes, Config) -> + N = Config#config.n, + Meta = Config#config.meta, + pick_partners(Meta, Node, Nodes, [], N - 1). + + +%% return a list of live/up Partners, and if all Partners are down, +%% walk the ring to get one other remote node and return it. +partners_plus(Node, Nodes) -> + Partners = partners(Node, Nodes), + PartnersDown = lists:subtract(Partners, erlang:nodes()), + PartnersUp = lists:subtract(Partners, PartnersDown), + case PartnersUp of + [] -> + TargetNodes = target_list(Node, Nodes), + NonPartners = lists:subtract(TargetNodes, + lists:flatten([Node, Partners])), + walk_ring(NonPartners); + _ -> + %% at least one partner is up, so gossip w/ them + PartnersUp + end. + + +%%==================================================================== +%% Internal functions +%%==================================================================== + +%% @spec pick_partners(proplist(), Node::dynomite_node(), [Node], [Node], +%% integer()) -> list() +%% @doc iterate through N-1 partner picks, returning the resulting list sorted +pick_partners(_Meta, Node, _Nodes, Acc, 0) -> + lists:sort(lists:delete(Node, Acc)); +pick_partners(Meta, Node, Nodes, Acc, Count) -> + Partner = pick_partner(Meta, Node, Nodes, Acc, 1), + NewNodes = lists:filter(fun(Elem) -> + case Elem of + no_partner_found -> false; + Partner -> false; + _ -> true + end + end, Nodes), + NewAcc = case Partner of + no_partner_found -> Acc; + _ -> [Partner|Acc] + end, + pick_partners(Meta, Node, NewNodes, NewAcc, Count-1). + + +%% @spec pick_partner(proplist(), Node::dynomite_node(), [Node], [Node], +%% integer()) -> Node::dynomite_node() +%% @doc pick a specific replication partner at the given level +pick_partner([], Node, Nodes, _Acc, 1) -> + %% handle the no metadata situation + %% Note: This clause must be before the Level > length(Meta) guarded clause + target_key(node:name(Node), lists:map(fun node:name/1, Nodes), roundrobin); + +pick_partner(Meta, _Node, _Nodes, Acc, Level) when Level > length(Meta) -> + Acc; + +pick_partner(Meta, Node, Nodes, Acc, Level) -> + MetaDict = meta_dict(Nodes, Level, dict:new()), + NodeKey = lists:sublist(node:attributes(Node), Level), + Keys = dict:fetch_keys(MetaDict), + {_MetaName, Strategy} = lists:nth(Level, Meta), + TargetKey = target_key(NodeKey, Keys, Strategy), + Candidates = dict:fetch(TargetKey, MetaDict), + case length(Candidates) of + 0 -> + %% didn't find a candidate + no_partner_found; + 1 -> + %% found only one candidate, return it + [Partner] = Candidates, + Partner; + _ -> + pick_partner(Meta, Node, Nodes, Acc, Level + 1) + end. + + +%% @doc construct a dict that holds the key of metadata values so far (up to +%% the current level, and dynomite_node() list as the value. This is used +%% to select a partner in pick_partner/5 +%% @end +meta_dict([], _Level, Dict) -> + Dict; + +meta_dict([Node|Rest], Level, Dict) -> + Key = lists:sublist(node:attributes(Node), Level), + DictNew = dict:append(Key, Node, Dict), + meta_dict(Rest, Level, DictNew). + + +%% @spec target_key(term(), list(), Strategy::atom()) -> term() +%% @doc given the key and keys, sort the list of keys based on stragety (i.e. +%% for roundrobin, sort them, put the NodeKey on the end of the list, and +%% then return the head of the list as the target. +%% @end +%% TODO: moar strategies other than roundrobin? +target_key(NodeKey, Keys, roundrobin) -> + SortedKeys = lists:sort(Keys), + TargetKey = case target_list(NodeKey, SortedKeys) of + [] -> no_partner_found; + [Key|_Rest] -> Key + end, + TargetKey. + + +%% @spec target_list(term(), list()) -> list() +%% @doc split the list of keys into 'lessthan NodeKey', NodeKey, and 'greaterthan +%% Nodekey' and then put the lessthan section on the end of the list +%% @end +target_list(_NodeKey, []) -> + []; +target_list(NodeKey, Keys) -> + {A, [NodeKey|B]} = lists:splitwith(fun(K) -> K /= NodeKey end, Keys), + lists:append([B, A, [NodeKey]]). + + +walk_ring([]) -> + %% TODO: should we be more forceful here and throw? not for now + showroom_log:message(info, + "~p:walk_ring/1 - could not find node for gossip", [?MODULE]), + []; + +walk_ring([Node|Rest]) -> + case lists:member(Node, erlang:nodes()) of + true -> [Node]; + _ -> walk_ring(Rest) + end. diff --git a/src/vector_clock.erl b/src/vector_clock.erl new file mode 100644 index 00000000..0a89d41e --- /dev/null +++ b/src/vector_clock.erl @@ -0,0 +1,99 @@ +%%% @author Cliff Moon [] +%%% @copyright 2008 Cliff Moon + +-module (vector_clock). +-export ([create/1, truncate/1, increment/2, compare/2, resolve/2, merge/2, + equals/2]). + +%% -ifdef(TEST). +%% -include("etest/vector_clock_test.erl"). +%% -endif. + +create(NodeName) -> [{NodeName, lib_misc:now_float()}]. + +truncate(Clock) when length(Clock) > 10 -> + lists:nthtail(length(Clock) - 10, lists:keysort(2, Clock)); + +truncate(Clock) -> Clock. + +increment(NodeName, [{NodeName, _Version}|Clocks]) -> + [{NodeName, lib_misc:now_float()}|Clocks]; + +increment(NodeName, [NodeClock|Clocks]) -> + [NodeClock|increment(NodeName, Clocks)]; + +increment(NodeName, []) -> + [{NodeName, lib_misc:now_float()}]. + +resolve({ClockA, ValuesA}, {ClockB, ValuesB}) -> + case compare(ClockA, ClockB) of + less -> {ClockB, ValuesB}; + greater -> {ClockA, ValuesA}; + equal -> {ClockA, ValuesA}; + concurrent -> + io:format("~nConcurrent Clocks~n" + "ClockA : ~p~nClockB : ~p~n" + "ValuesA: ~p~nValuesB: ~p~n" + , [ClockA, ClockB, ValuesA, ValuesB]), + {merge(ClockA,ClockB), ValuesA ++ ValuesB} + end; +resolve(not_found, {Clock, Values}) -> + {Clock, Values}; +resolve({Clock, Values}, not_found) -> + {Clock, Values}. + +merge(ClockA, ClockB) -> + merge([], ClockA, ClockB). + +merge(Merged, [], ClockB) -> lists:keysort(1, Merged ++ ClockB); + +merge(Merged, ClockA, []) -> lists:keysort(1, Merged ++ ClockA); + +merge(Merged, [{NodeA, VersionA}|ClockA], ClockB) -> + case lists:keytake(NodeA, 1, ClockB) of + {value, {NodeA, VersionB}, TrunkClockB} when VersionA > VersionB -> + merge([{NodeA,VersionA}|Merged],ClockA,TrunkClockB); + {value, {NodeA, VersionB}, TrunkClockB} -> + merge([{NodeA,VersionB}|Merged],ClockA,TrunkClockB); + false -> + merge([{NodeA,VersionA}|Merged],ClockA,ClockB) + end. + +compare(ClockA, ClockB) -> + AltB = less_than(ClockA, ClockB), + if AltB -> less; true -> + BltA = less_than(ClockB, ClockA), + if BltA -> greater; true -> + AeqB = equals(ClockA, ClockB), + if AeqB -> equal; true -> concurrent end + end + end. + +%% ClockA is less than ClockB if and only if ClockA[z] <= ClockB[z] for all +%% instances z and there exists an index z' such that ClockA[z'] < ClockB[z'] +less_than(ClockA, ClockB) -> + ForAll = lists:all(fun({Node, VersionA}) -> + case lists:keysearch(Node, 1, ClockB) of + {value, {_NodeB, VersionB}} -> VersionA =< VersionB; + false -> false + end + end, ClockA), + Exists = lists:any(fun({NodeA, VersionA}) -> + case lists:keysearch(NodeA, 1, ClockB) of + {value, {_NodeB, VersionB}} -> VersionA /= VersionB; + false -> true + end + end, ClockA), + %length takes care of the case when clockA is shorter than B + ForAll and (Exists or (length(ClockA) < length(ClockB))). + +equals(ClockA, ClockB) -> + Equivalent = lists:all(fun({NodeA, VersionA}) -> + lists:any(fun(NodeClockB) -> + case NodeClockB of + {NodeA, VersionA} -> true; + _ -> false + end + end, ClockB) + end, ClockA), + Equivalent and (length(ClockA) == length(ClockB)). diff --git a/test/Emakefile b/test/Emakefile new file mode 100644 index 00000000..d05e4d94 --- /dev/null +++ b/test/Emakefile @@ -0,0 +1,4 @@ +{"*", [warn_obsolete_guard, warn_unused_import, + warn_shadow_vars, warn_export_vars, debug_info, + {i, "../include"}, + {outdir, "../tests_ebin"}]}. diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 00000000..45998c6e --- /dev/null +++ b/test/Makefile @@ -0,0 +1,12 @@ +include ../support/include.mk + +all: $(EBIN_FILES_NO_DOCS) + +doc: $(EBIN_FILES) + +debug: + $(MAKE) DEBUG=-DDEBUG + +clean: + rm -rf $(EBIN_FILES) + rm -rf ../tests_ebin \ No newline at end of file diff --git a/test/cluster_ops_test.erl b/test/cluster_ops_test.erl new file mode 100644 index 00000000..1c692dcf --- /dev/null +++ b/test/cluster_ops_test.erl @@ -0,0 +1,83 @@ +-module(cluster_ops_test). + +-include("../../couchdb/couch_db.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +% read_quorum_test() -> +% % we need to be running a cluster here... +% % not sure how to start things up for unit tests + +% % but we're testing reads when a node is missing a doc, so disable internal +% % replication - a bit harsh if anything else is here, but hey, it's a test +% rpc:multicall(showroom, stop, []), +% rpc:multicall(supervisor, terminate_child, +% [couch_primary_services, couch_replication_supervisor]), +% rpc:multicall(supervisor, delete_child, +% [couch_primary_services, couch_replication_supervisor]), + +% % create db +% DbName = <<"cluster_ops_test">>, +% showroom_db:delete_db(DbName, []), +% {Status, #db{name=DbName}} = showroom_db:create_db(DbName, []), +% ?assertEqual(ok, Status), + +% % open db +% {ok, Db} = showroom_db:open_db(DbName, []), + +% % make a test doc +% Key = <<"a">>, +% Json = {[{<<"_id">>,Key}]}, +% Doc = couch_doc:from_json_obj(Json), +% Clock = vector_clock:create(node()), +% NewDoc = Doc#doc{clock=Clock}, + +% % insert a doc in two shards out of three +% % TODO: we need N=3, need to fix that at db create time Options above +% % (fb 1001) +% {M,F,A} = {dynomite_couch_api, put,[Db, NewDoc, []]}, +% CorrectNodeParts = membership2:nodeparts_for_key(Key), +% [{MissingNode, MissingPart} | BadNodeParts] = CorrectNodeParts, +% MapFun = fun({Node,Part}) -> +% rpc:call(Node, M, F, [[Part | A]]) +% end, +% {Good, Bad} = pcall(MapFun, BadNodeParts, 2), +% ?assertEqual(2, length(Good)), +% ?assertEqual([], Bad), + +% % make sure it's notfound on the MissingNode +% MissingNodeGet = rpc:call(MissingNode, dynomite_couch_api, get, +% [[MissingPart, Db, Key, nil, []]]), +% ?assertEqual({not_found, {[], [missing]}}, MissingNodeGet), + +% JsonDoc = {[{<<"_id">>,<<"a">>}, +% {<<"_rev">>, +% <<"1-967a00dff5e02add41819138abb3284d">>}]}, + +% % r=3 should fail +% {r_quorum_not_met, {[{message, _M}, {good, G}, {bad, B}]}} = +% showroom_doc:open_doc(Db, Key, nil, [{r, "3"}]), +% ?assertEqual([JsonDoc,JsonDoc], G), +% ?assertEqual([{not_found, missing}], B), + +% % r=2 should never fail (run it many times to make sure) +% do_opens({Db, Key, nil, [{r, "2"}]}, 20), + +% ok. + + +% pcall(MapFun, Servers, Const) -> +% Replies = lib_misc:pmap(MapFun, Servers, Const), +% lists:partition(fun valid/1, Replies). + + +% valid({ok, _}) -> true; +% valid(ok) -> true; +% valid(_) -> false. + + +% do_opens(_,0) -> ok; +% do_opens({Db, DocId, Refs, Options} = Payload, Times) -> +% {Status, _Doc} = showroom_doc:open_doc(Db, DocId, Refs, Options), +% ?assertEqual(ok, Status), +% do_opens(Payload, Times-1). diff --git a/test/mem2_code_change.erl b/test/mem2_code_change.erl new file mode 100644 index 00000000..3b0c73fb --- /dev/null +++ b/test/mem2_code_change.erl @@ -0,0 +1,12 @@ +-module(mem2_code_change). + +-export([run/0]). + +run() -> + Pid = whereis(membership), + OldVsn = "0.7.1-cloudant", + Extra = "", + + sys:suspend(Pid), + sys:change_code(Pid, membership2, OldVsn, Extra), + sys:resume(Pid). diff --git a/test/mem_utils_test.erl b/test/mem_utils_test.erl new file mode 100644 index 00000000..b884d94e --- /dev/null +++ b/test/mem_utils_test.erl @@ -0,0 +1,97 @@ +-module(mem_utils_test). + +-include_lib("eunit/include/eunit.hrl"). + + +join_type_test() -> + Options = [{replace,node3}], + ?assertEqual({replace,node3}, mem_utils:join_type(dummy,dummy,Options)). + + +pmap_from_full_test() -> + ?assertEqual([{n1,0},{n2,1},{n3,2},{n4,3}], + mem_utils:pmap_from_full(t_fullmap(0))). + + +fix_mappings_nodedown_test() -> + {PMap0, Fullmap0} = mem_utils:fix_mappings(nodedown, n3, t_fullmap(0)), + % with n3 down, n1 takes over + ?assertEqual([{n1,0},{n2,1},{n1,2},{n4,3}], PMap0), + ?assertEqual(t_fullmap(1), lists:sort(Fullmap0)). + + +fix_mappings_rejoin_test() -> + {PMap0, Fullmap0} = mem_utils:fix_mappings(nodedown, n3, t_fullmap(0)), + % with n3 down, n1 takes over + ?assertEqual([{n1,0},{n2,1},{n1,2},{n4,3}], PMap0), + ?assertEqual(t_fullmap(1), lists:sort(Fullmap0)), + % now have n3 rejoin + {PMap1, Fullmap1} = mem_utils:fix_mappings(rejoin, n3, Fullmap0), + ?assertEqual([{n1,0},{n2,1},{n3,2},{n4,3}], PMap1), + ?assertEqual(lists:sort(t_fullmap(0)), lists:sort(Fullmap1)). + + +fix_mappings_replace_test() -> + {PMap0, Fullmap0} = mem_utils:fix_mappings(nodedown, n3, t_fullmap(0)), + % with n3 down, n1 takes over + ?assertEqual([{n1,0},{n2,1},{n1,2},{n4,3}], PMap0), + ?assertEqual(t_fullmap(1), lists:sort(Fullmap0)), + % now replace n3 with n5 + {PMap2, Fullmap2} = mem_utils:fix_mappings(replace, {n3,n5}, Fullmap0), + ?assertEqual([{n1,0},{n2,1},{n5,2},{n4,3}], PMap2), + ?assertEqual(lists:sort(t_fullmap(2)), lists:sort(Fullmap2)). + + +fix_mappings_already_down_test() -> + {_PMap0, Fullmap0} = mem_utils:fix_mappings(nodedown, n3, t_fullmap(1)), + ?assertEqual(t_fullmap(1), lists:sort(Fullmap0)). + + +was_i_nodedown_test() -> + ?assertEqual(true, mem_utils:was_i_nodedown(n3, t_fullmap(1))), + ?assertEqual(false, mem_utils:was_i_nodedown(n3, t_fullmap(0))). + + +%% test helper funs + +t_fullmap(0) -> % four node, four part fullmap (unsorted) + [{n1,0,primary}, + {n2,0,partner}, + {n3,0,partner}, + {n2,1,primary}, + {n3,1,partner}, + {n4,1,partner}, + {n3,2,primary}, + {n4,2,partner}, + {n1,2,partner}, + {n4,3,primary}, + {n1,3,partner}, + {n2,3,partner}]; +t_fullmap(1) -> % like (0) above, but n3 is down (sorted) + [{n1,0,primary}, + {n1,2,partner}, + {n1,3,partner}, + {n2,0,partner}, + {n2,1,primary}, + {n2,3,partner}, + {n3,0,{nodedown,partner}}, + {n3,1,{nodedown,partner}}, + {n3,2,{nodedown,primary}}, + {n4,1,partner}, + {n4,2,partner}, + {n4,3,primary}]; +t_fullmap(2) -> % like (0) above, but n3 is replaced w/ n5 (unsorted) + [{n1,0,primary}, + {n2,0,partner}, + {n5,0,partner}, + {n2,1,primary}, + {n5,1,partner}, + {n4,1,partner}, + {n5,2,primary}, + {n4,2,partner}, + {n1,2,partner}, + {n4,3,primary}, + {n1,3,partner}, + {n2,3,partner}]; +t_fullmap(_Huh) -> + huh. diff --git a/test/membership2_test.erl b/test/membership2_test.erl new file mode 100644 index 00000000..ed804cc2 --- /dev/null +++ b/test/membership2_test.erl @@ -0,0 +1,126 @@ +%%% -*- erlang-indent-level:2 -*- +-module(membership2_test). +-author('cliff@powerset.com'). +-author('brad@cloudant.com'). + +-include("../include/config.hrl"). +-include("../include/common.hrl"). +-include("../include/test.hrl"). + +-include_lib("eunit/include/eunit.hrl"). + +% singular_startup_sequence_test() -> +% %% configuration:start_link(#config{n=1,r=1,w=1,q=6,directory=?TMP_DIR}), +% {ok, _} = mock:mock(configuration), +% mock:expects(configuration, get_config, fun(_Args) -> true end, +% #config{n=1,r=1,w=1,q=6,directory=?TMP_DIR}, 3), +% {ok, _} = mock:mock(replication), +% mock:expects(replication, partners, fun({_, [a], _}) -> true end, []), +% mock:expects(replication, partners_plus, fun({a, [a]}) -> true end, []), +% {ok, M} = membership2:start_link(a, [a]), +% State = gen_server:call(M, state), +% ?assertEqual(a, State#membership.node), +% ?assertEqual([a], State#membership.nodes), +% mock:verify_and_stop(replication), +% membership2:stop(M), +% %% configuration:stop(), +% mock:verify_and_stop(configuration), +% ?assertMatch({ok, [[a]]}, file:consult(?TMP_FILE("a.world"))), +% file:delete(?TMP_FILE("a.world")). + +% -define(NODEA, {a, ["d", "1", "4"]}). +% -define(NODEB, {b, ["e", "3", "1"]}). +% -define(NODEC, {c, ["f", "1", "2"]}). +% -define(NODES, [?NODEA, ?NODEB, ?NODEC]). + +% multi_startup_sequence_test() -> +% {ok, _} = mock:mock(configuration), +% mock:expects(configuration, get_config, fun(_Args) -> true end, +% (#config{n=3,r=1,w=1,q=6,directory=?TMP_DIR}), 3), +% {ok, _} = mock:mock(replication), +% VersionOne = vector_clock:create(make_ref()), +% Pid1 = make_ref(), +% VersionTwo = vector_clock:create(make_ref()), +% Pid2 = make_ref(), +% mock:expects(replication, partners, fun({_, ?NODES, _}) -> true end, [?NODEB, ?NODEC]), +% {ok, _} = stub:stub(membership2, call_join, fun(?NODEB, ?NODEA) -> +% {VersionOne, ?NODES, [{1,Pid1}]}; +% (?NODEC, ?NODEA) -> +% {VersionTwo, ?NODES, [{2,Pid2}]} +% end, 2), +% ?debugMsg("proxied"), +% ?debugFmt("check process code: ~p", [erlang:check_process_code(self(), membership2)]), +% {ok, M} = membership2:start_link(?NODEA, ?NODES), +% State = gen_server:call(M, state), +% ?assertEqual(?NODEA, State#membership.node), +% ?assertEqual(?NODES, State#membership.nodes), +% % Servers = State#membership.servers, +% % ?assertMatch([{1,Pid1},{2,Pid2}], membership2:servers_to_list(Servers)), +% ?assertEqual(greater, vector_clock:compare(State#membership.version, VersionOne)), +% ?assertEqual(greater, vector_clock:compare(State#membership.version, VersionTwo)), +% mock:verify_and_stop(replication), +% membership2:stop(M), +% mock:verify_and_stop(configuration), +% ?assertMatch({ok, [?NODES]}, file:consult(?TMP_FILE("a.world"))), +% file:delete(?TMP_FILE("a.world")). + +% startup_and_first_servers_for_key_test() -> +% configuration:start_link(#config{n=1,r=1,w=1,q=6,directory=?TMP_DIR}), +% {ok, _} = mock:mock(replication), +% mock:expects(replication, partners, fun({_, [a], _}) -> true end, []), +% {ok, M} = membership2:start_link(a, [a]), +% _State = gen_server:call(M, state), +% ?assertEqual([], membership2:servers_for_key("blah")), +% mock:verify_and_stop(replication), +% membership2:stop(M), +% configuration:stop(), +% ?assertMatch({ok, [[a]]}, file:consult(?TMP_FILE("a.world"))), +% file:delete(?TMP_FILE("a.world")). + +% startup_and_register_test() -> +% configuration:start_link(#config{n=1,r=1,w=1,q=0,directory=?TMP_DIR}), +% {ok, _} = mock:mock(replication), +% mock:expects(replication, partners, fun({_, [?NODEA], _}) -> true end, [], 3), +% {ok, M} = membership2:start_link(?NODEA, [?NODEA]), +% SServer1 = make_server(), +% SServer2 = make_server(), +% membership2:register(1, SServer1), +% membership2:register(1, SServer2), +% ?assertEqual([SServer1, SServer2], membership2:servers_for_key("blah")), +% mock:verify_and_stop(replication), +% membership2:stop(M), +% configuration:stop(), +% SServer1 ! stop, +% SServer2 ! stop, +% file:delete(?TMP_FILE("a.world")). + +% handle_local_server_outage_test() -> +% configuration:start_link(#config{n=1,r=1,w=1,q=0,directory=?TMP_DIR}), +% {ok, _} = mock:mock(replication), +% mock:expects(replication, partners, fun({_, [?NODEA], _}) -> true end, [], 4), +% {ok, M} = membership2:start_link(?NODEA, [?NODEA]), +% SServer1 = make_server(), +% SServer2 = make_server(), +% membership2:register(1, SServer1), +% membership2:register(1, SServer2), +% SServer1 ! stop, +% timer:sleep(1), +% ?assertEqual([SServer2], membership2:servers_for_key("blah")), +% mock:verify_and_stop(replication), +% membership2:stop(M), +% configuration:stop(), +% SServer2 ! stop, +% file:delete(?TMP_FILE("a.world")). + +% full_gossip_test() -> +% configuration:start_link(#config{n=1,r=1,w=1,q=2,directory=priv_dir()}), +% {ok, _} = mock:mock(replication), +% mock:expects(replication, partners, fun({_, ?NODES, _}) -> true end, [?NODEB, ?NODEC],4), + + +% make_server() -> +% spawn(fun() -> +% receive +% stop -> ok +% end +% end). diff --git a/test/mock.erl b/test/mock.erl new file mode 100644 index 00000000..2ecbf4f7 --- /dev/null +++ b/test/mock.erl @@ -0,0 +1,322 @@ +%%% -*- erlang-indent-level:2 -*- +%%%------------------------------------------------------------------- +%%% File: mock.erl +%%% @author Cliff Moon <> [] +%%% @copyright 2009 Cliff Moon +%%% @doc +%%% +%%% @end +%%% +%%% @since 2009-01-04 by Cliff Moon +%%%------------------------------------------------------------------- +-module(mock). +-author('cliff@powerset.com'). + +%% API +-export([mock/1, proxy_call/2, proxy_call/3, expects/4, expects/5, + verify_and_stop/1, verify/1, stub_proxy_call/3, stop/1]). + +-include_lib("eunit/include/eunit.hrl"). +-include("../include/common.hrl"). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(mockstate, {old_code, module, expectations=[]}). + +%%==================================================================== +%% API +%%==================================================================== +%%-------------------------------------------------------------------- +%% @spec mock(Module::atom()) -> {ok,Mock::record()} | ignore | {error,Error} +%% @doc Starts the server +%% @end +%%-------------------------------------------------------------------- +mock(Module) -> + case gen_server:start_link({local, mod_to_name(Module)}, mock, Module, []) of + {ok, Pid} -> {ok, Pid}; + {error, Reason} -> {error, Reason} + end. + +%% @spec proxy_call(Module::atom(), Function::atom()) -> term() +%% @doc Proxies a call to the mock server for Module without arguments +%% @end +proxy_call(Module, Function) -> + gen_server:call(mod_to_name(Module), {proxy_call, Function, {}}). + +%% @spec proxy_call(Module::atom(), Function::atom(), Args::tuple()) -> term() +%% @doc Proxies a call to the mock server for Module with arguments +%% @end +proxy_call(Module, Function, Args) -> + gen_server:call(mod_to_name(Module), {proxy_call, Function, Args}). + +stub_proxy_call(Module, Function, Args) -> + RegName = list_to_atom(lists:concat([Module, "_", Function, "_stub"])), + Ref = make_ref(), + RegName ! {Ref, self(), Args}, + ?debugFmt("sending {~p,~p,~p}", [Ref, self(), Args]), + receive + {Ref, Answer} -> Answer + end. + +%% @spec expects(Module::atom(), +%% Function::atom(), +%% Args::function(), +%% Ret::function() | term() ) -> term() + +%% Times:: {at_least, integer()} | never | {no_more_than, integer()} | integer()) -> term() + +%% @doc Sets the expectation that Function of Module will be called during a +%% test with Args. Args should be a fun predicate that will return true or +%% false whether or not the argument list matches. The argument list of the +%% function is passed in as a tuple. Ret is either a value to return or a fun +%% of arity 2 to be evaluated in response to a proxied call. The first argument +%% is the actual args from the call, the second is the call count starting +%% with 1. +expects(Module, Function, Args, Ret) -> + gen_server:call(mod_to_name(Module), {expects, Function, Args, Ret, 1}). + +expects(Module, Function, Args, Ret, Times) -> + gen_server:call(mod_to_name(Module), {expects, Function, Args, Ret, Times}). + +%% stub(Module, Function, Args, Ret) -> +%% gen_server:call(mod_to_name(Module), {stub, Function, Args, Ret}). + +verify_and_stop(Module) -> + verify(Module), + stop(Module). + +verify(Module) -> + ?assertEqual(ok, gen_server:call(mod_to_name(Module), verify)). + +stop(Module) -> + gen_server:cast(mod_to_name(Module), stop), + timer:sleep(10). + + +%%==================================================================== +%% gen_server callbacks +%%==================================================================== + +%%-------------------------------------------------------------------- +%% @spec init(Args) -> {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%% @doc Initiates the server +%% @end +%%-------------------------------------------------------------------- +init(Module) -> + case code:get_object_code(Module) of + {Module, Bin, Filename} -> + case replace_code(Module) of + ok -> {ok, #mockstate{module=Module,old_code={Module, Bin, Filename}}}; + {error, Reason} -> {stop, Reason} + end; + error -> {stop, ?fmt("Could not get object code for module ~p", [Module])} + end. + +%%-------------------------------------------------------------------- +%% @spec +%% handle_call(Request, From, State) -> {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | +%% {stop, Reason, State} +%% @doc Handling call messages +%% @end +%%-------------------------------------------------------------------- +handle_call({proxy_call, Function, Args}, _From, + State = #mockstate{module=Mod,expectations=Expects}) -> + case match_expectation(Function, Args, Expects) of + {matched, ReturnTerm, NewExpects} -> + {reply, ReturnTerm, State#mockstate{expectations=NewExpects}}; + unmatched -> + {stop, ?fmt("got unexpected call to ~p:~p", [Mod,Function])} + end; + +handle_call({expects, Function, Args, Ret, Times}, _From, + State = #mockstate{expectations=Expects}) -> + {reply, ok, State#mockstate{ + expectations=add_expectation(Function, Args, Ret, Times, Expects)}}; + +handle_call(verify, _From, State = #mockstate{expectations=Expects,module=Mod}) -> + ?infoFmt("verifying ~p~n", [Mod]), + if + length(Expects) > 0 -> + {reply, {mismatch, format_missing_expectations(Expects, Mod)}, State}; + true -> {reply, ok, State} + end. + +%%-------------------------------------------------------------------- +%% @spec handle_cast(Msg, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @doc Handling cast messages +%% @end +%%-------------------------------------------------------------------- +handle_cast(stop, State) -> + timer:sleep(10), + {stop, normal, State}. + +%%-------------------------------------------------------------------- +%% @spec handle_info(Info, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @doc Handling all non call/cast messages +%% @end +%%-------------------------------------------------------------------- +handle_info(_Info, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% @spec terminate(Reason, State) -> void() +%% @doc This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any necessary +%% cleaning up. When it returns, the gen_server terminates with Reason. +%% The return value is ignored. +%% @end +%%-------------------------------------------------------------------- +terminate(_Reason, #mockstate{old_code={Module, Binary, Filename}}) -> + code:purge(Module), + code:delete(Module), + code:load_binary(Module, Filename, Binary), + timer:sleep(10). + +%%-------------------------------------------------------------------- +%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} +%% @doc Convert process state when code is changed +%% @end +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- +format_missing_expectations(Expects, Mod) -> + format_missing_expectations(Expects, Mod, []). + +format_missing_expectations([], _, Msgs) -> + lists:reverse(Msgs); + +format_missing_expectations([{Function, _Args, _Ret, Times, Called}|Expects], Mod, Msgs) -> + Msgs1 = [?fmt("expected ~p:~p to be called ~p times but was called ~p", [Mod,Function,Times,Called])|Msgs], + format_missing_expectations(Expects, Mod, Msgs1). + +add_expectation(Function, Args, Ret, Times, Expects) -> + Expects ++ [{Function, Args, Ret, Times, 0}]. + +match_expectation(Function, Args, Expectations) -> + match_expectation(Function, Args, Expectations, []). + +match_expectation(_Function, _Args, [], _Rest) -> + unmatched; + +match_expectation(Function, Args, [{Function, Matcher, Ret, MaxTimes, Invoked}|Expects], Rest) -> + case Matcher(Args) of + true -> + ReturnTerm = prepare_return(Args, Ret, Invoked+1), + if + Invoked + 1 >= MaxTimes -> {matched, ReturnTerm, lists:reverse(Rest) ++ Expects}; + true -> {matched, ReturnTerm, lists:reverse(Rest) ++ [{Function, Matcher, Ret, MaxTimes, Invoked+1}] ++ Expects} + end; + false -> match_expectation(Function, Args, Expects, [{Function,Matcher,Ret,MaxTimes,Invoked}|Rest]) + end; + +match_expectation(Function, Args, [Expect|Expects], Rest) -> + match_expectation(Function, Args, Expects, [Expect|Rest]). + +prepare_return(Args, Ret, Invoked) when is_function(Ret) -> + Ret(Args, Invoked); + +prepare_return(_Args, Ret, _Invoked) -> + Ret. + +replace_code(Module) -> + Info = Module:module_info(), + Exports = get_exports(Info), + unload_code(Module), + NewFunctions = generate_functions(Module, Exports), + Forms = [ + {attribute,1,module,Module}, + {attribute,2,export,Exports} + ] ++ NewFunctions, + case compile:forms(Forms, [binary]) of + {ok, Module, Binary} -> case code:load_binary(Module, atom_to_list(Module) ++ ".erl", Binary) of + {module, Module} -> ok; + {error, Reason} -> {error, Reason} + end; + error -> {error, "An undefined error happened when compiling."}; + {error, Errors, Warnings} -> {error, Errors ++ Warnings} + end. + +unload_code(Module) -> + code:purge(Module), + code:delete(Module). + +get_exports(Info) -> + get_exports(Info, []). + +get_exports(Info, Acc) -> + case lists:keytake(exports, 1, Info) of + {value, {exports, Exports}, ModInfo} -> + get_exports(ModInfo, Acc ++ lists:filter(fun({module_info, _}) -> false; (_) -> true end, Exports)); + _ -> Acc + end. + +%% stub_function_loop(Fun) -> +%% receive +%% {Ref, Pid, Args} -> +%% ?debugFmt("received {~p,~p,~p}", [Ref, Pid, Args]), +%% Ret = (catch Fun(Args) ), +%% ?debugFmt("sending {~p,~p}", [Ref,Ret]), +%% Pid ! {Ref, Ret}, +%% stub_function_loop(Fun) +%% end. + +% Function -> {function, Lineno, Name, Arity, [Clauses]} +% Clause -> {clause, Lineno, [Variables], [Guards], [Expressions]} +% Variable -> {var, Line, Name} +% +generate_functions(Module, Exports) -> + generate_functions(Module, Exports, []). + +generate_functions(_Module, [], FunctionForms) -> + lists:reverse(FunctionForms); + +generate_functions(Module, [{Name,Arity}|Exports], FunctionForms) -> + generate_functions(Module, Exports, [generate_function(Module, Name, Arity)|FunctionForms]). + +generate_function(Module, Name, Arity) -> + {function, 1, Name, Arity, [{clause, 1, generate_variables(Arity), [], generate_expression(mock, proxy_call, Module, Name, Arity)}]}. + +generate_variables(0) -> []; +generate_variables(Arity) -> + lists:map(fun(N) -> + {var, 1, list_to_atom(lists:concat(['Arg', N]))} + end, lists:seq(1, Arity)). + +generate_expression(M, F, Module, Name, 0) -> + [{call,1,{remote,1,{atom,1,M},{atom,1,F}}, [{atom,1,Module}, {atom,1,Name}]}]; +generate_expression(M, F, Module, Name, Arity) -> + [{call,1,{remote,1,{atom,1,M},{atom,1,F}}, [{atom,1,Module}, {atom,1,Name}, {tuple,1,lists:map(fun(N) -> + {var, 1, list_to_atom(lists:concat(['Arg', N]))} + end, lists:seq(1, Arity))}]}]. + +mod_to_name(Module) -> + list_to_atom(lists:concat([mock_, Module])). + +%% replace_function(FF, Forms) -> +%% replace_function(FF, Forms, []). + +%% replace_function(FF, [], Ret) -> +%% [FF|lists:reverse(Ret)]; + +%% replace_function({function,_,Name,Arity,Clauses}, [{function,Line,Name,Arity,_}|Forms], Ret) -> +%% lists:reverse(Ret) ++ [{function,Line,Name,Arity,Clauses}|Forms]; + +%% replace_function(FF, [FD|Forms], Ret) -> +%% replace_function(FF, Forms, [FD|Ret]). diff --git a/test/mock_genserver.erl b/test/mock_genserver.erl new file mode 100644 index 00000000..cde41ff5 --- /dev/null +++ b/test/mock_genserver.erl @@ -0,0 +1,209 @@ +%%%------------------------------------------------------------------- +%%% File: mock_genserver.erl +%%% @author Cliff Moon <> [] +%%% @copyright 2009 Cliff Moon +%%% @doc +%%% +%%% @end +%%% +%%% @since 2009-01-02 by Cliff Moon +%%%------------------------------------------------------------------- +-module(mock_genserver). +-author('cliff@powerset.com'). + +-behaviour(gen_server). + +-include_lib("eunit/include/eunit.hrl"). + +%% API +-export([start_link/1, stub_call/3, expects_call/3, expects_call/4, stop/1]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(state, {call_stubs=[], call_expects=[], cast_expectations, info_expectations}). + +%%==================================================================== +%% API +%%==================================================================== +%%-------------------------------------------------------------------- +%% @spec start_link(Reference::atom()) -> {ok,Pid} | ignore | {error,Error} +%% @doc Starts the server +%% @end +%%-------------------------------------------------------------------- +start_link(Reference) -> + gen_server:start_link(Reference, ?MODULE, [], []). + +stub_call(Server, Sym, Fun) when is_function(Fun) -> + gen_server:call(Server, {mock_stub_call, Sym, Fun}). + +expects_call(Server, Args, Fun) when is_function(Fun) -> + gen_server:call(Server, {mock_expects_call, Args, Fun}). + +expects_call(Server, Args, Fun, Times) when is_function(Fun) -> + gen_server:call(Server, {mock_expects_call, Args, Fun, Times}). + +stop(Server) -> + gen_server:call(Server, mock_stop). + +%%==================================================================== +%% gen_server callbacks +%%==================================================================== + +%%-------------------------------------------------------------------- +%% @spec init(Args) -> {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%% @doc Initiates the server +%% @end +%%-------------------------------------------------------------------- +init([]) -> + {ok, #state{}}. + +%%-------------------------------------------------------------------- +%% @spec +%% handle_call(Request, From, State) -> {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | +%% {stop, Reason, State} +%% @doc Handling call messages +%% @end +%%-------------------------------------------------------------------- +handle_call({mock_stub_call, Sym, Fun}, _From, State = #state{call_stubs=Stubs}) -> + {reply, ok, State#state{call_stubs=[{Sym, Fun}|Stubs]}}; + +handle_call({mock_expects_call, Args, Fun}, _From, State = #state{call_expects=Expects}) -> + {reply, ok, State#state{call_expects=add_expectation(Args, Fun, at_least_once, Expects)}}; + +handle_call({mock_expects_call, Args, Fun, Times}, _From, State = #state{call_expects=Expects}) -> + {reply, ok, State#state{call_expects=add_expectation(Args, Fun, Times, Expects)}}; + +handle_call(mock_stop, _From, State) -> + {stop, normal, ok, State}; + +handle_call(Request, _From, State = #state{call_stubs=Stubs,call_expects=Expects}) -> + % expectations have a higher priority + case find_expectation(Request, Expects) of + {found, {_, Fun, Time}, NewExpects} -> {reply, Fun(Request, Time), State#state{call_expects=NewExpects}}; + not_found -> % look for a stub + case find_stub(Request, Stubs) of + {found, {_, Fun}} -> {reply, Fun(Request), State}; + not_found -> + {stop, {unexpected_call, Request}, State} + end + end. + +%%-------------------------------------------------------------------- +%% @spec handle_cast(Msg, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @doc Handling cast messages +%% @end +%%-------------------------------------------------------------------- +handle_cast(_Msg, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% @spec handle_info(Info, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @doc Handling all non call/cast messages +%% @end +%%-------------------------------------------------------------------- +handle_info(_Info, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% @spec terminate(Reason, State) -> void() +%% @doc This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any necessary +%% cleaning up. When it returns, the gen_server terminates with Reason. +%% The return value is ignored. +%% @end +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%-------------------------------------------------------------------- +%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} +%% @doc Convert process state when code is changed +%% @end +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- + + +add_expectation(Args, Fun, Times, Expects) -> + Expects ++ [{Args, Fun, Times}]. + +find_expectation(Request, Expects) -> + find_expectation(Request, Expects, []). + +find_expectation(_Request, [], _Rest) -> + not_found; + +find_expectation(Request, [{Args, Fun, Times}|Expects], Rest) -> + MatchFun = generate_match_fun(Args), + case MatchFun(Request) of + true -> + if + Times == at_least_once -> {found, {Args, Fun, Times}, lists:reverse(Rest) ++ [{Args, Fun, Times}] ++ Expects}; + Times == 1 -> {found, {Args, Fun, Times}, lists:reverse(Rest) ++ Expects}; + true -> {found, {Args, Fun, Times}, lists:reverse(Rest) ++ [{Args, Fun, Times-1}] ++ Expects} + end; + false -> find_expectation(Request, Expects, [{Args, Fun, Times}|Rest]) + end. + +find_stub(Request, Stub) when is_tuple(Request) -> + Sym = element(1, Request), + find_stub(Sym, Stub); + +find_stub(_Sym, []) -> + not_found; + +find_stub(Sym, _Stubs) when not is_atom(Sym) -> + not_found; + +find_stub(Sym, [{Sym, Fun}|_Stubs]) -> + {found, {Sym, Fun}}; + +find_stub(Sym, [_Stub|Stubs]) -> + find_stub(Sym, Stubs). + +generate_match_fun(Args) when is_tuple(Args) -> + generate_match_fun(tuple_to_list(Args)); + +generate_match_fun(Args) when not is_list(Args) -> + generate_match_fun([Args]); + +generate_match_fun(Args) when is_list(Args) -> + Src = generate_match_fun("fun({", Args), + {ok, Tokens, _} = erl_scan:string(Src), + {ok, [Form]} = erl_parse:parse_exprs(Tokens), + {value, Fun, _} = erl_eval:expr(Form, erl_eval:new_bindings()), + Fun. + +generate_match_fun(Src, []) -> + Src ++ "}) -> true; (_) -> false end."; + +% unbound atom means you don't care about an arg +generate_match_fun(Src, [unbound|Args]) -> + if + length(Args) > 0 -> generate_match_fun(Src ++ "_,", Args); + true -> generate_match_fun(Src ++ "_", Args) + end; + +generate_match_fun(Src, [Bound|Args]) -> + Term = lists:flatten(io_lib:format("~w", [Bound])), + if + length(Args) > 0 -> generate_match_fun(Src ++ Term ++ ",", Args); + true -> generate_match_fun(Src ++ Term, Args) + end. diff --git a/test/partitions_test.erl b/test/partitions_test.erl new file mode 100644 index 00000000..20effd8a --- /dev/null +++ b/test/partitions_test.erl @@ -0,0 +1,121 @@ +%%% -*- erlang-indent-level:2 -*- +-module(partitions_test). +-author('brad@cloudant.com'). + +-include("../include/config.hrl"). +-include("../include/common.hrl"). +-include("../include/test.hrl"). + + +join_test() -> + TableA = [{a,1},{a,2},{a,3},{a,4},{a,5},{a,6},{a,7},{a,8}], + TableB = [{a,1},{a,2},{a,3},{a,4},{b,5},{b,6},{b,7},{b,8}], + TableC = [{a,1},{a,2},{a,3},{c,4},{b,5},{b,6},{b,7},{c,8}], + TableD = [{a,1},{a,2},{d,3},{c,4},{b,5},{b,6},{d,7},{c,8}], + TableE = [{a,1},{a,2},{d,3},{c,4},{b,5},{b,6},{e,7},{c,8}], + TableF = [{a,1},{a,2},{d,3},{c,4},{b,5},{b,6},{e,7},{f,8}], + TableG = [{a,1},{a,2},{d,3},{c,4},{b,5},{g,6},{e,7},{f,8}], + TableH = [{a,1},{h,2},{d,3},{c,4},{b,5},{g,6},{e,7},{f,8}], + ?assertEqual({ok,TableA}, partitions:join(a, TableA, [])), + ?assertEqual({ok,TableB}, partitions:join(b, TableA, [])), + ?assertEqual({ok,TableC}, partitions:join(c, TableB, [])), + ?assertEqual({ok,TableD}, partitions:join(d, TableC, [])), + ?assertEqual({ok,TableE}, partitions:join(e, TableD, [])), + ?assertEqual({ok,TableF}, partitions:join(f, TableE, [])), + ?assertEqual({ok,TableG}, partitions:join(g, TableF, [])), + ?assertEqual({ok,TableH}, partitions:join(h, TableG, [])), + ?assertEqual({error, "Too many nodes vs partitions", TableH}, + partitions:join(i, TableH, [])), + ok. + + +hints_test() -> + TableA = [{a,1},{a,2},{a,3},{a,4},{a,5},{a,6},{a,7},{a,8}], + TableB = [{a,1},{b,2},{a,3},{a,4},{a,5},{b,6},{b,7},{b,8}], + TableC = [{a,1},{a,2},{a,3},{a,4},{c,5},{c,6},{c,7},{c,8}], + TableD = [{d,1},{d,2},{d,3},{d,4},{a,5},{a,6},{a,7},{a,8}], + ?assertEqual({ok, TableB}, partitions:join(b, TableA, [2])), + ?assertEqual({ok, TableC}, partitions:join(c, TableA, [0])), + ?assertEqual({ok, TableD}, partitions:join(d, TableA, [1,2,3,4])), + ok. + + +shard_name_test() -> + ?assertEqual(<<"x000000/dbname_000000">>, + partitions:shard_name(0, <<"dbname">>)), + ok. + + +%% note: fullmaps used here +diff_same_length_test() -> + OldMap = [{a,1, type},{a,2, type},{b,3, type},{b,4, type}], + NewMap = [{a,1, type},{a,2, type},{b,3, type},{c,4, type}], + ?assertEqual([{b,c,4}], partitions:diff(OldMap, NewMap)), + ok. + + +diff_dupes_test() -> + OldMap = [{'node1@node1.boorad.local',0,primary}, + {'node2@node2.boorad.local',0,partner}, + {'node3@node3.boorad.local',0,partner}, + {'node1@node1.boorad.local',182687704666362864775460604089535377456991567872, primary}, + {'node2@node2.boorad.local',182687704666362864775460604089535377456991567872, partner}, + {'node3@node3.boorad.local',182687704666362864775460604089535377456991567872, partner}, + {'node1@node1.boorad.local',365375409332725729550921208179070754913983135744, primary}, + {'node2@node2.boorad.local',365375409332725729550921208179070754913983135744, partner}, + {'node3@node3.boorad.local',365375409332725729550921208179070754913983135744, partner}, + {'node1@node1.boorad.local',548063113999088594326381812268606132370974703616, partner}, + {'node2@node2.boorad.local',548063113999088594326381812268606132370974703616, partner}, + {'node3@node3.boorad.local',548063113999088594326381812268606132370974703616, primary}, + {'node1@node1.boorad.local',730750818665451459101842416358141509827966271488, partner}, + {'node2@node2.boorad.local',730750818665451459101842416358141509827966271488, primary}, + {'node3@node3.boorad.local',730750818665451459101842416358141509827966271488, partner}, + {'node1@node1.boorad.local',913438523331814323877303020447676887284957839360, partner}, + {'node2@node2.boorad.local',913438523331814323877303020447676887284957839360, primary}, + {'node3@node3.boorad.local',913438523331814323877303020447676887284957839360, partner}, + {'node1@node1.boorad.local',1096126227998177188652763624537212264741949407232, partner}, + {'node2@node2.boorad.local',1096126227998177188652763624537212264741949407232, primary}, + {'node3@node3.boorad.local',1096126227998177188652763624537212264741949407232, partner}, + {'node1@node1.boorad.local',1278813932664540053428224228626747642198940975104, partner}, + {'node2@node2.boorad.local',1278813932664540053428224228626747642198940975104, partner}, + {'node3@node3.boorad.local',1278813932664540053428224228626747642198940975104, primary}], + NewMap = [{'node1@node1.boorad.local',0,primary}, + {'node2@node2.boorad.local',0,partner}, + {'node3@node3.boorad.local',0,partner}, + {'node1@node1.boorad.local',182687704666362864775460604089535377456991567872, primary}, + {'node2@node2.boorad.local',182687704666362864775460604089535377456991567872, partner}, + {'node3@node3.boorad.local',182687704666362864775460604089535377456991567872, partner}, + {'node1@node1.boorad.local',365375409332725729550921208179070754913983135744, partner}, + {'node2@node2.boorad.local',365375409332725729550921208179070754913983135744, partner}, + {'node4@node4.boorad.local',365375409332725729550921208179070754913983135744, primary}, + {'node1@node1.boorad.local',548063113999088594326381812268606132370974703616, partner}, + {'node3@node3.boorad.local',548063113999088594326381812268606132370974703616, primary}, + {'node4@node4.boorad.local',548063113999088594326381812268606132370974703616, partner}, + {'node2@node2.boorad.local',730750818665451459101842416358141509827966271488, primary}, + {'node3@node3.boorad.local',730750818665451459101842416358141509827966271488, partner}, + {'node4@node4.boorad.local',730750818665451459101842416358141509827966271488, partner}, + {'node2@node2.boorad.local',913438523331814323877303020447676887284957839360, primary}, + {'node3@node3.boorad.local',913438523331814323877303020447676887284957839360, partner}, + {'node4@node4.boorad.local',913438523331814323877303020447676887284957839360, partner}, + {'node1@node1.boorad.local',1096126227998177188652763624537212264741949407232, partner}, + {'node2@node2.boorad.local',1096126227998177188652763624537212264741949407232, partner}, + {'node4@node4.boorad.local',1096126227998177188652763624537212264741949407232, primary}, + {'node1@node1.boorad.local',1278813932664540053428224228626747642198940975104, partner}, + {'node3@node3.boorad.local',1278813932664540053428224228626747642198940975104, primary}, + {'node4@node4.boorad.local',1278813932664540053428224228626747642198940975104, partner}], + + Diff = [{'node3@node3.boorad.local','node4@node4.boorad.local', + 365375409332725729550921208179070754913983135744}, + {'node2@node2.boorad.local','node4@node4.boorad.local', + 548063113999088594326381812268606132370974703616}, + {'node1@node1.boorad.local','node4@node4.boorad.local', + 730750818665451459101842416358141509827966271488}, + {'node1@node1.boorad.local','node4@node4.boorad.local', + 913438523331814323877303020447676887284957839360}, + {'node3@node3.boorad.local','node4@node4.boorad.local', + 1096126227998177188652763624537212264741949407232}, + {'node2@node2.boorad.local','node4@node4.boorad.local', + 1278813932664540053428224228626747642198940975104}], + + ?assertEqual(Diff, partitions:diff(OldMap, NewMap)), + ok. diff --git a/test/replication_test.erl b/test/replication_test.erl new file mode 100644 index 00000000..095e1b44 --- /dev/null +++ b/test/replication_test.erl @@ -0,0 +1,89 @@ +%%% -*- erlang-indent-level:2 -*- +-module(replication_test). +-author('brad@cloudant.com'). + +-include("../include/config.hrl"). +-include("../include/test.hrl"). + +-include_lib("eunit/include/eunit.hrl"). + +-define(NODEA, {a, ["d", "1", "4"]}). +-define(NODEB, {b, ["e", "3", "1"]}). +-define(NODEC, {c, ["f", "1", "2"]}). +-define(NODED, {d, ["e", "1", "2"]}). +-define(NODEE, {e, ["e", "2", "2"]}). +-define(NODES, [?NODEA, ?NODEB, ?NODEC, ?NODED, ?NODEE]). + +%% TODO: give this some effigy love, mock configuration up all of these +%% different ways. + +metadata_level_1_test() -> + configuration:start_link(#config{n=3,r=1,w=1,q=6, + directory=?TMP_DIR, + meta=[{datacenter,roundrobin}, + {rack, roundrobin}, + {slot, roundrobin} + ]}), + Partners = replication:partners(?NODEA, + [?NODEA, ?NODEB, ?NODEC], + configuration:get_config()), + ?assertEqual([?NODEB, ?NODEC], Partners), + configuration:stop(). + + +metadata_level_2_test() -> + configuration:start_link(#config{n=3,r=1,w=1,q=6, + directory=?TMP_DIR, + meta=[{datacenter,roundrobin}, + {rack, roundrobin}, + {slot, roundrobin} + ]}), + Partners = replication:partners(?NODEA, + ?NODES, + configuration:get_config()), + ?assertEqual([?NODED,?NODEE], Partners), + configuration:stop(). + + +no_metadata_test() -> + configuration:start_link(#config{n=2,r=1,w=1,q=6, + directory=?TMP_DIR, + meta=[]}), + Partners = replication:partners(a, + [a,b,c,d], + configuration:get_config()), + ?assertEqual([b], Partners), + configuration:stop(). + + +wrap_test() -> + configuration:start_link(#config{n=3,r=1,w=1,q=6, + directory=?TMP_DIR, + meta=[]}), + Wrap1Partners = replication:partners(c, + [a,b,c,d], + configuration:get_config()), + ?assertEqual([a,d], Wrap1Partners), + Wrap2Partners = replication:partners(d, + [a,b,c,d], + configuration:get_config()), + ?assertEqual([a,b], Wrap2Partners), + configuration:stop(). + + +self_test() -> + configuration:start_link(#config{n=3,r=1,w=1,q=6, + directory=?TMP_DIR, + meta=[]}), + Partners = replication:partners(a, [a], + configuration:get_config()), + ?assertEqual([], Partners), + configuration:stop(). + + +remove_self_test() -> + configuration:start_link( + #config{n=4,r=1,w=1,q=6, directory=?TMP_DIR, meta=[]}), + Partners = replication:partners(a, [a,b], configuration:get_config()), + ?assertEqual([b], Partners), + configuration:stop(). diff --git a/test/stub.erl b/test/stub.erl new file mode 100644 index 00000000..2a6173b5 --- /dev/null +++ b/test/stub.erl @@ -0,0 +1,168 @@ +%%%------------------------------------------------------------------- +%%% File: stub.erl +%%% @author Cliff Moon <> [] +%%% @copyright 2009 Cliff Moon +%%% @doc +%%% +%%% @end +%%% +%%% @since 2009-05-10 by Cliff Moon +%%%------------------------------------------------------------------- +-module(stub). +-author('cliff@powerset.com'). + +-behaviour(gen_server). + +%% API +-export([stub/3, stub/4, proxy_call/3]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-include_lib("eunit/include/eunit.hrl"). +-include("../include/common.hrl"). + +-record(state, {old_code, module, stub, times}). + +%%==================================================================== +%% API +%%==================================================================== + +stub(Module, Function, Fun) -> + stub(Module, Function, Fun, 1). + +stub(Module, Function, Fun, Times) when is_function(Fun) -> + gen_server:start({local, name(Module, Function)}, ?MODULE, [Module, Function, Fun, Times], []). + +proxy_call(_, Name, Args) -> + {Times, Reply} = gen_server:call(Name, {proxy_call, Args}), + if + Times =< 0 -> gen_server:cast(Name, stop); + true -> ok + end, + Reply. + +%%==================================================================== +%% gen_server callbacks +%%==================================================================== + +%%-------------------------------------------------------------------- +%% @spec init(Args) -> {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%% @doc Initiates the server +%% @end +%%-------------------------------------------------------------------- +init([Module, Function, Fun, Times]) -> + case code:get_object_code(Module) of + {Module, Bin, Filename} -> + ?debugMsg("stubbing"), + stub_function(Module, Function, arity(Fun)), + {ok, #state{module=Module,old_code={Module,Bin,Filename},times=Times,stub=Fun}}; + error -> {stop, ?fmt("Could not get object code for module ~p", [Module])} + end. + +%%-------------------------------------------------------------------- +%% @spec +%% handle_call(Request, From, State) -> {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | +%% {stop, Reason, State} +%% @doc Handling call messages +%% @end +%%-------------------------------------------------------------------- +handle_call({proxy_call, Args}, _From, State = #state{stub=Fun, times=Times}) -> + Reply = apply(Fun, tuple_to_list(Args)), + {reply, {Times-1, Reply}, State#state{times=Times-1}}. + +%%-------------------------------------------------------------------- +%% @spec handle_cast(Msg, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @doc Handling cast messages +%% @end +%%-------------------------------------------------------------------- +handle_cast(stop, State) -> + sleep:timer(10), + {stop, normal, State}. + +%%-------------------------------------------------------------------- +%% @spec handle_info(Info, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @doc Handling all non call/cast messages +%% @end +%%-------------------------------------------------------------------- +handle_info(_Info, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% @spec terminate(Reason, State) -> void() +%% @doc This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any necessary +%% cleaning up. When it returns, the gen_server terminates with Reason. +%% The return value is ignored. +%% @end +%%-------------------------------------------------------------------- +terminate(_Reason, #state{old_code={_Module,_Bin,_Filename}}) -> + ok. + +%%-------------------------------------------------------------------- +%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} +%% @doc Convert process state when code is changed +%% @end +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- +name(Module, Function) -> + list_to_atom(lists:concat([Module, Function, "stub"])). + +stub_function(Module, Function, Arity) -> + {_, Bin, _} = code:get_object_code(Module), + {ok, {Module,[{abstract_code,{raw_abstract_v1,Forms}}]}} = beam_lib:chunks(Bin, [abstract_code]), + ?debugMsg("replacing function"), + StubbedForms = replace_function(Module, Function, Arity, Forms), + case compile:forms(StubbedForms, [binary]) of + {ok, Module, Binary} -> code:load_binary(Module, atom_to_list(Module) ++ ".erl", Binary); + Other -> Other + end. + +arity(Fun) when is_function(Fun) -> + Props = erlang:fun_info(Fun), + proplists:get_value(arity, Props). + +replace_function(Module, Function, Arity, Forms) -> + replace_function(Module, Function, Arity, Forms, []). + +replace_function(_Module, _Function, _Arity, [], Acc) -> + lists:reverse(Acc); +replace_function(Module, Function, Arity, [{function, Line, Function, Arity, _Clauses}|Forms], Acc) -> + lists:reverse(Acc) ++ [{function, Line, Function, Arity, [ + {clause, + Line, + generate_variables(Arity), + [], + generate_expression(stub,proxy_call,Module,name(Module,Function),Arity)}]}] ++ Forms; +replace_function(Module, Function, Arity, [Form|Forms], Acc) -> + replace_function(Module, Function, Arity, Forms, [Form|Acc]). + +generate_variables(0) -> []; +generate_variables(Arity) -> + lists:map(fun(N) -> + {var, 1, list_to_atom(lists:concat(['Arg', N]))} + end, lists:seq(1, Arity)). + +generate_expression(M, F, Module, Name, 0) -> + [{call,1,{remote,1,{atom,1,M},{atom,1,F}}, [{atom,1,Module}, {atom,1,Name}]}]; +generate_expression(M, F, Module, Name, Arity) -> + [{call,1,{remote,1,{atom,1,M},{atom,1,F}}, [{atom,1,Module}, {atom,1,Name}, {tuple,1,lists:map(fun(N) -> + {var, 1, list_to_atom(lists:concat(['Arg', N]))} + end, lists:seq(1, Arity))}]}]. diff --git a/test/test_suite.erl b/test/test_suite.erl new file mode 100644 index 00000000..255ed5a9 --- /dev/null +++ b/test/test_suite.erl @@ -0,0 +1,10 @@ +-module(test_suite). + +-include_lib("eunit/include/eunit.hrl"). + +all_test_() -> + [{module, mem_utils_test}, + {module, membership2_test}, + {module, partitions_test}, + {module, replication_test} + ]. -- cgit v1.2.3 From 80f71dc1aff733a29738219f58980427d81d44fe Mon Sep 17 00:00:00 2001 From: Joe Date: Mon, 22 Feb 2010 12:23:00 -0800 Subject: clean up --- ebin/dynomite.appup | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 ebin/dynomite.appup diff --git a/ebin/dynomite.appup b/ebin/dynomite.appup deleted file mode 100644 index d6d7726b..00000000 --- a/ebin/dynomite.appup +++ /dev/null @@ -1,6 +0,0 @@ -{"0.9.0-cloudant", [{"0.9.0-cloudant", [ - {apply, {supervisor, terminate_child, [showroom_sup, dynomite_sup]}}, - {restart_application, dynomite}, - {apply, {supervisor, delete_child, [showroom_sup, dynomite_sup]}}, - {update, showroom_sup, supervisor} -]}],[{"0.9.0-cloudant",[]}]}. -- cgit v1.2.3 From 1fae46b0f019a2100935d51e7a525863b18add0d Mon Sep 17 00:00:00 2001 From: Joe Date: Mon, 22 Feb 2010 12:24:59 -0800 Subject: gitignore --- ebin/.gitignore | 1 - 1 file changed, 1 deletion(-) delete mode 100644 ebin/.gitignore diff --git a/ebin/.gitignore b/ebin/.gitignore deleted file mode 100644 index 13d94f8b..00000000 --- a/ebin/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.app -- cgit v1.2.3 From 4bef00e4f0dcb6d903a751a6a14c65118c50ca8c Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Tue, 2 Mar 2010 11:57:28 -0500 Subject: keep .app and .appup files in main source tree --- ebin/dynomite.app | 30 ++++++++++++++++++++++++++++++ ebin/dynomite.appup | 6 ++++++ 2 files changed, 36 insertions(+) create mode 100644 ebin/dynomite.app create mode 100644 ebin/dynomite.appup diff --git a/ebin/dynomite.app b/ebin/dynomite.app new file mode 100644 index 00000000..5107eced --- /dev/null +++ b/ebin/dynomite.app @@ -0,0 +1,30 @@ +%% dynomite app resource file + +{application, dynomite, + [{description, "Dynomite Clustering System"}, + {mod, {dynomite_app, []}}, + {vsn, "{{dynomite_vsn}}"}, + {modules, + [ + bootstrap_manager, + bootstrap_receiver, + cluster_ops, + configuration, + dynomite, + dynomite_app, + dynomite_couch_api, + dynomite_couch_storage, + dynomite_http, + dynomite_prof, + dynomite_sup, + lib_misc, + mem_utils, + membership2, + node, + partitions, + replication, + vector_clock + ]}, + {registered, [membership]}, + {applications, [kernel, stdlib, sasl, crypto, mochiweb]} + ]}. diff --git a/ebin/dynomite.appup b/ebin/dynomite.appup new file mode 100644 index 00000000..57ca056d --- /dev/null +++ b/ebin/dynomite.appup @@ -0,0 +1,6 @@ +{"{{dynomite_vsn}}", [{"{{dynomite_prev_vsn}}", [ + {apply, {supervisor, terminate_child, [showroom_sup, dynomite_sup]}}, + {restart_application, dynomite}, + {apply, {supervisor, delete_child, [showroom_sup, dynomite_sup]}}, + {update, showroom_sup, supervisor} +]}],[{"{{dynomite_prev_vsn}}",[]}]}. -- cgit v1.2.3 From b6bc69c0e47dfb968f3b0b1ab798841e3dfa9997 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Tue, 2 Mar 2010 12:11:49 -0500 Subject: un-template app version numbers --- ebin/dynomite.app | 2 +- ebin/dynomite.appup | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ebin/dynomite.app b/ebin/dynomite.app index 5107eced..b339496a 100644 --- a/ebin/dynomite.app +++ b/ebin/dynomite.app @@ -3,7 +3,7 @@ {application, dynomite, [{description, "Dynomite Clustering System"}, {mod, {dynomite_app, []}}, - {vsn, "{{dynomite_vsn}}"}, + {vsn, "0.9.5-cloudant"}, {modules, [ bootstrap_manager, diff --git a/ebin/dynomite.appup b/ebin/dynomite.appup index 57ca056d..c88a78bd 100644 --- a/ebin/dynomite.appup +++ b/ebin/dynomite.appup @@ -1,6 +1,6 @@ -{"{{dynomite_vsn}}", [{"{{dynomite_prev_vsn}}", [ +{"0.9.5-cloudant", [{"0.9.4-cloudant", [ {apply, {supervisor, terminate_child, [showroom_sup, dynomite_sup]}}, {restart_application, dynomite}, {apply, {supervisor, delete_child, [showroom_sup, dynomite_sup]}}, {update, showroom_sup, supervisor} -]}],[{"{{dynomite_prev_vsn}}",[]}]}. +]}],[{"0.9.4-cloudant",[]}]}. -- cgit v1.2.3 From a236c1b179eaf85c5a8eca8e5d2ea022e0889585 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Mon, 8 Mar 2010 22:36:06 -0500 Subject: minor path fix --- test/cluster_ops_test.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cluster_ops_test.erl b/test/cluster_ops_test.erl index 1c692dcf..7bc8fdeb 100644 --- a/test/cluster_ops_test.erl +++ b/test/cluster_ops_test.erl @@ -1,6 +1,6 @@ -module(cluster_ops_test). --include("../../couchdb/couch_db.hrl"). +-include("../../couch/src/couch_db.hrl"). -include_lib("eunit/include/eunit.hrl"). -- cgit v1.2.3 From 5f4714f15226719c991bba8116d1f09b58dcf264 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Tue, 9 Mar 2010 11:23:12 -0500 Subject: include couch and showroom in eunit code path, BugzID 9844 --- src/configuration.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/configuration.erl b/src/configuration.erl index 1caca5ec..2d5582da 100644 --- a/src/configuration.erl +++ b/src/configuration.erl @@ -36,6 +36,7 @@ %% @doc starts couch_config gen_server if it's not already started start_link(DynomiteConfig) -> + couch_config_event:start_link(), couch_config:start_link([]), set_config(DynomiteConfig). -- cgit v1.2.3 From d645998e35762b78fc845afdda68236fb7c28b26 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Tue, 9 Mar 2010 11:36:19 -0500 Subject: removing old Makefiles --- src/Makefile | 11 ----------- test/Emakefile | 4 ---- test/Makefile | 12 ------------ 3 files changed, 27 deletions(-) delete mode 100644 src/Makefile delete mode 100644 test/Emakefile delete mode 100644 test/Makefile diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 32aa1872..00000000 --- a/src/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -include ../support/include.mk - -all: $(EBIN_FILES_NO_DOCS) - -doc: $(EBIN_FILES) - -debug: - $(MAKE) DEBUG=-DDEBUG - -clean: - rm -rf $(EBIN_FILES) diff --git a/test/Emakefile b/test/Emakefile deleted file mode 100644 index d05e4d94..00000000 --- a/test/Emakefile +++ /dev/null @@ -1,4 +0,0 @@ -{"*", [warn_obsolete_guard, warn_unused_import, - warn_shadow_vars, warn_export_vars, debug_info, - {i, "../include"}, - {outdir, "../tests_ebin"}]}. diff --git a/test/Makefile b/test/Makefile deleted file mode 100644 index 45998c6e..00000000 --- a/test/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -include ../support/include.mk - -all: $(EBIN_FILES_NO_DOCS) - -doc: $(EBIN_FILES) - -debug: - $(MAKE) DEBUG=-DDEBUG - -clean: - rm -rf $(EBIN_FILES) - rm -rf ../tests_ebin \ No newline at end of file -- cgit v1.2.3 From c9f601a5d2950ca47eefeb3336993cd65c77456a Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Tue, 9 Mar 2010 15:39:38 -0500 Subject: trust VM to start our dependent apps --- src/dynomite_app.erl | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/src/dynomite_app.erl b/src/dynomite_app.erl index 6ee0b978..2243af4c 100644 --- a/src/dynomite_app.erl +++ b/src/dynomite_app.erl @@ -20,7 +20,6 @@ %% Application callbacks -export([start/2, stop/1]). --define(APPS, [crypto,sasl,mochiweb]). -define(DEFAULT_CLUSTER_URL, "http://localhost:5984/_cluster"). %%==================================================================== @@ -49,9 +48,6 @@ start(_Type, _StartArgs) -> Args end, - % start required apps - State = start_apps(), - % start dynomite supervisor ok = start_node(), case dynomite_sup:start_link(PdStartArgs) of @@ -79,24 +75,6 @@ stop({_, Sup}) -> %% Internal functions %%==================================================================== -start_apps() -> - Fun = fun(App, AccIn) -> - Result = case application:start(App) of - ok -> - App; - {error, {already_started, App}} -> - nil; - _Error -> - exit(app_start_fail) - end, - if - Result =/= nil -> [App|AccIn]; - true -> AccIn - end - end, - lists:foldl(Fun, [], ?APPS). - - %% @spec start_node() -> ok | {error, Reason} %% @doc start this node (join to dist. erlang cluster) start_node() -> -- cgit v1.2.3 From c539a94b4adc5b20ee89295b062759528ca7dfcc Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Tue, 9 Mar 2010 15:59:01 -0500 Subject: bugfix from bbdd88 --- src/dynomite_app.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynomite_app.erl b/src/dynomite_app.erl index 2243af4c..b31b2e57 100644 --- a/src/dynomite_app.erl +++ b/src/dynomite_app.erl @@ -52,7 +52,7 @@ start(_Type, _StartArgs) -> ok = start_node(), case dynomite_sup:start_link(PdStartArgs) of {ok, Supervisor} -> - {ok, Supervisor, State}; + {ok, Supervisor}; Error -> Error end. -- cgit v1.2.3 From cddfba8e31fa09b2a4ac9256a1a413208193ea06 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 18 Mar 2010 19:31:13 -0400 Subject: faster grouping by NodePart --- src/cluster_ops.erl | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/src/cluster_ops.erl b/src/cluster_ops.erl index bd2ad83d..5bcb6bfa 100644 --- a/src/cluster_ops.erl +++ b/src/cluster_ops.erl @@ -242,32 +242,23 @@ strip_ok(Val) -> Val. %% but is a list so we can use ?PMAP with the results %% @end get_dist_tasks(KeyFun, SeqsKVPairs) -> - %% loop thru SeqsKVPairs adding node/part to each - NPSV = lists:flatmap( - fun({Seq,KVPair}) -> - NodeParts = membership2:nodeparts_for_key(KeyFun(KVPair)), - lists:map( - fun(NodePart) -> - {NodePart, {Seq, KVPair}} - end, NodeParts) - end, SeqsKVPairs), - nodepart_values_list(NPSV). - - -%% pile up the List by NodePart (like a dict) -nodepart_values_list(List) -> - DistTasks = - lists:foldl( - fun(NodePart, AccIn) -> - Values = proplists:get_all_values(NodePart, List), - case length(Values) of - 0 -> AccIn; - _ -> [{NodePart, Values} | AccIn] - end - end, [], membership2:all_nodes_parts(true)), - % ?LOG_DEBUG("~nDistTasks: ~p~n", [DistTasks]), - DistTasks. - + NPSV = lists:flatmap(fun({_,KVPair} = Elem) -> + [{NP, Elem} || NP <- membership2:nodeparts_for_key(KeyFun(KVPair))] + end, SeqsKVPairs), + group_by_key(NPSV). + +group_by_key([]) -> + []; +group_by_key(List) -> + [{FirstK,FirstV} | Rest] = lists:keysort(1,List), + Acc0 = {FirstK, [FirstV], []}, + FoldFun = fun({K,V}, {K,Vs,Acc}) -> + {K, [V|Vs], Acc}; + ({NewKey,V}, {OldKey,Vs,Acc}) -> + {NewKey, [V], [{OldKey,Vs}|Acc]} + end, + {LastK, LastVs, Acc} = lists:foldl(FoldFun, Acc0, Rest), + [{LastK, LastVs} | Acc]. get_const(Access) -> get_const(Access, unpack_config(configuration:get_config())). -- cgit v1.2.3 From 5fa64c1413f28b020ca8988b7db222151a69c0ca Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 18 Mar 2010 20:05:41 -0400 Subject: faster lookups for cluster constants --- src/cluster_ops.erl | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/src/cluster_ops.erl b/src/cluster_ops.erl index 5bcb6bfa..3889956f 100644 --- a/src/cluster_ops.erl +++ b/src/cluster_ops.erl @@ -31,9 +31,8 @@ %% %% This fun uses quorum constants from config key_lookup(Key, {M,F,A}, Access) -> - {N,_R,_W} = Consts = unpack_config(configuration:get_config()), - Const = get_const(Access, Consts), - key_lookup(Key, {M,F,A}, Access, Const, N). + N = list_to_integer(couch_config:get("cluster", "n", "3")), + key_lookup(Key, {M,F,A}, Access, get_const(Access), N). %% @doc Get to the proper shard on N nodes by key lookup @@ -78,8 +77,7 @@ all_parts({M,F,A}, Access, AndPartners, ResolveFun) -> %% %% This fun uses quorum constants from config some_parts(KeyFun, SeqsKVPairs, {M,F,A}, Access) -> - Const = get_const(Access), - some_parts(KeyFun, SeqsKVPairs, {M,F,A}, Access, Const). + some_parts(KeyFun, SeqsKVPairs, {M,F,A}, Access, get_const(Access)). %% @doc Do op on some shards, depending on list of keys sent in. @@ -215,10 +213,6 @@ error_message(Good, Bad, N, T, Access) -> [{error, Msg}, {good, Good}, {bad, Bad}]. -unpack_config(#config{n=N,r=R,w=W}) -> - {N, R, W}. - - pcall(MapFun, Servers, Const) -> Replies = lib_misc:pmap(MapFun, Servers, Const), lists:partition(fun valid/1, Replies). @@ -260,14 +254,11 @@ group_by_key(List) -> {LastK, LastVs, Acc} = lists:foldl(FoldFun, Acc0, Rest), [{LastK, LastVs} | Acc]. -get_const(Access) -> - get_const(Access, unpack_config(configuration:get_config())). - - -get_const(Access, {_N,R,W}) -> - case Access of - r -> R; - w -> W; - r1 -> 1; - Other -> throw({bad_access_term, Other}) - end. +get_const(r) -> + list_to_integer(couch_config:get("cluster", "r", "2")); +get_const(w) -> + list_to_integer(couch_config:get("cluster", "w", "2")); +get_const(r1) -> + 1; +get_const(Other) -> + throw({bad_access_term, Other}). -- cgit v1.2.3 From cb8ce418615ad18041ec5ae9ffff88414fc360db Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 15 Apr 2010 15:03:45 -0400 Subject: quickfix so we do N single-doc requests instead of R/W. BugzID 10021 --- src/cluster_ops.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster_ops.erl b/src/cluster_ops.erl index 3889956f..72bba92f 100644 --- a/src/cluster_ops.erl +++ b/src/cluster_ops.erl @@ -52,7 +52,7 @@ key_lookup(Key, {M,F,A}, Access, Const, N) -> {error, Class, Exception} end end, - {GoodReplies, Bad} = pcall(MapFun, NodeParts, Const), + {GoodReplies, Bad} = pcall(MapFun, NodeParts, N), if length(Bad) > 0 -> ?LOG_DEBUG("~nBad: ~p~n", [Bad]); true -> ok end, Good = lists:map(fun strip_ok/1, GoodReplies), final_key_lookup(Good, Bad, N, Const, ResolveFun, NotFoundFun, Access). -- cgit v1.2.3 From d9f7834a7618f1c8e7334e98221127519fdc4ecb Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 21 Apr 2010 14:16:56 -0400 Subject: use keysearch BIF to traverse ejson. BugzID 10064 --- src/configuration.erl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/configuration.erl b/src/configuration.erl index 2d5582da..db44e83c 100644 --- a/src/configuration.erl +++ b/src/configuration.erl @@ -69,14 +69,14 @@ stop() -> %% @doc turn a couch config proplist into a dynomite configuration record couch2dynomite_config(ClusterConfig, Directory) -> - Q = ?l2i(proplists:get_value("q", ClusterConfig, "3")), - R = ?l2i(proplists:get_value("r", ClusterConfig, "2")), - W = ?l2i(proplists:get_value("w", ClusterConfig, "1")), - N = ?l2i(proplists:get_value("n", ClusterConfig, "4")), + Q = ?l2i(couch_util:get_value("q", ClusterConfig, "3")), + R = ?l2i(couch_util:get_value("r", ClusterConfig, "2")), + W = ?l2i(couch_util:get_value("w", ClusterConfig, "1")), + N = ?l2i(couch_util:get_value("n", ClusterConfig, "4")), %% use couch's database_dir here, to avoid /tmp/data not existing - Webport = ?l2i(proplists:get_value("webport", ClusterConfig, "8080")), - Meta = proplists:get_value("meta", ClusterConfig, []), - StorageMod = proplists:get_value("storage_mod", ClusterConfig, []), + Webport = ?l2i(couch_util:get_value("webport", ClusterConfig, "8080")), + Meta = couch_util:get_value("meta", ClusterConfig, []), + StorageMod = couch_util:get_value("storage_mod", ClusterConfig, []), #config{q=Q, r=R, w=W, n=N, directory=Directory, web_port=Webport, meta=Meta, storage_mod=StorageMod}. -- cgit v1.2.3 From 651bf4999d89c7375498684e2211bec549e61b29 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Mon, 8 Mar 2010 08:21:34 -0800 Subject: new skeleton membership module for mem3 --- include/common.hrl | 6 ++++++ src/dynomite_sup.erl | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/common.hrl b/include/common.hrl index 2299950d..608a23ad 100644 --- a/include/common.hrl +++ b/include/common.hrl @@ -39,3 +39,9 @@ version, fullmap }). + +%% version 3 of membership state +-record(mem, {header=3, + node, + nodes, + clock}). diff --git a/src/dynomite_sup.erl b/src/dynomite_sup.erl index f8136934..1bf0de22 100644 --- a/src/dynomite_sup.erl +++ b/src/dynomite_sup.erl @@ -51,11 +51,11 @@ init(Args) -> Node = node(), Nodes = running_nodes() ++ [node()], Membership = {membership, - {membership2, start_link, [Node, Nodes, Args]}, + {mem3, start_link, [Node, Nodes, Args]}, permanent, 1000, worker, - [membership2]}, + [mem3]}, MemEventMgr = {mem_event_manager, {gen_event, start_link, [{local, membership_events}]}, permanent, -- cgit v1.2.3 From cb079ba3ee4496b97e73e9fa08525878b88de413 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Tue, 9 Mar 2010 09:56:30 -0800 Subject: creating fullmap based on node list --- src/partitions.erl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/partitions.erl b/src/partitions.erl index 942968e1..27d2a5a1 100644 --- a/src/partitions.erl +++ b/src/partitions.erl @@ -12,8 +12,8 @@ -author('cliff@powerset.com'). %% API --export([partition_range/1, create_partitions/3, map_partitions/2, - diff/2, pp_diff/1, int_to_partition/2, +-export([partition_range/1, create_partitions/2, create_partitions/3, + diff/2, pp_diff/1, int_to_partition/2, map_partitions/2, join/3, leave/3, hash/1, hash_to_partition/2, item_to_nodepart/1, shard_name/2, hash_to_hex/2]). @@ -33,6 +33,11 @@ partition_range(Q) -> trunc( ?RINGTOP / math:pow(2,Q) ). % SHA-1 space / 2^Q + +create_partitions(Q, Node) -> + create_partitions(Q, Node, []). + + create_partitions(Q, Node, _Nodes) -> fresh(trunc(math:pow(2,Q)), Node). % map_partitions(Table, Nodes). -- cgit v1.2.3 From e522c645e3d34403ae64193db3422ad8c574d256 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Tue, 9 Mar 2010 11:54:53 -0800 Subject: moving mem3 into rebar position --- src/mem3.erl | 204 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 src/mem3.erl diff --git a/src/mem3.erl b/src/mem3.erl new file mode 100644 index 00000000..6f53ed23 --- /dev/null +++ b/src/mem3.erl @@ -0,0 +1,204 @@ + +-module(mem3). +-author('brad@cloudant.com'). + +-behaviour(gen_server). + +%% API +-export([start_link/2, start_link/3, stop/0, stop/1]). +-export([clock/0, state/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%% includes +-include("../include/config.hrl"). +-include("../include/common.hrl"). + + +%%==================================================================== +%% API +%%==================================================================== + +start_link(Node, ErlNodes) -> + start_link(Node, ErlNodes, []). + + +start_link(Node, ErlNodes, Args) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [Node, ErlNodes, Args], []). + + +stop() -> + stop(?MODULE). + + +stop(Server) -> + gen_server:cast(Server, stop). + + +clock() -> + gen_server:call(?MODULE, clock). + + +state() -> + gen_server:call(?MODULE, state). + + +%%==================================================================== +%% gen_server callbacks +%%==================================================================== + +%% start up membership server +init([Node, Nodes, Args]) -> + process_flag(trap_exit,true), + showroom_log:message(info, "membership: membership server starting...", []), + net_kernel:monitor_nodes(true), + Options = lists:flatten(Args), + Config = configuration:get_config(), + OldState = read_latest_state_file(Config), + State = handle_init(Node, Nodes, Options, OldState, Config), + {ok, State}. + + +%% new node joining to this node +handle_call({join, _JoiningNode, _Options}, _From, State) -> + {reply, ok, State}; + +%% clock +handle_call(clock, _From, State = #mem{clock=Clock}) -> + {reply, Clock, State}; + +%% state +handle_call(state, _From, State) -> + {reply, State, State}; + +%% ignored call +handle_call(Msg, _From, State) -> + showroom_log:message(info, "membership: ignored call: ~p", [Msg]), + {reply, ignored, State}. + + +%% stop +handle_cast(stop, State) -> + {stop, normal, State}; + +%% ignored cast +handle_cast(Msg, State) -> + showroom_log:message(info, "membership: ignored cast: ~p", [Msg]), + {noreply, State}. + + +%% @doc handle nodedown messages because we have +%% net_kernel:monitor_nodes(true) +handle_info({nodedown, Node}, State) -> + showroom_log:message(alert, "membership: nodedown from ~p", [Node]), + {noreply, State}; + +%% @doc handle nodeup messages because we have +%% net_kernel:monitor_nodes(true) +handle_info({nodeup, Node}, State) -> + showroom_log:message(alert, "membership: nodeup Node: ~p", [Node]), + {noreply, State}; + +%% ignored info +handle_info(Info, State) -> + showroom_log:message(info, "membership: ignored info: ~p", [Info]), + {noreply, State}. + + +% terminate +terminate(_Reason, _State) -> + ok. + + +% ignored code change +code_change(OldVsn, State, _Extra) -> + io:format("Unknown Old Version!~nOldVsn: ~p~nState : ~p~n", [OldVsn, State]), + {ok, State}. + + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- + +% we could be: +% 1. starting fresh node into a fresh cluster (we're one of first nodes) +% 2. starting fresh node into an existing cluster (need to join) +% 3. rejoining a cluster after some downtime +% 4. replacing a node in an existing cluster + +handle_init(Node, [], nil, Options, Config) -> + % no other erlang nodes, no old state + Hints = proplists:get_value(hints, Options), + Map = create_map(Config, [{Node, Hints}]), + ?debugFmt("~nmap: ~p~n", [Map]); + +handle_init(_Node, [], _OldState, _Options, _Config) -> + % no other erlang nodes, old state + % network partition? + ok; + +handle_init(_Node, _ErlNodes, nil, _Options, _Config) -> + % other erlang nodes, no old state + ok; + +handle_init(_Node, _ErlNodes, _OldState, _Options, _Config) -> + % other erlang nodes, old state + % network partition? + ok. + + +find_latest_state_filename(Config) -> + ?debugFmt("~nConfig: ~p~n", [Config]), + Dir = Config#config.directory, + case file:list_dir(Dir) of + {ok, Filenames} -> + Timestamps = [list_to_integer(TS) || {"state", TS} <- + [list_to_tuple(string:tokens(FN, ".")) || FN <- Filenames]], + SortedTimestamps = lists:reverse(lists:sort(Timestamps)), + case SortedTimestamps of + [Latest | _] -> + {ok, Dir ++ "/state." ++ integer_to_list(Latest)}; + _ -> + throw({error, not_found}) + end; + {error, Reason} -> + throw({error, Reason}) + end. + + +read_latest_state_file(Config) -> + try + {ok, File} = find_latest_state_filename(Config), + case file:consult(File) of + {ok, #mem{}=State} -> State; + _Else -> throw({error, bad_mem_state_file}) + end + catch + _:Error -> + showroom_log:message(info, "membership: ~p", [Error]), + nil + end. + + +%% @doc given Config and a list of Nodes, construct a Fullmap +create_map(#config{q=Q}, Nodes) -> + [{FirstNode,_}|_] = Nodes, + Pmap = lists:foldl(fun({Node, Hints}, Map) -> + partitions:join(Node, Map, Hints) + end, partitions:create(Q, FirstNode), Nodes), + make_fullmap(Pmap). + + +%% @doc construct a table with all partitions, with the primary node and all +%% replication partner nodes as well. +make_fullmap(PMap) -> + {Nodes, _Parts} = lists:unzip(PMap), + NodeParts = lists:flatmap( + fun({Node,Part}) -> + Partners = replication:partners(Node, lists:usort(Nodes)), + PartnerList = [{Partner, Part, partner} || Partner <- Partners], + [{Node, Part, primary} | PartnerList] + end, PMap), + NodeParts. -- cgit v1.2.3 From 8429ee374325a6a2c9779c5001c143ce4b35b1c6 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Mon, 15 Mar 2010 16:11:33 -0400 Subject: more work on mem3 init, handling different types of joins, requiring more human-intervention, reworking startargs to strip out most everything --- ebin/dynomite.app | 1 + include/common.hrl | 5 ++- src/dynomite_app.erl | 10 +----- src/dynomite_sup.erl | 39 ++++---------------- src/mem3.erl | 100 ++++++++++++++++++++++++++++++++------------------- test/mem3_test.erl | 38 ++++++++++++++++++++ 6 files changed, 114 insertions(+), 79 deletions(-) create mode 100644 test/mem3_test.erl diff --git a/ebin/dynomite.app b/ebin/dynomite.app index b339496a..e6e71af2 100644 --- a/ebin/dynomite.app +++ b/ebin/dynomite.app @@ -18,6 +18,7 @@ dynomite_prof, dynomite_sup, lib_misc, + mem3, mem_utils, membership2, node, diff --git a/include/common.hrl b/include/common.hrl index 608a23ad..6d92d9fa 100644 --- a/include/common.hrl +++ b/include/common.hrl @@ -44,4 +44,7 @@ -record(mem, {header=3, node, nodes, - clock}). + clock, + ets, + test=false + }). diff --git a/src/dynomite_app.erl b/src/dynomite_app.erl index b31b2e57..417f4c76 100644 --- a/src/dynomite_app.erl +++ b/src/dynomite_app.erl @@ -40,17 +40,9 @@ %% @doc start required apps, join cluster, start dynomite supervisor start(_Type, _StartArgs) -> - % get process_dict hack for startargs (i.e. not from .app file) - PdStartArgs = case erase(startargs) of - undefined -> - []; - Args -> - Args - end, - % start dynomite supervisor ok = start_node(), - case dynomite_sup:start_link(PdStartArgs) of + case dynomite_sup:start_link() of {ok, Supervisor} -> {ok, Supervisor}; Error -> diff --git a/src/dynomite_sup.erl b/src/dynomite_sup.erl index 1bf0de22..b60824ac 100644 --- a/src/dynomite_sup.erl +++ b/src/dynomite_sup.erl @@ -1,20 +1,10 @@ -%%%------------------------------------------------------------------- -%%% File: dynomite_sup.erl -%%% @author Cliff Moon [] -%%% @copyright 2008 Cliff Moon -%%% @doc -%%% -%%% @end -%%% -%%% @since 2008-06-27 by Cliff Moon -%%%------------------------------------------------------------------- -module(dynomite_sup). --author('cliff@powerset.com'). +-author('brad@cloudant.com'). -behaviour(supervisor). %% API --export([start_link/1]). +-export([start_link/0]). %% Supervisor callbacks -export([init/1]). @@ -31,8 +21,8 @@ %% @doc Starts the supervisor %% @end %%-------------------------------------------------------------------- -start_link(Hints) -> - supervisor:start_link(?MODULE, [Hints]). +start_link() -> + supervisor:start_link(?MODULE, []). %%==================================================================== %% Supervisor callbacks @@ -47,11 +37,9 @@ start_link(Hints) -> %% specifications. %% @end %%-------------------------------------------------------------------- -init(Args) -> - Node = node(), - Nodes = running_nodes() ++ [node()], +init(_Args) -> Membership = {membership, - {mem3, start_link, [Node, Nodes, Args]}, + {mem3, start_link, []}, permanent, 1000, worker, @@ -68,18 +56,3 @@ init(Args) -> %%==================================================================== %% Internal functions %%==================================================================== - -%% @doc get a list of running nodes visible to this local node -running_nodes() -> - [Node || Node <- nodes([this,visible]), running(Node)]. - -%% @doc monitor the membership server on Node from here -running(Node) -> - Ref = erlang:monitor(process, {membership, Node}), - R = receive - {'DOWN', Ref, _, _, _} -> false - after 1 -> - true - end, - erlang:demonitor(Ref), - R. diff --git a/src/mem3.erl b/src/mem3.erl index 6f53ed23..1018af29 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -5,8 +5,8 @@ -behaviour(gen_server). %% API --export([start_link/2, start_link/3, stop/0, stop/1]). --export([clock/0, state/0]). +-export([start_link/0, start_link/1, stop/0, stop/1]). +-export([join/2, clock/0, state/0]). %% gen_server callbacks -export([init/1, handle_call/3, handle_cast/2, handle_info/2, @@ -21,12 +21,12 @@ %% API %%==================================================================== -start_link(Node, ErlNodes) -> - start_link(Node, ErlNodes, []). +start_link() -> + start_link([]). -start_link(Node, ErlNodes, Args) -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [Node, ErlNodes, Args], []). +start_link(Args) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, Args, []). stop() -> @@ -37,6 +37,10 @@ stop(Server) -> gen_server:cast(Server, stop). +join(JoinType, Nodes) -> + gen_server:call(?MODULE, {join, JoinType, Nodes}). + + clock() -> gen_server:call(?MODULE, clock). @@ -50,20 +54,23 @@ state() -> %%==================================================================== %% start up membership server -init([Node, Nodes, Args]) -> +init(Args) -> process_flag(trap_exit,true), - showroom_log:message(info, "membership: membership server starting...", []), - net_kernel:monitor_nodes(true), - Options = lists:flatten(Args), Config = configuration:get_config(), - OldState = read_latest_state_file(Config), - State = handle_init(Node, Nodes, Options, OldState, Config), - {ok, State}. + OldState = case Args of + test -> nil; + _ -> read_latest_state_file(Config) + end, + State = handle_init(OldState), + {ok, State#mem{test=(Args == test)}}. + %% new node joining to this node -handle_call({join, _JoiningNode, _Options}, _From, State) -> - {reply, ok, State}; +handle_call({join, JoinType, ExtNodes}, _From, State) -> + Config = configuration:get_config(), + Reply = handle_join(JoinType, ExtNodes, State, Config), + {reply, Reply, State}; %% clock handle_call(clock, _From, State = #mem{clock=Clock}) -> @@ -114,7 +121,7 @@ terminate(_Reason, _State) -> % ignored code change code_change(OldVsn, State, _Extra) -> - io:format("Unknown Old Version!~nOldVsn: ~p~nState : ~p~n", [OldVsn, State]), + io:format("Unknown Old Version~nOldVsn: ~p~nState : ~p~n", [OldVsn, State]), {ok, State}. @@ -122,35 +129,50 @@ code_change(OldVsn, State, _Extra) -> %%% Internal functions %%-------------------------------------------------------------------- -% we could be: -% 1. starting fresh node into a fresh cluster (we're one of first nodes) -% 2. starting fresh node into an existing cluster (need to join) -% 3. rejoining a cluster after some downtime +% we could be automatically: +% 1. rejoining a cluster after some downtime +% +% we could be manually: +% 2. beginning a cluster with only this node +% 3. joining a cluster as a new node % 4. replacing a node in an existing cluster -handle_init(Node, [], nil, Options, Config) -> - % no other erlang nodes, no old state - Hints = proplists:get_value(hints, Options), - Map = create_map(Config, [{Node, Hints}]), - ?debugFmt("~nmap: ~p~n", [Map]); - -handle_init(_Node, [], _OldState, _Options, _Config) -> - % no other erlang nodes, old state - % network partition? +handle_init(nil) -> + showroom_log:message(info, "membership: membership server starting...", []), + net_kernel:monitor_nodes(true), + Table = init_ets_table(), + Node = node(), + Nodes = [{Node, []}], + Clock = vector_clock:create(Node), + #mem{node=Node, nodes=Nodes, clock=Clock, ets=Table}; + +handle_init(_OldState) -> + ?debugHere, + % there's an old state, let's try to rejoin automatically + % TODO implement me + Table = init_ets_table(), + #mem{ets=Table}. + + +%% handle join activities +handle_join(first, ExtNodes, State, Config) -> + Map = create_map(Config, ExtNodes), + ?debugFmt("~nmap: ~p~n", [Map]), + State#mem{}; + +handle_join(new, _ExtNodes, _State, _Config) -> ok; -handle_init(_Node, _ErlNodes, nil, _Options, _Config) -> - % other erlang nodes, no old state +handle_join(replace, [_OldNode | _], _State, _Config) -> ok; -handle_init(_Node, _ErlNodes, _OldState, _Options, _Config) -> - % other erlang nodes, old state - % network partition? - ok. +handle_join(JoinType, _, _, _) -> + showroom_log:message(info, "membership: unknown join type: ~p", [JoinType]), + {error, {unknown_join_type, JoinType}}. +%% @doc find the latest state file on disk find_latest_state_filename(Config) -> - ?debugFmt("~nConfig: ~p~n", [Config]), Dir = Config#config.directory, case file:list_dir(Dir) of {ok, Filenames} -> @@ -202,3 +224,9 @@ make_fullmap(PMap) -> [{Node, Part, primary} | PartnerList] end, PMap), NodeParts. + + +init_ets_table() -> + Table = list_to_atom(lists:concat(["mem_", atom_to_list(node())])), + ets:new(Table, [public, set, named_table]), + Table. diff --git a/test/mem3_test.erl b/test/mem3_test.erl new file mode 100644 index 00000000..6286936e --- /dev/null +++ b/test/mem3_test.erl @@ -0,0 +1,38 @@ +-module(mem3_test). + +-include("../include/common.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +%% TEST SETUP + +all_tests_test_() -> + {"membership3 tests", + [ + {setup, + fun test_setup/0, + fun test_teardown/1, + fun(Pid) -> + {with, Pid, + [ + fun init/1 + ]} + end} + ] + }. + + +test_setup() -> + {ok, Pid} = mem3:start_link(test), + Pid. + + +test_teardown(Pid) -> + exit(Pid, shutdown). + + +%% TESTS + +init(_Pid) -> + State = #mem{test=Test} = mem3:state(), + ?debugFmt("~nState: ~p~n", [State]), + ?assertEqual(true, Test). -- cgit v1.2.3 From 78b2e92c8c2bc22c17a7670437dcd6852768690d Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Thu, 18 Mar 2010 23:56:13 -0400 Subject: some docs, and handling the joining of the first node (which is given a list of nodes for the cluster) --- src/mem3.erl | 52 ++++++++++++++++++++++++++++++++++++---------------- test/mem3_test.erl | 12 +++++++++--- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 1018af29..b59a1d60 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -1,4 +1,18 @@ - +%%% membership module +%%% +%%% State of the gen_server is a #mem record +%%% +%%% Nodes and Gossip are the same thing, and are a list of three-tuples like: +%%% +%%% [ {Pos,NodeName,Options} | _ ] +%%% +%%% Position is 1-based incrementing in order of node joining +%%% +%%% Options is a proplist, with [{hints, [Part1|_]}] denoting that the node +%%% is responsible for the extra partitions too. +%%% +%%% TODO: dialyzer type specs +%%% -module(mem3). -author('brad@cloudant.com'). @@ -57,10 +71,7 @@ state() -> init(Args) -> process_flag(trap_exit,true), Config = configuration:get_config(), - OldState = case Args of - test -> nil; - _ -> read_latest_state_file(Config) - end, + OldState = read_latest_state_file(Args, Config), State = handle_init(OldState), {ok, State#mem{test=(Args == test)}}. @@ -69,8 +80,8 @@ init(Args) -> %% new node joining to this node handle_call({join, JoinType, ExtNodes}, _From, State) -> Config = configuration:get_config(), - Reply = handle_join(JoinType, ExtNodes, State, Config), - {reply, Reply, State}; + NewState = handle_join(JoinType, ExtNodes, State, Config), + {reply, ok, NewState}; %% clock handle_call(clock, _From, State = #mem{clock=Clock}) -> @@ -142,23 +153,26 @@ handle_init(nil) -> net_kernel:monitor_nodes(true), Table = init_ets_table(), Node = node(), - Nodes = [{Node, []}], + Nodes = [{0, Node, []}], Clock = vector_clock:create(Node), #mem{node=Node, nodes=Nodes, clock=Clock, ets=Table}; handle_init(_OldState) -> ?debugHere, % there's an old state, let's try to rejoin automatically + % but only if we can compare our old state to all other + % available nodes and get a match... otherwise get a human involved % TODO implement me Table = init_ets_table(), #mem{ets=Table}. -%% handle join activities -handle_join(first, ExtNodes, State, Config) -> +%% handle join activities, return NewState +handle_join(first, ExtNodes, #mem{node=Node, clock=Clock} = State, Config) -> Map = create_map(Config, ExtNodes), ?debugFmt("~nmap: ~p~n", [Map]), - State#mem{}; + NewClock = vector_clock:increment(Node, Clock), + State#mem{nodes=ExtNodes, clock=NewClock}; handle_join(new, _ExtNodes, _State, _Config) -> ok; @@ -190,7 +204,9 @@ find_latest_state_filename(Config) -> end. -read_latest_state_file(Config) -> +read_latest_state_file(test, _) -> + nil; +read_latest_state_file(_, Config) -> try {ok, File} = find_latest_state_filename(Config), case file:consult(File) of @@ -206,10 +222,14 @@ read_latest_state_file(Config) -> %% @doc given Config and a list of Nodes, construct a Fullmap create_map(#config{q=Q}, Nodes) -> - [{FirstNode,_}|_] = Nodes, - Pmap = lists:foldl(fun({Node, Hints}, Map) -> - partitions:join(Node, Map, Hints) - end, partitions:create(Q, FirstNode), Nodes), + [{_,FirstNode,_}|_] = Nodes, + Fun = fun({_Pos, Node, Options}, Map) -> + Hints = proplists:get_value(hints, Options), + {ok, NewMap} = partitions:join(Node, Map, Hints), + NewMap + end, + Acc0 = partitions:create_partitions(Q, FirstNode), + Pmap = lists:foldl(Fun, Acc0, lists:keysort(1, Nodes)), make_fullmap(Pmap). diff --git a/test/mem3_test.erl b/test/mem3_test.erl index 6286936e..ae6dcaa5 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -14,7 +14,8 @@ all_tests_test_() -> fun(Pid) -> {with, Pid, [ - fun init/1 + fun init/1, + fun join_first/1 ]} end} ] @@ -33,6 +34,11 @@ test_teardown(Pid) -> %% TESTS init(_Pid) -> - State = #mem{test=Test} = mem3:state(), - ?debugFmt("~nState: ~p~n", [State]), + #mem{test=Test} = mem3:state(), ?assertEqual(true, Test). + + +join_first(_Pid) -> + mem3:join(first, [{1, a, []}, {2, b, []}]), + ?debugFmt("~nState: ~p~n", [mem3:state()]), + ok. -- cgit v1.2.3 From b32640350ee550c105258d47629b0b8c3e8775d2 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 24 Mar 2010 20:49:47 -0400 Subject: store Args in mem State, instead of just 'test', and allow for an Args-supplied Config (usually for testing only) --- include/common.hrl | 2 +- src/mem3.erl | 33 ++++++++++++++++++++++----------- test/mem3_test.erl | 8 ++++++-- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/include/common.hrl b/include/common.hrl index 6d92d9fa..b5d4d7ec 100644 --- a/include/common.hrl +++ b/include/common.hrl @@ -46,5 +46,5 @@ nodes, clock, ets, - test=false + args }). diff --git a/src/mem3.erl b/src/mem3.erl index b59a1d60..1bf27bc4 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -70,16 +70,17 @@ state() -> %% start up membership server init(Args) -> process_flag(trap_exit,true), - Config = configuration:get_config(), - OldState = read_latest_state_file(Args, Config), + Config = get_config(Args), + Test = proplists:get_value(test, Args), + OldState = read_latest_state_file(Test, Config), State = handle_init(OldState), - {ok, State#mem{test=(Args == test)}}. - + {ok, State#mem{args=Args}}. %% new node joining to this node -handle_call({join, JoinType, ExtNodes}, _From, State) -> - Config = configuration:get_config(), +handle_call({join, JoinType, ExtNodes}, _From, + State = #mem{args=Args}) -> + Config = get_config(Args), NewState = handle_join(JoinType, ExtNodes, State, Config), {reply, ok, NewState}; @@ -140,6 +141,15 @@ code_change(OldVsn, State, _Extra) -> %%% Internal functions %%-------------------------------------------------------------------- +%% @doc if Args has config use it, otherwise call configuration module +%% most times Args will have config during testing runs +get_config(Args) -> + case proplists:get_value(config, Args) of + undefined -> configuration:get_config(); + Any -> Any + end. + + % we could be automatically: % 1. rejoining a cluster after some downtime % @@ -204,7 +214,8 @@ find_latest_state_filename(Config) -> end. -read_latest_state_file(test, _) -> +%% (Test, Config) +read_latest_state_file(true, _) -> nil; read_latest_state_file(_, Config) -> try @@ -221,7 +232,7 @@ read_latest_state_file(_, Config) -> %% @doc given Config and a list of Nodes, construct a Fullmap -create_map(#config{q=Q}, Nodes) -> +create_map(#config{q=Q} = Config, Nodes) -> [{_,FirstNode,_}|_] = Nodes, Fun = fun({_Pos, Node, Options}, Map) -> Hints = proplists:get_value(hints, Options), @@ -230,16 +241,16 @@ create_map(#config{q=Q}, Nodes) -> end, Acc0 = partitions:create_partitions(Q, FirstNode), Pmap = lists:foldl(Fun, Acc0, lists:keysort(1, Nodes)), - make_fullmap(Pmap). + make_fullmap(Pmap, Config). %% @doc construct a table with all partitions, with the primary node and all %% replication partner nodes as well. -make_fullmap(PMap) -> +make_fullmap(PMap, Config) -> {Nodes, _Parts} = lists:unzip(PMap), NodeParts = lists:flatmap( fun({Node,Part}) -> - Partners = replication:partners(Node, lists:usort(Nodes)), + Partners = replication:partners(Node, lists:usort(Nodes), Config), PartnerList = [{Partner, Part, partner} || Partner <- Partners], [{Node, Part, primary} | PartnerList] end, PMap), diff --git a/test/mem3_test.erl b/test/mem3_test.erl index ae6dcaa5..25815d54 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -1,6 +1,7 @@ -module(mem3_test). -include("../include/common.hrl"). +-include("../include/config.hrl"). -include_lib("eunit/include/eunit.hrl"). %% TEST SETUP @@ -23,7 +24,9 @@ all_tests_test_() -> test_setup() -> - {ok, Pid} = mem3:start_link(test), + Config = #config{n=3,r=2,w=2,q=3,directory="/srv/db", + storage_mod="dynomite_couch_storage"}, + {ok, Pid} = mem3:start_link([{test,true}, {config, Config}]), Pid. @@ -34,7 +37,8 @@ test_teardown(Pid) -> %% TESTS init(_Pid) -> - #mem{test=Test} = mem3:state(), + #mem{args=Args} = mem3:state(), + Test = proplists:get_value(test, Args), ?assertEqual(true, Test). -- cgit v1.2.3 From 98b059115a7fc880716310187fb97855d014d498 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 24 Mar 2010 21:10:17 -0400 Subject: begin storing derived pmap,fullmap in ets, test them a bit --- src/mem3.erl | 45 ++++++++++++++++++++++++++++++++++++++++----- test/mem3_test.erl | 5 ++++- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 1bf27bc4..584a2a14 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -21,6 +21,8 @@ %% API -export([start_link/0, start_link/1, stop/0, stop/1]). -export([join/2, clock/0, state/0]). +-export([partitions/0, fullmap/0]). + %% gen_server callbacks -export([init/1, handle_call/3, handle_cast/2, handle_info/2, @@ -63,6 +65,18 @@ state() -> gen_server:call(?MODULE, state). +%% @doc retrieve the primary partition map. This is a list of partitions and +%% their corresponding primary node, no replication partner nodes. +partitions() -> + ets_pmap(). + + +%% @doc retrieve the full partition map, like above, but including replication +%% partner nodes. List should number 2^Q * N +fullmap() -> + lists:keysort(2, ets_fullmap()). + + %%==================================================================== %% gen_server callbacks %%==================================================================== @@ -179,8 +193,8 @@ handle_init(_OldState) -> %% handle join activities, return NewState handle_join(first, ExtNodes, #mem{node=Node, clock=Clock} = State, Config) -> - Map = create_map(Config, ExtNodes), - ?debugFmt("~nmap: ~p~n", [Map]), + {Pmap, Fullmap} = create_maps(Config, ExtNodes), + update_ets(Pmap, Fullmap), NewClock = vector_clock:increment(Node, Clock), State#mem{nodes=ExtNodes, clock=NewClock}; @@ -231,8 +245,8 @@ read_latest_state_file(_, Config) -> end. -%% @doc given Config and a list of Nodes, construct a Fullmap -create_map(#config{q=Q} = Config, Nodes) -> +%% @doc given Config and a list of Nodes, construct a {Pmap,Fullmap} +create_maps(#config{q=Q} = Config, Nodes) -> [{_,FirstNode,_}|_] = Nodes, Fun = fun({_Pos, Node, Options}, Map) -> Hints = proplists:get_value(hints, Options), @@ -241,7 +255,7 @@ create_map(#config{q=Q} = Config, Nodes) -> end, Acc0 = partitions:create_partitions(Q, FirstNode), Pmap = lists:foldl(Fun, Acc0, lists:keysort(1, Nodes)), - make_fullmap(Pmap, Config). + {Pmap, make_fullmap(Pmap, Config)}. %% @doc construct a table with all partitions, with the primary node and all @@ -257,7 +271,28 @@ make_fullmap(PMap, Config) -> NodeParts. +%% ets table helper functions init_ets_table() -> Table = list_to_atom(lists:concat(["mem_", atom_to_list(node())])), ets:new(Table, [public, set, named_table]), Table. + + +ets_name(Node) -> + list_to_atom(lists:concat(["mem_", atom_to_list(Node)])). + + +update_ets(Pmap, Fullmap) -> + Table = ets_name(node()), + ets:insert(Table, {pmap, Pmap}), + ets:insert(Table, {fullmap, Fullmap}). + + +ets_pmap() -> + [{pmap, PMap}] = ets:lookup(ets_name(node()), pmap), + PMap. + + +ets_fullmap() -> + [{fullmap, FullMap}] = ets:lookup(ets_name(node()), fullmap), + FullMap. diff --git a/test/mem3_test.erl b/test/mem3_test.erl index 25815d54..99f32ff9 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -44,5 +44,8 @@ init(_Pid) -> join_first(_Pid) -> mem3:join(first, [{1, a, []}, {2, b, []}]), - ?debugFmt("~nState: ~p~n", [mem3:state()]), + Fullmap = mem3:fullmap(), + ?assertEqual(16, length(Fullmap)), + Pmap = mem3:partitions(), + ?assertEqual(8, length(Pmap)), ok. -- cgit v1.2.3 From e94e87f219ee898172f060081bca47d62f1908a1 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Tue, 30 Mar 2010 18:50:58 -0400 Subject: test clock, and add some dialyzer specs for mem3 --- src/mem3.erl | 22 ++++++++++++++++++++++ test/mem3_test.erl | 7 +++++++ 2 files changed, 29 insertions(+) diff --git a/src/mem3.erl b/src/mem3.erl index 584a2a14..5de00826 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -28,39 +28,60 @@ -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). + %% includes -include("../include/config.hrl"). -include("../include/common.hrl"). +%% types +-type join_type() :: first | new | replace. +-type join_order() :: non_neg_integer(). +-type options() :: list(). +-type mem_node() :: {join_order(), node(), options()}. +-type mem_node_list() :: [mem_node()]. +-type arg_options() :: {test, boolean()} | {config, #config{}}. +-type args() :: [] | [arg_options()]. +-type mem_state() :: #mem{}. +-type epoch() :: float(). +-type clock() :: {node(), epoch()}. +-type vector_clock() :: [clock()]. + %%==================================================================== %% API %%==================================================================== +-spec start_link() -> {ok, pid()}. start_link() -> start_link([]). +-spec start_link(args()) -> {ok, pid()}. start_link(Args) -> gen_server:start_link({local, ?MODULE}, ?MODULE, Args, []). +-spec stop() -> ok. stop() -> stop(?MODULE). +-spec stop(atom()) -> ok. stop(Server) -> gen_server:cast(Server, stop). +-spec join(join_type(), mem_node_list()) -> ok. join(JoinType, Nodes) -> gen_server:call(?MODULE, {join, JoinType, Nodes}). +-spec clock() -> vector_clock(). clock() -> gen_server:call(?MODULE, clock). +-spec state() -> mem_state(). state() -> gen_server:call(?MODULE, state). @@ -82,6 +103,7 @@ fullmap() -> %%==================================================================== %% start up membership server +-spec init(args()) -> {ok, mem_state()}. init(Args) -> process_flag(trap_exit,true), Config = get_config(Args), diff --git a/test/mem3_test.erl b/test/mem3_test.erl index 99f32ff9..8747cf02 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -16,6 +16,7 @@ all_tests_test_() -> {with, Pid, [ fun init/1, + fun clock/1, fun join_first/1 ]} end} @@ -42,6 +43,12 @@ init(_Pid) -> ?assertEqual(true, Test). +clock(_Pid) -> + Node = node(), + Clock = mem3:clock(), + ?assertMatch([{Node, _}], Clock). + + join_first(_Pid) -> mem3:join(first, [{1, a, []}, {2, b, []}]), Fullmap = mem3:fullmap(), -- cgit v1.2.3 From 70502d9c2ac609a90a427200fccfec747e0b63bd Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Tue, 30 Mar 2010 18:56:25 -0400 Subject: change ets to cache in mem3 --- include/common.hrl | 2 +- src/mem3.erl | 32 ++++++++++++++++---------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/include/common.hrl b/include/common.hrl index b5d4d7ec..69a9e749 100644 --- a/include/common.hrl +++ b/include/common.hrl @@ -45,6 +45,6 @@ node, nodes, clock, - ets, + cache, args }). diff --git a/src/mem3.erl b/src/mem3.erl index 5de00826..d7a6b979 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -89,13 +89,13 @@ state() -> %% @doc retrieve the primary partition map. This is a list of partitions and %% their corresponding primary node, no replication partner nodes. partitions() -> - ets_pmap(). + cache_pmap(). %% @doc retrieve the full partition map, like above, but including replication %% partner nodes. List should number 2^Q * N fullmap() -> - lists:keysort(2, ets_fullmap()). + lists:keysort(2, cache_fullmap()). %%==================================================================== @@ -197,11 +197,11 @@ get_config(Args) -> handle_init(nil) -> showroom_log:message(info, "membership: membership server starting...", []), net_kernel:monitor_nodes(true), - Table = init_ets_table(), + Table = init_cache_table(), Node = node(), Nodes = [{0, Node, []}], Clock = vector_clock:create(Node), - #mem{node=Node, nodes=Nodes, clock=Clock, ets=Table}; + #mem{node=Node, nodes=Nodes, clock=Clock, cache=Table}; handle_init(_OldState) -> ?debugHere, @@ -209,14 +209,14 @@ handle_init(_OldState) -> % but only if we can compare our old state to all other % available nodes and get a match... otherwise get a human involved % TODO implement me - Table = init_ets_table(), - #mem{ets=Table}. + Table = init_cache_table(), + #mem{cache=Table}. %% handle join activities, return NewState handle_join(first, ExtNodes, #mem{node=Node, clock=Clock} = State, Config) -> {Pmap, Fullmap} = create_maps(Config, ExtNodes), - update_ets(Pmap, Fullmap), + update_cache(Pmap, Fullmap), NewClock = vector_clock:increment(Node, Clock), State#mem{nodes=ExtNodes, clock=NewClock}; @@ -293,28 +293,28 @@ make_fullmap(PMap, Config) -> NodeParts. -%% ets table helper functions -init_ets_table() -> +%% cache table helper functions +init_cache_table() -> Table = list_to_atom(lists:concat(["mem_", atom_to_list(node())])), ets:new(Table, [public, set, named_table]), Table. -ets_name(Node) -> +cache_name(Node) -> list_to_atom(lists:concat(["mem_", atom_to_list(Node)])). -update_ets(Pmap, Fullmap) -> - Table = ets_name(node()), +update_cache(Pmap, Fullmap) -> + Table = cache_name(node()), ets:insert(Table, {pmap, Pmap}), ets:insert(Table, {fullmap, Fullmap}). -ets_pmap() -> - [{pmap, PMap}] = ets:lookup(ets_name(node()), pmap), +cache_pmap() -> + [{pmap, PMap}] = ets:lookup(cache_name(node()), pmap), PMap. -ets_fullmap() -> - [{fullmap, FullMap}] = ets:lookup(ets_name(node()), fullmap), +cache_fullmap() -> + [{fullmap, FullMap}] = ets:lookup(cache_name(node()), fullmap), FullMap. -- cgit v1.2.3 From 411f5f4925e210f281fb0ddb62b5bfe3525cbe9b Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Tue, 30 Mar 2010 21:27:17 -0400 Subject: switch mem3 cache from ets to mochiglobal, 20% speedup :) --- src/mem3.erl | 60 +++++++++++++++++++++++++++++------------------ test/mem3_cache_bench.erl | 29 +++++++++++++++++++++++ 2 files changed, 66 insertions(+), 23 deletions(-) create mode 100644 test/mem3_cache_bench.erl diff --git a/src/mem3.erl b/src/mem3.erl index d7a6b979..97f1aa03 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -22,7 +22,7 @@ -export([start_link/0, start_link/1, stop/0, stop/1]). -export([join/2, clock/0, state/0]). -export([partitions/0, fullmap/0]). - +-export([all_nodes_parts/1]). %% gen_server callbacks -export([init/1, handle_call/3, handle_cast/2, handle_info/2, @@ -89,13 +89,23 @@ state() -> %% @doc retrieve the primary partition map. This is a list of partitions and %% their corresponding primary node, no replication partner nodes. partitions() -> - cache_pmap(). + mochiglobal:get(pmap). %% @doc retrieve the full partition map, like above, but including replication %% partner nodes. List should number 2^Q * N fullmap() -> - lists:keysort(2, cache_fullmap()). + lists:keysort(2, mochiglobal:get(fullmap)). + + +%% @doc get all the nodes and partitions in the cluster. Depending on the +%% AllPartners param, you get only primary nodes or replication partner +%% nodes, as well. +%% No nodes/parts currently down are returned. +all_nodes_parts(false) -> + mochiglobal:get(pmap); +all_nodes_parts(true) -> + mem_utils:nodeparts_up(mochiglobal:get(fullmap)). %%==================================================================== @@ -197,11 +207,10 @@ get_config(Args) -> handle_init(nil) -> showroom_log:message(info, "membership: membership server starting...", []), net_kernel:monitor_nodes(true), - Table = init_cache_table(), Node = node(), Nodes = [{0, Node, []}], Clock = vector_clock:create(Node), - #mem{node=Node, nodes=Nodes, clock=Clock, cache=Table}; + #mem{node=Node, nodes=Nodes, clock=Clock}; handle_init(_OldState) -> ?debugHere, @@ -209,8 +218,7 @@ handle_init(_OldState) -> % but only if we can compare our old state to all other % available nodes and get a match... otherwise get a human involved % TODO implement me - Table = init_cache_table(), - #mem{cache=Table}. + #mem{}. %% handle join activities, return NewState @@ -294,27 +302,33 @@ make_fullmap(PMap, Config) -> %% cache table helper functions -init_cache_table() -> - Table = list_to_atom(lists:concat(["mem_", atom_to_list(node())])), - ets:new(Table, [public, set, named_table]), - Table. +update_cache(Pmap, Fullmap) -> + mochiglobal:put(pmap, Pmap), + mochiglobal:put(fullmap, Fullmap). -cache_name(Node) -> - list_to_atom(lists:concat(["mem_", atom_to_list(Node)])). +% %% cache table helper functions +% init_cache_table() -> +% Table = list_to_atom(lists:concat(["mem_", atom_to_list(node())])), +% ets:new(Table, [public, set, named_table]), +% Table. -update_cache(Pmap, Fullmap) -> - Table = cache_name(node()), - ets:insert(Table, {pmap, Pmap}), - ets:insert(Table, {fullmap, Fullmap}). +% cache_name(Node) -> +% list_to_atom(lists:concat(["mem_", atom_to_list(Node)])). + + +% update_cache(Pmap, Fullmap) -> +% Table = cache_name(node()), +% ets:insert(Table, {pmap, Pmap}), +% ets:insert(Table, {fullmap, Fullmap}). -cache_pmap() -> - [{pmap, PMap}] = ets:lookup(cache_name(node()), pmap), - PMap. +% cache_pmap() -> +% [{pmap, PMap}] = ets:lookup(cache_name(node()), pmap), +% PMap. -cache_fullmap() -> - [{fullmap, FullMap}] = ets:lookup(cache_name(node()), fullmap), - FullMap. +% cache_fullmap() -> +% [{fullmap, FullMap}] = ets:lookup(cache_name(node()), fullmap), +% FullMap. diff --git a/test/mem3_cache_bench.erl b/test/mem3_cache_bench.erl new file mode 100644 index 00000000..a78f029d --- /dev/null +++ b/test/mem3_cache_bench.erl @@ -0,0 +1,29 @@ +-module (mem3_cache_bench). + +-export ([doit/1]). + +-include("../include/config.hrl"). + + +init() -> + Config = #config{n=3,r=2,w=2,q=3,directory="/srv/db", + storage_mod="dynomite_couch_storage"}, + {ok, _Pid} = mem3:start_link([{test,true}, {config, Config}]), + mem3:join(first, [{1, a, []}, {2, b, []}]). + + +doit(Reps) -> + init(), + Begin = erlang:now(), + process(Reps), + % seconds to run the test + Time = timer:now_diff(erlang:now(), Begin)/1000000, + mem3:stop(), + Time. + + +process(0) -> + ok; +process(M) -> + mem3:fullmap(), + process(M-1). -- cgit v1.2.3 From 23c494acf0967770a08ba9d72085887dedbc8ccd Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 31 Mar 2010 09:24:34 -0400 Subject: add some api calls to mem3, and test hints for first join --- src/mem3.erl | 63 ++++++++++++++++++++++++++---------------------------- test/mem3_test.erl | 22 ++++++++++++++++++- 2 files changed, 51 insertions(+), 34 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 97f1aa03..b64915d6 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -22,7 +22,8 @@ -export([start_link/0, start_link/1, stop/0, stop/1]). -export([join/2, clock/0, state/0]). -export([partitions/0, fullmap/0]). --export([all_nodes_parts/1]). +-export([nodes_for_part/1, nodes_for_part/2, all_nodes_parts/1]). +-export([parts_for_node/1]). %% gen_server callbacks -export([init/1, handle_call/3, handle_cast/2, handle_info/2, @@ -34,7 +35,7 @@ -include("../include/common.hrl"). -%% types +%% types - stick somewhere in includes? -type join_type() :: first | new | replace. -type join_order() :: non_neg_integer(). -type options() :: list(). @@ -89,13 +90,35 @@ state() -> %% @doc retrieve the primary partition map. This is a list of partitions and %% their corresponding primary node, no replication partner nodes. partitions() -> - mochiglobal:get(pmap). + mochiglobal:get(pmap). %% @doc retrieve the full partition map, like above, but including replication %% partner nodes. List should number 2^Q * N fullmap() -> - lists:keysort(2, mochiglobal:get(fullmap)). + lists:keysort(2, mochiglobal:get(fullmap)). + + +%% @doc get all the responsible nodes for a given partition, including +%% replication partner nodes +nodes_for_part(Part) -> + nodes_for_part(Part, mochiglobal:get(fullmap)). + + +nodes_for_part(Part, NodePartList) -> + Filtered = lists:filter(fun({_N, P, _T}) -> P =:= Part end, NodePartList), + {Nodes, _Parts, _Types} = lists:unzip3(Filtered), + lists:usort(Nodes). + + +%% @doc return the partitions that reside on a given node +parts_for_node(Node) -> + lists:sort(lists:foldl(fun({N,P,_Type}, AccIn) -> + case N of + Node -> [P | AccIn]; + _ -> AccIn + end + end, [], mochiglobal:get(fullmap))). %% @doc get all the nodes and partitions in the cluster. Depending on the @@ -103,9 +126,9 @@ fullmap() -> %% nodes, as well. %% No nodes/parts currently down are returned. all_nodes_parts(false) -> - mochiglobal:get(pmap); + mochiglobal:get(pmap); all_nodes_parts(true) -> - mem_utils:nodeparts_up(mochiglobal:get(fullmap)). + mochiglobal:get(fullmap). %%==================================================================== @@ -276,6 +299,7 @@ read_latest_state_file(_, Config) -> %% @doc given Config and a list of Nodes, construct a {Pmap,Fullmap} +%% This is basically replaying all the mem events that have happened. create_maps(#config{q=Q} = Config, Nodes) -> [{_,FirstNode,_}|_] = Nodes, Fun = fun({_Pos, Node, Options}, Map) -> @@ -305,30 +329,3 @@ make_fullmap(PMap, Config) -> update_cache(Pmap, Fullmap) -> mochiglobal:put(pmap, Pmap), mochiglobal:put(fullmap, Fullmap). - - -% %% cache table helper functions -% init_cache_table() -> -% Table = list_to_atom(lists:concat(["mem_", atom_to_list(node())])), -% ets:new(Table, [public, set, named_table]), -% Table. - - -% cache_name(Node) -> -% list_to_atom(lists:concat(["mem_", atom_to_list(Node)])). - - -% update_cache(Pmap, Fullmap) -> -% Table = cache_name(node()), -% ets:insert(Table, {pmap, Pmap}), -% ets:insert(Table, {fullmap, Fullmap}). - - -% cache_pmap() -> -% [{pmap, PMap}] = ets:lookup(cache_name(node()), pmap), -% PMap. - - -% cache_fullmap() -> -% [{fullmap, FullMap}] = ets:lookup(cache_name(node()), fullmap), -% FullMap. diff --git a/test/mem3_test.erl b/test/mem3_test.erl index 8747cf02..d5420ba3 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -4,6 +4,9 @@ -include("../include/config.hrl"). -include_lib("eunit/include/eunit.hrl"). +-define(HINT_C1, 365375409332725729550921208179070754913983135744). +-define(HINT_C2, 1096126227998177188652763624537212264741949407232). + %% TEST SETUP all_tests_test_() -> @@ -17,7 +20,8 @@ all_tests_test_() -> [ fun init/1, fun clock/1, - fun join_first/1 + fun join_first/1, + fun join_first_with_hints/1 ]} end} ] @@ -56,3 +60,19 @@ join_first(_Pid) -> Pmap = mem3:partitions(), ?assertEqual(8, length(Pmap)), ok. + + +join_first_with_hints(_Pid) -> + mem3:join(first, [{1, a, []}, + {2, b, []}, + {3, c, [{hints, [?HINT_C1, ?HINT_C2]}]}, + {4, d, []}, + {5, e, []}]), + Fullmap = mem3:fullmap(), + ?assertEqual(24, length(Fullmap)), + Pmap = mem3:partitions(), + ?assertEqual(8, length(Pmap)), + %?debugFmt("~nFullmap: ~p~n", [Fullmap]), + ?assertEqual([c,d,e], mem3:nodes_for_part(?HINT_C1)), + ?assertEqual([c,d,e], mem3:nodes_for_part(?HINT_C2)), + ok. -- cgit v1.2.3 From 99a4d65b3b4158e26bdf97c644b8a60f6146c92d Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 31 Mar 2010 12:40:50 -0400 Subject: first stab at mem3's join new node functionality --- src/mem3.erl | 26 ++++++++++++++------------ test/mem3_test.erl | 19 ++++++++++++++++++- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index b64915d6..44ab4d4c 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -106,14 +106,14 @@ nodes_for_part(Part) -> nodes_for_part(Part, NodePartList) -> - Filtered = lists:filter(fun({_N, P, _T}) -> P =:= Part end, NodePartList), - {Nodes, _Parts, _Types} = lists:unzip3(Filtered), + Filtered = lists:filter(fun({_N, P}) -> P =:= Part end, NodePartList), + {Nodes, _Parts} = lists:unzip(Filtered), lists:usort(Nodes). %% @doc return the partitions that reside on a given node parts_for_node(Node) -> - lists:sort(lists:foldl(fun({N,P,_Type}, AccIn) -> + lists:sort(lists:foldl(fun({N,P}, AccIn) -> case N of Node -> [P | AccIn]; _ -> AccIn @@ -245,15 +245,14 @@ handle_init(_OldState) -> %% handle join activities, return NewState -handle_join(first, ExtNodes, #mem{node=Node, clock=Clock} = State, Config) -> - {Pmap, Fullmap} = create_maps(Config, ExtNodes), +handle_join(JoinType, ExtNodes, #mem{node=Node, clock=Clock} = State, Config) + when JoinType == first orelse JoinType == new -> + {Pmap, Fullmap} = create_maps(Config, JoinType, ExtNodes), update_cache(Pmap, Fullmap), NewClock = vector_clock:increment(Node, Clock), + % TODO: gossip State#mem{nodes=ExtNodes, clock=NewClock}; -handle_join(new, _ExtNodes, _State, _Config) -> - ok; - handle_join(replace, [_OldNode | _], _State, _Config) -> ok; @@ -300,14 +299,17 @@ read_latest_state_file(_, Config) -> %% @doc given Config and a list of Nodes, construct a {Pmap,Fullmap} %% This is basically replaying all the mem events that have happened. -create_maps(#config{q=Q} = Config, Nodes) -> +create_maps(#config{q=Q} = Config, JoinType, Nodes) -> [{_,FirstNode,_}|_] = Nodes, Fun = fun({_Pos, Node, Options}, Map) -> Hints = proplists:get_value(hints, Options), {ok, NewMap} = partitions:join(Node, Map, Hints), NewMap end, - Acc0 = partitions:create_partitions(Q, FirstNode), + Acc0 = case JoinType of + first -> partitions:create_partitions(Q, FirstNode); + new -> mochiglobal:get(pmap) + end, Pmap = lists:foldl(Fun, Acc0, lists:keysort(1, Nodes)), {Pmap, make_fullmap(Pmap, Config)}. @@ -319,8 +321,8 @@ make_fullmap(PMap, Config) -> NodeParts = lists:flatmap( fun({Node,Part}) -> Partners = replication:partners(Node, lists:usort(Nodes), Config), - PartnerList = [{Partner, Part, partner} || Partner <- Partners], - [{Node, Part, primary} | PartnerList] + PartnerList = [{Partner, Part} || Partner <- Partners], + [{Node, Part} | PartnerList] end, PMap), NodeParts. diff --git a/test/mem3_test.erl b/test/mem3_test.erl index d5420ba3..139187df 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -6,6 +6,12 @@ -define(HINT_C1, 365375409332725729550921208179070754913983135744). -define(HINT_C2, 1096126227998177188652763624537212264741949407232). +-define(PARTS_FOR_D1, [365375409332725729550921208179070754913983135744, + 548063113999088594326381812268606132370974703616, + 730750818665451459101842416358141509827966271488, + 913438523331814323877303020447676887284957839360, + 1096126227998177188652763624537212264741949407232, + 1278813932664540053428224228626747642198940975104]). %% TEST SETUP @@ -21,7 +27,8 @@ all_tests_test_() -> fun init/1, fun clock/1, fun join_first/1, - fun join_first_with_hints/1 + fun join_first_with_hints/1, + fun join_new_node/1 ]} end} ] @@ -76,3 +83,13 @@ join_first_with_hints(_Pid) -> ?assertEqual([c,d,e], mem3:nodes_for_part(?HINT_C1)), ?assertEqual([c,d,e], mem3:nodes_for_part(?HINT_C2)), ok. + + +join_new_node(_Pid) -> + mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}]), + ?assertEqual(24, length(mem3:fullmap())), + ?assertEqual([], mem3:parts_for_node(d)), + mem3:join(new, [{4, d, []}]), + ?assertEqual(?PARTS_FOR_D1, mem3:parts_for_node(d)), + ?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), + ok. -- cgit v1.2.3 From 40a4e77ced8b8c0b966e4c5aa83f2a45aafe6778 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Mon, 12 Apr 2010 20:18:57 -0400 Subject: add a mem3 reset command for tests. test for joins with order num that already exists in state --- src/mem3.erl | 100 ++++++++++++++++++++++++++++++++++++++--------------- test/mem3_test.erl | 34 ++++++++++++++++-- 2 files changed, 105 insertions(+), 29 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 44ab4d4c..ca5bec0b 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -19,7 +19,7 @@ -behaviour(gen_server). %% API --export([start_link/0, start_link/1, stop/0, stop/1]). +-export([start_link/0, start_link/1, stop/0, stop/1, reset/0]). -export([join/2, clock/0, state/0]). -export([partitions/0, fullmap/0]). -export([nodes_for_part/1, nodes_for_part/2, all_nodes_parts/1]). @@ -87,6 +87,11 @@ state() -> gen_server:call(?MODULE, state). +-spec reset() -> ok | not_reset. +reset() -> + gen_server:call(?MODULE, reset). + + %% @doc retrieve the primary partition map. This is a list of partitions and %% their corresponding primary node, no replication partner nodes. partitions() -> @@ -146,21 +151,37 @@ init(Args) -> {ok, State#mem{args=Args}}. -%% new node joining to this node +%% new node(s) joining to this node handle_call({join, JoinType, ExtNodes}, _From, - State = #mem{args=Args}) -> + #mem{args=Args} = State) -> Config = get_config(Args), - NewState = handle_join(JoinType, ExtNodes, State, Config), - {reply, ok, NewState}; + try + NewState = handle_join(JoinType, ExtNodes, State, Config), + {reply, ok, NewState} + catch + _:Error -> + showroom_log:message(error, "~p", [Error]), + {reply, Error, State} + end; %% clock -handle_call(clock, _From, State = #mem{clock=Clock}) -> +handle_call(clock, _From, #mem{clock=Clock} = State) -> {reply, Clock, State}; %% state handle_call(state, _From, State) -> {reply, State, State}; +%% reset - but only if we're in test mode +handle_call(reset, _From, #mem{args=Args} = State) -> + case proplists:get_value(test, Args) of + undefined -> {reply, not_reset, State}; + _ -> + mochiglobal:delete(pmap), + mochiglobal:delete(fullmap), + {reply, ok, int_reset(State)} + end; + %% ignored call handle_call(Msg, _From, State) -> showroom_log:message(info, "membership: ignored call: ~p", [Msg]), @@ -230,10 +251,7 @@ get_config(Args) -> handle_init(nil) -> showroom_log:message(info, "membership: membership server starting...", []), net_kernel:monitor_nodes(true), - Node = node(), - Nodes = [{0, Node, []}], - Clock = vector_clock:create(Node), - #mem{node=Node, nodes=Nodes, clock=Clock}; + int_reset(); handle_init(_OldState) -> ?debugHere, @@ -245,9 +263,10 @@ handle_init(_OldState) -> %% handle join activities, return NewState -handle_join(JoinType, ExtNodes, #mem{node=Node, clock=Clock} = State, Config) +handle_join(JoinType, ExtNodes, + #mem{node=Node, nodes=Nodes, clock=Clock} = State, Config) when JoinType == first orelse JoinType == new -> - {Pmap, Fullmap} = create_maps(Config, JoinType, ExtNodes), + {Pmap, Fullmap} = create_maps(Config, JoinType, ExtNodes, Nodes), update_cache(Pmap, Fullmap), NewClock = vector_clock:increment(Node, Clock), % TODO: gossip @@ -297,37 +316,64 @@ read_latest_state_file(_, Config) -> end. -%% @doc given Config and a list of Nodes, construct a {Pmap,Fullmap} +%% @doc given Config and a list of ExtNodes, construct a {Pmap,Fullmap} %% This is basically replaying all the mem events that have happened. -create_maps(#config{q=Q} = Config, JoinType, Nodes) -> - [{_,FirstNode,_}|_] = Nodes, - Fun = fun({_Pos, Node, Options}, Map) -> +create_maps(#config{q=Q} = Config, JoinType, ExtNodes, Nodes) -> + [{_,FirstNode,_}|_] = ExtNodes, + Fun = fun({Pos, Node, Options}, Pmap) -> + check_pos(Pos, Node, Nodes), Hints = proplists:get_value(hints, Options), - {ok, NewMap} = partitions:join(Node, Map, Hints), - NewMap + {ok, NewPmap} = partitions:join(Node, Pmap, Hints), + NewPmap end, Acc0 = case JoinType of first -> partitions:create_partitions(Q, FirstNode); new -> mochiglobal:get(pmap) end, - Pmap = lists:foldl(Fun, Acc0, lists:keysort(1, Nodes)), + Pmap = lists:foldl(Fun, Acc0, lists:keysort(1, ExtNodes)), {Pmap, make_fullmap(Pmap, Config)}. %% @doc construct a table with all partitions, with the primary node and all %% replication partner nodes as well. make_fullmap(PMap, Config) -> - {Nodes, _Parts} = lists:unzip(PMap), - NodeParts = lists:flatmap( - fun({Node,Part}) -> - Partners = replication:partners(Node, lists:usort(Nodes), Config), - PartnerList = [{Partner, Part} || Partner <- Partners], - [{Node, Part} | PartnerList] - end, PMap), - NodeParts. + {Nodes, _Parts} = lists:unzip(PMap), + NodeParts = lists:flatmap( + fun({Node,Part}) -> + Partners = replication:partners(Node, lists:usort(Nodes), Config), + PartnerList = [{Partner, Part} || Partner <- Partners], + [{Node, Part} | PartnerList] + end, PMap), + NodeParts. %% cache table helper functions update_cache(Pmap, Fullmap) -> mochiglobal:put(pmap, Pmap), mochiglobal:put(fullmap, Fullmap). + + +check_pos(Pos, Node, Nodes) -> + Found = lists:keyfind(Pos, 1, Nodes), + case Found of + false -> ok; + _ -> + {_,OldNode,_} = Found, + if + OldNode =:= Node -> + throw({error, {node_exists_at_position, Pos, Node}}); + true -> + throw({error, {position_exists, Pos, OldNode}}) + end + end. + + +int_reset() -> + int_reset(#mem{}). + + +int_reset(State) -> + Node = node(), + Nodes = [{0, Node, []}], + Clock = vector_clock:create(Node), + State#mem{node=Node, nodes=Nodes, clock=Clock}. diff --git a/test/mem3_test.erl b/test/mem3_test.erl index 139187df..5d8a004c 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -12,6 +12,8 @@ 913438523331814323877303020447676887284957839360, 1096126227998177188652763624537212264741949407232, 1278813932664540053428224228626747642198940975104]). +-define(x40, 365375409332725729550921208179070754913983135744). +-define(x60, 548063113999088594326381812268606132370974703616). %% TEST SETUP @@ -28,7 +30,9 @@ all_tests_test_() -> fun clock/1, fun join_first/1, fun join_first_with_hints/1, - fun join_new_node/1 + fun join_new_node/1, + fun join_two_new_nodes/1, + fun join_with_wrong_order/1 ]} end} ] @@ -61,6 +65,7 @@ clock(_Pid) -> join_first(_Pid) -> + mem3:reset(), mem3:join(first, [{1, a, []}, {2, b, []}]), Fullmap = mem3:fullmap(), ?assertEqual(16, length(Fullmap)), @@ -70,6 +75,7 @@ join_first(_Pid) -> join_first_with_hints(_Pid) -> + mem3:reset(), mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, [{hints, [?HINT_C1, ?HINT_C2]}]}, @@ -86,10 +92,34 @@ join_first_with_hints(_Pid) -> join_new_node(_Pid) -> + mem3:reset(), mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}]), ?assertEqual(24, length(mem3:fullmap())), ?assertEqual([], mem3:parts_for_node(d)), mem3:join(new, [{4, d, []}]), ?assertEqual(?PARTS_FOR_D1, mem3:parts_for_node(d)), - ?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), + %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), + ok. + + +join_two_new_nodes(_Pid) -> + mem3:reset(), + mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}]), + ?assertEqual([], mem3:parts_for_node(d)), + Res = mem3:join(new, [{4, d, []}, {5, e, []}]), + ?assertEqual(ok, Res), + ?assertEqual([a,d,e], mem3:nodes_for_part(?x40)), + ?assertEqual([c,d,e], mem3:nodes_for_part(?x60)), + %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), + ok. + + +join_with_wrong_order(_Pid) -> + mem3:reset(), + mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}]), + ?assertEqual([], mem3:parts_for_node(d)), + %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), + Res = mem3:join(new, [{3, d, []}]), + ?assertEqual({error,{position_exists,3,c}}, Res), + %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), ok. -- cgit v1.2.3 From 0445ae6e7b8169bf00936c5ebeed5e75225a6b90 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Tue, 13 Apr 2010 00:07:15 -0400 Subject: not using 'cache' field in #mem, now that we're using mochiglobal --- include/common.hrl | 1 - 1 file changed, 1 deletion(-) diff --git a/include/common.hrl b/include/common.hrl index 69a9e749..88c3fa9a 100644 --- a/include/common.hrl +++ b/include/common.hrl @@ -45,6 +45,5 @@ node, nodes, clock, - cache, args }). -- cgit v1.2.3 From cec304180890208561fd64624496c40bc5365667 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Tue, 13 Apr 2010 00:09:37 -0400 Subject: changed test in Args to have a test node name for executing node. First cut at adding gossip back in, but gossip during unit tests has been disabled. --- src/mem3.erl | 117 ++++++++++++++++++++++++++++++++++++++++++----------- test/mem3_test.erl | 8 ++-- 2 files changed, 98 insertions(+), 27 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index ca5bec0b..eb4feea2 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -147,7 +147,7 @@ init(Args) -> Config = get_config(Args), Test = proplists:get_value(test, Args), OldState = read_latest_state_file(Test, Config), - State = handle_init(OldState), + State = handle_init(Test, OldState), {ok, State#mem{args=Args}}. @@ -157,11 +157,11 @@ handle_call({join, JoinType, ExtNodes}, _From, Config = get_config(Args), try NewState = handle_join(JoinType, ExtNodes, State, Config), + gossip(NewState), {reply, ok, NewState} - catch - _:Error -> - showroom_log:message(error, "~p", [Error]), - {reply, Error, State} + catch _:Error -> + showroom_log:message(error, "~p", [Error]), + {reply, Error, State} end; %% clock @@ -174,12 +174,13 @@ handle_call(state, _From, State) -> %% reset - but only if we're in test mode handle_call(reset, _From, #mem{args=Args} = State) -> - case proplists:get_value(test, Args) of + Test = proplists:get_value(test, Args), + case Test of undefined -> {reply, not_reset, State}; _ -> mochiglobal:delete(pmap), mochiglobal:delete(fullmap), - {reply, ok, int_reset(State)} + {reply, ok, int_reset(Test, State)} end; %% ignored call @@ -192,6 +193,19 @@ handle_call(Msg, _From, State) -> handle_cast(stop, State) -> {stop, normal, State}; +handle_cast({gossip, #mem{node=RemoteNode} = RemoteState}, LocalState) -> + showroom_log:message(info, "membership: received gossip from ~p", + [RemoteNode]), + {MergeType, MergedState} = merge_state(RemoteState, LocalState), + case MergeType of + equal -> {noreply, MergedState}; + merged -> + showroom_log:message(info, "membership: merged new gossip", []), + update_cache(MergedState), + gossip(MergedState), + {noreply, MergedState} + end; + %% ignored cast handle_cast(Msg, State) -> showroom_log:message(info, "membership: ignored cast: ~p", [Msg]), @@ -248,12 +262,12 @@ get_config(Args) -> % 3. joining a cluster as a new node % 4. replacing a node in an existing cluster -handle_init(nil) -> +handle_init(Test, nil) -> showroom_log:message(info, "membership: membership server starting...", []), net_kernel:monitor_nodes(true), - int_reset(); + int_reset(Test); -handle_init(_OldState) -> +handle_init(_Test, _OldState) -> ?debugHere, % there's an old state, let's try to rejoin automatically % but only if we can compare our old state to all other @@ -269,10 +283,10 @@ handle_join(JoinType, ExtNodes, {Pmap, Fullmap} = create_maps(Config, JoinType, ExtNodes, Nodes), update_cache(Pmap, Fullmap), NewClock = vector_clock:increment(Node, Clock), - % TODO: gossip State#mem{nodes=ExtNodes, clock=NewClock}; handle_join(replace, [_OldNode | _], _State, _Config) -> + % TODO implement me ok; handle_join(JoinType, _, _, _) -> @@ -280,6 +294,23 @@ handle_join(JoinType, _, _, _) -> {error, {unknown_join_type, JoinType}}. +gossip(#mem{args=Args} = NewState) -> + Test = proplists:get_value(test, Args), + gossip(Test, NewState). + + +gossip(undefined, #mem{node=Node, nodes=StateNodes} = NewState) -> + {_, Nodes, _} = lists:unzip3(StateNodes), + PartnersPlus = replication:partners_plus(Node, Nodes), + lists:foreach(fun(TargetNode) -> + showroom_log:message(info, "membership: firing gossip from ~p to ~p", + [Node, TargetNode]), + gen_server:cast({?MODULE, TargetNode}, {gossip, NewState}) + end, PartnersPlus); +gossip(_,_) -> + % testing, so don't gossip + ok. + %% @doc find the latest state file on disk find_latest_state_filename(Config) -> Dir = Config#config.directory, @@ -300,20 +331,19 @@ find_latest_state_filename(Config) -> %% (Test, Config) -read_latest_state_file(true, _) -> - nil; -read_latest_state_file(_, Config) -> +read_latest_state_file(undefined, Config) -> try {ok, File} = find_latest_state_filename(Config), case file:consult(File) of {ok, #mem{}=State} -> State; _Else -> throw({error, bad_mem_state_file}) end - catch - _:Error -> - showroom_log:message(info, "membership: ~p", [Error]), - nil - end. + catch _:Error -> + showroom_log:message(info, "membership: ~p", [Error]), + nil + end; +read_latest_state_file(_, _) -> + nil. %% @doc given Config and a list of ExtNodes, construct a {Pmap,Fullmap} @@ -348,6 +378,12 @@ make_fullmap(PMap, Config) -> %% cache table helper functions +update_cache(#mem{nodes=Nodes, args=Args}) -> + Config = get_config(Args), + {Pmap, Fullmap} = create_maps(Config, first, Nodes, []), + update_cache(Pmap, Fullmap). + + update_cache(Pmap, Fullmap) -> mochiglobal:put(pmap, Pmap), mochiglobal:put(fullmap, Fullmap). @@ -368,12 +404,47 @@ check_pos(Pos, Node, Nodes) -> end. -int_reset() -> - int_reset(#mem{}). +int_reset(Test) -> + int_reset(Test, #mem{}). -int_reset(State) -> - Node = node(), +int_reset(Test, State) -> + Node = case Test of + undefined -> node(); + _ -> Test + end, Nodes = [{0, Node, []}], Clock = vector_clock:create(Node), State#mem{node=Node, nodes=Nodes, clock=Clock}. + + +merge_state(_RemoteState=#mem{clock=RemoteClock, nodes=RemoteNodes}, + LocalState=#mem{clock=LocalClock, nodes=LocalNodes}) -> + case vector_clock:equals(RemoteClock, LocalClock) of + true -> + {equal, LocalState}; + false -> + {MergedClock, MergedNodes} = + merge_nodes(RemoteClock, RemoteNodes, LocalClock, LocalNodes), + +% % notify of arrivals & departures +% Arrived = MergedNodes -- LocalNodes, +% notify(node_join, Arrived), +% Departed = LocalNodes -- MergedNodes, +% notify(node_leave, Departed), + + {merged, LocalState#mem{clock=MergedClock, nodes=MergedNodes}} + end. + + +merge_nodes(RemoteClock, RemoteNodes, LocalClock, LocalNodes) -> + {MergedClock, Merged} = + vector_clock:resolve({RemoteClock, RemoteNodes}, + {LocalClock, LocalNodes}), + {MergedClock, lists:keysort(1, Merged)}. + + +% notify(Type, Nodes) -> +% lists:foreach(fun(Node) -> +% gen_event:notify(membership_events, {Type, Node}) +% end, Nodes). diff --git a/test/mem3_test.erl b/test/mem3_test.erl index 5d8a004c..c47878d1 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -4,6 +4,7 @@ -include("../include/config.hrl"). -include_lib("eunit/include/eunit.hrl"). +-define(TEST_NODE_NAME, a). -define(HINT_C1, 365375409332725729550921208179070754913983135744). -define(HINT_C2, 1096126227998177188652763624537212264741949407232). -define(PARTS_FOR_D1, [365375409332725729550921208179070754913983135744, @@ -42,7 +43,7 @@ all_tests_test_() -> test_setup() -> Config = #config{n=3,r=2,w=2,q=3,directory="/srv/db", storage_mod="dynomite_couch_storage"}, - {ok, Pid} = mem3:start_link([{test,true}, {config, Config}]), + {ok, Pid} = mem3:start_link([{test,?TEST_NODE_NAME}, {config, Config}]), Pid. @@ -55,13 +56,12 @@ test_teardown(Pid) -> init(_Pid) -> #mem{args=Args} = mem3:state(), Test = proplists:get_value(test, Args), - ?assertEqual(true, Test). + ?assertEqual(?TEST_NODE_NAME, Test). clock(_Pid) -> - Node = node(), Clock = mem3:clock(), - ?assertMatch([{Node, _}], Clock). + ?assertMatch([{?TEST_NODE_NAME, _}], Clock). join_first(_Pid) -> -- cgit v1.2.3 From 97c4e9946cfdeeaf33334f4697c6ac60ac9f55e3 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 14 Apr 2010 17:36:53 -0400 Subject: added mem3:nodes() back in, and fixed some gen_server calls to return {ok, Value} --- src/mem3.erl | 28 ++++++++++++++++++++-------- test/mem3_test.erl | 4 ++-- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index eb4feea2..e0bd1df0 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -22,7 +22,7 @@ -export([start_link/0, start_link/1, stop/0, stop/1, reset/0]). -export([join/2, clock/0, state/0]). -export([partitions/0, fullmap/0]). --export([nodes_for_part/1, nodes_for_part/2, all_nodes_parts/1]). +-export([nodes/0, nodes_for_part/1, nodes_for_part/2, all_nodes_parts/1]). -export([parts_for_node/1]). %% gen_server callbacks @@ -34,6 +34,7 @@ -include("../include/config.hrl"). -include("../include/common.hrl"). +-define(SERVER, membership). %% types - stick somewhere in includes? -type join_type() :: first | new | replace. @@ -59,7 +60,7 @@ start_link() -> -spec start_link(args()) -> {ok, pid()}. start_link(Args) -> - gen_server:start_link({local, ?MODULE}, ?MODULE, Args, []). + gen_server:start_link({local, ?SERVER}, ?MODULE, Args, []). -spec stop() -> ok. @@ -74,22 +75,22 @@ stop(Server) -> -spec join(join_type(), mem_node_list()) -> ok. join(JoinType, Nodes) -> - gen_server:call(?MODULE, {join, JoinType, Nodes}). + gen_server:call(?SERVER, {join, JoinType, Nodes}). -spec clock() -> vector_clock(). clock() -> - gen_server:call(?MODULE, clock). + gen_server:call(?SERVER, clock). -spec state() -> mem_state(). state() -> - gen_server:call(?MODULE, state). + gen_server:call(?SERVER, state). -spec reset() -> ok | not_reset. reset() -> - gen_server:call(?MODULE, reset). + gen_server:call(?SERVER, reset). %% @doc retrieve the primary partition map. This is a list of partitions and @@ -104,6 +105,12 @@ fullmap() -> lists:keysort(2, mochiglobal:get(fullmap)). +%% @doc get the list of cluster nodes (according to membership module) +%% This may differ from erlang:nodes() +nodes() -> + gen_server:call(?SERVER, nodes). + + %% @doc get all the responsible nodes for a given partition, including %% replication partner nodes nodes_for_part(Part) -> @@ -166,11 +173,11 @@ handle_call({join, JoinType, ExtNodes}, _From, %% clock handle_call(clock, _From, #mem{clock=Clock} = State) -> - {reply, Clock, State}; + {reply, {ok, Clock}, State}; %% state handle_call(state, _From, State) -> - {reply, State, State}; + {reply, {ok, State}, State}; %% reset - but only if we're in test mode handle_call(reset, _From, #mem{args=Args} = State) -> @@ -183,6 +190,11 @@ handle_call(reset, _From, #mem{args=Args} = State) -> {reply, ok, int_reset(Test, State)} end; +%% nodes +handle_call(nodes, _From, #mem{nodes=NodeList} = State) -> + {_,Nodes,_} = lists:unzip3(NodeList), + {reply, {ok, Nodes}, State}; + %% ignored call handle_call(Msg, _From, State) -> showroom_log:message(info, "membership: ignored call: ~p", [Msg]), diff --git a/test/mem3_test.erl b/test/mem3_test.erl index c47878d1..8be90ef8 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -54,13 +54,13 @@ test_teardown(Pid) -> %% TESTS init(_Pid) -> - #mem{args=Args} = mem3:state(), + {ok, #mem{args=Args}} = mem3:state(), Test = proplists:get_value(test, Args), ?assertEqual(?TEST_NODE_NAME, Test). clock(_Pid) -> - Clock = mem3:clock(), + {ok, Clock} = mem3:clock(), ?assertMatch([{?TEST_NODE_NAME, _}], Clock). -- cgit v1.2.3 From f5bcba1df93803f02e6ecd20b84c5dbbe2f1b4d8 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 14 Apr 2010 23:08:12 -0400 Subject: removed pinging of nodes based on _cluster endpoint --- ebin/dynomite.app | 1 - src/dynomite.erl | 1 - src/dynomite_app.erl | 68 ++------------------------------------------------- src/dynomite_http.erl | 21 ---------------- 4 files changed, 2 insertions(+), 89 deletions(-) delete mode 100644 src/dynomite_http.erl diff --git a/ebin/dynomite.app b/ebin/dynomite.app index e6e71af2..02a9618d 100644 --- a/ebin/dynomite.app +++ b/ebin/dynomite.app @@ -14,7 +14,6 @@ dynomite_app, dynomite_couch_api, dynomite_couch_storage, - dynomite_http, dynomite_prof, dynomite_sup, lib_misc, diff --git a/src/dynomite.erl b/src/dynomite.erl index 1b9798c0..bb50986b 100644 --- a/src/dynomite.erl +++ b/src/dynomite.erl @@ -1,4 +1,3 @@ -%%% @author Brad Anderson %%% @doc convenience start/stop functions for Dynomite %%% -module(dynomite). diff --git a/src/dynomite_app.erl b/src/dynomite_app.erl index 417f4c76..949e29ae 100644 --- a/src/dynomite_app.erl +++ b/src/dynomite_app.erl @@ -1,13 +1,3 @@ -%%%------------------------------------------------------------------- -%%% File: dynomite.erl -%%% @author Cliff Moon [] -%%% @copyright 2008 Cliff Moon -%%% @doc -%%% -%%% @end -%%% -%%% @since 2008-06-27 by Cliff Moon -%%%------------------------------------------------------------------- -module(dynomite_app). -author('cliff@powerset.com'). -author('brad@cloudant.com'). @@ -20,8 +10,6 @@ %% Application callbacks -export([start/2, stop/1]). --define(DEFAULT_CLUSTER_URL, "http://localhost:5984/_cluster"). - %%==================================================================== %% Application callbacks %%==================================================================== @@ -40,14 +28,9 @@ %% @doc start required apps, join cluster, start dynomite supervisor start(_Type, _StartArgs) -> + couch_api:create_db(<<"users">>, []), % all nodes have local 'users' db % start dynomite supervisor - ok = start_node(), - case dynomite_sup:start_link() of - {ok, Supervisor} -> - {ok, Supervisor}; - Error -> - Error - end. + dynomite_sup:start_link(). %%-------------------------------------------------------------------- @@ -66,50 +49,3 @@ stop({_, Sup}) -> %%==================================================================== %% Internal functions %%==================================================================== - -%% @spec start_node() -> ok | {error, Reason} -%% @doc start this node (join to dist. erlang cluster) -start_node() -> - PingUrl = couch_config:get("cluster","ping", ?DEFAULT_CLUSTER_URL), - ?LOG_DEBUG("PingUrl: ~p", [PingUrl]), - Result = case get_pingnode(PingUrl, 1) of - {ok, PingNode} -> - join(PingNode); - _ -> - ?LOG_INFO("No pingnode found. Becoming single-node cluster", []) - end, - couch_api:create_db(<<"users">>, []), % all nodes have local 'users' db - Result. - - -%% @spec get_pingnode(Url::string(), Retries::int()) -> node() | -%% {error, Reason} -%% @doc make a http get call to Url to get cluster information -get_pingnode(Url, Retries) -> - try couch_rep_httpc:request(#http_db{url=Url, retries=Retries}) of - {[{<<"ping_node">>, Node}]} -> - {ok, list_to_atom(binary_to_list(Node))}; - _ -> - {error, pingnode_not_found} - catch - _:_ -> - {error, pingnode_not_found} - end. - - -join(PingNode) -> - if - node() =:= PingNode -> - ok; % we must be brain, so we'll take over the world - true -> - case net_adm:ping(PingNode) of - pong -> - % there is a cluster, we just joined it - ?LOG_DEBUG("ping successful, we're in.", []), - timer:sleep(1000); %% grr, what a hack, erlang. rly? - pang -> - ?LOG_ERROR("ping not successful.", []), - throw({cluster_error, ?l2b("cluster ping not successful")}) - end - end, - ok. diff --git a/src/dynomite_http.erl b/src/dynomite_http.erl deleted file mode 100644 index 8b6f7fbb..00000000 --- a/src/dynomite_http.erl +++ /dev/null @@ -1,21 +0,0 @@ -%%%------------------------------------------------------------------- -%%% File : dynomite_http.erl -%%% Author : Brad Anderson -%%% Description : -%%% -%%% Created : 10 Jan 2010 by Brad Anderson -%%%------------------------------------------------------------------- --module(dynomite_http). --author('Brad Anderson '). - --include("../couch/src/couch_db.hrl"). --include_lib("eunit/include/eunit.hrl"). - --export([handle_cluster_info/1]). - - -%% GET /_cluster -handle_cluster_info(#httpd{method='GET', path_parts=[_]}=Req) -> - ClusterInfo = [{<<"ping_node">>, ?l2b(atom_to_list(node()))}], - showroom_log:message(info, "Cluster Info: ~p", [ClusterInfo]), - couch_httpd:send_json(Req, {ClusterInfo}). -- cgit v1.2.3 From 75005e7dfc78bc9232e1364799a03ad541ce0244 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 14 Apr 2010 23:09:38 -0400 Subject: tested gossip with two nodes and a jointype of 'first', added pinging of nodes to set up distributed erlang, and had some logging & throwing cleanup --- src/mem3.erl | 42 +++++++++++++++++++++++++++++------------- src/vector_clock.erl | 9 +++++---- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index e0bd1df0..bc2b1d7e 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -212,7 +212,8 @@ handle_cast({gossip, #mem{node=RemoteNode} = RemoteState}, LocalState) -> case MergeType of equal -> {noreply, MergedState}; merged -> - showroom_log:message(info, "membership: merged new gossip", []), + showroom_log:message(info, "membership: merged new gossip: ~p", + [MergedState]), update_cache(MergedState), gossip(MergedState), {noreply, MergedState} @@ -289,13 +290,13 @@ handle_init(_Test, _OldState) -> %% handle join activities, return NewState -handle_join(JoinType, ExtNodes, - #mem{node=Node, nodes=Nodes, clock=Clock} = State, Config) - when JoinType == first orelse JoinType == new -> - {Pmap, Fullmap} = create_maps(Config, JoinType, ExtNodes, Nodes), - update_cache(Pmap, Fullmap), - NewClock = vector_clock:increment(Node, Clock), - State#mem{nodes=ExtNodes, clock=NewClock}; +handle_join(first, ExtNodes, State, Config) -> + {_,Nodes,_} = lists:unzip3(ExtNodes), + ping_all_yall(Nodes), + join(first, ExtNodes, State, Config); + +handle_join(new, ExtNodes, State, Config) -> + join(new, ExtNodes, State, Config); handle_join(replace, [_OldNode | _], _State, _Config) -> % TODO implement me @@ -306,6 +307,14 @@ handle_join(JoinType, _, _, _) -> {error, {unknown_join_type, JoinType}}. +join(JoinType, ExtNodes, #mem{node=Node, nodes=Nodes, clock=Clock} = State, + Config) -> + {Pmap, Fullmap} = create_maps(Config, JoinType, ExtNodes, Nodes), + update_cache(Pmap, Fullmap), + NewClock = vector_clock:increment(Node, Clock), + State#mem{nodes=ExtNodes, clock=NewClock}. + + gossip(#mem{args=Args} = NewState) -> Test = proplists:get_value(test, Args), gossip(Test, NewState). @@ -317,7 +326,7 @@ gossip(undefined, #mem{node=Node, nodes=StateNodes} = NewState) -> lists:foreach(fun(TargetNode) -> showroom_log:message(info, "membership: firing gossip from ~p to ~p", [Node, TargetNode]), - gen_server:cast({?MODULE, TargetNode}, {gossip, NewState}) + gen_server:cast({?SERVER, TargetNode}, {gossip, NewState}) end, PartnersPlus); gossip(_,_) -> % testing, so don't gossip @@ -335,7 +344,7 @@ find_latest_state_filename(Config) -> [Latest | _] -> {ok, Dir ++ "/state." ++ integer_to_list(Latest)}; _ -> - throw({error, not_found}) + throw({error, mem_state_file_not_found}) end; {error, Reason} -> throw({error, Reason}) @@ -450,9 +459,12 @@ merge_state(_RemoteState=#mem{clock=RemoteClock, nodes=RemoteNodes}, merge_nodes(RemoteClock, RemoteNodes, LocalClock, LocalNodes) -> - {MergedClock, Merged} = - vector_clock:resolve({RemoteClock, RemoteNodes}, - {LocalClock, LocalNodes}), + MergedClock = vector_clock:merge(RemoteClock, LocalClock), + Merged1 = lists:ukeymerge(1, + lists:keysort(1, RemoteNodes), + lists:keysort(1, LocalNodes)), + Merged = lists:keydelete(0, 1, Merged1), + % TODO: make sure we don't have dupe keys ? {MergedClock, lists:keysort(1, Merged)}. @@ -460,3 +472,7 @@ merge_nodes(RemoteClock, RemoteNodes, LocalClock, LocalNodes) -> % lists:foreach(fun(Node) -> % gen_event:notify(membership_events, {Type, Node}) % end, Nodes). + + +ping_all_yall(Nodes) -> + lists:map(fun(Node) -> net_adm:ping(Node) end, Nodes). diff --git a/src/vector_clock.erl b/src/vector_clock.erl index 0a89d41e..740d1520 100644 --- a/src/vector_clock.erl +++ b/src/vector_clock.erl @@ -31,10 +31,11 @@ resolve({ClockA, ValuesA}, {ClockB, ValuesB}) -> greater -> {ClockA, ValuesA}; equal -> {ClockA, ValuesA}; concurrent -> - io:format("~nConcurrent Clocks~n" - "ClockA : ~p~nClockB : ~p~n" - "ValuesA: ~p~nValuesB: ~p~n" - , [ClockA, ClockB, ValuesA, ValuesB]), + showroom_log:message(info, + "~nConcurrent Clocks~n" + "ClockA : ~p~nClockB : ~p~n" + "ValuesA: ~p~nValuesB: ~p~n" + , [ClockA, ClockB, ValuesA, ValuesB]), {merge(ClockA,ClockB), ValuesA ++ ValuesB} end; resolve(not_found, {Clock, Values}) -> -- cgit v1.2.3 From f23371c2ec884628e73abd783c3beedfaa25d490 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Fri, 16 Apr 2010 23:40:13 -0400 Subject: writing state to disk now, and handle_init installs disk state, if vector clocks match other nodes in cluster. Tests Needed --- src/mem3.erl | 109 +++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 83 insertions(+), 26 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index bc2b1d7e..cb69d27d 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -35,6 +35,7 @@ -include("../include/common.hrl"). -define(SERVER, membership). +-define(STATE_FILE_PREFIX, "membership"). %% types - stick somewhere in includes? -type join_type() :: first | new | replace. @@ -154,6 +155,8 @@ init(Args) -> Config = get_config(Args), Test = proplists:get_value(test, Args), OldState = read_latest_state_file(Test, Config), + showroom_log:message(info, "membership: membership server starting...", []), + net_kernel:monitor_nodes(true), State = handle_init(Test, OldState), {ok, State#mem{args=Args}}. @@ -164,7 +167,6 @@ handle_call({join, JoinType, ExtNodes}, _From, Config = get_config(Args), try NewState = handle_join(JoinType, ExtNodes, State, Config), - gossip(NewState), {reply, ok, NewState} catch _:Error -> showroom_log:message(error, "~p", [Error]), @@ -191,9 +193,9 @@ handle_call(reset, _From, #mem{args=Args} = State) -> end; %% nodes -handle_call(nodes, _From, #mem{nodes=NodeList} = State) -> - {_,Nodes,_} = lists:unzip3(NodeList), - {reply, {ok, Nodes}, State}; +handle_call(nodes, _From, #mem{nodes=Nodes} = State) -> + {_,NodeList,_} = lists:unzip3(Nodes), + {reply, {ok, NodeList}, State}; %% ignored call handle_call(Msg, _From, State) -> @@ -214,7 +216,7 @@ handle_cast({gossip, #mem{node=RemoteNode} = RemoteState}, LocalState) -> merged -> showroom_log:message(info, "membership: merged new gossip: ~p", [MergedState]), - update_cache(MergedState), + new_state(MergedState), gossip(MergedState), {noreply, MergedState} end; @@ -276,27 +278,34 @@ get_config(Args) -> % 4. replacing a node in an existing cluster handle_init(Test, nil) -> - showroom_log:message(info, "membership: membership server starting...", []), - net_kernel:monitor_nodes(true), int_reset(Test); -handle_init(_Test, _OldState) -> - ?debugHere, +handle_init(_Test, #mem{nodes=Nodes, args=Args} = OldState) -> % there's an old state, let's try to rejoin automatically - % but only if we can compare our old state to all other - % available nodes and get a match... otherwise get a human involved - % TODO implement me - #mem{}. + % but only if we can compare our old state to other available + % nodes and get a match... otherwise get a human involved + {_, NodeList, _} = lists:unzip3(Nodes), + ping_all_yall(NodeList), + {RemoteStates, _BadNodes} = get_remote_states(NodeList), + Test = proplists:get_value(test, Args), + case compare_state_with_rest(OldState, RemoteStates) of + match -> + showroom_log:message(info, "membership: rejoined successfully", []), + OldState; + Other -> + showroom_log:message(error, "membership: rejoin failed: ~p", [Other]), + int_reset(Test) + end. %% handle join activities, return NewState handle_join(first, ExtNodes, State, Config) -> {_,Nodes,_} = lists:unzip3(ExtNodes), ping_all_yall(Nodes), - join(first, ExtNodes, State, Config); + int_join(first, ExtNodes, State, Config); handle_join(new, ExtNodes, State, Config) -> - join(new, ExtNodes, State, Config); + int_join(new, ExtNodes, State, Config); handle_join(replace, [_OldNode | _], _State, _Config) -> % TODO implement me @@ -307,12 +316,12 @@ handle_join(JoinType, _, _, _) -> {error, {unknown_join_type, JoinType}}. -join(JoinType, ExtNodes, #mem{node=Node, nodes=Nodes, clock=Clock} = State, +int_join(JoinType, ExtNodes, #mem{node=Node, nodes=Nodes, clock=Clock} = State, Config) -> - {Pmap, Fullmap} = create_maps(Config, JoinType, ExtNodes, Nodes), - update_cache(Pmap, Fullmap), NewClock = vector_clock:increment(Node, Clock), - State#mem{nodes=ExtNodes, clock=NewClock}. + NewState = State#mem{nodes=ExtNodes, clock=NewClock}, + {Pmap, Fullmap} = create_maps(Config, JoinType, ExtNodes, Nodes), + new_state(NewState, Pmap, Fullmap, Config). gossip(#mem{args=Args} = NewState) -> @@ -337,12 +346,13 @@ find_latest_state_filename(Config) -> Dir = Config#config.directory, case file:list_dir(Dir) of {ok, Filenames} -> - Timestamps = [list_to_integer(TS) || {"state", TS} <- + Timestamps = [list_to_integer(TS) || {?STATE_FILE_PREFIX, TS} <- [list_to_tuple(string:tokens(FN, ".")) || FN <- Filenames]], SortedTimestamps = lists:reverse(lists:sort(Timestamps)), case SortedTimestamps of [Latest | _] -> - {ok, Dir ++ "/state." ++ integer_to_list(Latest)}; + {ok, Dir ++ "/" ++ ?STATE_FILE_PREFIX ++ "." ++ + integer_to_list(Latest)}; _ -> throw({error, mem_state_file_not_found}) end; @@ -356,8 +366,10 @@ read_latest_state_file(undefined, Config) -> try {ok, File} = find_latest_state_filename(Config), case file:consult(File) of - {ok, #mem{}=State} -> State; - _Else -> throw({error, bad_mem_state_file}) + {ok, [#mem{}=State]} -> State; + Else -> + ?debugFmt("~nElse: ~p~n", [Else]), + throw({error, bad_mem_state_file}) end catch _:Error -> showroom_log:message(info, "membership: ~p", [Error]), @@ -367,6 +379,21 @@ read_latest_state_file(_, _) -> nil. +%% @doc save the state file to disk, with current timestamp. +%% thx to riak_ring_manager:do_write_ringfile/1 +save_state_file(State, Config) -> + Dir = Config#config.directory, + {{Year, Month, Day},{Hour, Minute, Second}} = calendar:universal_time(), + TS = io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", + [Year, Month, Day, Hour, Minute, Second]), + FN = Dir ++ "/" ++ ?STATE_FILE_PREFIX ++ "." ++ TS, + ?debugFmt("~nFilename: ~s~n", [FN]), + ok = filelib:ensure_dir(FN), + {ok, File} = file:open(FN, [binary, write]), + io:format(File, "~w.~n", [State]), + file:close(File). + + %% @doc given Config and a list of ExtNodes, construct a {Pmap,Fullmap} %% This is basically replaying all the mem events that have happened. create_maps(#config{q=Q} = Config, JoinType, ExtNodes, Nodes) -> @@ -398,13 +425,22 @@ make_fullmap(PMap, Config) -> NodeParts. -%% cache table helper functions -update_cache(#mem{nodes=Nodes, args=Args}) -> +%% @doc tasks associated with a new state +new_state(#mem{nodes=Nodes, args=Args} = State) -> Config = get_config(Args), {Pmap, Fullmap} = create_maps(Config, first, Nodes, []), - update_cache(Pmap, Fullmap). + new_state(State, Pmap, Fullmap, Config). +%% @doc tasks associated with a new state +new_state(State, Pmap, Fullmap, Config) -> + update_cache(Pmap, Fullmap), + save_state_file(State, Config), + gossip(State), + State. + + +%% cache table helper function update_cache(Pmap, Fullmap) -> mochiglobal:put(pmap, Pmap), mochiglobal:put(fullmap, Fullmap). @@ -476,3 +512,24 @@ merge_nodes(RemoteClock, RemoteNodes, LocalClock, LocalNodes) -> ping_all_yall(Nodes) -> lists:map(fun(Node) -> net_adm:ping(Node) end, Nodes). + + +get_remote_states(NodeList) -> + NodeList1 = lists:delete(node(), NodeList), + {States1, BadNodes} = rpc:multicall(NodeList1, mem3, state, [], 5000), + {_Status, States2} = lists:unzip(States1), + {States2, BadNodes}. + + +%% @doc compare state with states based on vector clock +%% return match | {bad_state_match, Node, NodesThatDontMatch} +compare_state_with_rest(#mem{node=Node, clock=Clock} = _State, States) -> + Results = lists:map(fun(#mem{node=Node1, clock=Clock1}) -> + {vector_clock:equals(Clock, Clock1), Node1} + end, States), + BadResults = lists:foldl(fun({true, _N}, AccIn) -> AccIn; + ({false, N}, AccIn) -> [N | AccIn] end, [], Results), + if + length(BadResults) == 0 -> match; + true -> {bad_state_match, Node, BadResults} + end. -- cgit v1.2.3 From db3e28aa026a0e2e22356851d7b93fec8247c159 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Fri, 23 Apr 2010 15:26:18 -0400 Subject: gossip handling revamped, BugzID 10068 --- src/mem3.erl | 97 ++++++++++++++++++++++++++++-------------------------- test/mem3_test.erl | 19 +++++++++++ 2 files changed, 70 insertions(+), 46 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index cb69d27d..007e1926 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -25,6 +25,9 @@ -export([nodes/0, nodes_for_part/1, nodes_for_part/2, all_nodes_parts/1]). -export([parts_for_node/1]). +%% for testing more than anything else +-export([merge_nodes/2]). + %% gen_server callbacks -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -207,19 +210,11 @@ handle_call(Msg, _From, State) -> handle_cast(stop, State) -> {stop, normal, State}; +%% gossip handle_cast({gossip, #mem{node=RemoteNode} = RemoteState}, LocalState) -> showroom_log:message(info, "membership: received gossip from ~p", [RemoteNode]), - {MergeType, MergedState} = merge_state(RemoteState, LocalState), - case MergeType of - equal -> {noreply, MergedState}; - merged -> - showroom_log:message(info, "membership: merged new gossip: ~p", - [MergedState]), - new_state(MergedState), - gossip(MergedState), - {noreply, MergedState} - end; + {noreply, handle_gossip(RemoteState, LocalState)}; %% ignored cast handle_cast(Msg, State) -> @@ -324,6 +319,51 @@ int_join(JoinType, ExtNodes, #mem{node=Node, nodes=Nodes, clock=Clock} = State, new_state(NewState, Pmap, Fullmap, Config). +handle_gossip(RemoteState=#mem{clock=RemoteClock}, + LocalState=#mem{clock=LocalClock}) -> + case vector_clock:compare(RemoteClock, LocalClock) of + equal -> LocalState; + less -> LocalState; + greater -> + % this node needs updating + new_state(RemoteState); + concurrent -> + % ick, so let's resolve and merge states + showroom_log:message(info, + "~nmembership: Concurrent Clocks~n" + "~nRemoteState : ~p~nLocalState : ~p~n" + , [RemoteState, LocalState]), + MergedState = merge_states(RemoteState, LocalState), + new_state(MergedState) + end. + + +merge_states(#mem{clock=RemoteClock, nodes=RemoteNodes} = _RemoteState, + #mem{clock=LocalClock, nodes=LocalNodes} = LocalState) -> + MergedClock = vector_clock:merge(RemoteClock, LocalClock), + MergedNodes = merge_nodes(RemoteNodes, LocalNodes), + LocalState#mem{clock=MergedClock, nodes=MergedNodes}. + + +%% this will give one of the lists back, deterministically +merge_nodes(Remote, Local) -> + % get rid of the initial 0 node if it's still there, and sort + Remote1 = lists:usort(lists:keydelete(0,1,Remote)), + Local1 = lists:usort(lists:keydelete(0,1,Local)), + % handle empty lists as well as other cases + case {Remote1, Local1} of + {[], L} -> L; + {R, []} -> R; + _ -> erlang:min(Remote1, Local1) + end. + + + +% notify(Type, Nodes) -> +% lists:foreach(fun(Node) -> +% gen_event:notify(membership_events, {Type, Node}) +% end, Nodes). + gossip(#mem{args=Args} = NewState) -> Test = proplists:get_value(test, Args), gossip(Test, NewState). @@ -341,6 +381,7 @@ gossip(_,_) -> % testing, so don't gossip ok. + %% @doc find the latest state file on disk find_latest_state_filename(Config) -> Dir = Config#config.directory, @@ -387,7 +428,6 @@ save_state_file(State, Config) -> TS = io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", [Year, Month, Day, Hour, Minute, Second]), FN = Dir ++ "/" ++ ?STATE_FILE_PREFIX ++ "." ++ TS, - ?debugFmt("~nFilename: ~s~n", [FN]), ok = filelib:ensure_dir(FN), {ok, File} = file:open(FN, [binary, write]), io:format(File, "~w.~n", [State]), @@ -475,41 +515,6 @@ int_reset(Test, State) -> State#mem{node=Node, nodes=Nodes, clock=Clock}. -merge_state(_RemoteState=#mem{clock=RemoteClock, nodes=RemoteNodes}, - LocalState=#mem{clock=LocalClock, nodes=LocalNodes}) -> - case vector_clock:equals(RemoteClock, LocalClock) of - true -> - {equal, LocalState}; - false -> - {MergedClock, MergedNodes} = - merge_nodes(RemoteClock, RemoteNodes, LocalClock, LocalNodes), - -% % notify of arrivals & departures -% Arrived = MergedNodes -- LocalNodes, -% notify(node_join, Arrived), -% Departed = LocalNodes -- MergedNodes, -% notify(node_leave, Departed), - - {merged, LocalState#mem{clock=MergedClock, nodes=MergedNodes}} - end. - - -merge_nodes(RemoteClock, RemoteNodes, LocalClock, LocalNodes) -> - MergedClock = vector_clock:merge(RemoteClock, LocalClock), - Merged1 = lists:ukeymerge(1, - lists:keysort(1, RemoteNodes), - lists:keysort(1, LocalNodes)), - Merged = lists:keydelete(0, 1, Merged1), - % TODO: make sure we don't have dupe keys ? - {MergedClock, lists:keysort(1, Merged)}. - - -% notify(Type, Nodes) -> -% lists:foreach(fun(Node) -> -% gen_event:notify(membership_events, {Type, Node}) -% end, Nodes). - - ping_all_yall(Nodes) -> lists:map(fun(Node) -> net_adm:ping(Node) end, Nodes). diff --git a/test/mem3_test.erl b/test/mem3_test.erl index 8be90ef8..03e55978 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -123,3 +123,22 @@ join_with_wrong_order(_Pid) -> ?assertEqual({error,{position_exists,3,c}}, Res), %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), ok. + + +merge_nodes_test() -> + A = [{1,a1,[]},{2,a2,[]},{3,a3,[]}], + B = [{1,a1,[]},{2,a2,[]},{3,b3,[]}], + ?assertEqual(A, mem3:merge_nodes(A,B)), + ?assertEqual(mem3:merge_nodes(A,B), mem3:merge_nodes(B,A)), + C = [{1,c1,[]},{2,c2,[]},{3,c3,[]}], + ?assertEqual(A, mem3:merge_nodes(A,C)), + ?assertEqual(A, mem3:merge_nodes(C,A)), + ok. + + +merge_nodes_with_init_nodelist_test() -> + A = [{1,a1,[]},{2,a2,[]},{3,a3,[]}], + B = [{0, b, []}], + ?assertEqual(A, mem3:merge_nodes(A,B)), + ?assertEqual(mem3:merge_nodes(A,B), mem3:merge_nodes(B,A)), + ok. -- cgit v1.2.3 From f37f2fb12a65fc5ca5ab7bcc6e9f5272e0570fc9 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Fri, 23 Apr 2010 23:20:43 -0400 Subject: dbg and doc --- src/mem3.erl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 007e1926..211fdf08 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -225,13 +225,13 @@ handle_cast(Msg, State) -> %% @doc handle nodedown messages because we have %% net_kernel:monitor_nodes(true) handle_info({nodedown, Node}, State) -> - showroom_log:message(alert, "membership: nodedown from ~p", [Node]), + showroom_log:message(alert, "membership: nodedown ~p", [Node]), {noreply, State}; %% @doc handle nodeup messages because we have %% net_kernel:monitor_nodes(true) handle_info({nodeup, Node}, State) -> - showroom_log:message(alert, "membership: nodeup Node: ~p", [Node]), + showroom_log:message(alert, "membership: nodeup ~p", [Node]), {noreply, State}; %% ignored info @@ -319,6 +319,8 @@ int_join(JoinType, ExtNodes, #mem{node=Node, nodes=Nodes, clock=Clock} = State, new_state(NewState, Pmap, Fullmap, Config). +%% @doc handle the gossip messages +%% We're not using vector_clock:resolve b/c we need custom merge strategy handle_gossip(RemoteState=#mem{clock=RemoteClock}, LocalState=#mem{clock=LocalClock}) -> case vector_clock:compare(RemoteClock, LocalClock) of @@ -358,7 +360,6 @@ merge_nodes(Remote, Local) -> end. - % notify(Type, Nodes) -> % lists:foreach(fun(Node) -> % gen_event:notify(membership_events, {Type, Node}) -- cgit v1.2.3 From 0e88f2bd418737f9df5383b82811b07135e179c8 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Fri, 23 Apr 2010 23:25:05 -0400 Subject: remote node should receive gossip if their clock was less than local --- src/mem3.erl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 211fdf08..cec2631c 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -321,13 +321,16 @@ int_join(JoinType, ExtNodes, #mem{node=Node, nodes=Nodes, clock=Clock} = State, %% @doc handle the gossip messages %% We're not using vector_clock:resolve b/c we need custom merge strategy -handle_gossip(RemoteState=#mem{clock=RemoteClock}, +handle_gossip(RemoteState=#mem{clock=RemoteClock, node=RemoteNode}, LocalState=#mem{clock=LocalClock}) -> case vector_clock:compare(RemoteClock, LocalClock) of equal -> LocalState; - less -> LocalState; + less -> + % remote node needs updating + gen_server:cast({?SERVER, RemoteNode}, {gossip, LocalState}), + LocalState; greater -> - % this node needs updating + % local node needs updating new_state(RemoteState); concurrent -> % ick, so let's resolve and merge states -- cgit v1.2.3 From eb593c0557710c29f8c476f43d72fb54172b8e4e Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Mon, 26 Apr 2010 15:54:17 -0400 Subject: reworking gossip, BugzID 10069 --- include/common.hrl | 4 +- src/mem3.erl | 163 +++++++++++++++++++++-------------------------------- test/mem3_test.erl | 36 ++++++------ 3 files changed, 83 insertions(+), 120 deletions(-) diff --git a/include/common.hrl b/include/common.hrl index 88c3fa9a..4315a54c 100644 --- a/include/common.hrl +++ b/include/common.hrl @@ -43,7 +43,7 @@ %% version 3 of membership state -record(mem, {header=3, node, - nodes, - clock, + nodes=[], + clock=[], args }). diff --git a/src/mem3.erl b/src/mem3.erl index cec2631c..c5c558da 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -165,11 +165,9 @@ init(Args) -> %% new node(s) joining to this node -handle_call({join, JoinType, ExtNodes}, _From, - #mem{args=Args} = State) -> - Config = get_config(Args), +handle_call({join, JoinType, ExtNodes}, _From, State) -> try - NewState = handle_join(JoinType, ExtNodes, State, Config), + NewState = handle_join(JoinType, ExtNodes, State), {reply, ok, NewState} catch _:Error -> showroom_log:message(error, "~p", [Error]), @@ -189,10 +187,7 @@ handle_call(reset, _From, #mem{args=Args} = State) -> Test = proplists:get_value(test, Args), case Test of undefined -> {reply, not_reset, State}; - _ -> - mochiglobal:delete(pmap), - mochiglobal:delete(fullmap), - {reply, ok, int_reset(Test, State)} + _ -> {reply, ok, int_reset(Test, State)} end; %% nodes @@ -200,6 +195,12 @@ handle_call(nodes, _From, #mem{nodes=Nodes} = State) -> {_,NodeList,_} = lists:unzip3(Nodes), {reply, {ok, NodeList}, State}; +%% gossip +handle_call({gossip, #mem{node=RemoteNode} = RemoteState}, From, LocalState) -> + showroom_log:message(info, "membership: received gossip from ~p", + [RemoteNode]), + handle_gossip(From, RemoteState, LocalState); + %% ignored call handle_call(Msg, _From, State) -> showroom_log:message(info, "membership: ignored call: ~p", [Msg]), @@ -210,12 +211,6 @@ handle_call(Msg, _From, State) -> handle_cast(stop, State) -> {stop, normal, State}; -%% gossip -handle_cast({gossip, #mem{node=RemoteNode} = RemoteState}, LocalState) -> - showroom_log:message(info, "membership: received gossip from ~p", - [RemoteNode]), - {noreply, handle_gossip(RemoteState, LocalState)}; - %% ignored cast handle_cast(Msg, State) -> showroom_log:message(info, "membership: ignored cast: ~p", [Msg]), @@ -294,52 +289,60 @@ handle_init(_Test, #mem{nodes=Nodes, args=Args} = OldState) -> %% handle join activities, return NewState -handle_join(first, ExtNodes, State, Config) -> +handle_join(first, ExtNodes, State) -> {_,Nodes,_} = lists:unzip3(ExtNodes), ping_all_yall(Nodes), - int_join(first, ExtNodes, State, Config); + int_join(ExtNodes, State); -handle_join(new, ExtNodes, State, Config) -> - int_join(new, ExtNodes, State, Config); +handle_join(new, ExtNodes, State) -> + {_,Nodes,_} = lists:unzip3(ExtNodes), + ping_all_yall(Nodes), + int_join(ExtNodes, State); -handle_join(replace, [_OldNode | _], _State, _Config) -> +handle_join(replace, [_OldNode | _], _State) -> % TODO implement me ok; -handle_join(JoinType, _, _, _) -> +handle_join(JoinType, _, _) -> showroom_log:message(info, "membership: unknown join type: ~p", [JoinType]), {error, {unknown_join_type, JoinType}}. -int_join(JoinType, ExtNodes, #mem{node=Node, nodes=Nodes, clock=Clock} = State, - Config) -> +int_join(ExtNodes, #mem{node=Node, nodes=Nodes, clock=Clock} = State) -> + NewNodes = lists:foldl(fun({Pos, N, _Options}=New, AccIn) -> + check_pos(Pos, N, Nodes), + [New|AccIn] + end, Nodes, ExtNodes), + NewNodes1 = lists:sort(NewNodes), NewClock = vector_clock:increment(Node, Clock), - NewState = State#mem{nodes=ExtNodes, clock=NewClock}, - {Pmap, Fullmap} = create_maps(Config, JoinType, ExtNodes, Nodes), - new_state(NewState, Pmap, Fullmap, Config). + NewState = State#mem{nodes=NewNodes1, clock=NewClock}, + install_new_state(NewState), + NewState. %% @doc handle the gossip messages %% We're not using vector_clock:resolve b/c we need custom merge strategy -handle_gossip(RemoteState=#mem{clock=RemoteClock, node=RemoteNode}, +handle_gossip(From, RemoteState=#mem{clock=RemoteClock}, LocalState=#mem{clock=LocalClock}) -> case vector_clock:compare(RemoteClock, LocalClock) of - equal -> LocalState; + equal -> + {reply, ok, LocalState}; less -> % remote node needs updating - gen_server:cast({?SERVER, RemoteNode}, {gossip, LocalState}), - LocalState; + {reply, {new_state, LocalState}, LocalState}; greater -> % local node needs updating - new_state(RemoteState); + gen_server:reply(From, ok), % reply to sender first + install_new_state(RemoteState); concurrent -> % ick, so let's resolve and merge states showroom_log:message(info, - "~nmembership: Concurrent Clocks~n" - "~nRemoteState : ~p~nLocalState : ~p~n" + "membership: Concurrent Clocks~n" + "RemoteState : ~p~nLocalState : ~p~n" , [RemoteState, LocalState]), MergedState = merge_states(RemoteState, LocalState), - new_state(MergedState) + gen_server:reply(From, {new_state, MergedState}), % reply to sender + install_new_state(MergedState) end. @@ -363,29 +366,39 @@ merge_nodes(Remote, Local) -> end. -% notify(Type, Nodes) -> -% lists:foreach(fun(Node) -> -% gen_event:notify(membership_events, {Type, Node}) -% end, Nodes). - gossip(#mem{args=Args} = NewState) -> Test = proplists:get_value(test, Args), gossip(Test, NewState). -gossip(undefined, #mem{node=Node, nodes=StateNodes} = NewState) -> +gossip(undefined, #mem{node=Node, nodes=StateNodes} = State) -> {_, Nodes, _} = lists:unzip3(StateNodes), - PartnersPlus = replication:partners_plus(Node, Nodes), - lists:foreach(fun(TargetNode) -> - showroom_log:message(info, "membership: firing gossip from ~p to ~p", + TargetNode = next_up_node(Node, Nodes), + showroom_log:message(info, "membership: firing gossip from ~p to ~p", [Node, TargetNode]), - gen_server:cast({?SERVER, TargetNode}, {gossip, NewState}) - end, PartnersPlus); + case gen_server:call({?SERVER, TargetNode}, {gossip, State}) of + ok -> ok; + {new_state, _NewState} -> ?debugHere,ok; + Error -> throw({unknown_gossip_response, Error}) + end; + gossip(_,_) -> % testing, so don't gossip ok. +next_up_node(Node, Nodes) -> + {A, [Node|B]} = lists:splitwith(fun(N) -> N /= Node end, Nodes), + List = lists:append([B, A, [Node]]), + UpNodes = lists:delete(fun(N) -> lists:member(N, up_nodes()) end, List), + hd(UpNodes). % TODO: empty list? + + +up_nodes() -> + % TODO: implement cache (fb 9704 & 9449) + erlang:nodes(). + + %% @doc find the latest state file on disk find_latest_state_filename(Config) -> Dir = Config#config.directory, @@ -424,6 +437,12 @@ read_latest_state_file(_, _) -> nil. +install_new_state(#mem{args=Args} = State) -> + Config = get_config(Args), + save_state_file(State, Config), + gossip(State). + + %% @doc save the state file to disk, with current timestamp. %% thx to riak_ring_manager:do_write_ringfile/1 save_state_file(State, Config) -> @@ -438,58 +457,6 @@ save_state_file(State, Config) -> file:close(File). -%% @doc given Config and a list of ExtNodes, construct a {Pmap,Fullmap} -%% This is basically replaying all the mem events that have happened. -create_maps(#config{q=Q} = Config, JoinType, ExtNodes, Nodes) -> - [{_,FirstNode,_}|_] = ExtNodes, - Fun = fun({Pos, Node, Options}, Pmap) -> - check_pos(Pos, Node, Nodes), - Hints = proplists:get_value(hints, Options), - {ok, NewPmap} = partitions:join(Node, Pmap, Hints), - NewPmap - end, - Acc0 = case JoinType of - first -> partitions:create_partitions(Q, FirstNode); - new -> mochiglobal:get(pmap) - end, - Pmap = lists:foldl(Fun, Acc0, lists:keysort(1, ExtNodes)), - {Pmap, make_fullmap(Pmap, Config)}. - - -%% @doc construct a table with all partitions, with the primary node and all -%% replication partner nodes as well. -make_fullmap(PMap, Config) -> - {Nodes, _Parts} = lists:unzip(PMap), - NodeParts = lists:flatmap( - fun({Node,Part}) -> - Partners = replication:partners(Node, lists:usort(Nodes), Config), - PartnerList = [{Partner, Part} || Partner <- Partners], - [{Node, Part} | PartnerList] - end, PMap), - NodeParts. - - -%% @doc tasks associated with a new state -new_state(#mem{nodes=Nodes, args=Args} = State) -> - Config = get_config(Args), - {Pmap, Fullmap} = create_maps(Config, first, Nodes, []), - new_state(State, Pmap, Fullmap, Config). - - -%% @doc tasks associated with a new state -new_state(State, Pmap, Fullmap, Config) -> - update_cache(Pmap, Fullmap), - save_state_file(State, Config), - gossip(State), - State. - - -%% cache table helper function -update_cache(Pmap, Fullmap) -> - mochiglobal:put(pmap, Pmap), - mochiglobal:put(fullmap, Fullmap). - - check_pos(Pos, Node, Nodes) -> Found = lists:keyfind(Pos, 1, Nodes), case Found of @@ -514,9 +481,7 @@ int_reset(Test, State) -> undefined -> node(); _ -> Test end, - Nodes = [{0, Node, []}], - Clock = vector_clock:create(Node), - State#mem{node=Node, nodes=Nodes, clock=Clock}. + State#mem{node=Node, nodes=[], clock=[]}. ping_all_yall(Nodes) -> diff --git a/test/mem3_test.erl b/test/mem3_test.erl index 03e55978..069d897b 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -61,16 +61,14 @@ init(_Pid) -> clock(_Pid) -> {ok, Clock} = mem3:clock(), - ?assertMatch([{?TEST_NODE_NAME, _}], Clock). + ?assertMatch([], Clock). join_first(_Pid) -> mem3:reset(), mem3:join(first, [{1, a, []}, {2, b, []}]), - Fullmap = mem3:fullmap(), - ?assertEqual(16, length(Fullmap)), - Pmap = mem3:partitions(), - ?assertEqual(8, length(Pmap)), + {ok, Nodes} = mem3:nodes(), + ?assertEqual(2, length(Nodes)), ok. @@ -81,35 +79,35 @@ join_first_with_hints(_Pid) -> {3, c, [{hints, [?HINT_C1, ?HINT_C2]}]}, {4, d, []}, {5, e, []}]), - Fullmap = mem3:fullmap(), - ?assertEqual(24, length(Fullmap)), - Pmap = mem3:partitions(), - ?assertEqual(8, length(Pmap)), + {ok, Nodes} = mem3:nodes(), + ?assertEqual(5, length(Nodes)), %?debugFmt("~nFullmap: ~p~n", [Fullmap]), - ?assertEqual([c,d,e], mem3:nodes_for_part(?HINT_C1)), - ?assertEqual([c,d,e], mem3:nodes_for_part(?HINT_C2)), +% ?assertEqual([c,d,e], mem3:nodes_for_part(?HINT_C1)), +% ?assertEqual([c,d,e], mem3:nodes_for_part(?HINT_C2)), ok. join_new_node(_Pid) -> mem3:reset(), mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}]), - ?assertEqual(24, length(mem3:fullmap())), - ?assertEqual([], mem3:parts_for_node(d)), + {ok, Nodes1} = mem3:nodes(), + ?assertEqual(3, length(Nodes1)), mem3:join(new, [{4, d, []}]), - ?assertEqual(?PARTS_FOR_D1, mem3:parts_for_node(d)), - %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), + {ok, Nodes2} = mem3:nodes(), + ?assertEqual(4, length(Nodes2)), + ?debugFmt("~nNodes: ~p~n", [Nodes2]), ok. join_two_new_nodes(_Pid) -> mem3:reset(), mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}]), - ?assertEqual([], mem3:parts_for_node(d)), + {ok, Nodes1} = mem3:nodes(), + ?assertEqual(3, length(Nodes1)), Res = mem3:join(new, [{4, d, []}, {5, e, []}]), ?assertEqual(ok, Res), - ?assertEqual([a,d,e], mem3:nodes_for_part(?x40)), - ?assertEqual([c,d,e], mem3:nodes_for_part(?x60)), + {ok, Nodes2} = mem3:nodes(), + ?assertEqual(5, length(Nodes2)), %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), ok. @@ -117,7 +115,7 @@ join_two_new_nodes(_Pid) -> join_with_wrong_order(_Pid) -> mem3:reset(), mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}]), - ?assertEqual([], mem3:parts_for_node(d)), +% ?assertEqual([], mem3:parts_for_node(d)), %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), Res = mem3:join(new, [{3, d, []}]), ?assertEqual({error,{position_exists,3,c}}, Res), -- cgit v1.2.3 From 454b9aff017c4bbda9fa01cf9875c44f04644210 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 28 Apr 2010 10:41:20 -0400 Subject: dang, large commit. * node removed from #mem{} * start_gossip api call added * some dialyzer specs * 'new' join accepts PingNode, calls into cluster itself * get_test convenience method * don't save state when testing --- include/common.hrl | 1 - src/mem3.erl | 142 +++++++++++++++++++++++++++++++++-------------------- test/mem3_test.erl | 33 +++++++++---- 3 files changed, 114 insertions(+), 62 deletions(-) diff --git a/include/common.hrl b/include/common.hrl index 4315a54c..59f5b9a1 100644 --- a/include/common.hrl +++ b/include/common.hrl @@ -42,7 +42,6 @@ %% version 3 of membership state -record(mem, {header=3, - node, nodes=[], clock=[], args diff --git a/src/mem3.erl b/src/mem3.erl index c5c558da..25945761 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -20,13 +20,13 @@ %% API -export([start_link/0, start_link/1, stop/0, stop/1, reset/0]). --export([join/2, clock/0, state/0]). +-export([join/3, clock/0, state/0, start_gossip/0]). -export([partitions/0, fullmap/0]). -export([nodes/0, nodes_for_part/1, nodes_for_part/2, all_nodes_parts/1]). -export([parts_for_node/1]). %% for testing more than anything else --export([merge_nodes/2]). +-export([merge_nodes/2, next_up_node/1, next_up_node/3]). %% gen_server callbacks -export([init/1, handle_call/3, handle_cast/2, handle_info/2, @@ -46,9 +46,11 @@ -type options() :: list(). -type mem_node() :: {join_order(), node(), options()}. -type mem_node_list() :: [mem_node()]. --type arg_options() :: {test, boolean()} | {config, #config{}}. +-type config() :: #config{}. +-type arg_options() :: {test, boolean()} | {config, config()}. -type args() :: [] | [arg_options()]. -type mem_state() :: #mem{}. +-type test() :: undefined | node(). -type epoch() :: float(). -type clock() :: {node(), epoch()}. -type vector_clock() :: [clock()]. @@ -77,9 +79,9 @@ stop(Server) -> gen_server:cast(Server, stop). --spec join(join_type(), mem_node_list()) -> ok. -join(JoinType, Nodes) -> - gen_server:call(?SERVER, {join, JoinType, Nodes}). +-spec join(join_type(), mem_node_list(), node() | nil) -> ok. +join(JoinType, Nodes, PingNode) -> + gen_server:call(?SERVER, {join, JoinType, Nodes, PingNode}). -spec clock() -> vector_clock(). @@ -92,6 +94,11 @@ state() -> gen_server:call(?SERVER, state). +-spec start_gossip() -> ok. +start_gossip() -> + gen_server:call(?SERVER, start_gossip). + + -spec reset() -> ok | not_reset. reset() -> gen_server:call(?SERVER, reset). @@ -156,7 +163,7 @@ all_nodes_parts(true) -> init(Args) -> process_flag(trap_exit,true), Config = get_config(Args), - Test = proplists:get_value(test, Args), + Test = get_test(Args), OldState = read_latest_state_file(Test, Config), showroom_log:message(info, "membership: membership server starting...", []), net_kernel:monitor_nodes(true), @@ -165,9 +172,11 @@ init(Args) -> %% new node(s) joining to this node -handle_call({join, JoinType, ExtNodes}, _From, State) -> +handle_call({join, JoinType, ExtNodes, PingNode}, _From, State) -> + % NewState = handle_join(JoinType, ExtNodes, PingNode, State), + % {reply, ok, NewState}; try - NewState = handle_join(JoinType, ExtNodes, State), + NewState = handle_join(JoinType, ExtNodes, PingNode, State), {reply, ok, NewState} catch _:Error -> showroom_log:message(error, "~p", [Error]), @@ -184,7 +193,7 @@ handle_call(state, _From, State) -> %% reset - but only if we're in test mode handle_call(reset, _From, #mem{args=Args} = State) -> - Test = proplists:get_value(test, Args), + Test = get_test(Args), case Test of undefined -> {reply, not_reset, State}; _ -> {reply, ok, int_reset(Test, State)} @@ -196,11 +205,16 @@ handle_call(nodes, _From, #mem{nodes=Nodes} = State) -> {reply, {ok, NodeList}, State}; %% gossip -handle_call({gossip, #mem{node=RemoteNode} = RemoteState}, From, LocalState) -> +handle_call({gossip, RemoteState}, {Pid,_Tag} = From, LocalState) -> showroom_log:message(info, "membership: received gossip from ~p", - [RemoteNode]), + [erlang:node(Pid)]), handle_gossip(From, RemoteState, LocalState); +% start_gossip +handle_call(start_gossip, _From, State) -> + NewState = gossip(State), + {reply, ok, NewState}; + %% ignored call handle_call(Msg, _From, State) -> showroom_log:message(info, "membership: ignored call: ~p", [Msg]), @@ -259,6 +273,10 @@ get_config(Args) -> end. +get_test(Args) -> + proplists:get_value(test, Args). + + % we could be automatically: % 1. rejoining a cluster after some downtime % @@ -277,7 +295,7 @@ handle_init(_Test, #mem{nodes=Nodes, args=Args} = OldState) -> {_, NodeList, _} = lists:unzip3(Nodes), ping_all_yall(NodeList), {RemoteStates, _BadNodes} = get_remote_states(NodeList), - Test = proplists:get_value(test, Args), + Test = get_test(Args), case compare_state_with_rest(OldState, RemoteStates) of match -> showroom_log:message(info, "membership: rejoined successfully", []), @@ -289,32 +307,43 @@ handle_init(_Test, #mem{nodes=Nodes, args=Args} = OldState) -> %% handle join activities, return NewState -handle_join(first, ExtNodes, State) -> +handle_join(first, ExtNodes, nil, State) -> {_,Nodes,_} = lists:unzip3(ExtNodes), ping_all_yall(Nodes), int_join(ExtNodes, State); -handle_join(new, ExtNodes, State) -> - {_,Nodes,_} = lists:unzip3(ExtNodes), - ping_all_yall(Nodes), - int_join(ExtNodes, State); +handle_join(new, ExtNodes, PingNode, #mem{args=Args} = State) -> + NewState = case get_test(Args) of + undefined -> + % ping the PingNode and get its state + pong = net_adm:ping(PingNode), + timer:sleep(1000), % let dist. erl get set up... sigh. + {ok, RemoteState} = rpc:call(PingNode, mem3, state, []), + RemoteState; + _ -> + % testing, so meh + State + end, + % now use this info to join the ring + int_join(ExtNodes, NewState); -handle_join(replace, [_OldNode | _], _State) -> +handle_join(replace, [_OldNode | _], _PingNode, _State) -> % TODO implement me ok; -handle_join(JoinType, _, _) -> - showroom_log:message(info, "membership: unknown join type: ~p", [JoinType]), +handle_join(JoinType, _, PingNode, _) -> + showroom_log:message(info, "membership: unknown join type: ~p " + "for ping node: ~p", [JoinType, PingNode]), {error, {unknown_join_type, JoinType}}. -int_join(ExtNodes, #mem{node=Node, nodes=Nodes, clock=Clock} = State) -> +int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> NewNodes = lists:foldl(fun({Pos, N, _Options}=New, AccIn) -> check_pos(Pos, N, Nodes), [New|AccIn] end, Nodes, ExtNodes), NewNodes1 = lists:sort(NewNodes), - NewClock = vector_clock:increment(Node, Clock), + NewClock = vector_clock:increment(node(), Clock), NewState = State#mem{nodes=NewNodes1, clock=NewClock}, install_new_state(NewState), NewState. @@ -333,7 +362,7 @@ handle_gossip(From, RemoteState=#mem{clock=RemoteClock}, greater -> % local node needs updating gen_server:reply(From, ok), % reply to sender first - install_new_state(RemoteState); + {noreply, install_new_state(RemoteState)}; concurrent -> % ick, so let's resolve and merge states showroom_log:message(info, @@ -342,7 +371,7 @@ handle_gossip(From, RemoteState=#mem{clock=RemoteClock}, , [RemoteState, LocalState]), MergedState = merge_states(RemoteState, LocalState), gen_server:reply(From, {new_state, MergedState}), % reply to sender - install_new_state(MergedState) + {noreply, install_new_state(MergedState)} end. @@ -367,18 +396,19 @@ merge_nodes(Remote, Local) -> gossip(#mem{args=Args} = NewState) -> - Test = proplists:get_value(test, Args), + Test = get_test(Args), gossip(Test, NewState). -gossip(undefined, #mem{node=Node, nodes=StateNodes} = State) -> +-spec gossip(test(), mem_state()) -> mem_state(). +gossip(undefined, #mem{nodes=StateNodes} = State) -> {_, Nodes, _} = lists:unzip3(StateNodes), - TargetNode = next_up_node(Node, Nodes), + TargetNode = next_up_node(Nodes), showroom_log:message(info, "membership: firing gossip from ~p to ~p", - [Node, TargetNode]), + [node(), TargetNode]), case gen_server:call({?SERVER, TargetNode}, {gossip, State}) of - ok -> ok; - {new_state, _NewState} -> ?debugHere,ok; + ok -> State; + {new_state, NewState} -> NewState; Error -> throw({unknown_gossip_response, Error}) end; @@ -387,11 +417,19 @@ gossip(_,_) -> ok. -next_up_node(Node, Nodes) -> +next_up_node(Nodes) -> + Node = node(), + next_up_node(Node, Nodes, up_nodes()). + + +next_up_node(Node, Nodes, UpNodes) -> {A, [Node|B]} = lists:splitwith(fun(N) -> N /= Node end, Nodes), - List = lists:append([B, A, [Node]]), - UpNodes = lists:delete(fun(N) -> lists:member(N, up_nodes()) end, List), - hd(UpNodes). % TODO: empty list? + List = lists:append(B, A), % be sure to eliminate Node + DownNodes = Nodes -- UpNodes, + case List -- DownNodes of + [Target|_] -> Target; + [] -> throw({error, no_gossip_targets_available}) + end. up_nodes() -> @@ -425,8 +463,7 @@ read_latest_state_file(undefined, Config) -> {ok, File} = find_latest_state_filename(Config), case file:consult(File) of {ok, [#mem{}=State]} -> State; - Else -> - ?debugFmt("~nElse: ~p~n", [Else]), + _Else -> throw({error, bad_mem_state_file}) end catch _:Error -> @@ -439,13 +476,15 @@ read_latest_state_file(_, _) -> install_new_state(#mem{args=Args} = State) -> Config = get_config(Args), - save_state_file(State, Config), + Test = get_test(Args), + save_state_file(Test, State, Config), gossip(State). %% @doc save the state file to disk, with current timestamp. %% thx to riak_ring_manager:do_write_ringfile/1 -save_state_file(State, Config) -> +-spec save_state_file(test(), mem_state(), config()) -> ok. +save_state_file(undefined, State, Config) -> Dir = Config#config.directory, {{Year, Month, Day},{Hour, Minute, Second}} = calendar:universal_time(), TS = io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", @@ -454,7 +493,9 @@ save_state_file(State, Config) -> ok = filelib:ensure_dir(FN), {ok, File} = file:open(FN, [binary, write]), io:format(File, "~w.~n", [State]), - file:close(File). + file:close(File); + +save_state_file(_,_,_) -> ok. % don't save if testing check_pos(Pos, Node, Nodes) -> @@ -476,12 +517,8 @@ int_reset(Test) -> int_reset(Test, #mem{}). -int_reset(Test, State) -> - Node = case Test of - undefined -> node(); - _ -> Test - end, - State#mem{node=Node, nodes=[], clock=[]}. +int_reset(_Test, State) -> + State#mem{nodes=[], clock=[]}. ping_all_yall(Nodes) -> @@ -492,18 +529,19 @@ get_remote_states(NodeList) -> NodeList1 = lists:delete(node(), NodeList), {States1, BadNodes} = rpc:multicall(NodeList1, mem3, state, [], 5000), {_Status, States2} = lists:unzip(States1), - {States2, BadNodes}. + {lists:zip(NodeList1,States2), BadNodes}. %% @doc compare state with states based on vector clock %% return match | {bad_state_match, Node, NodesThatDontMatch} -compare_state_with_rest(#mem{node=Node, clock=Clock} = _State, States) -> - Results = lists:map(fun(#mem{node=Node1, clock=Clock1}) -> - {vector_clock:equals(Clock, Clock1), Node1} +compare_state_with_rest(#mem{clock=Clock} = _State, States) -> + Results = lists:map(fun({Node, #mem{clock=Clock1}}) -> + {vector_clock:equals(Clock, Clock1), Node} end, States), BadResults = lists:foldl(fun({true, _N}, AccIn) -> AccIn; - ({false, N}, AccIn) -> [N | AccIn] end, [], Results), + ({false, N}, AccIn) -> [N | AccIn] + end, [], Results), if length(BadResults) == 0 -> match; - true -> {bad_state_match, Node, BadResults} + true -> {bad_state_match, node(), BadResults} end. diff --git a/test/mem3_test.erl b/test/mem3_test.erl index 069d897b..80699559 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -66,7 +66,7 @@ clock(_Pid) -> join_first(_Pid) -> mem3:reset(), - mem3:join(first, [{1, a, []}, {2, b, []}]), + mem3:join(first, [{1, a, []}, {2, b, []}], nil), {ok, Nodes} = mem3:nodes(), ?assertEqual(2, length(Nodes)), ok. @@ -78,7 +78,8 @@ join_first_with_hints(_Pid) -> {2, b, []}, {3, c, [{hints, [?HINT_C1, ?HINT_C2]}]}, {4, d, []}, - {5, e, []}]), + {5, e, []}], + nil), {ok, Nodes} = mem3:nodes(), ?assertEqual(5, length(Nodes)), %?debugFmt("~nFullmap: ~p~n", [Fullmap]), @@ -89,22 +90,21 @@ join_first_with_hints(_Pid) -> join_new_node(_Pid) -> mem3:reset(), - mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}]), + mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}], nil), {ok, Nodes1} = mem3:nodes(), ?assertEqual(3, length(Nodes1)), - mem3:join(new, [{4, d, []}]), + mem3:join(new, [{4, d, []}], a), {ok, Nodes2} = mem3:nodes(), ?assertEqual(4, length(Nodes2)), - ?debugFmt("~nNodes: ~p~n", [Nodes2]), ok. join_two_new_nodes(_Pid) -> mem3:reset(), - mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}]), + mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}], nil), {ok, Nodes1} = mem3:nodes(), ?assertEqual(3, length(Nodes1)), - Res = mem3:join(new, [{4, d, []}, {5, e, []}]), + Res = mem3:join(new, [{4, d, []}, {5, e, []}], b), ?assertEqual(ok, Res), {ok, Nodes2} = mem3:nodes(), ?assertEqual(5, length(Nodes2)), @@ -114,15 +114,18 @@ join_two_new_nodes(_Pid) -> join_with_wrong_order(_Pid) -> mem3:reset(), - mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}]), + mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}], nil), % ?assertEqual([], mem3:parts_for_node(d)), %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), - Res = mem3:join(new, [{3, d, []}]), + Res = mem3:join(new, [{3, d, []}], c), ?assertEqual({error,{position_exists,3,c}}, Res), %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), ok. +%% +%% tests without running gen_server +%% merge_nodes_test() -> A = [{1,a1,[]},{2,a2,[]},{3,a3,[]}], B = [{1,a1,[]},{2,a2,[]},{3,b3,[]}], @@ -140,3 +143,15 @@ merge_nodes_with_init_nodelist_test() -> ?assertEqual(A, mem3:merge_nodes(A,B)), ?assertEqual(mem3:merge_nodes(A,B), mem3:merge_nodes(B,A)), ok. + + +next_up_nodes_test() -> + Nodes = [a,b,c,d], + UpNodes = [a,b,d], + ?assertEqual(b, mem3:next_up_node(a,Nodes,UpNodes)), + ?assertEqual(d, mem3:next_up_node(b,Nodes,UpNodes)), + ?assertEqual(a, mem3:next_up_node(d,Nodes,UpNodes)), + ?assertThrow({error, no_gossip_targets_available}, + mem3:next_up_node(a,[a,b,c],[])), + ?assertEqual(b, mem3:next_up_node(a,[a,b],[a,b])), + ok. -- cgit v1.2.3 From 7233933976709eca1eddf5471790ec786cb33bc2 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 28 Apr 2010 15:26:22 -0400 Subject: fix bug in get_remote_states --- src/mem3.erl | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 25945761..b3416219 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -314,15 +314,8 @@ handle_join(first, ExtNodes, nil, State) -> handle_join(new, ExtNodes, PingNode, #mem{args=Args} = State) -> NewState = case get_test(Args) of - undefined -> - % ping the PingNode and get its state - pong = net_adm:ping(PingNode), - timer:sleep(1000), % let dist. erl get set up... sigh. - {ok, RemoteState} = rpc:call(PingNode, mem3, state, []), - RemoteState; - _ -> - % testing, so meh - State + undefined -> get_pingnode_state(PingNode); + _ -> State % testing, so meh end, % now use this info to join the ring int_join(ExtNodes, NewState); @@ -349,6 +342,14 @@ int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> NewState. +get_pingnode_state(PingNode) -> + % ping the PingNode and get its state + pong = net_adm:ping(PingNode), + timer:sleep(1000), % let dist. erl get set up... sigh. + {ok, RemoteState} = rpc:call(PingNode, mem3, state, []), + RemoteState. + + %% @doc handle the gossip messages %% We're not using vector_clock:resolve b/c we need custom merge strategy handle_gossip(From, RemoteState=#mem{clock=RemoteClock}, @@ -529,7 +530,8 @@ get_remote_states(NodeList) -> NodeList1 = lists:delete(node(), NodeList), {States1, BadNodes} = rpc:multicall(NodeList1, mem3, state, [], 5000), {_Status, States2} = lists:unzip(States1), - {lists:zip(NodeList1,States2), BadNodes}. + NodeList2 = NodeList1 -- BadNodes, + {lists:zip(NodeList2,States2), BadNodes}. %% @doc compare state with states based on vector clock -- cgit v1.2.3 From ccf67d04af1cb14d91ba5a5ac1c4bc85317ffe01 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 28 Apr 2010 22:24:34 -0400 Subject: switch join types to init,join,replace,leave and add first json-rpc handler for membership (init) - BugzID 9726 --- src/mem3.erl | 42 +++++++++++++++++++++++++----------------- test/mem3_test.erl | 24 ++++++++++++------------ 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index b3416219..b2aa4394 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -41,7 +41,7 @@ -define(STATE_FILE_PREFIX, "membership"). %% types - stick somewhere in includes? --type join_type() :: first | new | replace. +-type join_type() :: init | join | replace | leave. -type join_order() :: non_neg_integer(). -type options() :: list(). -type mem_node() :: {join_order(), node(), options()}. @@ -173,15 +173,17 @@ init(Args) -> %% new node(s) joining to this node handle_call({join, JoinType, ExtNodes, PingNode}, _From, State) -> - % NewState = handle_join(JoinType, ExtNodes, PingNode, State), - % {reply, ok, NewState}; - try - NewState = handle_join(JoinType, ExtNodes, PingNode, State), - {reply, ok, NewState} - catch _:Error -> - showroom_log:message(error, "~p", [Error]), - {reply, Error, State} - end; + % {ok, NewState} = handle_join(JoinType, ExtNodes, PingNode, State), + % {reply, ok, NewState}; + try + case handle_join(JoinType, ExtNodes, PingNode, State) of + {ok, NewState} -> {reply, ok, NewState}; + Other -> {reply, Other, State} + end + catch _:Error -> + showroom_log:message(error, "~p", [Error]), + {reply, Error, State} + end; %% clock handle_call(clock, _From, #mem{clock=Clock} = State) -> @@ -306,13 +308,13 @@ handle_init(_Test, #mem{nodes=Nodes, args=Args} = OldState) -> end. -%% handle join activities, return NewState -handle_join(first, ExtNodes, nil, State) -> +%% handle join activities, return {ok,NewState} +handle_join(init, ExtNodes, nil, State) -> {_,Nodes,_} = lists:unzip3(ExtNodes), ping_all_yall(Nodes), int_join(ExtNodes, State); -handle_join(new, ExtNodes, PingNode, #mem{args=Args} = State) -> +handle_join(join, ExtNodes, PingNode, #mem{args=Args} = State) -> NewState = case get_test(Args) of undefined -> get_pingnode_state(PingNode); _ -> State % testing, so meh @@ -324,10 +326,14 @@ handle_join(replace, [_OldNode | _], _PingNode, _State) -> % TODO implement me ok; +handle_join(leave, [_OldNode | _], _PingNode, _State) -> + % TODO implement me + ok; + handle_join(JoinType, _, PingNode, _) -> showroom_log:message(info, "membership: unknown join type: ~p " "for ping node: ~p", [JoinType, PingNode]), - {error, {unknown_join_type, JoinType}}. + {error, unknown_join_type}. int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> @@ -339,7 +345,7 @@ int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> NewClock = vector_clock:increment(node(), Clock), NewState = State#mem{nodes=NewNodes1, clock=NewClock}, install_new_state(NewState), - NewState. + {ok, NewState}. get_pingnode_state(PingNode) -> @@ -507,9 +513,11 @@ check_pos(Pos, Node, Nodes) -> {_,OldNode,_} = Found, if OldNode =:= Node -> - throw({error, {node_exists_at_position, Pos, Node}}); + Msg = "node_exists_at_position_" ++ integer_to_list(Pos), + throw({error, list_to_binary(Msg)}); true -> - throw({error, {position_exists, Pos, OldNode}}) + Msg = "position_exists_" ++ integer_to_list(Pos), + throw({error, list_to_binary(Msg)}) end end. diff --git a/test/mem3_test.erl b/test/mem3_test.erl index 80699559..b8622005 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -29,8 +29,8 @@ all_tests_test_() -> [ fun init/1, fun clock/1, - fun join_first/1, - fun join_first_with_hints/1, + fun join_init/1, + fun join_init_with_hints/1, fun join_new_node/1, fun join_two_new_nodes/1, fun join_with_wrong_order/1 @@ -64,17 +64,17 @@ clock(_Pid) -> ?assertMatch([], Clock). -join_first(_Pid) -> +join_init(_Pid) -> mem3:reset(), - mem3:join(first, [{1, a, []}, {2, b, []}], nil), + mem3:join(init, [{1, a, []}, {2, b, []}], nil), {ok, Nodes} = mem3:nodes(), ?assertEqual(2, length(Nodes)), ok. -join_first_with_hints(_Pid) -> +join_init_with_hints(_Pid) -> mem3:reset(), - mem3:join(first, [{1, a, []}, + mem3:join(init, [{1, a, []}, {2, b, []}, {3, c, [{hints, [?HINT_C1, ?HINT_C2]}]}, {4, d, []}, @@ -90,10 +90,10 @@ join_first_with_hints(_Pid) -> join_new_node(_Pid) -> mem3:reset(), - mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}], nil), + mem3:join(init, [{1, a, []}, {2, b, []}, {3, c, []}], nil), {ok, Nodes1} = mem3:nodes(), ?assertEqual(3, length(Nodes1)), - mem3:join(new, [{4, d, []}], a), + mem3:join(join, [{4, d, []}], a), {ok, Nodes2} = mem3:nodes(), ?assertEqual(4, length(Nodes2)), ok. @@ -101,10 +101,10 @@ join_new_node(_Pid) -> join_two_new_nodes(_Pid) -> mem3:reset(), - mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}], nil), + mem3:join(init, [{1, a, []}, {2, b, []}, {3, c, []}], nil), {ok, Nodes1} = mem3:nodes(), ?assertEqual(3, length(Nodes1)), - Res = mem3:join(new, [{4, d, []}, {5, e, []}], b), + Res = mem3:join(join, [{4, d, []}, {5, e, []}], b), ?assertEqual(ok, Res), {ok, Nodes2} = mem3:nodes(), ?assertEqual(5, length(Nodes2)), @@ -114,10 +114,10 @@ join_two_new_nodes(_Pid) -> join_with_wrong_order(_Pid) -> mem3:reset(), - mem3:join(first, [{1, a, []}, {2, b, []}, {3, c, []}], nil), + mem3:join(init, [{1, a, []}, {2, b, []}, {3, c, []}], nil), % ?assertEqual([], mem3:parts_for_node(d)), %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), - Res = mem3:join(new, [{3, d, []}], c), + Res = mem3:join(join, [{3, d, []}], c), ?assertEqual({error,{position_exists,3,c}}, Res), %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), ok. -- cgit v1.2.3 From b8c3322645395de3775ad8b302133a8d80a39fef Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 5 May 2010 17:54:16 -0400 Subject: add 0.5sec sleep to ping_all, seems to have solved 'init' bug picked up by integration tester --- src/mem3.erl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index b2aa4394..b932ca9b 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -348,6 +348,13 @@ int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> {ok, NewState}. +install_new_state(#mem{args=Args} = State) -> + Config = get_config(Args), + Test = get_test(Args), + save_state_file(Test, State, Config), + gossip(Test, State). + + get_pingnode_state(PingNode) -> % ping the PingNode and get its state pong = net_adm:ping(PingNode), @@ -411,6 +418,7 @@ gossip(#mem{args=Args} = NewState) -> gossip(undefined, #mem{nodes=StateNodes} = State) -> {_, Nodes, _} = lists:unzip3(StateNodes), TargetNode = next_up_node(Nodes), + ?debugFmt("~nNodes: ~p~nTarget: ~p~n", [Nodes, TargetNode]), showroom_log:message(info, "membership: firing gossip from ~p to ~p", [node(), TargetNode]), case gen_server:call({?SERVER, TargetNode}, {gossip, State}) of @@ -481,13 +489,6 @@ read_latest_state_file(_, _) -> nil. -install_new_state(#mem{args=Args} = State) -> - Config = get_config(Args), - Test = get_test(Args), - save_state_file(Test, State, Config), - gossip(State). - - %% @doc save the state file to disk, with current timestamp. %% thx to riak_ring_manager:do_write_ringfile/1 -spec save_state_file(test(), mem_state(), config()) -> ok. @@ -531,7 +532,10 @@ int_reset(_Test, State) -> ping_all_yall(Nodes) -> - lists:map(fun(Node) -> net_adm:ping(Node) end, Nodes). + lists:foreach(fun(Node) -> + net_adm:ping(Node) + end, Nodes), + timer:sleep(500). % sigh. get_remote_states(NodeList) -> -- cgit v1.2.3 From 7896702038b9b5c9adb3951a8196b198046783a2 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Fri, 7 May 2010 11:18:25 -0400 Subject: disable some partition calls, and results from http admin layer --- src/mem3.erl | 72 ++++++++++++++++++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index b932ca9b..b0105286 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -20,10 +20,10 @@ %% API -export([start_link/0, start_link/1, stop/0, stop/1, reset/0]). --export([join/3, clock/0, state/0, start_gossip/0]). --export([partitions/0, fullmap/0]). --export([nodes/0, nodes_for_part/1, nodes_for_part/2, all_nodes_parts/1]). --export([parts_for_node/1]). +-export([join/3, clock/0, state/0, nodes/0, start_gossip/0]). +%-export([partitions/0, fullmap/0]). +%-export([nodes/0, nodes_for_part/1, nodes_for_part/2, all_nodes_parts/1]). +%-export([parts_for_node/1]). %% for testing more than anything else -export([merge_nodes/2, next_up_node/1, next_up_node/3]). @@ -104,16 +104,16 @@ reset() -> gen_server:call(?SERVER, reset). -%% @doc retrieve the primary partition map. This is a list of partitions and -%% their corresponding primary node, no replication partner nodes. -partitions() -> - mochiglobal:get(pmap). +% %% @doc retrieve the primary partition map. This is a list of partitions and +% %% their corresponding primary node, no replication partner nodes. +% partitions() -> +% mochiglobal:get(pmap). -%% @doc retrieve the full partition map, like above, but including replication -%% partner nodes. List should number 2^Q * N -fullmap() -> - lists:keysort(2, mochiglobal:get(fullmap)). +% %% @doc retrieve the full partition map, like above, but including replication +% %% partner nodes. List should number 2^Q * N +% fullmap() -> +% lists:keysort(2, mochiglobal:get(fullmap)). %% @doc get the list of cluster nodes (according to membership module) @@ -122,36 +122,36 @@ nodes() -> gen_server:call(?SERVER, nodes). -%% @doc get all the responsible nodes for a given partition, including -%% replication partner nodes -nodes_for_part(Part) -> - nodes_for_part(Part, mochiglobal:get(fullmap)). +% %% @doc get all the responsible nodes for a given partition, including +% %% replication partner nodes +% nodes_for_part(Part) -> +% nodes_for_part(Part, mochiglobal:get(fullmap)). -nodes_for_part(Part, NodePartList) -> - Filtered = lists:filter(fun({_N, P}) -> P =:= Part end, NodePartList), - {Nodes, _Parts} = lists:unzip(Filtered), - lists:usort(Nodes). +% nodes_for_part(Part, NodePartList) -> +% Filtered = lists:filter(fun({_N, P}) -> P =:= Part end, NodePartList), +% {Nodes, _Parts} = lists:unzip(Filtered), +% lists:usort(Nodes). -%% @doc return the partitions that reside on a given node -parts_for_node(Node) -> - lists:sort(lists:foldl(fun({N,P}, AccIn) -> - case N of - Node -> [P | AccIn]; - _ -> AccIn - end - end, [], mochiglobal:get(fullmap))). +% %% @doc return the partitions that reside on a given node +% parts_for_node(Node) -> +% lists:sort(lists:foldl(fun({N,P}, AccIn) -> +% case N of +% Node -> [P | AccIn]; +% _ -> AccIn +% end +% end, [], mochiglobal:get(fullmap))). -%% @doc get all the nodes and partitions in the cluster. Depending on the -%% AllPartners param, you get only primary nodes or replication partner -%% nodes, as well. -%% No nodes/parts currently down are returned. -all_nodes_parts(false) -> - mochiglobal:get(pmap); -all_nodes_parts(true) -> - mochiglobal:get(fullmap). +% %% @doc get all the nodes and partitions in the cluster. Depending on the +% %% AllPartners param, you get only primary nodes or replication partner +% %% nodes, as well. +% %% No nodes/parts currently down are returned. +% all_nodes_parts(false) -> +% mochiglobal:get(pmap); +% all_nodes_parts(true) -> +% mochiglobal:get(fullmap). %%==================================================================== -- cgit v1.2.3 From bdd612392c4ee759c95deeaccfa471983f4d3c28 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Fri, 7 May 2010 23:33:19 -0400 Subject: work on create_db functionality, can now generate fullmap for a db based on its name, its config, and current mem3 nodes - BugzID 10007 --- src/mem3.erl | 3 +- src/partitions.erl | 395 ++++++++++------------------------------------- test/partitions_test.erl | 164 ++++++-------------- 3 files changed, 129 insertions(+), 433 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index b0105286..a95b5fb0 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -118,6 +118,7 @@ reset() -> %% @doc get the list of cluster nodes (according to membership module) %% This may differ from erlang:nodes() +%% Guaranteed to be in order of State's node list (1st elem in 3-tuple) nodes() -> gen_server:call(?SERVER, nodes). @@ -203,7 +204,7 @@ handle_call(reset, _From, #mem{args=Args} = State) -> %% nodes handle_call(nodes, _From, #mem{nodes=Nodes} = State) -> - {_,NodeList,_} = lists:unzip3(Nodes), + {_,NodeList,_} = lists:unzip3(lists:keysort(1, Nodes)), {reply, {ok, NodeList}, State}; %% gossip diff --git a/src/partitions.erl b/src/partitions.erl index 27d2a5a1..f029fedd 100644 --- a/src/partitions.erl +++ b/src/partitions.erl @@ -1,339 +1,102 @@ -%%%------------------------------------------------------------------- -%%% File: partitions.erl -%%% @author Cliff Moon [http://www.powerset.com/] -%%% @copyright 2008 Cliff Moon -%%% @doc -%%% -%%% @end -%%% -%%% @since 2008-10-12 by Cliff Moon -%%%------------------------------------------------------------------- -module(partitions). --author('cliff@powerset.com'). +-author('brad@cloudant.com'). %% API --export([partition_range/1, create_partitions/2, create_partitions/3, - diff/2, pp_diff/1, int_to_partition/2, map_partitions/2, - join/3, leave/3, hash/1, hash_to_partition/2, item_to_nodepart/1, - shard_name/2, hash_to_hex/2]). +-export([fullmap/3, hash/1]). --define(RINGTOP, trunc(math:pow(2,160)-1)). % SHA-1 space +-define(RINGTOP, trunc(math:pow(2,160))). % SHA-1 space -include("../../couch/src/couch_db.hrl"). -include_lib("eunit/include/eunit.hrl"). -%% -ifdef(TEST). -%% -include("etest/partitions_test.erl"). -%% -endif. - %%==================================================================== %% API %%==================================================================== -partition_range(Q) -> - trunc( ?RINGTOP / math:pow(2,Q) ). % SHA-1 space / 2^Q - - -create_partitions(Q, Node) -> - create_partitions(Q, Node, []). - - -create_partitions(Q, Node, _Nodes) -> - fresh(trunc(math:pow(2,Q)), Node). - % map_partitions(Table, Nodes). - - -%% @spec map_partitions(Table::proplist(),Nodes::list()) -> proplist() -%% @doc maps partitions to nodes. The resulting list should be Dynomite format, -%% namely {Node,Part} -%% @end -map_partitions(Table, Nodes) -> - {_Nodes, Parts} = lists:unzip(Table), - do_map(Nodes, Parts). - - -%% @doc in case Hints is undefined, turn it into a list for clauses below. -join(Node, Table, undefined) -> - join(Node, Table, []); - -%% @spec join(node(), proplist(), list()) -> {ok, PartTable::proplist()} | -%% {error, Error} -%% @doc given a node, current partition table, and hints, this function returns -%% the new partition table -join(Node, Table, Hints) -> - {NodeList, Parts} = lists:unzip(Table), - OtherNodes = lists:delete(Node, NodeList), - OtherDistinctNodes = lists:usort(OtherNodes), - %% quick check to see if we have more nodes than partitions - if - length(Parts) == length(OtherDistinctNodes) -> - {error, "Too many nodes vs partitions", Table}; - true -> - AlreadyPresent = length(NodeList) - length(OtherNodes), - Nodes = lists:usort(NodeList), - PartCountToTake = trunc(length(Parts) / (length(Nodes) + 1)), - %% calcs done, let's steal some partitions - {HintsTaken, NewTable} = steal_hints(Node, Table, Hints), - if - PartCountToTake - AlreadyPresent - HintsTaken > 0 -> - steal_partitions(Node, OtherDistinctNodes, NewTable, - PartCountToTake - AlreadyPresent - HintsTaken); - true -> - %% no partitions to take - {ok, NewTable} - end - end. - - -%% TODO: implement me -leave(_Node, Table, _Hints) -> - Table. - - -diff(From, To) when length(From) =/= length(To) -> - {error, badlength, "Cannot diff partition maps with different length"}; - -diff(From, To) -> - diff(sort_for_diff(From), sort_for_diff(To), []). - - -pp_diff(Diff) -> - lists:map( - fun({F,T,Part}) -> {F,T,showroom_utils:int_to_hexstr(Part)} end, - Diff). - +%% @doc build a full partition map +fullmap(DbName, Nodes, Options) -> + {N,Q} = db_init_constants(Options), + NewNodes = ordered_nodes(DbName, Nodes), + Pmap = pmap(Q, NewNodes), + int_fullmap(N, Pmap, NewNodes). %% @spec hash(term()) -> Digest::binary() -%% @doc Showroom uses SHA-1 as its hash +%% @doc uses SHA-1 as its hash hash(Item) -> crypto:sha(term_to_binary(Item)). - -%% @spec hash_to_partition(binary(), integer()) -> integer() -%% @doc given a hashed value and Q, return the partition -hash_to_partition(Hash, Q) -> - HashInt = hash_int(Hash), - Size = partition_range(Q), - Factor = (HashInt div Size), - Rem = (HashInt rem Size), - if - Rem > 0 -> Factor * Size; - true -> ((Factor-1) * Size) - end. - - -hash_to_hex(Hash, Q) -> - Part = hash_to_partition(Hash, Q), - showroom_utils:int_to_hexstr(Part). - - -%% @doc given an int and a list of partitions, get the first part greater -%% than Int. Used for a hex part being turned back into an int. -int_to_partition(Int, Parts) -> - Rem = lists:dropwhile(fun(E) -> E < Int end, lists:sort(Parts)), - case Rem of - [] -> 0; % wrap-around-ring case (back to 0) - [H|_T] -> H - end. - - -%% @spec item_to_nodepart(bin()) -> {Node::node(),Part::integer()} -%% @doc given a raw item, return the node/partition/shard -%% name based on consistent hashing -item_to_nodepart(Item) when is_binary(Item) -> - Q = list_to_integer(couch_config:get("cluster","q")), - Hash = hash(?b2l(Item)), - Part = hash_to_partition(Hash, Q), - {ok, Table} = membership2:partitions(), - lists:keyfind(Part, 2, Table); - -item_to_nodepart(Item) -> - item_to_nodepart(term_to_binary(Item)). - - -%% @spec shard_name(integer(), binary()) -> binary() -%% @doc create shard name -shard_name(Part, DbName) -> - PartHex = ?l2b(showroom_utils:int_to_hexstr(Part)), - <<"x", PartHex/binary, "/", DbName/binary, "_", PartHex/binary>>. - %%==================================================================== %% Internal functions %%==================================================================== -%% @doc Create a brand new table. The size and seednode are specified; -%% initially all partitions are owned by the seednode. If NumPartitions -%% is not much larger than the intended eventual number of -%% participating nodes, then performance will suffer. -%% from http://code.google.com/p/distributerl (trunk revision 4) chash:fresh/2 -%% @spec fresh(NumPartitions :: integer(), SeedNode :: node()) -> table() -fresh(NumPartitions, SeedNode) -> - Increment = ?RINGTOP div NumPartitions, - [{SeedNode, IndexAsInt} || IndexAsInt <- lists:seq(0,(?RINGTOP-1),Increment)]. - - -%% @spec steal_hints(node(), proplist(), list( integer() )) -> -%% {integer(), proplist()} -%% @doc move the partitions listed in Hints over to the new owner, Node -steal_hints(Node, Table, Hints) -> - steal_hints(Node, Table, Hints, 0). - - -%% @doc recursive workhorse for hints mechanism, Acc is tracking how many -%% hints/partitions were successfully moved to a new Node. -%% @end -steal_hints(_Node, Table, [], Acc) -> - {Acc, Table}; - -steal_hints(Node, Table, [Hint|RestHints], Acc) -> - {Status, NewTable} = swap_node_for_part(Node, Hint, Table), - Acc1 = case Status of - ok -> Acc+1; - _ -> Acc - end, - steal_hints(Node, NewTable, RestHints, Acc1). - - -%% @doc take a part from one of the other nodes based on most # of parts per -%% node. -%% @end -%% TODO: This fun does list ops on the Table each time through. Inefficient? -%% Hopefully not, due to small Table sizes -steal_partitions(_Node, _OtherNodes, Table, 0) -> - {ok, Table}; -steal_partitions(Node, OtherNodes, Table, Count) -> - %% first, get a list of OtherNodes and their partition counts - NPCountFun = fun(N) -> - L = proplists:get_all_values(N, Table), - {N, length(lists:delete(undefined, L))} - end, - NPCounts = lists:reverse(lists:keysort(2,lists:map(NPCountFun, OtherNodes))), - %% grab the node that has the most partitions - [{TakeFrom, _PartsCount}|_RestOfTable] = NPCounts, - %% get the highest # partition of the TakeFrom node - TakeFromParts = lists:reverse(lists:sort(proplists:get_all_values(TakeFrom, - Table))), - [Part|_RestOfParts] = TakeFromParts, - {ok, NewTable} = swap_node_for_part(Node, Part, Table), - steal_partitions(Node, OtherNodes, NewTable, Count-1). - - -%% @doc Make Node the owner of the partition beginning at Part. -%% from http://code.google.com/p/distributerl (trunk revision 4) chash:update/3 -swap_node_for_part(Node, Part, Table) -> - case lists:keymember(Part, 2, Table) of - true -> - GapList = [{N,P} || {N,P} <- Table, P /= Part], - {A, B} = lists:partition(fun({_,K1}) -> K1 < Part end, GapList), - {ok, A ++ [{Node, Part}] ++ B}; - false -> - showroom_log:message(info, - "'~p' partition was not found in partition table", [Part]), - {noswap, Table} - end. - - -%% @doc get the difference between two FullPMaps -%% lists need to be sorted by part, then node -diff([], [], Results) -> - lists:reverse(remove_dupes(Results)); - -diff([{Node,Part,_}|PartsA], [{Node,Part,_}|PartsB], Results) -> - diff(PartsA, PartsB, Results); - -diff([{NodeA,Part,_}|PartsA], [{NodeB,Part,_}|PartsB], Results) -> - diff(PartsA, PartsB, [{NodeA,NodeB,Part}|Results]). - - -%% @doc sorts the full map for diff/3. This may change to get more accurate -%% diff w/o dupes -sort_for_diff(FullMap) -> - lists:keysort(2,lists:sort(FullMap)). - - -remove_dupes(Diff) -> - {_,_,AllParts} = lists:unzip3(Diff), - Parts = lists:usort(AllParts), - remove_dupes_from_part(Parts, Diff, []). - - -%% @doc ex: take [{a,b,1},{b,c,1}] diff and make it [{a,c,1}] so we don't go -%% moving unnecessary shard files. 'Move partition 1 from a to b and -%% then move partition 1 from b to c' is unnecessary. Just move it a to c. -remove_dupes_from_part([], _Diff, Acc) -> - Acc; - -remove_dupes_from_part([Part|Rest], Diff, Acc) -> - PartData = lists:filter(fun({_,_,P}) -> P =:= Part end, Diff), - NewPartData = process_part_data(Part, PartData, PartData, PartData), - remove_dupes_from_part(Rest, Diff, lists:concat([NewPartData, Acc])). - - -%% for one partition of the full diff, remove the dupes -process_part_data(_Part, _PartData, [], Acc) -> - Acc; - -process_part_data(Part, PartData, [{From,To,_Part}|Rest], Acc) -> - case proplists:lookup(To, PartData) of - {To, NewTo, _Part} -> - - Remove1 = proplists:delete(To, PartData), - Remove2 = proplists:delete(From, Remove1), - NewPartData = [{From, NewTo, Part}|Remove2], - %?debugFmt("~nFrom : ~p~nTo : ~p~nNewTo: ~p~n" - % "Remove1: ~p~nRemove2: ~p~n" - % "NewPartData: ~p~n" - % , [From, To, NewTo, Remove1, Remove2, NewPartData]), - process_part_data(Part, NewPartData, Rest, NewPartData); - none -> - process_part_data(Part, PartData, Rest, Acc) - end. - - -% %% @doc from dynomite -% diff([], [], Results) -> -% lists:reverse(Results); - -% diff([{Node,Part}|PartsA], [{Node,Part}|PartsB], Results) -> -% diff(PartsA, PartsB, Results); - -% diff([{NodeA,Part}|PartsA], [{NodeB,Part}|PartsB], Results) -> -% diff(PartsA, PartsB, [{NodeA,NodeB,Part}|Results]). - - -%% @doc does Node/Partition mapping based on Amazon Dynamo paper, -%% section 6.2, strategy 3, more or less -%% http://www.allthingsdistributed.com/2007/10/amazons_dynamo.html -%% @end -do_map([Node|RestNodes], Parts) -> - Max = length(Parts) / length([Node|RestNodes]), - do_map(Node, RestNodes, Parts, [], 1, Max). - - -%% return final mapped list -do_map(_,_,[],Mapped, _, _) -> - lists:keysort(1, Mapped); - -%% finish off last node, Cnt & Max no longer needed -do_map(Node, [], [Part|RestParts], Mapped, _, _) -> - do_map(Node, [], RestParts, [{Node, Part}|Mapped], 0,0); - -%% workhorse clause, iterates through parts, until Cnt > Max, then advances to -%% next node, wash, rinse, repeat -do_map(Node, [NextNode|RestNodes], [Part|RestParts], Mapped, Cnt, Max) -> - case Cnt > Max of - true -> - do_map(NextNode, RestNodes, RestParts, [{Node, Part}|Mapped], - 1, Max); - false -> - do_map(Node, [NextNode|RestNodes], RestParts, [{Node, Part}|Mapped], - Cnt+1, Max) - end. - - -%% TODO: other guards +%% @doc get cluster constants from options or config +db_init_constants(Options) -> + {const(n, Options), const(q, Options)}. + +%% @doc get individual constant +const(Const, Options) -> + ListResult = case couch_util:get_value(Const, Options) of + undefined -> couch_config:get("cluster", atom_to_list(Const)); + Val -> Val + end, + list_to_integer(ListResult). + +%% @doc hash the dbname, and return the corresponding node for seeding a ring +seednode(DbName, Nodes) -> + Hash = hash(DbName), + HashInt = hash_int(Hash), + Size = partition_range(length(Nodes)), + Factor = (HashInt div Size), + lists:nth(Factor+1, Nodes). + +%% @doc take the list of nodes, and rearrange it, starting with the node that +%% results from hashing the Term +ordered_nodes(Term, Nodes) -> + SeedNode = seednode(Term, Nodes), + {A, B} = lists:splitwith(fun(N) -> N /= SeedNode end, Nodes), + lists:append(B,A). + +%% @doc create a partition map [{node(),part{}|_} +pmap(NumPartitions, Nodes) -> + Increment = ?RINGTOP div NumPartitions + 1, + Parts = lists:seq(0,(?RINGTOP),Increment), + make_map(Nodes, Nodes, Parts, []). + +%% @doc create a full map, which is a pmap with N-1 replication partner nodes +%% added per partition +int_fullmap(N, Pmap, Nodes) -> + Full = lists:foldl(fun({Node,Part}, AccIn) -> + Partners = partners(N, Node, Nodes, Part), + lists:append([ [{Node,Part}], Partners, AccIn]) + end, [], Pmap), + lists:reverse(Full). + +partners(N, Node, Nodes, Part) -> + {A, [Node|B]} = lists:splitwith(fun(Nd) -> Nd /= Node end, Nodes), + Nodes1 = lists:append(B,A), + Partners = lists:sublist(Nodes1, N-1), % N-1 replication partner nodes + lists:map(fun(Partner) -> {Partner, Part} end, Partners). + + +%% @doc turn hash into an integer hash_int(Hash) when is_binary(Hash) -> - <> = Hash, - IndexAsInt; + <> = Hash, + IndexAsInt; hash_int(Hash) when is_integer(Hash) -> - Hash. + Hash. + +%% @doc size of one partition in the ring +partition_range(Q) -> + trunc( ?RINGTOP / Q ). % SHA-1 space / Q + +%% @doc assign nodes to each of the partitions. When you run out of nodes, +%% start at the beginning of the node list again. +%% The provided node list starts with the seed node (seednode fun) +make_map(_,_,[], Acc) -> + lists:keysort(2,Acc); +make_map(AllNodes, [], Parts, Acc) -> + % start back at beginning of node list + make_map(AllNodes, AllNodes, Parts, Acc); +make_map(AllNodes, [Node|RestNodes], [Part|RestParts], Acc) -> + % add a node/part combo to the Acc + make_map(AllNodes, RestNodes, RestParts, [{Node,Part}|Acc]). diff --git a/test/partitions_test.erl b/test/partitions_test.erl index 20effd8a..834719b7 100644 --- a/test/partitions_test.erl +++ b/test/partitions_test.erl @@ -1,121 +1,53 @@ -%%% -*- erlang-indent-level:2 -*- -module(partitions_test). -author('brad@cloudant.com'). -include("../include/config.hrl"). -include("../include/common.hrl"). --include("../include/test.hrl"). - - -join_test() -> - TableA = [{a,1},{a,2},{a,3},{a,4},{a,5},{a,6},{a,7},{a,8}], - TableB = [{a,1},{a,2},{a,3},{a,4},{b,5},{b,6},{b,7},{b,8}], - TableC = [{a,1},{a,2},{a,3},{c,4},{b,5},{b,6},{b,7},{c,8}], - TableD = [{a,1},{a,2},{d,3},{c,4},{b,5},{b,6},{d,7},{c,8}], - TableE = [{a,1},{a,2},{d,3},{c,4},{b,5},{b,6},{e,7},{c,8}], - TableF = [{a,1},{a,2},{d,3},{c,4},{b,5},{b,6},{e,7},{f,8}], - TableG = [{a,1},{a,2},{d,3},{c,4},{b,5},{g,6},{e,7},{f,8}], - TableH = [{a,1},{h,2},{d,3},{c,4},{b,5},{g,6},{e,7},{f,8}], - ?assertEqual({ok,TableA}, partitions:join(a, TableA, [])), - ?assertEqual({ok,TableB}, partitions:join(b, TableA, [])), - ?assertEqual({ok,TableC}, partitions:join(c, TableB, [])), - ?assertEqual({ok,TableD}, partitions:join(d, TableC, [])), - ?assertEqual({ok,TableE}, partitions:join(e, TableD, [])), - ?assertEqual({ok,TableF}, partitions:join(f, TableE, [])), - ?assertEqual({ok,TableG}, partitions:join(g, TableF, [])), - ?assertEqual({ok,TableH}, partitions:join(h, TableG, [])), - ?assertEqual({error, "Too many nodes vs partitions", TableH}, - partitions:join(i, TableH, [])), - ok. - - -hints_test() -> - TableA = [{a,1},{a,2},{a,3},{a,4},{a,5},{a,6},{a,7},{a,8}], - TableB = [{a,1},{b,2},{a,3},{a,4},{a,5},{b,6},{b,7},{b,8}], - TableC = [{a,1},{a,2},{a,3},{a,4},{c,5},{c,6},{c,7},{c,8}], - TableD = [{d,1},{d,2},{d,3},{d,4},{a,5},{a,6},{a,7},{a,8}], - ?assertEqual({ok, TableB}, partitions:join(b, TableA, [2])), - ?assertEqual({ok, TableC}, partitions:join(c, TableA, [0])), - ?assertEqual({ok, TableD}, partitions:join(d, TableA, [1,2,3,4])), - ok. - - -shard_name_test() -> - ?assertEqual(<<"x000000/dbname_000000">>, - partitions:shard_name(0, <<"dbname">>)), - ok. - - -%% note: fullmaps used here -diff_same_length_test() -> - OldMap = [{a,1, type},{a,2, type},{b,3, type},{b,4, type}], - NewMap = [{a,1, type},{a,2, type},{b,3, type},{c,4, type}], - ?assertEqual([{b,c,4}], partitions:diff(OldMap, NewMap)), - ok. - - -diff_dupes_test() -> - OldMap = [{'node1@node1.boorad.local',0,primary}, - {'node2@node2.boorad.local',0,partner}, - {'node3@node3.boorad.local',0,partner}, - {'node1@node1.boorad.local',182687704666362864775460604089535377456991567872, primary}, - {'node2@node2.boorad.local',182687704666362864775460604089535377456991567872, partner}, - {'node3@node3.boorad.local',182687704666362864775460604089535377456991567872, partner}, - {'node1@node1.boorad.local',365375409332725729550921208179070754913983135744, primary}, - {'node2@node2.boorad.local',365375409332725729550921208179070754913983135744, partner}, - {'node3@node3.boorad.local',365375409332725729550921208179070754913983135744, partner}, - {'node1@node1.boorad.local',548063113999088594326381812268606132370974703616, partner}, - {'node2@node2.boorad.local',548063113999088594326381812268606132370974703616, partner}, - {'node3@node3.boorad.local',548063113999088594326381812268606132370974703616, primary}, - {'node1@node1.boorad.local',730750818665451459101842416358141509827966271488, partner}, - {'node2@node2.boorad.local',730750818665451459101842416358141509827966271488, primary}, - {'node3@node3.boorad.local',730750818665451459101842416358141509827966271488, partner}, - {'node1@node1.boorad.local',913438523331814323877303020447676887284957839360, partner}, - {'node2@node2.boorad.local',913438523331814323877303020447676887284957839360, primary}, - {'node3@node3.boorad.local',913438523331814323877303020447676887284957839360, partner}, - {'node1@node1.boorad.local',1096126227998177188652763624537212264741949407232, partner}, - {'node2@node2.boorad.local',1096126227998177188652763624537212264741949407232, primary}, - {'node3@node3.boorad.local',1096126227998177188652763624537212264741949407232, partner}, - {'node1@node1.boorad.local',1278813932664540053428224228626747642198940975104, partner}, - {'node2@node2.boorad.local',1278813932664540053428224228626747642198940975104, partner}, - {'node3@node3.boorad.local',1278813932664540053428224228626747642198940975104, primary}], - NewMap = [{'node1@node1.boorad.local',0,primary}, - {'node2@node2.boorad.local',0,partner}, - {'node3@node3.boorad.local',0,partner}, - {'node1@node1.boorad.local',182687704666362864775460604089535377456991567872, primary}, - {'node2@node2.boorad.local',182687704666362864775460604089535377456991567872, partner}, - {'node3@node3.boorad.local',182687704666362864775460604089535377456991567872, partner}, - {'node1@node1.boorad.local',365375409332725729550921208179070754913983135744, partner}, - {'node2@node2.boorad.local',365375409332725729550921208179070754913983135744, partner}, - {'node4@node4.boorad.local',365375409332725729550921208179070754913983135744, primary}, - {'node1@node1.boorad.local',548063113999088594326381812268606132370974703616, partner}, - {'node3@node3.boorad.local',548063113999088594326381812268606132370974703616, primary}, - {'node4@node4.boorad.local',548063113999088594326381812268606132370974703616, partner}, - {'node2@node2.boorad.local',730750818665451459101842416358141509827966271488, primary}, - {'node3@node3.boorad.local',730750818665451459101842416358141509827966271488, partner}, - {'node4@node4.boorad.local',730750818665451459101842416358141509827966271488, partner}, - {'node2@node2.boorad.local',913438523331814323877303020447676887284957839360, primary}, - {'node3@node3.boorad.local',913438523331814323877303020447676887284957839360, partner}, - {'node4@node4.boorad.local',913438523331814323877303020447676887284957839360, partner}, - {'node1@node1.boorad.local',1096126227998177188652763624537212264741949407232, partner}, - {'node2@node2.boorad.local',1096126227998177188652763624537212264741949407232, partner}, - {'node4@node4.boorad.local',1096126227998177188652763624537212264741949407232, primary}, - {'node1@node1.boorad.local',1278813932664540053428224228626747642198940975104, partner}, - {'node3@node3.boorad.local',1278813932664540053428224228626747642198940975104, primary}, - {'node4@node4.boorad.local',1278813932664540053428224228626747642198940975104, partner}], - - Diff = [{'node3@node3.boorad.local','node4@node4.boorad.local', - 365375409332725729550921208179070754913983135744}, - {'node2@node2.boorad.local','node4@node4.boorad.local', - 548063113999088594326381812268606132370974703616}, - {'node1@node1.boorad.local','node4@node4.boorad.local', - 730750818665451459101842416358141509827966271488}, - {'node1@node1.boorad.local','node4@node4.boorad.local', - 913438523331814323877303020447676887284957839360}, - {'node3@node3.boorad.local','node4@node4.boorad.local', - 1096126227998177188652763624537212264741949407232}, - {'node2@node2.boorad.local','node4@node4.boorad.local', - 1278813932664540053428224228626747642198940975104}], - - ?assertEqual(Diff, partitions:diff(OldMap, NewMap)), - ok. +-include_lib("eunit/include/eunit.hrl"). + +-define(FOUR_NODES, [a,b,c,d]). +-define(Map1, [{d,0}, + {a,292300327466180583640736966543256603931186508596}, + {b,584600654932361167281473933086513207862373017192}, + {c,876900982398541750922210899629769811793559525788}, + {d,1169201309864722334562947866173026415724746034384}]). +-define(Map2, [{c,0}, + {d,182687704666362864775460604089535377456991567873}, + {a,365375409332725729550921208179070754913983135746}, + {b,548063113999088594326381812268606132370974703619}, + {c,730750818665451459101842416358141509827966271492}, + {d,913438523331814323877303020447676887284957839365}, + {a,1096126227998177188652763624537212264741949407238}, + {b,1278813932664540053428224228626747642198940975111}]). +-define(Map3, [{d,0}, + {c,0}, + {a,365375409332725729550921208179070754913983135745}, + {d,365375409332725729550921208179070754913983135745}, + {b,730750818665451459101842416358141509827966271490}, + {a,730750818665451459101842416358141509827966271490}, + {c,1096126227998177188652763624537212264741949407235}, + {b,1096126227998177188652763624537212264741949407235}]). + +%%==================================================================== +%% Tests +%%==================================================================== + +fullmap_n1_test() -> + Map1 = partitions:fullmap(<<"test">>, ?FOUR_NODES, opts(1,5)), + ?assertEqual(?Map1, Map1), + Map2 = partitions:fullmap(<<"boorad">>, ?FOUR_NODES, opts(1,8)), + ?assertEqual(?Map2, Map2), + ok. + +fullmap_Ngt1_test() -> + Map3 = partitions:fullmap(<<"boorad">>, ?FOUR_NODES, opts(2,4)), + ?assertEqual(?Map3, Map3), + ok. + + +%%==================================================================== +%% Internal functions +%%==================================================================== + +opts(N,Q) -> + [{n,integer_to_list(N)},{q,integer_to_list(Q)}]. -- cgit v1.2.3 From b28ba2dbc731832eb4b7b22e552fc173c1b6ca95 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Sun, 9 May 2010 22:55:42 -0400 Subject: change cluster_ops:all_parts to accept the list of NodeParts --- src/cluster_ops.erl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/cluster_ops.erl b/src/cluster_ops.erl index 72bba92f..c1edc5b2 100644 --- a/src/cluster_ops.erl +++ b/src/cluster_ops.erl @@ -59,8 +59,7 @@ key_lookup(Key, {M,F,A}, Access, Const, N) -> %% @doc Do op on all shards (and maybe even replication partners) -all_parts({M,F,A}, Access, AndPartners, ResolveFun) -> - NodePartList = membership2:all_nodes_parts(AndPartners), +all_parts({M,F,A}, Access, NodeParts, ResolveFun) -> MapFun = fun({Node, Part}) -> try rpc:call(Node, M, F, [[Part | A]]) @@ -68,9 +67,9 @@ all_parts({M,F,A}, Access, AndPartners, ResolveFun) -> {error, Class, Exception} end end, - Replies = ?PMAP(MapFun, NodePartList), + Replies = ?PMAP(MapFun, NodeParts), {Good, Bad} = lists:partition(fun valid/1, Replies), - final_all_parts(Good, Bad, length(NodePartList), ResolveFun, Access). + final_all_parts(Good, Bad, length(NodeParts), ResolveFun, Access). %% @doc Do op on some shards, depending on list of keys sent in. -- cgit v1.2.3 From 800941fb3b397c7f90204c58177d245acd5a5833 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Sun, 9 May 2010 23:10:42 -0400 Subject: undo last commit, let's leave cluster_ops alone, make a new showroom module --- src/cluster_ops.erl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/cluster_ops.erl b/src/cluster_ops.erl index c1edc5b2..72bba92f 100644 --- a/src/cluster_ops.erl +++ b/src/cluster_ops.erl @@ -59,7 +59,8 @@ key_lookup(Key, {M,F,A}, Access, Const, N) -> %% @doc Do op on all shards (and maybe even replication partners) -all_parts({M,F,A}, Access, NodeParts, ResolveFun) -> +all_parts({M,F,A}, Access, AndPartners, ResolveFun) -> + NodePartList = membership2:all_nodes_parts(AndPartners), MapFun = fun({Node, Part}) -> try rpc:call(Node, M, F, [[Part | A]]) @@ -67,9 +68,9 @@ all_parts({M,F,A}, Access, NodeParts, ResolveFun) -> {error, Class, Exception} end end, - Replies = ?PMAP(MapFun, NodeParts), + Replies = ?PMAP(MapFun, NodePartList), {Good, Bad} = lists:partition(fun valid/1, Replies), - final_all_parts(Good, Bad, length(NodeParts), ResolveFun, Access). + final_all_parts(Good, Bad, length(NodePartList), ResolveFun, Access). %% @doc Do op on some shards, depending on list of keys sent in. -- cgit v1.2.3 From 8b429742a99b1467aae4ed737ea573d652b764d4 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Tue, 11 May 2010 11:39:03 -0400 Subject: move partitions into showroom, now that there's not much left of dynomite code --- ebin/dynomite.app | 1 - src/partitions.erl | 102 ----------------------------------------------------- 2 files changed, 103 deletions(-) delete mode 100644 src/partitions.erl diff --git a/ebin/dynomite.app b/ebin/dynomite.app index 02a9618d..634c09b2 100644 --- a/ebin/dynomite.app +++ b/ebin/dynomite.app @@ -21,7 +21,6 @@ mem_utils, membership2, node, - partitions, replication, vector_clock ]}, diff --git a/src/partitions.erl b/src/partitions.erl deleted file mode 100644 index f029fedd..00000000 --- a/src/partitions.erl +++ /dev/null @@ -1,102 +0,0 @@ --module(partitions). --author('brad@cloudant.com'). - -%% API --export([fullmap/3, hash/1]). - --define(RINGTOP, trunc(math:pow(2,160))). % SHA-1 space - --include("../../couch/src/couch_db.hrl"). --include_lib("eunit/include/eunit.hrl"). - -%%==================================================================== -%% API -%%==================================================================== - -%% @doc build a full partition map -fullmap(DbName, Nodes, Options) -> - {N,Q} = db_init_constants(Options), - NewNodes = ordered_nodes(DbName, Nodes), - Pmap = pmap(Q, NewNodes), - int_fullmap(N, Pmap, NewNodes). - -%% @spec hash(term()) -> Digest::binary() -%% @doc uses SHA-1 as its hash -hash(Item) -> - crypto:sha(term_to_binary(Item)). - -%%==================================================================== -%% Internal functions -%%==================================================================== - -%% @doc get cluster constants from options or config -db_init_constants(Options) -> - {const(n, Options), const(q, Options)}. - -%% @doc get individual constant -const(Const, Options) -> - ListResult = case couch_util:get_value(Const, Options) of - undefined -> couch_config:get("cluster", atom_to_list(Const)); - Val -> Val - end, - list_to_integer(ListResult). - -%% @doc hash the dbname, and return the corresponding node for seeding a ring -seednode(DbName, Nodes) -> - Hash = hash(DbName), - HashInt = hash_int(Hash), - Size = partition_range(length(Nodes)), - Factor = (HashInt div Size), - lists:nth(Factor+1, Nodes). - -%% @doc take the list of nodes, and rearrange it, starting with the node that -%% results from hashing the Term -ordered_nodes(Term, Nodes) -> - SeedNode = seednode(Term, Nodes), - {A, B} = lists:splitwith(fun(N) -> N /= SeedNode end, Nodes), - lists:append(B,A). - -%% @doc create a partition map [{node(),part{}|_} -pmap(NumPartitions, Nodes) -> - Increment = ?RINGTOP div NumPartitions + 1, - Parts = lists:seq(0,(?RINGTOP),Increment), - make_map(Nodes, Nodes, Parts, []). - -%% @doc create a full map, which is a pmap with N-1 replication partner nodes -%% added per partition -int_fullmap(N, Pmap, Nodes) -> - Full = lists:foldl(fun({Node,Part}, AccIn) -> - Partners = partners(N, Node, Nodes, Part), - lists:append([ [{Node,Part}], Partners, AccIn]) - end, [], Pmap), - lists:reverse(Full). - -partners(N, Node, Nodes, Part) -> - {A, [Node|B]} = lists:splitwith(fun(Nd) -> Nd /= Node end, Nodes), - Nodes1 = lists:append(B,A), - Partners = lists:sublist(Nodes1, N-1), % N-1 replication partner nodes - lists:map(fun(Partner) -> {Partner, Part} end, Partners). - - -%% @doc turn hash into an integer -hash_int(Hash) when is_binary(Hash) -> - <> = Hash, - IndexAsInt; -hash_int(Hash) when is_integer(Hash) -> - Hash. - -%% @doc size of one partition in the ring -partition_range(Q) -> - trunc( ?RINGTOP / Q ). % SHA-1 space / Q - -%% @doc assign nodes to each of the partitions. When you run out of nodes, -%% start at the beginning of the node list again. -%% The provided node list starts with the seed node (seednode fun) -make_map(_,_,[], Acc) -> - lists:keysort(2,Acc); -make_map(AllNodes, [], Parts, Acc) -> - % start back at beginning of node list - make_map(AllNodes, AllNodes, Parts, Acc); -make_map(AllNodes, [Node|RestNodes], [Part|RestParts], Acc) -> - % add a node/part combo to the Acc - make_map(AllNodes, RestNodes, RestParts, [{Node,Part}|Acc]). -- cgit v1.2.3 From 4e255d8f327b2bb6c179839db5b8d66383a391c4 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 12 May 2010 21:48:32 -0400 Subject: create_db quorum check done, as well as initial install_fullmap function --- src/dynomite_app.erl | 1 - src/mem3.erl | 13 ++++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/dynomite_app.erl b/src/dynomite_app.erl index 949e29ae..4b520921 100644 --- a/src/dynomite_app.erl +++ b/src/dynomite_app.erl @@ -28,7 +28,6 @@ %% @doc start required apps, join cluster, start dynomite supervisor start(_Type, _StartArgs) -> - couch_api:create_db(<<"users">>, []), % all nodes have local 'users' db % start dynomite supervisor dynomite_sup:start_link(). diff --git a/src/mem3.erl b/src/mem3.erl index a95b5fb0..189bcf8e 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -20,7 +20,7 @@ %% API -export([start_link/0, start_link/1, stop/0, stop/1, reset/0]). --export([join/3, clock/0, state/0, nodes/0, start_gossip/0]). +-export([join/3, clock/0, state/0, nodes/0, fullnodes/0, start_gossip/0]). %-export([partitions/0, fullmap/0]). %-export([nodes/0, nodes_for_part/1, nodes_for_part/2, all_nodes_parts/1]). %-export([parts_for_node/1]). @@ -123,6 +123,13 @@ nodes() -> gen_server:call(?SERVER, nodes). +%% @doc get the list of cluster nodes (according to membership module) +%% This may differ from erlang:nodes() +%% Guaranteed to be in order of State's node list (1st elem in 3-tuple) +fullnodes() -> + gen_server:call(?SERVER, fullnodes). + + % %% @doc get all the responsible nodes for a given partition, including % %% replication partner nodes % nodes_for_part(Part) -> @@ -207,6 +214,10 @@ handle_call(nodes, _From, #mem{nodes=Nodes} = State) -> {_,NodeList,_} = lists:unzip3(lists:keysort(1, Nodes)), {reply, {ok, NodeList}, State}; +%% fullnodes +handle_call(fullnodes, _From, #mem{nodes=Nodes} = State) -> + {reply, {ok, Nodes}, State}; + %% gossip handle_call({gossip, RemoteState}, {Pid,_Tag} = From, LocalState) -> showroom_log:message(info, "membership: received gossip from ~p", -- cgit v1.2.3 From e68c2a65c2227eef1c07c0abbe097fd5aa40413c Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Fri, 14 May 2010 14:59:42 -0400 Subject: removing more of dynomite deps from mem3 --- include/common.hrl | 7 ---- src/mem3.erl | 107 +++++++++++++++++++---------------------------------- test/mem3_test.erl | 15 ++++++-- 3 files changed, 49 insertions(+), 80 deletions(-) diff --git a/include/common.hrl b/include/common.hrl index 59f5b9a1..2299950d 100644 --- a/include/common.hrl +++ b/include/common.hrl @@ -39,10 +39,3 @@ version, fullmap }). - -%% version 3 of membership state --record(mem, {header=3, - nodes=[], - clock=[], - args - }). diff --git a/src/mem3.erl b/src/mem3.erl index 189bcf8e..33257c40 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -20,7 +20,8 @@ %% API -export([start_link/0, start_link/1, stop/0, stop/1, reset/0]). --export([join/3, clock/0, state/0, nodes/0, fullnodes/0, start_gossip/0]). +-export([join/3, clock/0, state/0, states/0, nodes/0, fullnodes/0, + start_gossip/0]). %-export([partitions/0, fullmap/0]). %-export([nodes/0, nodes_for_part/1, nodes_for_part/2, all_nodes_parts/1]). %-export([parts_for_node/1]). @@ -32,10 +33,15 @@ -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). - %% includes --include("../include/config.hrl"). --include("../include/common.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +%% version 3 of membership state +-record(mem, {header=3, + nodes=[], + clock=[], + args + }). -define(SERVER, membership). -define(STATE_FILE_PREFIX, "membership"). @@ -46,8 +52,7 @@ -type options() :: list(). -type mem_node() :: {join_order(), node(), options()}. -type mem_node_list() :: [mem_node()]. --type config() :: #config{}. --type arg_options() :: {test, boolean()} | {config, config()}. +-type arg_options() :: {test, boolean()}. -type args() :: [] | [arg_options()]. -type mem_state() :: #mem{}. -type test() :: undefined | node(). @@ -94,6 +99,15 @@ state() -> gen_server:call(?SERVER, state). +-spec states() -> {ok, [mem_state()]}. +states() -> + {ok, Nodes} = mem3:nodes(), + case rpc:multicall(Nodes, ?MODULE, state, []) of + {States, []} -> {ok, lists:map(fun({ok,S}) -> S end, States)}; + {Good, Bad} -> {error, {[{good,Good},{bad,Bad}]}} + end. + + -spec start_gossip() -> ok. start_gossip() -> gen_server:call(?SERVER, start_gossip). @@ -104,21 +118,10 @@ reset() -> gen_server:call(?SERVER, reset). -% %% @doc retrieve the primary partition map. This is a list of partitions and -% %% their corresponding primary node, no replication partner nodes. -% partitions() -> -% mochiglobal:get(pmap). - - -% %% @doc retrieve the full partition map, like above, but including replication -% %% partner nodes. List should number 2^Q * N -% fullmap() -> -% lists:keysort(2, mochiglobal:get(fullmap)). - - %% @doc get the list of cluster nodes (according to membership module) %% This may differ from erlang:nodes() %% Guaranteed to be in order of State's node list (1st elem in 3-tuple) +-spec nodes() -> {ok, [node()]}. nodes() -> gen_server:call(?SERVER, nodes). @@ -126,42 +129,11 @@ nodes() -> %% @doc get the list of cluster nodes (according to membership module) %% This may differ from erlang:nodes() %% Guaranteed to be in order of State's node list (1st elem in 3-tuple) +-spec fullnodes() -> {ok, [mem_node()]}. fullnodes() -> gen_server:call(?SERVER, fullnodes). -% %% @doc get all the responsible nodes for a given partition, including -% %% replication partner nodes -% nodes_for_part(Part) -> -% nodes_for_part(Part, mochiglobal:get(fullmap)). - - -% nodes_for_part(Part, NodePartList) -> -% Filtered = lists:filter(fun({_N, P}) -> P =:= Part end, NodePartList), -% {Nodes, _Parts} = lists:unzip(Filtered), -% lists:usort(Nodes). - - -% %% @doc return the partitions that reside on a given node -% parts_for_node(Node) -> -% lists:sort(lists:foldl(fun({N,P}, AccIn) -> -% case N of -% Node -> [P | AccIn]; -% _ -> AccIn -% end -% end, [], mochiglobal:get(fullmap))). - - -% %% @doc get all the nodes and partitions in the cluster. Depending on the -% %% AllPartners param, you get only primary nodes or replication partner -% %% nodes, as well. -% %% No nodes/parts currently down are returned. -% all_nodes_parts(false) -> -% mochiglobal:get(pmap); -% all_nodes_parts(true) -> -% mochiglobal:get(fullmap). - - %%==================================================================== %% gen_server callbacks %%==================================================================== @@ -170,9 +142,8 @@ fullnodes() -> -spec init(args()) -> {ok, mem_state()}. init(Args) -> process_flag(trap_exit,true), - Config = get_config(Args), Test = get_test(Args), - OldState = read_latest_state_file(Test, Config), + OldState = read_latest_state_file(Test), showroom_log:message(info, "membership: membership server starting...", []), net_kernel:monitor_nodes(true), State = handle_init(Test, OldState), @@ -280,11 +251,11 @@ code_change(OldVsn, State, _Extra) -> %% @doc if Args has config use it, otherwise call configuration module %% most times Args will have config during testing runs -get_config(Args) -> - case proplists:get_value(config, Args) of - undefined -> configuration:get_config(); - Any -> Any - end. +%get_config(Args) -> +% case proplists:get_value(config, Args) of +% undefined -> configuration:get_config(); +% Any -> Any +% end. get_test(Args) -> @@ -361,9 +332,8 @@ int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> install_new_state(#mem{args=Args} = State) -> - Config = get_config(Args), Test = get_test(Args), - save_state_file(Test, State, Config), + save_state_file(Test, State), gossip(Test, State). @@ -430,7 +400,6 @@ gossip(#mem{args=Args} = NewState) -> gossip(undefined, #mem{nodes=StateNodes} = State) -> {_, Nodes, _} = lists:unzip3(StateNodes), TargetNode = next_up_node(Nodes), - ?debugFmt("~nNodes: ~p~nTarget: ~p~n", [Nodes, TargetNode]), showroom_log:message(info, "membership: firing gossip from ~p to ~p", [node(), TargetNode]), case gen_server:call({?SERVER, TargetNode}, {gossip, State}) of @@ -465,8 +434,8 @@ up_nodes() -> %% @doc find the latest state file on disk -find_latest_state_filename(Config) -> - Dir = Config#config.directory, +find_latest_state_filename() -> + Dir = couch_config:get("couchdb", "database_dir"), case file:list_dir(Dir) of {ok, Filenames} -> Timestamps = [list_to_integer(TS) || {?STATE_FILE_PREFIX, TS} <- @@ -485,9 +454,9 @@ find_latest_state_filename(Config) -> %% (Test, Config) -read_latest_state_file(undefined, Config) -> +read_latest_state_file(undefined) -> try - {ok, File} = find_latest_state_filename(Config), + {ok, File} = find_latest_state_filename(), case file:consult(File) of {ok, [#mem{}=State]} -> State; _Else -> @@ -497,15 +466,15 @@ read_latest_state_file(undefined, Config) -> showroom_log:message(info, "membership: ~p", [Error]), nil end; -read_latest_state_file(_, _) -> +read_latest_state_file(_) -> nil. %% @doc save the state file to disk, with current timestamp. %% thx to riak_ring_manager:do_write_ringfile/1 --spec save_state_file(test(), mem_state(), config()) -> ok. -save_state_file(undefined, State, Config) -> - Dir = Config#config.directory, +-spec save_state_file(test(), mem_state()) -> ok. +save_state_file(undefined, State) -> + Dir = couch_config:get("couchdb", "database_dir"), {{Year, Month, Day},{Hour, Minute, Second}} = calendar:universal_time(), TS = io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", [Year, Month, Day, Hour, Minute, Second]), @@ -515,7 +484,7 @@ save_state_file(undefined, State, Config) -> io:format(File, "~w.~n", [State]), file:close(File); -save_state_file(_,_,_) -> ok. % don't save if testing +save_state_file(_,_) -> ok. % don't save if testing check_pos(Pos, Node, Nodes) -> diff --git a/test/mem3_test.erl b/test/mem3_test.erl index b8622005..01d80eb0 100644 --- a/test/mem3_test.erl +++ b/test/mem3_test.erl @@ -4,6 +4,13 @@ -include("../include/config.hrl"). -include_lib("eunit/include/eunit.hrl"). +%% version 3 of membership state +-record(mem, {header=3, + nodes=[], + clock=[], + args + }). + -define(TEST_NODE_NAME, a). -define(HINT_C1, 365375409332725729550921208179070754913983135744). -define(HINT_C2, 1096126227998177188652763624537212264741949407232). @@ -41,9 +48,9 @@ all_tests_test_() -> test_setup() -> - Config = #config{n=3,r=2,w=2,q=3,directory="/srv/db", - storage_mod="dynomite_couch_storage"}, - {ok, Pid} = mem3:start_link([{test,?TEST_NODE_NAME}, {config, Config}]), + % Config = #config{n=3,r=2,w=2,q=3,directory="/srv/db", + % storage_mod="dynomite_couch_storage"}, + {ok, Pid} = mem3:start_link([{test,?TEST_NODE_NAME}]), Pid. @@ -118,7 +125,7 @@ join_with_wrong_order(_Pid) -> % ?assertEqual([], mem3:parts_for_node(d)), %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), Res = mem3:join(join, [{3, d, []}], c), - ?assertEqual({error,{position_exists,3,c}}, Res), + ?assertEqual({error, <<"position_exists_3">>}, Res), %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), ok. -- cgit v1.2.3 From 151b4400ed229f2820364cd0cf83a550fd602914 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 19 May 2010 14:55:54 -0400 Subject: remove all vector clocks from #doc, and code to go with it. Not sure if things still work, but are being rewritten anyway with mem3. Closes BugzID 9959 --- src/dynomite_couch_api.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dynomite_couch_api.erl b/src/dynomite_couch_api.erl index a5ad53c4..554b84f6 100644 --- a/src/dynomite_couch_api.erl +++ b/src/dynomite_couch_api.erl @@ -47,12 +47,12 @@ get([Part, Db, DocId, Revs, Options]) -> end. -put([Part, Db, Doc = #doc{clock=Clock}, Options]) -> +put([Part, Db, Doc, Options]) -> case showroom_db:open_shard(node(), Part, Db) of {ok, Shard} -> {Status, NewRev} = couch_db:update_doc(Shard, Doc, Options), showroom_db:close_shard(Shard), - {Status, {Clock, [NewRev]}}; + {Status, [NewRev]}; Error -> Error end. -- cgit v1.2.3 From dcb044f31136a4f8f19caab19e003b2e0352bc2f Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 19 May 2010 16:35:37 -0400 Subject: mem3 code for node replacement --- src/mem3.erl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 33257c40..81a383c2 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -305,9 +305,13 @@ handle_join(join, ExtNodes, PingNode, #mem{args=Args} = State) -> % now use this info to join the ring int_join(ExtNodes, NewState); -handle_join(replace, [_OldNode | _], _PingNode, _State) -> - % TODO implement me - ok; +handle_join(replace, [OldNode | _], PingNode, State) -> + handle_join(replace, {OldNode, []}, PingNode, State); +handle_join(replace, {OldNode, NewOpts}, PingNode, _State) -> + OldState = #mem{nodes=OldNodes} = get_pingnode_state(PingNode), + {Order, OldNode, _OldOpts} = lists:keyfind(OldNode, 2, OldNodes), + NewNodes = lists:keyreplace(OldNode, 2, OldNodes, {Order, node(), NewOpts}), + int_join([], OldState#mem{nodes=NewNodes}); handle_join(leave, [_OldNode | _], _PingNode, _State) -> % TODO implement me -- cgit v1.2.3 From c8317d0f15ecc0a19c114128ad5b3cbf2aa8cf95 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Thu, 20 May 2010 11:23:08 -0400 Subject: add another clause for join-replace --- src/mem3.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mem3.erl b/src/mem3.erl index 81a383c2..3a3df1ed 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -305,6 +305,8 @@ handle_join(join, ExtNodes, PingNode, #mem{args=Args} = State) -> % now use this info to join the ring int_join(ExtNodes, NewState); +handle_join(replace, OldNode, PingNode, State) when is_atom(OldNode) -> + handle_join(replace, {OldNode, []}, PingNode, State); handle_join(replace, [OldNode | _], PingNode, State) -> handle_join(replace, {OldNode, []}, PingNode, State); handle_join(replace, {OldNode, NewOpts}, PingNode, _State) -> -- cgit v1.2.3 From c473a194757173150d945970d84edac0533dfbad Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Mon, 24 May 2010 09:03:05 -0400 Subject: some mem3 docs/specs --- src/mem3.erl | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 3a3df1ed..e4e51100 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -59,6 +59,7 @@ -type epoch() :: float(). -type clock() :: {node(), epoch()}. -type vector_clock() :: [clock()]. +-type ping_node() :: node() | nil. %%==================================================================== %% API @@ -262,14 +263,12 @@ get_test(Args) -> proplists:get_value(test, Args). -% we could be automatically: -% 1. rejoining a cluster after some downtime -% -% we could be manually: -% 2. beginning a cluster with only this node -% 3. joining a cluster as a new node -% 4. replacing a node in an existing cluster - +%% @doc handle_init starts a node +%% Most of the time, this puts the node in a single-node cluster setup, +%% But, we could be automatically rejoining a cluster after some downtime. +%% See handle_join for initing, joining, leaving a cluster, or replacing a +%% node. +%% @end handle_init(Test, nil) -> int_reset(Test); @@ -291,7 +290,10 @@ handle_init(_Test, #mem{nodes=Nodes, args=Args} = OldState) -> end. -%% handle join activities, return {ok,NewState} +%% @doc handle join activities, return {ok,NewState} +-spec handle_join(join_type(), [mem_node()], ping_node(), #mem{}) -> + {ok, #mem{}}. + handle_join(init, ExtNodes, nil, State) -> {_,Nodes,_} = lists:unzip3(ExtNodes), ping_all_yall(Nodes), @@ -324,7 +326,7 @@ handle_join(JoinType, _, PingNode, _) -> "for ping node: ~p", [JoinType, PingNode]), {error, unknown_join_type}. - +%% @doc common operations for all join types int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> NewNodes = lists:foldl(fun({Pos, N, _Options}=New, AccIn) -> check_pos(Pos, N, Nodes), -- cgit v1.2.3 From 897425141596a17231010bd1fc812c7245eb2355 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Mon, 24 May 2010 14:53:01 -0400 Subject: comments --- src/mem3.erl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index e4e51100..7d60ecd5 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -291,14 +291,14 @@ handle_init(_Test, #mem{nodes=Nodes, args=Args} = OldState) -> %% @doc handle join activities, return {ok,NewState} --spec handle_join(join_type(), [mem_node()], ping_node(), #mem{}) -> - {ok, #mem{}}. - +-spec handle_join(join_type(), [mem_node()], ping_node(), mem_state()) -> + {ok, mem_state()}. +% init handle_join(init, ExtNodes, nil, State) -> {_,Nodes,_} = lists:unzip3(ExtNodes), ping_all_yall(Nodes), int_join(ExtNodes, State); - +% join handle_join(join, ExtNodes, PingNode, #mem{args=Args} = State) -> NewState = case get_test(Args) of undefined -> get_pingnode_state(PingNode); @@ -306,7 +306,7 @@ handle_join(join, ExtNodes, PingNode, #mem{args=Args} = State) -> end, % now use this info to join the ring int_join(ExtNodes, NewState); - +% replace handle_join(replace, OldNode, PingNode, State) when is_atom(OldNode) -> handle_join(replace, {OldNode, []}, PingNode, State); handle_join(replace, [OldNode | _], PingNode, State) -> @@ -316,7 +316,7 @@ handle_join(replace, {OldNode, NewOpts}, PingNode, _State) -> {Order, OldNode, _OldOpts} = lists:keyfind(OldNode, 2, OldNodes), NewNodes = lists:keyreplace(OldNode, 2, OldNodes, {Order, node(), NewOpts}), int_join([], OldState#mem{nodes=NewNodes}); - +% leave handle_join(leave, [_OldNode | _], _PingNode, _State) -> % TODO implement me ok; -- cgit v1.2.3 From 0bcc808ce4787da27bda33dff3c8e377094b7042 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Mon, 24 May 2010 18:23:21 -0400 Subject: http admin call for join 'replace' --- src/mem3.erl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 7d60ecd5..a834010a 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -85,9 +85,10 @@ stop(Server) -> gen_server:cast(Server, stop). --spec join(join_type(), mem_node_list(), node() | nil) -> ok. -join(JoinType, Nodes, PingNode) -> - gen_server:call(?SERVER, {join, JoinType, Nodes, PingNode}). +-spec join(join_type(), mem_node_list() | {node(), options()}, node() | nil) -> + ok. +join(JoinType, Payload, PingNode) -> + gen_server:call(?SERVER, {join, JoinType, Payload, PingNode}). -spec clock() -> vector_clock(). @@ -153,8 +154,6 @@ init(Args) -> %% new node(s) joining to this node handle_call({join, JoinType, ExtNodes, PingNode}, _From, State) -> - % {ok, NewState} = handle_join(JoinType, ExtNodes, PingNode, State), - % {reply, ok, NewState}; try case handle_join(JoinType, ExtNodes, PingNode, State) of {ok, NewState} -> {reply, ok, NewState}; -- cgit v1.2.3 From cb2e6b3b258f1ccda74aa7bc96ec07b1da35bceb Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Tue, 25 May 2010 15:21:45 -0400 Subject: all_databases now working with/without Customer param, types moved to mem.hrl, view updater thwarted for updates to dbs db. --- include/membership.hrl | 28 ++++++++++++++++++++++++++++ src/mem3.erl | 22 +--------------------- 2 files changed, 29 insertions(+), 21 deletions(-) create mode 100644 include/membership.hrl diff --git a/include/membership.hrl b/include/membership.hrl new file mode 100644 index 00000000..52bc4d1a --- /dev/null +++ b/include/membership.hrl @@ -0,0 +1,28 @@ + +%% version 3 of membership state +-record(mem, {header=3, + nodes=[], + clock=[], + args + }). + +%% types +-type join_type() :: init | join | replace | leave. +-type join_order() :: non_neg_integer(). +-type options() :: list(). +-type mem_node() :: {join_order(), node(), options()}. +-type mem_node_list() :: [mem_node()]. +-type arg_options() :: {test, boolean()}. +-type args() :: [] | [arg_options()]. +-type mem_state() :: #mem{}. +-type test() :: undefined | node(). +-type epoch() :: float(). +-type clock() :: {node(), epoch()}. +-type vector_clock() :: [clock()]. +-type ping_node() :: node() | nil. + +-type part() :: integer(). +-type ref_node_part() :: {reference(), node(), part()}. +-type tref() :: reference(). +-type np() :: {node(), part()}. +-type np_acc() :: [{np(), any()}]. diff --git a/src/mem3.erl b/src/mem3.erl index a834010a..d5a96605 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -34,32 +34,12 @@ terminate/2, code_change/3]). %% includes +-include("../include/membership.hrl"). -include_lib("eunit/include/eunit.hrl"). -%% version 3 of membership state --record(mem, {header=3, - nodes=[], - clock=[], - args - }). - -define(SERVER, membership). -define(STATE_FILE_PREFIX, "membership"). -%% types - stick somewhere in includes? --type join_type() :: init | join | replace | leave. --type join_order() :: non_neg_integer(). --type options() :: list(). --type mem_node() :: {join_order(), node(), options()}. --type mem_node_list() :: [mem_node()]. --type arg_options() :: {test, boolean()}. --type args() :: [] | [arg_options()]. --type mem_state() :: #mem{}. --type test() :: undefined | node(). --type epoch() :: float(). --type clock() :: {node(), epoch()}. --type vector_clock() :: [clock()]. --type ping_node() :: node() | nil. %%==================================================================== %% API -- cgit v1.2.3 From 9c348bf6752ed63f611c06a8442b0f7d4f291b9d Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 26 May 2010 15:18:34 -0400 Subject: change {N,P} fullmap over to #part{} record for future partition-splitting hotness --- include/membership.hrl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/membership.hrl b/include/membership.hrl index 52bc4d1a..45d94f8a 100644 --- a/include/membership.hrl +++ b/include/membership.hrl @@ -6,6 +6,9 @@ args }). +%% partition record +-record(part, {dbname, node, b, e}). + %% types -type join_type() :: init | join | replace | leave. -type join_order() :: non_neg_integer(). @@ -21,8 +24,9 @@ -type vector_clock() :: [clock()]. -type ping_node() :: node() | nil. --type part() :: integer(). --type ref_node_part() :: {reference(), node(), part()}. +-type part() :: #part{}. +-type fullmap() :: [part()]. +-type ref_part_map() :: {reference(), part()}. -type tref() :: reference(). -type np() :: {node(), part()}. --type np_acc() :: [{np(), any()}]. +-type beg_acc() :: [integer()]. -- cgit v1.2.3 From 63c0e5eac30acbb6ed8926dc5dfbf5157f416369 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 27 May 2010 10:52:51 -0400 Subject: replace #part with #shard --- include/membership.hrl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/membership.hrl b/include/membership.hrl index 45d94f8a..f42350f5 100644 --- a/include/membership.hrl +++ b/include/membership.hrl @@ -7,7 +7,7 @@ }). %% partition record --record(part, {dbname, node, b, e}). +-record(shard, {name, node, dbname, range}). %% types -type join_type() :: init | join | replace | leave. @@ -24,7 +24,7 @@ -type vector_clock() :: [clock()]. -type ping_node() :: node() | nil. --type part() :: #part{}. +-type part() :: #shard{}. -type fullmap() :: [part()]. -type ref_part_map() :: {reference(), part()}. -type tref() :: reference(). -- cgit v1.2.3 From cdec6e0fb82f862f7c7d0c712535a712cdc683d5 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Thu, 27 May 2010 11:41:03 -0400 Subject: allow for single-node cluster 'init' --- src/mem3.erl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index d5a96605..11c39ef7 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -386,13 +386,17 @@ gossip(#mem{args=Args} = NewState) -> -spec gossip(test(), mem_state()) -> mem_state(). gossip(undefined, #mem{nodes=StateNodes} = State) -> {_, Nodes, _} = lists:unzip3(StateNodes), - TargetNode = next_up_node(Nodes), - showroom_log:message(info, "membership: firing gossip from ~p to ~p", - [node(), TargetNode]), - case gen_server:call({?SERVER, TargetNode}, {gossip, State}) of - ok -> State; - {new_state, NewState} -> NewState; - Error -> throw({unknown_gossip_response, Error}) + case next_up_node(Nodes) of + no_gossip_targets_available -> + State; % skip gossip, I'm the only node + TargetNode -> + showroom_log:message(info, "membership: firing gossip from ~p to ~p", + [node(), TargetNode]), + case gen_server:call({?SERVER, TargetNode}, {gossip, State}) of + ok -> State; + {new_state, NewState} -> NewState; + Error -> throw({unknown_gossip_response, Error}) + end end; gossip(_,_) -> @@ -411,7 +415,7 @@ next_up_node(Node, Nodes, UpNodes) -> DownNodes = Nodes -- UpNodes, case List -- DownNodes of [Target|_] -> Target; - [] -> throw({error, no_gossip_targets_available}) + [] -> no_gossip_targets_available end. -- cgit v1.2.3 From 819db8cbda15dc8f78831a8705b3a2ad468343ef Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 27 May 2010 12:46:15 -0400 Subject: open_doc call in fabric --- include/membership.hrl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/membership.hrl b/include/membership.hrl index f42350f5..98b47e2c 100644 --- a/include/membership.hrl +++ b/include/membership.hrl @@ -7,7 +7,7 @@ }). %% partition record --record(shard, {name, node, dbname, range}). +-record(shard, {name, node, dbname, range, ref}). %% types -type join_type() :: init | join | replace | leave. -- cgit v1.2.3 From 4c6b7c7c12ba03e5b50d7379ab14cb0ba0037965 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Fri, 28 May 2010 15:52:42 -0400 Subject: remove dynomite cruft --- include/chunk_size.hrl | 1 - include/common.hrl | 41 --- include/config.hrl | 24 -- include/dmerkle.hrl | 14 - include/test.hrl | 13 - src/bootstrap_manager.erl | 261 ---------------- src/bootstrap_receiver.erl | 121 -------- src/cluster_ops.erl | 264 ---------------- src/configuration.erl | 100 ------ src/dynomite_couch_api.erl | 140 --------- src/dynomite_couch_storage.erl | 41 --- src/lib_misc.erl | 235 -------------- src/mem_utils.erl | 129 -------- src/membership2.erl | 686 ----------------------------------------- src/node.erl | 39 --- src/replication.erl | 165 ---------- 16 files changed, 2274 deletions(-) delete mode 100644 include/chunk_size.hrl delete mode 100644 include/common.hrl delete mode 100644 include/config.hrl delete mode 100644 include/dmerkle.hrl delete mode 100644 include/test.hrl delete mode 100644 src/bootstrap_manager.erl delete mode 100644 src/bootstrap_receiver.erl delete mode 100644 src/cluster_ops.erl delete mode 100644 src/configuration.erl delete mode 100644 src/dynomite_couch_api.erl delete mode 100644 src/dynomite_couch_storage.erl delete mode 100644 src/lib_misc.erl delete mode 100644 src/mem_utils.erl delete mode 100644 src/membership2.erl delete mode 100644 src/node.erl delete mode 100644 src/replication.erl diff --git a/include/chunk_size.hrl b/include/chunk_size.hrl deleted file mode 100644 index f9906b5f..00000000 --- a/include/chunk_size.hrl +++ /dev/null @@ -1 +0,0 @@ --define(CHUNK_SIZE, 5120). diff --git a/include/common.hrl b/include/common.hrl deleted file mode 100644 index 2299950d..00000000 --- a/include/common.hrl +++ /dev/null @@ -1,41 +0,0 @@ - --include_lib("eunit/include/eunit.hrl"). - --define(fmt(Msg, Args), lists:flatten(io_lib:format(Msg, Args))). --define(infoFmt(Msg, Args), error_logger:info_msg(Msg, Args)). --define(infoMsg(Msg), error_logger:info_msg(Msg)). - - -%% from couch_db.hrl --ifndef(LOG_DEBUG). --define(LOG_DEBUG(Format, Args), - showroom_log:message(debug, Format, Args)). --endif. - --ifndef(LOG_INFO). --define(LOG_INFO(Format, Args), - showroom_log:message(info, Format, Args)). --endif. - --ifndef(LOG_ERROR). --define(LOG_ERROR(Format, Args), - showroom_log:message(error, Format, Args)). --endif. - -%% -define(PMAP(F,L), lists:map(F,L)). --define(PMAP(F,L), showroom_utils:pmap(F,L)). - - -%% -%% membership2 (in here for separate testing module) -%% - --define(VERSION,2). - --record(membership, {header=?VERSION, - node, - nodes, - partitions, - version, - fullmap - }). diff --git a/include/config.hrl b/include/config.hrl deleted file mode 100644 index 20983d26..00000000 --- a/include/config.hrl +++ /dev/null @@ -1,24 +0,0 @@ - --ifndef(CONFIG_HRL). --define(CONFIG_HRL, true). -%we don't want to turn protocol buffers on by default, since the library is not included -%it should be very easy for new users to start up an instance --record(config, {n=3, - r=1, - w=1, - q=6, - directory, - web_port, - text_port=11222, - storage_mod=dets_storage, - blocksize=4096, - thrift_port=9200, - pb_port=undefined, - buffered_writes=undefined, - cache=undefined, - cache_size=1048576, - hash_module=partitions, - meta=[] - }). - --endif. diff --git a/include/dmerkle.hrl b/include/dmerkle.hrl deleted file mode 100644 index b4fe2a08..00000000 --- a/include/dmerkle.hrl +++ /dev/null @@ -1,14 +0,0 @@ --define(DMERKLE_VERSION, 2). --define(STATIC_HEADER, 93). - --define(d_from_blocksize(BlockSize), trunc((BlockSize - 17)/16)). --define(pointers_from_blocksize(BlockSize), (lib_misc:ceiling(math:log(BlockSize)/math:log(2)) - 3)). --define(pointer_for_size(Size, BlockSize), (if Size =< 16 -> 1; Size =< BlockSize -> ?pointers_from_blocksize(Size); true -> last end)). --define(size_for_pointer(N), (2 bsl (N+2))). --define(headersize_from_blocksize(BlockSize), (?STATIC_HEADER + ?pointers_from_blocksize(BlockSize) * 8)). --define(aligned(Ptr, HeaderSize, BlockSize), (((Ptr - (HeaderSize)) rem BlockSize) == 0)). --define(block(Ptr, HeaderSize, BlockSize), ((Ptr - (HeaderSize)) div BlockSize)). - --record(node, {m=0, keys=[], children=[], offset=eof}). --record(leaf, {m=0, values=[], offset=eof}). --record(free, {offset,size=0,pointer=0}). diff --git a/include/test.hrl b/include/test.hrl deleted file mode 100644 index 38fb850f..00000000 --- a/include/test.hrl +++ /dev/null @@ -1,13 +0,0 @@ --define(TMP_DIR, "../../../tmp/lib"). - --define(TMP_FILE, fun(File) -> - filename:join(?TMP_DIR, File) - end). - -%% priv_dir() -> -%% Dir = filename:join([t:config(priv_dir), "data", atom_to_list(?MODULE), pid_to_list(self())]), -%% filelib:ensure_dir(filename:join([Dir, atom_to_list(?MODULE)])), -%% Dir. - -%% priv_file(File) -> -%% filename:join(priv_dir(), File). diff --git a/src/bootstrap_manager.erl b/src/bootstrap_manager.erl deleted file mode 100644 index f1303223..00000000 --- a/src/bootstrap_manager.erl +++ /dev/null @@ -1,261 +0,0 @@ -%%%------------------------------------------------------------------- -%%% File: bootstrap_manager.erl -%%% @author Cliff Moon <> [] -%%% @copyright 2009 Cliff Moon -%%% @doc This is the bootstrap manager for a cluster. -%%% -%%% @end -%%% -%%% @since 2009-07-29 by Cliff Moon -%%%------------------------------------------------------------------- --module(bootstrap_manager). --author('cliff@powerset.com'). --author('brad@cloudant.com'). - --behaviour(gen_server). - -%% API --export([start_bootstrap/3, end_bootstrap/1, - start_link/3, start/3, stop/0, - start_transfers/0, transfers/0]). - -%% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). - --record(state, {transfer_list, nodes, transfers, futurefullmap}). --record(transfer, {partition, receivers, rate=0, status=starting}). - --include("../include/config.hrl"). --include("../include/common.hrl"). - -%%==================================================================== -%% API -%%==================================================================== -%%-------------------------------------------------------------------- -%% @spec start_link() -> {ok,Pid} | ignore | {error,Error} -%% @doc Starts the server -%% @end -%%-------------------------------------------------------------------- -start_bootstrap(State=#membership{node=Node, nodes=Nodes}, - OldFullMap, NewFullMap) -> - case partitions:diff(OldFullMap, NewFullMap) of - [] -> - % no difference in pmaps - {NewFullMap, State#membership{fullmap=NewFullMap}}; - TransferList when is_list(TransferList) -> - ?LOG_DEBUG("~nBootstrap~nNode : ~p~nTransferList :~n~p~n", - [Node, partitions:pp_diff(TransferList)]), - case start_link(TransferList, Nodes, NewFullMap) of - {ok, _Pid} -> - start_transfers(); - Other -> throw(Other) - end, - - % bootstrap has some stuff to do (async), so just give the state - % passed in for now. end_bootstrap will be called with the resulting - % state when it completes - {OldFullMap, State}; - Other -> - % probably occurs b/c T (# of nodes) < N currently. - % more nodes joining should help avoid this error. - ?LOG_ERROR("no_bootstrap - Other: ~p", [Other]), - {NewFullMap, State#membership{fullmap=NewFullMap}} - end. - - -end_bootstrap(#state{futurefullmap=FutureFullMap}) -> - end_bootstrap(FutureFullMap); - -end_bootstrap(NewFullMap) -> - gen_server:call(membership, {newfullmap, NewFullMap}), - stop(). - - -start(TransferList, Nodes, FutureFullMap) -> - gen_server:start({global, bootstrap_manager}, ?MODULE, - [TransferList, Nodes, FutureFullMap], []). - - -start_link(TransferList, Nodes, FutureFullMap) -> - gen_server:start_link({global, bootstrap_manager}, ?MODULE, - [TransferList, Nodes, FutureFullMap], []). - - -stop() -> - gen_server:cast({global, bootstrap_manager}, stop). - - -start_transfers() -> - gen_server:cast({global, bootstrap_manager}, start_transfers). - - -transfers() -> - gen_server:call({global, bootstrap_manager}, transfers). - - -%%==================================================================== -%% gen_server callbacks -%%==================================================================== - -%%-------------------------------------------------------------------- -%% @spec init(Args) -> {ok, State} | -%% {ok, State, Timeout} | -%% ignore | -%% {stop, Reason} -%% @doc Initiates the server -%% @end -%%-------------------------------------------------------------------- -init([TransferList, Nodes, FutureFullMap]) -> - process_flag(trap_exit, true), - {ok, #state{transfer_list=TransferList,nodes=Nodes, - futurefullmap=FutureFullMap}}. - - -%%-------------------------------------------------------------------- -%% @spec -%% handle_call(Request, From, State) -> {reply, Reply, State} | -%% {reply, Reply, State, Timeout} | -%% {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, Reply, State} | -%% {stop, Reason, State} -%% @doc Handling call messages -%% @end -%%-------------------------------------------------------------------- -handle_call(average_transfer_rate, _From, - State=#state{transfers=Transfers}) -> - {Sum, Cardinality} = ets:foldl( - fun(#transfer{rate=Rate}, {Sum, Cardinality}) -> - {Sum+Rate,Cardinality+1} - end, {0, 0}, Transfers), - AverageRate = Sum / Cardinality, - {reply, AverageRate, State}; - -handle_call(aggregate_transfer_rate, _From, - State=#state{transfers=Transfers}) -> - Sum = ets:foldl(fun(#transfer{rate=Rate}, Sum) -> - Rate + Sum - end, 0, Transfers), - {reply, Sum, State}; - -handle_call(transfers, _From, - State=#state{transfers=Transfers}) -> - {reply, {ok, ets:tab2list(Transfers)}, State}; - -%% at least reply that this 'catch-all' was ignored -handle_call(_Request, _From, State) -> - {reply, ignored, State}. - - -%%-------------------------------------------------------------------- -%% @spec handle_cast(Msg, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% @doc Handling cast messages -%% @end -%%-------------------------------------------------------------------- -handle_cast(stop, State) -> - {stop, normal, State}; - -handle_cast(start_transfers, - State=#state{transfer_list=TransferList}) -> - Transfers = start_transfers(TransferList, State), - {noreply, State#state{transfers=Transfers}}; - -handle_cast(_Msg, State) -> - {noreply, State}. - - -%%-------------------------------------------------------------------- -%% @spec handle_info(Info, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% @doc Handling all non call/cast messages -%% @end -%%-------------------------------------------------------------------- - -handle_info({receiver_done, FromNode, _ToNode, Partition, DbName, Receiver}, - State = #state{transfers=Transfers}) -> - %% TODO use bring_online & ToNode? instead of waiting until end & installing - %% NewFullMap into mem2 - - %% handle the old file - membership2:decommission_part(FromNode, Partition, DbName), - - %% remove from Transfers table - case ets:lookup(Transfers, Partition) of - [Transfer] = [#transfer{receivers=Receivers}] -> - NewReceivers = lists:delete(Receiver, Receivers), - if - length(NewReceivers) == 0 -> ets:delete(Transfers, Partition); - true -> ets:insert(Transfers, Transfer#transfer{receivers=NewReceivers}) - end; - _ -> ok - end, - case ets:first(Transfers) of - '$end_of_table' -> - end_bootstrap(State), - {noreply, State}; - _ -> {noreply, State} - end; - -handle_info(_Info, State) -> - {noreply, State}. - - -%%-------------------------------------------------------------------- -%% @spec terminate(Reason, State) -> void() -%% @doc This function is called by a gen_server when it is about to -%% terminate. It should be the opposite of Module:init/1 and do any necessary -%% cleaning up. When it returns, the gen_server terminates with Reason. -%% The return value is ignored. -%% @end -%%-------------------------------------------------------------------- -terminate(_Reason, _State) -> - ok. - - -%%-------------------------------------------------------------------- -%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} -%% @doc Convert process state when code is changed -%% @end -%%-------------------------------------------------------------------- -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - - -%%-------------------------------------------------------------------- -%%% Internal functions -%%-------------------------------------------------------------------- -start_transfers([], State) -> - no_transfers, % no diff in pmaps, so no transfers - end_bootstrap(State); - -start_transfers(Diff, State=#state{nodes=Nodes}) -> - case showroom_db:all_databases("") of - {ok, AllDbs} when length(AllDbs) > 0 -> - start_transfers(Diff, Nodes, configuration:get_config(), AllDbs, - ets:new(transfers, [public, set, {keypos, 2}])); - {ok, []} -> end_bootstrap(State); % no databases, so bootstrap not needed - Other -> throw(Other) % problem getting list of dbs - end. - - -start_transfers([], _, _, _, Transfers) -> - Transfers; - -start_transfers([{FromNode, ToNode, Partition} | Diff], Nodes, Config, - AllDbs, Transfers) -> - membership2:take_offline(FromNode, Partition), - Receivers = lists:map( - fun(DbName) -> - {ok, Receiver} = - bootstrap_receiver:start_link(FromNode, ToNode, Partition, - DbName, 10000, self()), - Receiver - end, AllDbs), - % NOTE: by using AllDbs, we are omitting .deleted.couch files - ets:insert(Transfers, #transfer{partition=Partition, - receivers=Receivers}), - start_transfers(Diff, Nodes, Config, AllDbs, Transfers). diff --git a/src/bootstrap_receiver.erl b/src/bootstrap_receiver.erl deleted file mode 100644 index 3b4907cb..00000000 --- a/src/bootstrap_receiver.erl +++ /dev/null @@ -1,121 +0,0 @@ -%%%------------------------------------------------------------------- -%%% File: bootstrap_receiver.erl -%%% @author Brad Anderson -%%% @copyright 2009 Brad Anderson -%%% @doc -%%% -%%% @end -%%% -%%% @since 2009-09-22 by Brad Anderson -%%%------------------------------------------------------------------- --module(bootstrap_receiver). --author('brad@cloudant.com'). - --include("../include/config.hrl"). --include("../include/common.hrl"). - -%% API --export([start_link/6, loop/6, fetch_shard/5]). - - -%%==================================================================== -%% API -%%==================================================================== -%%-------------------------------------------------------------------- -%% @spec -%% @doc -%% @end -%%-------------------------------------------------------------------- -start_link(FromNode, ToNode, Partition, DbName, Timeout, Manager) -> - Pid = proc_lib:spawn_link(ToNode, bootstrap_receiver, loop, - [FromNode, Partition, DbName, Timeout, Manager, - self()]), - sync_wait(Pid, Timeout). - - -loop(FromNode, Partition, DbName, Timeout, Manager, Parent) -> - proc_lib:init_ack(Parent, {ok, self()}), - fetch_shard(FromNode, Partition, DbName, Timeout, Manager). - - -%% @doc run at "ToNode" via spawn_link -fetch_shard(FromNode, Partition, DbName, Timeout, Manager) -> - Directory = couch_config:get("couchdb", "database_dir"), - [_NodeName, Hostname] = string:tokens(atom_to_list(FromNode), "@"), - SrcFile = binary_to_list(partitions:shard_name(Partition, DbName)), - DestFile = showroom_utils:full_filename(Partition, DbName, Directory), - Authn = fetch_authn(), - Port = fetch_port(), - Url = lists:concat(["http://", Authn, Hostname, Port, "/", SrcFile, - ".couch"]), - Options = [{save_response_to_file, DestFile}, - {inactivity_timeout, Timeout}], - case filelib:ensure_dir(DestFile) of - ok -> ok; - {error, eexist} -> ok; % duh! - Other -> throw(Other) - end, - ?LOG_DEBUG("~n" - "Directory: ~p~n" - "Hostname : ~p~n" - "SrcFile : ~p~n" - "DestFile : ~p~n" - "Url : ~p~n" - "Options : ~p~n" - , [Directory, Hostname, SrcFile, DestFile, Url, Options]), - case ibrowse:send_req(Url, [], get, [], Options, infinity) of - {ok, "200", _Headers, Body} -> - ?LOG_DEBUG("~nBootstrap ibrowse req Body: ~p~n", [Body]), - Manager ! {receiver_done, FromNode, node(), Partition, DbName, - self()}; - Error -> - ?LOG_ERROR("~nBootstrap ibrowse req Error: ~p~n", [Error]), - throw(Error) - end. - - -%%==================================================================== -%% Internal functions -%%==================================================================== - - -%% from proc_lib.erl in otp r13b01 -sync_wait(Pid, Timeout) -> - receive - {ack, Pid, Return} -> - Return; - {'EXIT', Pid, Reason} -> - {error, Reason} - after Timeout -> - unlink(Pid), - exit(Pid, kill), - flush(Pid), - {error, timeout} - end. - - -flush(Pid) -> - receive - {'EXIT', Pid, _} -> - true - after 0 -> - true - end. - - -fetch_authn() -> - User = couch_config:get("shard_moving", "user", ""), - Pass = couch_config:get("shard_moving", "pass", ""), - if - length(User) > 0 andalso length(Pass) > 0 -> - lists:concat([User, ":", Pass, "@"]); - true -> "" - end. - - -fetch_port() -> - Port = couch_config:get("shard_moving", "port", "8080"), - if - Port =:= "80" -> ""; - true -> lists:concat([":", Port]) - end. diff --git a/src/cluster_ops.erl b/src/cluster_ops.erl deleted file mode 100644 index 72bba92f..00000000 --- a/src/cluster_ops.erl +++ /dev/null @@ -1,264 +0,0 @@ -%%%------------------------------------------------------------------- -%%% File: cluster_ops.erl -%%% @author Brad Anderson [http://cloudant.com] -%%% @copyright 2009 Brad Anderson -%%% @doc -%%% -%%% @end -%%% -%%% @since 2009-07-21 by Brad Anderson -%%%------------------------------------------------------------------- --module(cluster_ops). --author('brad@cloudant.com'). - -%% API --export([key_lookup/3, key_lookup/5, - all_parts/4, - some_parts/4, some_parts/5, - quorum_from_each_part/3]). - --include("../include/common.hrl"). --include("../include/config.hrl"). - --include("../include/profile.hrl"). - - -%%==================================================================== -%% API -%%==================================================================== - -%% @doc Get to the proper shard on N nodes by key lookup -%% -%% This fun uses quorum constants from config -key_lookup(Key, {M,F,A}, Access) -> - N = list_to_integer(couch_config:get("cluster", "n", "3")), - key_lookup(Key, {M,F,A}, Access, get_const(Access), N). - - -%% @doc Get to the proper shard on N nodes by key lookup -%% -%% This fun uses a provided quorum constant, possibly from request, -%% possibly from config -key_lookup(Key, {M,F,A}, Access, Const, N) -> - NodeParts = membership2:nodeparts_for_key(Key), - {ResolveFun, NotFoundFun} = case Access of - r -> {fun resolve_read/1, fun resolve_not_found/2}; - w -> {fun resolve_write/1, fun(_,_) -> {false, notused, []} end} - end, - MapFun = fun({Node,Part}) -> - try - rpc:call(Node, M, F, [[Part | A]]) - catch Class:Exception -> - {error, Class, Exception} - end - end, - {GoodReplies, Bad} = pcall(MapFun, NodeParts, N), - if length(Bad) > 0 -> ?LOG_DEBUG("~nBad: ~p~n", [Bad]); true -> ok end, - Good = lists:map(fun strip_ok/1, GoodReplies), - final_key_lookup(Good, Bad, N, Const, ResolveFun, NotFoundFun, Access). - - -%% @doc Do op on all shards (and maybe even replication partners) -all_parts({M,F,A}, Access, AndPartners, ResolveFun) -> - NodePartList = membership2:all_nodes_parts(AndPartners), - MapFun = fun({Node, Part}) -> - try - rpc:call(Node, M, F, [[Part | A]]) - catch Class:Exception -> - {error, Class, Exception} - end - end, - Replies = ?PMAP(MapFun, NodePartList), - {Good, Bad} = lists:partition(fun valid/1, Replies), - final_all_parts(Good, Bad, length(NodePartList), ResolveFun, Access). - - -%% @doc Do op on some shards, depending on list of keys sent in. -%% -%% This fun uses quorum constants from config -some_parts(KeyFun, SeqsKVPairs, {M,F,A}, Access) -> - some_parts(KeyFun, SeqsKVPairs, {M,F,A}, Access, get_const(Access)). - - -%% @doc Do op on some shards, depending on list of keys sent in. -%% -%% This fun uses a provided quorum constant, possibly from request, -%% possibly from config -some_parts(KeyFun, SeqsKVPairs, {M,F,A}, _Access, Const) -> - TaskFun = fun({{Node,Part}, Values}) -> - try - rpc:call(Node, M, F, [[Part | [Values | A]]]) - catch Class:Exception -> - {error, Class, Exception} - end - end, - - % get tasks per node that are part / values for that partition - DistTasks = get_dist_tasks(KeyFun, SeqsKVPairs), - - % With the distributed tasklist in hand, do the tasks per partition. - % For each partition, do the work on all nodes/parts. - TaskReplies = ?PMAP(TaskFun, DistTasks), - {GoodReplies, Bad} = lists:partition(fun valid/1, TaskReplies), - if length(Bad) > 0 -> ?LOG_DEBUG("~nBad: ~p~n", [Bad]); true -> ok end, - Good = lists:map(fun strip_ok/1, GoodReplies), - final_some_parts(Good, Bad, Const). - - -quorum_from_each_part({M,F,A}, Access, ResolveFun) -> - Const = get_const(Access), - {_, Parts} = lists:unzip(membership2:partitions()), - PartsMapFun = fun(Part) -> - Nodes = membership2:nodes_for_part(Part), - NodesMapFun = fun(Node) -> rpc:call(Node, M, F, [[Part | A]]) end, - {GoodReplies,BadReplies} = pcall(NodesMapFun, Nodes, Const), - Good1 = lists:map(fun strip_ok/1, GoodReplies), - Bad1 = case length(Good1) >= Const of - true -> []; - false -> BadReplies - end, - {Good1,Bad1} - end, - Results1 = ?PMAP(PartsMapFun, Parts), - {Good,Bad} = lists:foldl(fun({G,B}, {GAcc,BAcc}) -> - {lists:append(G,GAcc),lists:append(B,BAcc)} - end, {[],[]}, Results1), - if length(Bad) > 0 -> ?LOG_DEBUG("~nBad: ~p~n", [Bad]); true -> ok end, - final_quorum_from_each_part(Good, Bad, length(Parts), ResolveFun, Access). - - -%%-------------------------------------------------------------------- -%% Internal functions -%%-------------------------------------------------------------------- - -final_key_lookup(Good, Bad, N, Const, ResolveFun, NotFoundFun, Access) -> - {NotFound, Return, Reasons} = NotFoundFun(Bad, Const), - if - length(Good) >= Const -> {ok, ResolveFun(Good)}; - NotFound -> {ok, Return, Reasons}; - true -> error_message(Good, Bad, N, Const, Access) - end. - - -final_all_parts(Good, Bad, Total, ResolveFun, Access) -> - case length(Good) =:= Total of - true -> {ok, ResolveFun(Good)}; - _ -> error_message(Good, Bad, Total, Total, Access) - end. - - -final_some_parts(Good, _Bad, Const) -> - Good1 = lists:flatten(Good), - {Seqs, _} = lists:unzip(Good1), - {ResG,ResB} = - lists:foldl( - fun(Seq, {AccG,AccB}) -> - Vals = proplists:get_all_values(Seq, Good1), - case length(Vals) >= Const of - true -> {[{Seq, Vals}|AccG],AccB}; - _ -> {AccG, [{Seq, Vals}|AccB]} - end - end, {[],[]}, lists:usort(Seqs)), - case length(ResB) of - 0 -> {ok, ResG}; - _ -> {error, ResB} - end. - - -final_quorum_from_each_part(Good, Bad, Total, ResolveFun, Access) -> - case length(Good) =:= Total of - true -> {ok, ResolveFun(Good)}; - _ -> error_message(Good, Bad, Total, Total, Access) - end. - - -resolve_read([First|Responses]) -> - case First of - not_found -> not_found; - _ -> lists:foldr(fun vector_clock:resolve/2, First, Responses) - end. - - -resolve_write([First|Responses]) -> - case First of - not_found -> not_found; - _ -> lists:foldr(fun vector_clock:resolve/2, First, Responses) - end. - - -resolve_not_found(Bad, R) -> - {NotFoundCnt, DeletedCnt, OtherReasons} = - lists:foldl(fun({Error,Reason}, {NotFoundAcc, DeletedAcc, ReasonAcc}) -> - case {Error,Reason} of - {not_found, {_Clock, [missing|_Rest]}} -> - {NotFoundAcc+1, DeletedAcc, ReasonAcc}; - {not_found, {_Clock, [deleted|_Rest]}} -> - {NotFoundAcc, DeletedAcc+1, ReasonAcc}; - _ -> - {NotFoundAcc, DeletedAcc, [Reason|ReasonAcc]} - end - end, {0, 0, []}, Bad), - % TODO: is the comparison to R good here, or should it be N-R? - if - NotFoundCnt >= R -> {true, {not_found, missing}, OtherReasons}; - DeletedCnt >= R -> {true, {not_found, deleted}, OtherReasons}; - true -> {false, other, OtherReasons} - end. - - -error_message(Good, Bad, N, T, Access) -> - Msg = list_to_atom(lists:concat([atom_to_list(Access), "_quorum_not_met"])), - ?LOG_ERROR("~p~nSuccess on ~p of ~p servers. Needed ~p. Errors: ~w" - , [Msg, length(Good), N, T, Bad]), - [{error, Msg}, {good, Good}, {bad, Bad}]. - - -pcall(MapFun, Servers, Const) -> - Replies = lib_misc:pmap(MapFun, Servers, Const), - lists:partition(fun valid/1, Replies). - - -valid({ok, _}) -> true; -valid(ok) -> true; -valid(_) -> false. - - -strip_ok({ok, Val}) -> Val; -strip_ok(Val) -> Val. - - -%% @spec get_dist_tasks(KeyFun::function(), KVPairs::list()) -> -%% [{{Node::node(), Part::integer()}, SeqVals}] -%% Type - ordered | ?? -%% SeqVals - [{Seq, Val}] -%% @doc builds a distributed task list of nodes with a list of shard/values. -%% This looks like a dict structure -%% but is a list so we can use ?PMAP with the results -%% @end -get_dist_tasks(KeyFun, SeqsKVPairs) -> - NPSV = lists:flatmap(fun({_,KVPair} = Elem) -> - [{NP, Elem} || NP <- membership2:nodeparts_for_key(KeyFun(KVPair))] - end, SeqsKVPairs), - group_by_key(NPSV). - -group_by_key([]) -> - []; -group_by_key(List) -> - [{FirstK,FirstV} | Rest] = lists:keysort(1,List), - Acc0 = {FirstK, [FirstV], []}, - FoldFun = fun({K,V}, {K,Vs,Acc}) -> - {K, [V|Vs], Acc}; - ({NewKey,V}, {OldKey,Vs,Acc}) -> - {NewKey, [V], [{OldKey,Vs}|Acc]} - end, - {LastK, LastVs, Acc} = lists:foldl(FoldFun, Acc0, Rest), - [{LastK, LastVs} | Acc]. - -get_const(r) -> - list_to_integer(couch_config:get("cluster", "r", "2")); -get_const(w) -> - list_to_integer(couch_config:get("cluster", "w", "2")); -get_const(r1) -> - 1; -get_const(Other) -> - throw({bad_access_term, Other}). diff --git a/src/configuration.erl b/src/configuration.erl deleted file mode 100644 index db44e83c..00000000 --- a/src/configuration.erl +++ /dev/null @@ -1,100 +0,0 @@ -%%% -*- erlang-indent-level:2 -*- -%%%------------------------------------------------------------------- -%%% File: configuration.erl -%%% @author Cliff Moon -%%% @author Brad Anderson -%%% @copyright 2008 Cliff Moon -%%% @doc -%%% This module keeps Dynomite source relatively unchanged, but -%%% reads from couchdb config stuffs -%%% @end -%%% -%%% @since 2008-07-18 by Cliff Moon -%%%------------------------------------------------------------------- --module(configuration). --author('cliff@powerset.com'). --author('brad@cloudant.com'). - -%%-behaviour(gen_server). - -%% API --export([start_link/1, get_config/1, get_config/0, set_config/1, stop/0]). - --include_lib("eunit/include/eunit.hrl"). - --include("../include/config.hrl"). --include("../include/common.hrl"). - --define(SERVER, couch_config). --define(i2l(V), integer_to_list(V)). --define(l2i(V), list_to_integer(V)). - - -%% ----------------------------------------------------------------- -%% API -%% ----------------------------------------------------------------- - -%% @doc starts couch_config gen_server if it's not already started -start_link(DynomiteConfig) -> - couch_config_event:start_link(), - couch_config:start_link([]), - set_config(DynomiteConfig). - - -%% @doc get the config for a remote node -get_config(Node) -> - ClusterConfig = rpc:call(Node, couch_config, get, ["cluster"]), - Directory = rpc:call(Node, couch_config, get, ["couchdb", "database_dir"]), - couch2dynomite_config(ClusterConfig, Directory). - - -%% @doc get the config for the local node -get_config() -> - get_config(node()). - - -%% @doc given a Dynomite config record, put the values into the Couch config -set_config(DynomiteConfig) -> - dynomite2couch_config(DynomiteConfig). - - -%% @doc stop the config server (nothing to do until after couch_config refactor) -stop() -> - couch_config:stop(). - - -%% ----------------------------------------------------------------- -%% Internal functions -%% ----------------------------------------------------------------- - -%% @doc turn a couch config proplist into a dynomite configuration record -couch2dynomite_config(ClusterConfig, Directory) -> - Q = ?l2i(couch_util:get_value("q", ClusterConfig, "3")), - R = ?l2i(couch_util:get_value("r", ClusterConfig, "2")), - W = ?l2i(couch_util:get_value("w", ClusterConfig, "1")), - N = ?l2i(couch_util:get_value("n", ClusterConfig, "4")), - %% use couch's database_dir here, to avoid /tmp/data not existing - Webport = ?l2i(couch_util:get_value("webport", ClusterConfig, "8080")), - Meta = couch_util:get_value("meta", ClusterConfig, []), - StorageMod = couch_util:get_value("storage_mod", ClusterConfig, []), - #config{q=Q, r=R, w=W, n=N, directory=Directory, web_port=Webport, - meta=Meta, storage_mod=StorageMod}. - - -%% @doc workhorse for set_config/1 above -dynomite2couch_config(DynomiteConfig) -> - couch_config:set("cluster", "q", ?i2l(DynomiteConfig#config.q), false), - couch_config:set("cluster", "r", ?i2l(DynomiteConfig#config.r), false), - couch_config:set("cluster", "w", ?i2l(DynomiteConfig#config.w), false), - couch_config:set("cluster", "n", ?i2l(DynomiteConfig#config.n), false), - couch_config:set("couchdb", "database_dir", DynomiteConfig#config.directory, - false), - couch_config:set("cluster", "webport", - case DynomiteConfig#config.web_port of - undefined -> "8080"; - _ -> ?i2l(DynomiteConfig#config.web_port) - end, false), - couch_config:set("cluster", "meta", DynomiteConfig#config.meta, false), - couch_config:set("cluster", "storage_mod", - DynomiteConfig#config.storage_mod, false), - ok. diff --git a/src/dynomite_couch_api.erl b/src/dynomite_couch_api.erl deleted file mode 100644 index 554b84f6..00000000 --- a/src/dynomite_couch_api.erl +++ /dev/null @@ -1,140 +0,0 @@ -%% This is a Dynomite plugin for calling the CouchDB raw Erlang API -%% -%% Most calls will have come from any of the web endpoints to execute -%% these functions on the proper node for the key(s). - --module(dynomite_couch_api). --author('brad@cloudant.com'). - --export([create_db/1, delete_db/1, get/1, put/1, - bulk_docs/1, missing_revs/1, get_db_info/1, get_view_group_info/1, - ensure_full_commit/1 - ]). - --include("../../couch/src/couch_db.hrl"). --include("../include/common.hrl"). - - -%%-------------------------------------------------------------------- -%% @spec create_db([Part, DbName, Options]) -> {ok,Db} | {error,Error} -%% Description: Creates the database shard. -%%-------------------------------------------------------------------- -create_db([Part, DbName, Options]) -> - case couch_server:create(partitions:shard_name(Part, DbName), Options) of - {ok, Shard} -> - couch_db:close(Shard), - ok; - Error -> Error - end. - - -%%-------------------------------------------------------------------- -%% @spec delete_db([Part, DbName, Options]) -> {ok,deleted} | {error,Error} -%% Description: Deletes the database shard. -%%-------------------------------------------------------------------- -delete_db([Part, DbName, Options]) -> - couch_server:delete(partitions:shard_name(Part, DbName), Options). - - -get([Part, Db, DocId, Revs, Options]) -> - case showroom_db:open_shard(node(), Part, Db) of - {ok, Shard} -> - {Status, Doc} = couch_api:open_doc(Shard, DocId, Revs, Options), - showroom_db:close_shard(Shard), - {Status, {[], [Doc]}}; - Error -> - Error - end. - - -put([Part, Db, Doc, Options]) -> - case showroom_db:open_shard(node(), Part, Db) of - {ok, Shard} -> - {Status, NewRev} = couch_db:update_doc(Shard, Doc, Options), - showroom_db:close_shard(Shard), - {Status, [NewRev]}; - Error -> - Error - end. - - -bulk_docs([Part, SeqsDocs, Db, Options, Type]) -> - {Seqs, Docs} = lists:unzip(SeqsDocs), - case Docs of - [] -> {ok, []}; - _ -> - case showroom_db:open_shard(node(), Part, Db) of - {ok, Shard} -> - {ok, Results1} = couch_db:update_docs(Shard, Docs, Options, Type), - showroom_db:close_shard(Shard), - Results = int_zip(Seqs, Results1), - {ok, Results}; - Error -> - Error - end - end. - - -missing_revs([Part, SeqsIdsRevs, Db]) -> - {_Seqs, IdsRevs} = lists:unzip(SeqsIdsRevs), - case IdsRevs of - [] -> {ok, []}; - _ -> - case showroom_db:open_shard(node(), Part, Db) of - {ok, Shard} -> - {ok, Results1} = couch_db:get_missing_revs(Shard, IdsRevs), - showroom_db:close_shard(Shard), - {ok, Results1}; - Error -> - Error - end - end. - - -get_db_info([Part, Db]) -> - case showroom_db:open_shard(node(), Part, Db) of - {ok, Shard} -> - {Status, Info} = couch_db:get_db_info(Shard), - showroom_db:close_shard(Shard), - {Status, {[], Info}}; - Error -> - Error - end. - -get_view_group_info([Part, Db, DesignId]) -> - case showroom_db:open_shard(node(), Part, Db) of - {ok, Shard} -> - {ok, EmptyGroup} = showroom_view:build_skeleton_view_group(Db, DesignId), - <<"S", ShardName/binary>> = Shard#db.name, - {ok, Pid} = gen_server:call(couch_view, {get_group_server, - ShardName, EmptyGroup}), - {ok, Info} = couch_view_group:request_group_info(Pid), - showroom_db:close_shard(Shard), - {ok, {[], Info}}; - Error -> - Error - end. - - -ensure_full_commit([Part, Db]) -> - case showroom_db:open_shard(node(), Part, Db) of - {ok, Shard} -> - {Status, Info} = couch_db:ensure_full_commit(Shard), - showroom_db:close_shard(Shard), - {Status, {[], Info}}; - Error -> - Error - end. - - -%% ======================= -%% internal -%% ======================= - -int_zip(Seqs, Docs) when length(Seqs) == length(Docs) -> - lists:zip(Seqs, Docs); -int_zip(_Seqs, []) -> - []; -int_zip(Seqs, Docs) -> - ?debugFmt("~nWTF? int_zip~nSeqs: ~p~nDocs: ~p~n", [Seqs, Docs]), - []. diff --git a/src/dynomite_couch_storage.erl b/src/dynomite_couch_storage.erl deleted file mode 100644 index 4fd21b80..00000000 --- a/src/dynomite_couch_storage.erl +++ /dev/null @@ -1,41 +0,0 @@ -%%%------------------------------------------------------------------- -%%% File: dynomite_couch_storage.erl -%%% @author Brad Anderson -%%% @copyright 2009 Brad Anderson -%%% @doc -%%% -%%% @end -%%% -%%% @since 2009-07-14 -%%%------------------------------------------------------------------- --module(dynomite_couch_storage). --author('brad@cloudant.com'). - -%% API --export([name/1, open/2, close/1, create/2]). -%% , close/1, get/2, put/4, has_key/2, delete/2, fold/3 - --include_lib("../include/common.hrl"). - -%% -record(row, {key, context, values}). - -%%==================================================================== -%% API -%%==================================================================== - -name(Boundary) -> - showroom_utils:int_to_hexstr(Boundary). - -open(Directory, Name) -> -%% ?debugFmt("~nDirectory: ~p~nName : ~p~n", [Directory,Name]), - {ok, {Directory, Name}}. - -close(_Table) -> ok. - -create(_Directory, _Name) -> - ok. - - -%%==================================================================== -%% Internal functions -%%==================================================================== diff --git a/src/lib_misc.erl b/src/lib_misc.erl deleted file mode 100644 index f5449295..00000000 --- a/src/lib_misc.erl +++ /dev/null @@ -1,235 +0,0 @@ --module(lib_misc). - --define(OFFSET_BASIS, 2166136261). --define(FNV_PRIME, 16777619). - --export([rm_rf/1, pmap/3, succ/1, fast_acc/3, hash/1, hash/2, fnv/1, - nthdelete/2, zero_split/1, nthreplace/3, rand_str/1, position/2, - shuffle/1, floor/1, ceiling/1, time_to_epoch_int/1, - time_to_epoch_float/1, now_int/0, now_float/0, byte_size/1, listify/1, - reverse_bits/1]). - --include("../include/config.hrl"). --include("../include/profile.hrl"). - - -rm_rf(Name) when is_list(Name) -> - case filelib:is_dir(Name) of - false -> - file:delete(Name); - true -> - case file:list_dir(Name) of - {ok, Filenames} -> - lists:foreach(fun rm_rf/1, [ filename:join(Name, F) || F <- Filenames]), - file:del_dir(Name); - {error, Reason} -> error_logger:info_msg("rm_rf failed because ~p~n", [Reason]) - end - end. - -zero_split(Bin) -> - zero_split(0, Bin). - -zero_split(N, Bin) when N > erlang:byte_size(Bin) -> Bin; - -zero_split(N, Bin) -> - case Bin of - <<_:N/binary, 0:8, _/binary>> -> split_binary(Bin, N); - _ -> zero_split(N+1, Bin) - end. - -rand_str(N) -> - lists:map(fun(_I) -> - random:uniform(26) + $a - 1 - end, lists:seq(1,N)). - -nthreplace(N, E, List) -> - lists:sublist(List, N-1) ++ [E] ++ lists:nthtail(N, List). - -nthdelete(N, List) -> - nthdelete(N, List, []). - -nthdelete(0, List, Ret) -> - lists:reverse(Ret) ++ List; - -nthdelete(_, [], Ret) -> - lists:reverse(Ret); - -nthdelete(1, [_E|L], Ret) -> - nthdelete(0, L, Ret); - -nthdelete(N, [E|L], Ret) -> - nthdelete(N-1, L, [E|Ret]). - -floor(X) -> - T = erlang:trunc(X), - case (X - T) of - Neg when Neg < 0 -> T - 1; - Pos when Pos > 0 -> T; - _ -> T - end. - -ceiling(X) -> - T = erlang:trunc(X), - case (X - T) of - Neg when Neg < 0 -> T; - Pos when Pos > 0 -> T + 1; - _ -> T - end. - -succ([]) -> - []; - -succ(Str) -> - succ_int(lists:reverse(Str), []). - -succ_int([Char|Str], Acc) -> - if - Char >= $z -> succ_int(Str, [$a|Acc]); - true -> lists:reverse(lists:reverse([Char+1|Acc]) ++ Str) - end. - -fast_acc(_, Acc, 0) -> Acc; - -fast_acc(Fun, Acc, N) -> - fast_acc(Fun, Fun(Acc), N-1). - -shuffle(List) when is_list(List) -> - [ N || {_R,N} <- lists:keysort(1, [{random:uniform(),X} || X <- List]) ]. - -pmap(Fun, List, ReturnNum) -> - N = if - ReturnNum > length(List) -> length(List); - true -> ReturnNum - end, - SuperParent = self(), - SuperRef = erlang:make_ref(), - Ref = erlang:make_ref(), - %% we spawn an intermediary to collect the results - %% this is so that there will be no leaked messages sitting in our mailbox - Parent = spawn(fun() -> - L = gather(N, length(List), Ref, []), - SuperParent ! {SuperRef, pmap_sort(List, L)} - end), - Pids = [spawn(fun() -> - Parent ! {Ref, {Elem, (catch Fun(Elem))}} - end) || Elem <- List], - Ret = receive - {SuperRef, Ret1} -> Ret1 - end, - % i think we need to cleanup here. - lists:foreach(fun(P) -> exit(P, die) end, Pids), - Ret. - -pmap_sort(Original, Results) -> - pmap_sort([], Original, lists:reverse(Results)). - -% pmap_sort(Sorted, [], _) -> lists:reverse(Sorted); -pmap_sort(Sorted, _, []) -> lists:reverse(Sorted); -pmap_sort(Sorted, [E|Original], Results) -> - case lists:keytake(E, 1, Results) of - {value, {E, Val}, Rest} -> pmap_sort([Val|Sorted], Original, Rest); - false -> pmap_sort(Sorted, Original, Results) - end. - -gather(_, Max, _, L) when length(L) == Max -> L; -gather(0, _, _, L) -> L; -gather(N, Max, Ref, L) -> - receive - {Ref, {Elem, {not_found, Ret}}} -> gather(N, Max, Ref, [{Elem, {not_found, Ret}}|L]); - {Ref, {Elem, {badrpc, Ret}}} -> gather(N, Max, Ref, [{Elem, {badrpc, Ret}}|L]); - {Ref, {Elem, {'EXIT', Ret}}} -> gather(N, Max, Ref, [{Elem, {'EXIT', Ret}}|L]); - {Ref, Ret} -> gather(N-1, Max, Ref, [Ret|L]) - end. - -get_hash_module(#config{hash_module=HashModule}) -> - HashModule. - -hash(Term) -> - HashModule = get_hash_module(configuration:get_config()), - ?prof(hash), - R = HashModule:hash(Term), - ?forp(hash), - R. - -hash(Term, Seed) -> - HashModule = get_hash_module(configuration:get_config()), - ?prof(hash), - R = HashModule:hash(Term, Seed), - ?forp(hash), - R. - -%32 bit fnv. magic numbers ahoy -fnv(Term) when is_binary(Term) -> - fnv_int(?OFFSET_BASIS, 0, Term); - -fnv(Term) -> - fnv_int(?OFFSET_BASIS, 0, term_to_binary(Term)). - -fnv_int(Hash, ByteOffset, Bin) when erlang:byte_size(Bin) == ByteOffset -> - Hash; - -fnv_int(Hash, ByteOffset, Bin) -> - <<_:ByteOffset/binary, Octet:8, _/binary>> = Bin, - Xord = Hash bxor Octet, - fnv_int((Xord * ?FNV_PRIME) rem (2 bsl 31), ByteOffset+1, Bin). - -position(Predicate, List) when is_function(Predicate) -> - position(Predicate, List, 1); - -position(E, List) -> - position(E, List, 1). - -position(Predicate, [], _N) when is_function(Predicate) -> false; - -position(Predicate, [E|List], N) when is_function(Predicate) -> - case Predicate(E) of - true -> N; - false -> position(Predicate, List, N+1) - end; - -position(_, [], _) -> false; - -position(E, [E|_List], N) -> N; - -position(E, [_|List], N) -> position(E, List, N+1). - -now_int() -> - time_to_epoch_int(now()). - -now_float() -> - time_to_epoch_float(now()). - -time_to_epoch_int(Time) when is_integer(Time) or is_float(Time) -> - Time; - -time_to_epoch_int({Mega,Sec,_}) -> - Mega * 1000000 + Sec. - -time_to_epoch_float(Time) when is_integer(Time) or is_float(Time) -> - Time; - -time_to_epoch_float({Mega,Sec,Micro}) -> - Mega * 1000000 + Sec + Micro / 1000000. - -byte_size(List) when is_list(List) -> - lists:foldl(fun(El, Acc) -> Acc + lib_misc:byte_size(El) end, 0, List); - -byte_size(Term) -> - erlang:byte_size(Term). - -listify(List) when is_list(List) -> - List; - -listify(El) -> [El]. - -reverse_bits(V) when is_integer(V) -> - % swap odd and even bits - V1 = ((V bsr 1) band 16#55555555) bor (((V band 16#55555555) bsl 1) band 16#ffffffff), - % swap consecutive pairs - V2 = ((V1 bsr 2) band 16#33333333) bor (((V1 band 16#33333333) bsl 2) band 16#ffffffff), - % swap nibbles ... - V3 = ((V2 bsr 4) band 16#0F0F0F0F) bor (((V2 band 16#0F0F0F0F) bsl 4) band 16#ffffffff), - % swap bytes - V4 = ((V3 bsr 8) band 16#00FF00FF) bor (((V3 band 16#00FF00FF) bsl 8) band 16#ffffffff), - % swap 2-byte long pairs - ((V4 bsr 16) band 16#ffffffff) bor ((V4 bsl 16) band 16#ffffffff). diff --git a/src/mem_utils.erl b/src/mem_utils.erl deleted file mode 100644 index ffefd5cb..00000000 --- a/src/mem_utils.erl +++ /dev/null @@ -1,129 +0,0 @@ --module(mem_utils). - --export([fix_mappings/3, get_remote_fullmap/1, join_type/3, pmap_from_full/1, - nodeparts_up/1, remove_partition/3, use_persistent/2, - was_i_nodedown/2]). - --include("../include/common.hrl"). - -join_type(Node, Fullmap, Options) -> - case proplists:get_value(replace, Options) of - undefined -> - case lists:filter(fun({N,_P,_T}) -> N =:= Node end, Fullmap) of - [] -> new; - _ -> rejoin - end; - OldNode when is_atom(OldNode) -> - % not a particularly strong guard, but will have to do - {replace, OldNode}; - _ -> new - end. - - -%% @doc return a {PMap, Fullmap} tuple that has corrections for -%% down, rejoining, or replacing Node -fix_mappings(nodedown, Node, OldFullmap) -> - fix_mappings_fold(fun({N,P,T}, AccIn) -> - case {N,T} of - {Node, {nodedown, Type}} -> - % already marked as nodedown, so leave it - [{N,P, {nodedown, Type}} | AccIn]; - {Node, _} -> - % mark it as nodedown - [{N,P, {nodedown, T}} | AccIn]; - _ -> [{N,P,T} | AccIn] - end - end, [], OldFullmap); - -fix_mappings(rejoin, Node, OldFullmap) -> - fix_mappings_fold(fun({N,P,{nodedown,T}}, AccIn) when N =:= Node -> - [{N,P,T} | AccIn]; - (NPT, AccIn) -> [NPT | AccIn] - end, [], OldFullmap); - -fix_mappings(replace, {OldNode, NewNode}, OldFullmap) -> - fix_mappings_fold(fun({N,P,T}, AccIn) -> - case {N, T} of - {OldNode, {nodedown,T1}} -> [{NewNode,P,T1} | AccIn]; - {OldNode, _} -> [{NewNode,P,T} | AccIn]; - _ -> [{N,P,T} | AccIn] - end - end, [], OldFullmap). - - -fix_mappings_fold(Fun, Acc0, OldFullmap) -> - NewFullmap = lists:foldl(Fun, Acc0, OldFullmap), - NewPMap = pmap_from_full(NewFullmap), - {NewPMap, NewFullmap}. - - -%% @doc create a PMap (primary nodes only) from provided Fullmap -%% If a primary node is down, a partner will be supplied -pmap_from_full(Fullmap) -> - NodePartList = nodeparts_up(Fullmap), - lists:keysort(2,lists:foldl(fun({N,P,T}, AccIn) -> - case T of - primary -> [{N,P} | AccIn]; - {nodedown, primary} -> - NewNode = case lists:delete(N, - membership2:nodes_for_part(P, NodePartList)) of - [First|_] -> First; - [] -> N % wtf, are all partners down too? - end, - [{NewNode,P} | AccIn]; - _ -> AccIn - end - end, [], Fullmap)). - - -nodeparts_up(Fullmap) -> - lists:foldl(fun({_N,_P,{nodedown,_}}, AccIn) -> AccIn; - ({N,P,_T}, AccIn) -> [{N,P} | AccIn] - end, [], Fullmap). - - - -%% @doc if Node is in the Fullmap as {nodedown,_} return true -was_i_nodedown(Node, Fullmap) -> - lists:member(yes, lists:map(fun({N,_P,{nodedown,_T}}) -> - case N of - Node -> yes; - _ -> no - end; - (_) -> no - end, Fullmap)). - - -remove_partition(FullMap, Node, Partition) -> - case lists:filter( - fun({N,P,_Type}) -> N =:= Node andalso P =:= Partition end, - FullMap) of - [Elem|_] -> - lists:delete(Elem, FullMap); - Other -> - ?LOG_ERROR("~nNo partition to remove: ~p~n" - "Node: ~p~nPartition: ~p~n", [Other, Node, Partition]), - FullMap - end. - - -use_persistent(_PartnersPlus, undefined) -> - false; - -use_persistent(PartnersPlus, _PersistentParts) -> - % get a fullmap from a partner - % this may need rework for network partitions, as you could get a bad - % fullmap from another node that was partitioned w/ this one :\ - RemoteFullmap = get_remote_fullmap(PartnersPlus), - % return opposite of was_i_nodedown - not mem_utils:was_i_nodedown(node(), RemoteFullmap). - - -get_remote_fullmap([]) -> - []; % no remote fullmap available, so return empty list - -get_remote_fullmap([Node|Rest]) -> - case gen_server:call({membership, Node}, fullmap) of - {ok, Fullmap} -> Fullmap; - _ -> get_remote_fullmap(Rest) - end. diff --git a/src/membership2.erl b/src/membership2.erl deleted file mode 100644 index 4c4780c3..00000000 --- a/src/membership2.erl +++ /dev/null @@ -1,686 +0,0 @@ -%%%------------------------------------------------------------------- -%%% File: membership2.erl -%%% @author Cliff Moon [] -%%% @copyright 2009 Cliff Moon -%%% @doc -%%% -%%% @end -%%% -%%% @since 2009-05-04 by Cliff Moon -%%%------------------------------------------------------------------- --module(membership2). --author('cliff@powerset.com'). --author('brad@cloudant.com'). - --behaviour(gen_server). - -%% API --export([start_link/2, start_link/3, stop/1, check_nodes/0, - partitions/0, partition_for_key/1, fullmap/0, - all_nodes_parts/1, clock/0, - nodes/0, nodeparts_for_key/1, nodes_for_part/1, nodes_for_part/2, - nodes_for_shard/1, nodes_down/0, - parts_for_node/1, - take_offline/2, bring_online/2, - decommission_part/3, pp_fullmap/0, snafu/1, snafu/3]). - - -%% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). - -%% includes --include("../include/config.hrl"). --include("../include/common.hrl"). --include("../include/profile.hrl"). --include_lib("eunit/include/eunit.hrl"). - -%%==================================================================== -%% API -%%==================================================================== -%% @doc Starts the server -%% @end -%%-------------------------------------------------------------------- - -start_link(Node, Nodes) -> - start_link(Node, Nodes, []). - - -start_link(Node, Nodes, Args) -> - gen_server:start_link({local, membership}, ?MODULE, [Node, Nodes, Args], []). - - -stop(Server) -> - gen_server:cast(Server, stop). - - -%% @doc for when things have really gone south. Install a new state on all -%% nodes, given a filename, or node list, partition map, and fullmap. -%% @end -snafu(Filename) -> - NewState = case file:consult(Filename) of - {ok, [Terms]} -> - Terms; - Error -> - throw(Error) - end, - #membership{nodes=Nodes, partitions=PMap, fullmap=Fullmap} = NewState, - snafu(Nodes, PMap, Fullmap). - - -snafu(Nodes, PMap, Fullmap) -> - NewState = #membership{node=node(), nodes=Nodes, - partitions=PMap, fullmap=Fullmap, version=vector_clock:create(dbcore)}, - update_ets(ets_name(node()), NewState), - fire_gossip(node(), Nodes, NewState), - save(NewState). - - -check_nodes() -> - ErlangNodes = lists:usort([node() | erlang:nodes()]), - {ok, MemNodeList} = membership2:nodes(), - MemNodes = lists:usort(MemNodeList), - {PMapNodeList, _PMapPartList} = lists:unzip(partitions()), - PMapNodes = lists:usort(PMapNodeList), - case ErlangNodes =:= MemNodes andalso - ErlangNodes =:= PMapNodes andalso - MemNodes =:= PMapNodes of - true -> true; - _ -> - Msg = "membership: Node Lists do not match.~n" - "Erlang Nodes : ~p~n" - "Membership Nodes : ~p~n" - "PMap Nodes : ~p~n", - Lst = [ErlangNodes, MemNodes, PMapNodes], - showroom_log:message(error, Msg, Lst), - io:format(Msg, Lst), - false - end. - - -%% @doc retrieve the primary partition map. This is a list of partitions and -%% their corresponding primary node, no replication partner nodes. -partitions() -> - ets_pmap(). - - -%% @doc retrieve the full partition map, like above, but including replication -%% partner nodes. List should number 2^Q * N -fullmap() -> - lists:keysort(2, ets_fullmap()). - - -%% @doc pretty-print the full partition map (sorted by node, then part) -pp_fullmap() -> - lists:foreach( - fun({N,P}) -> - io:format("~-60s ~s~n", [N, showroom_utils:int_to_hexstr(P)]) - end, - lists:sort(membership2:all_nodes_parts(true))). - - -%% @doc get the current vector clock from membership state -clock() -> - gen_server:call(membership, clock). - - -%% @doc get the list of cluster nodes (according to membership module) -%% This may differ from erlang:nodes() -nodes() -> - gen_server:call(membership, nodes). - - -%% @doc get all the responsible nodes for a given partition, including -%% replication partner nodes -nodes_for_part(Part) -> - nodes_for_part(Part, all_nodes_parts(true)). - - -nodes_for_part(Part, NodePartList) -> - Filtered = lists:filter(fun({_N, P}) -> P =:= Part end, NodePartList), - {Nodes, _Parts} = lists:unzip(Filtered), - lists:usort(Nodes). - - -nodes_for_shard(ShardName) when is_binary(ShardName) -> - nodes_for_shard(binary_to_list(ShardName)); - -nodes_for_shard(ShardName) when is_list(ShardName) -> - HexPart = case string:rchr(ShardName, $_) + 1 of - 1 -> ShardName; - Last -> string:substr(ShardName, Last) - end, - Int = showroom_utils:hexstr_to_int(HexPart), - {_, Parts} = lists:unzip(membership2:partitions()), - nodes_for_part(partitions:int_to_partition(Int, Parts)). - - -%% @doc get all the responsible nodes and partitions for a given key, including -%% nodes/parts on replication partner nodes -nodeparts_for_key(Key) -> - int_node_parts_for_key(Key). - - -%% @doc get a list of all the nodes marked down in this node's fullmap -nodes_down() -> - Downs = lists:foldl(fun({N,_P,{nodedown, _T}}, AccIn) -> [N|AccIn]; - (_, AccIn) -> AccIn end, [], fullmap()), - lists:usort(Downs). - - -%% @doc return the partition responsible for the given Key -partition_for_key(Key) -> - Config = configuration:get_config(), - Hash = lib_misc:hash(Key), - partitions:hash_to_partition(Hash, Config#config.q). - - -%% @doc return the partitions that reside on a given node -parts_for_node(Node) -> - lists:sort(lists:foldl(fun({N,P,_Type}, AccIn) -> - case N of - Node -> [P | AccIn]; - _ -> AccIn - end - end, [], fullmap())). - - -%% @doc get all the nodes and partitions in the cluster. Depending on the -%% AllPartners param, you get only primary nodes or replication partner -%% nodes, as well. -%% No nodes/parts currently down are returned. -all_nodes_parts(false) -> - ets_pmap(); -all_nodes_parts(true) -> - mem_utils:nodeparts_up(ets_fullmap()). - - -%% @doc If a local storage server exists for this partition it will be taken -%% out of rotation until put back in. -%% @end -take_offline(Node, Partition) when Node =:= node() -> - gen_server:call(membership, {take_offline, Partition}); - -take_offline(Node, Partition)-> - gen_server:call({membership, Node}, {take_offline, Partition}). - - -%% @doc Brings a storage server that has been taken offline back online. -%% @end -bring_online(Node, Partition) -> - showroom_log:message(debug, "membership: bring_online Node: ~p Partition: ~p", - [Node, Partition]), - gen_server:call({membership, Node}, {bring_online, Partition}). - - -%% @doc cleans up the remaining .couch shard/partition file after it has been -%% moved to a new node. -decommission_part(Node, Part, DbName) -> - gen_server:cast({membership, Node}, {decommission, Part, DbName}). - - -%%==================================================================== -%% gen_server callbacks -%%==================================================================== - -%%-------------------------------------------------------------------- -%% @spec init(Args) -> {ok, State} | -%% {ok, State, Timeout} | -%% ignore | -%% {stop, Reason} -%% @doc Initiates the server -%% @end -%%-------------------------------------------------------------------- -init([Node, Nodes, Args]) -> - process_flag(trap_exit,true), - showroom_log:message(info, "membership: membership server starting...", []), - Options = lists:flatten(Args), - showroom_log:message(info, "membership: options ~p", [Options]), - net_kernel:monitor_nodes(true), - Config = configuration:get_config(), - PersistentState=#membership{partitions=PersistentParts} = load(Node), - PartnersPlus = replication:partners_plus(Node, Nodes), - State = - case mem_utils:use_persistent(PartnersPlus, PersistentParts) of - false -> - showroom_log:message(info, "membership: not using persisted state", []), - % didn't find persistent state on disk or this node was nodedown - % so we don't want to use persisted state - PartialNodes = lists:usort(Nodes), - {NewVersion, RemoteNodes, NewPMap1, NewFullMap1} = - join_to(Node, PartnersPlus, Options), - NewWorldNodes = lists:usort(PartialNodes ++ RemoteNodes), - NewPMap = case NewPMap1 of - [] -> partitions:create_partitions(Config#config.q, Node, - NewWorldNodes); - _ -> NewPMap1 - end, - NewFullMap = case NewFullMap1 of - [] -> make_all_nodes_parts(NewPMap); - _ -> NewFullMap1 - end, - #membership{ - node=Node, - nodes=NewWorldNodes, - partitions=lists:keysort(2,NewPMap), - % version=vector_clock:increment(dbcore, NewVersion), - version=NewVersion, - fullmap=NewFullMap}; - _ -> - % found persistent state on disk - showroom_log:message(info, "membership: using persisted state", []), - case Options of - [] -> ok; - _ -> - showroom_log:message(info, "membership: options ~p ignored.", [Options]) - end, - %% fire gossip even if state comes from disk - fire_gossip(Node, Nodes, PersistentState), - PersistentState - end, - save(State), - % ets table is an optimization for cluster_ops performance - Ets = ets:new(ets_name(Node), [public, set, named_table]), - update_ets(Ets, State), - {ok, State}. - - -%%-------------------------------------------------------------------- -%% @spec -%% handle_call(Request, From, State) -> {reply, Reply, State} | -%% {reply, Reply, State, Timeout} | -%% {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, Reply, State} | -%% {stop, Reason, State} -%% @doc Handling call messages -%% @end -%%-------------------------------------------------------------------- - -%% join -handle_call({join, JoiningNode, Options}, _From, - State = #membership{version=Version, node=Node, nodes=Nodes, - partitions=Partitions, fullmap=OldFullMap}) -> - JoinType = mem_utils:join_type(JoiningNode, OldFullMap, Options), - showroom_log:message(alert, "membership: node ~p wants to join, type '~p'", - [JoiningNode, JoinType]), - {PMap, NewFullmap} = case JoinType of - rejoin -> - mem_utils:fix_mappings(rejoin, JoiningNode, OldFullMap); - {replace, OldNode} -> - mem_utils:fix_mappings(replace, {OldNode, JoiningNode}, OldFullMap); - new -> - Hints = proplists:get_value(hints, Options), - PMap1 = case partitions:join(JoiningNode, Partitions, Hints) of - {ok, Table} -> Table; - {error, Error, _Table} -> throw({join_error, Error}) - end, - Fullmap1 = make_all_nodes_parts(PMap1), - {PMap1, Fullmap1} - end, - WorldNodes = lists:usort(Nodes ++ [JoiningNode]), - NewVersion = vector_clock:increment(dbcore, Version), - NewState1 = State#membership{nodes=WorldNodes, partitions=PMap, - version=NewVersion}, - {Fullmap, NewState2} = case proplists:get_value(bootstrap, Options) of - true -> - % join not complete until bootstrap finishes, - % so this NewState isn't the final (i.e. NewState1 will be installed) - showroom_log:message(info, "membership: bootstrap process starting", []), - bootstrap_manager:start_bootstrap(NewState1, OldFullMap, NewFullmap); - _ -> - % no bootstrap, so install NewFullmap now - showroom_log:message(info, "membership: no bootstrap", []), - {NewFullmap, NewState1#membership{fullmap=NewFullmap}} - end, - save(NewState2), - update_ets(ets_name(node()), NewState2), - notify(node_join, [JoiningNode]), - fire_gossip(Node, WorldNodes, NewState2), - % If we're bootstrapping, then the join is not complete. - % So return FullMap for now. bootstrap_manager:end_bootstrap will fix it - {reply, {ok, NewVersion, WorldNodes, PMap, Fullmap}, NewState2}; - -%% clock -handle_call(clock, _From, State = #membership{version=Version}) -> - {reply, Version, State}; - -%% state -handle_call(state, _From, State) -> - {reply, State, State}; - -%% newfullmap -handle_call({newfullmap, NewFullMap}, _From, - State = #membership{node=Node, nodes=Nodes, version=Version}) -> - NewVersion = vector_clock:increment(dbcore, Version), - NewState = State#membership{version=NewVersion, fullmap=NewFullMap}, - save(NewState), - update_ets(ets_name(node()), NewState), - fire_gossip(Node, Nodes, NewState), - {reply, installed, NewState}; - -%% partitions -handle_call(partitions, _From, State = #membership{partitions=Parts}) -> - {reply, {ok, Parts}, State}; - -%% fullmap -handle_call(fullmap, _From, State = #membership{fullmap=FullMap}) -> - {reply, {ok, FullMap}, State}; - -%% nodes -handle_call(nodes, _From, State = #membership{nodes=Nodes}) -> - {reply, {ok, Nodes}, State}; - -%% take_offline -handle_call({take_offline, Partition}, _From, - State = #membership{node=Node, nodes=Nodes, fullmap=OldFullMap}) -> - showroom_log:message(info, "membership: take_offline Node: ~p Partition: ~p", - [Node, Partition]), - NewFullMap = mem_utils:remove_partition(OldFullMap, Node, Partition), - NewState = State#membership{fullmap=NewFullMap}, - fire_gossip(Node, Nodes, NewState), - update_ets(ets_name(node()), NewState), - {reply, {offline, Node, Partition}, NewState}; - -%% at least reply that this 'catch-all' was ignored -handle_call(_Request, _From, State) -> - {reply, ignored, State}. - - -%%-------------------------------------------------------------------- -%% @spec handle_cast(Msg, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% @doc Handling cast messages -%% @end -%%-------------------------------------------------------------------- - -handle_cast({gossip, RemoteState = #membership{node=RemoteNode}}, - LocalState = #membership{node=_Me}) -> - showroom_log:message(info, "membership: received gossip from ~p", - [RemoteNode]), - {MergeType, MergedState = #membership{nodes=_MergedNodes}} = - merge_state(RemoteState, LocalState), - case MergeType of - equal -> {noreply, MergedState}; - merged -> - showroom_log:message(info, "membership: merged new gossip", []), - % fire_gossip(Me, MergedNodes, MergedState), - update_ets(ets_name(node()), MergedState), - save(MergedState), - {noreply, MergedState} - end; - -% decommission -% renaming for now, until case 1245 can be completed -handle_cast({decommission, Part, DbName}, State) -> - {{Y,Mon,D}, {H,Min,S}} = calendar:universal_time(), - Directory = couch_config:get("couchdb", "database_dir"), - OrigFilename = showroom_utils:full_filename(Part, DbName, Directory), - Moved = lists:flatten(io_lib:format(".~w~2.10.0B~2.10.0B." ++ - "~2.10.0B~2.10.0B~2.10.0B.moved.couch", [Y,Mon,D,H,Min,S])), - % Note: this MovedFilename bit below gives weird results: - % ["/Users/brad/dev/erlang/dbcore/tmp/lib/x800000/test_800000", - % ".20091001.162640.moved.couch"] but list/string behavior handles it. - MovedFilename = lists:map(fun(E) -> binary_to_list(E) end, - re:replace(OrigFilename, "\.couch", Moved, [])), - ok = file:rename(OrigFilename, MovedFilename), - {noreply, State}. - - -%% @doc handle nodedown messages because we have -%% net_kernel:monitor_nodes(true) -handle_info({nodedown, Node}, - State = #membership{nodes=OldNodes, fullmap=OldFullmap, - version=OldVersion}) -> - showroom_log:message(alert, "membership: nodedown from ~p", [Node]), - case lists:member(Node, OldNodes) of - true -> - notify(nodedown, [Node]), - % clean up membership state - Nodes = lists:delete(Node, OldNodes), - {PMap, Fullmap} = mem_utils:fix_mappings(nodedown, Node, OldFullmap), - % Do we increment clock here? w/o gossip? - % This is happening near simultaneously on the other nodes, too :\ - % Only reason to increment is persisted clock on down node will be older - % when it returns - Version = vector_clock:increment(dbcore, OldVersion), - NewState = State#membership{nodes=Nodes, partitions=PMap, fullmap=Fullmap, - version=Version}, - update_ets(ets_name(node()), NewState), - save(NewState), - {noreply, NewState}; - _ -> {noreply, State} - end; - -%% @doc handle nodeup messages because we have -%% net_kernel:monitor_nodes(true) -handle_info({nodeup, Node}, State) -> - showroom_log:message(alert, "membership: nodeup Node: ~p", [Node]), - {noreply, State}; - -handle_info(Info, State) -> - showroom_log:message(info, "membership: handle_info Info: ~p", [Info]), - {noreply, State}. - -%%-------------------------------------------------------------------- -%% @spec terminate(Reason, State) -> void() -%% @doc This function is called by a gen_server when it is about to -%% terminate. It should be the opposite of Module:init/1 and do any necessary -%% cleaning up. When it returns, the gen_server terminates with Reason. -%% The return value is ignored. -%% @end -%%-------------------------------------------------------------------- -terminate(_Reason, _State) -> - ok. - -%% 0.5.6 to 0.5.7 -code_change(184380560337424323902805568963460261434, State, _Extra) -> - backup_old_config_file(), - % update State to the new version - {membership, _Hdr, Node, Nodes, PMap, Version} = State, - NewState = #membership{ - node = Node, - nodes = Nodes, - partitions = PMap, - version = Version, - fullmap = make_all_nodes_parts(PMap) - }, - save(NewState), - % also create new ets table - Ets = ets:new(ets_name(Node), [public, set, named_table]), - update_ets(Ets, NewState), - {ok, NewState}; - -%% 0.8.8 to 0.9.0 -code_change(239470595681156900105628017899543243419, State, _Extra) -> - net_kernel:monitor_nodes(true), - {ok, State}; - -code_change(OldVsn, State, _Extra) -> - io:format("Unknown Old Version!~nOldVsn: ~p~nState : ~p~n", [OldVsn, State]), - {ok, State}. - -%%-------------------------------------------------------------------- -%%% Internal functions -%%-------------------------------------------------------------------- - -backup_old_config_file() -> - Config = configuration:get_config(), - FileName = filename:join([Config#config.directory, - lists:concat([node:name(node()), ".state"])]), - BackupName = filename:join([Config#config.directory, - lists:concat([node:name(node()), ".state.bak"])]), - file:copy(FileName, BackupName). - - -%% return State from membership file -load(Node) -> - Config = configuration:get_config(), - case file:consult(filename:join([Config#config.directory, - lists:concat([node:name(Node), ".state"])])) of - {error, Reason} -> - showroom_log:message(info, "membership: could not load state: ~p~n", - [Reason]), - #membership{nodes=[]}; - {ok, [Terms]} -> - Terms - end. - - -%% save the State to a file -save(State) -> - Config = configuration:get_config(), - Filename = filename:join([Config#config.directory, - lists:concat([node:name(State#membership.node), ".state"])]), - {ok, File} = file:open(Filename, [binary, write]), - io:format(File, "~w.~n", [State]), - file:close(File). - - -%% joining is bi-directional, as opposed to gossip which is unidirectional -%% we want to collect the list of known nodes to compute the partition map -%% which isn't necessarily the same as the list of running nodes -join_to(Node, Partners, Options) -> - join_to(Node, Partners, - {vector_clock:create(dbcore), [], [], []}, Options). - - -%% @doc join this node to one of its partners (or PartnersPlus if no partners -%% are available). -join_to(_, [], {Version, World, PMap, FullMap}, _Options) -> - {Version, World, PMap, FullMap}; - -join_to(Node, [Partner|Rest], {Version, World, PMap, FullMap}, Options) -> - case call_join(Partner, Node, Options) of - {ok, RemoteVersion, NewNodes, NewPMap, NewFullMap} -> - {vector_clock:merge(Version, RemoteVersion), - lists:usort(World ++ NewNodes), - NewPMap, - NewFullMap}; - Other -> - showroom_log:message(info, "membership: join_to Other: ~p~n", [Other]), - join_to(Node, Rest, {Version, World, PMap, FullMap}, Options) - end. - - -%% @doc make the join call to Remote node (usually a partner of Node) -call_join(Remote, Node, Options) -> - showroom_log:message(info, "membership: call_join From: ~p To: ~p", - [Node, Remote]), - catch gen_server:call({membership, node:name(Remote)}, - {join, Node, Options}). - - -merge_state(_RemoteState=#membership{version=RemoteVersion, nodes=RemoteNodes, - partitions=RemotePMap, - fullmap=RemoteFullMap}, - LocalState=#membership{version=LocalVersion, nodes=LocalNodes, - partitions=LocalPMap, - fullmap=LocalFullMap}) -> - case vector_clock:equals(RemoteVersion, LocalVersion) of - true -> - {equal, LocalState}; - false -> - % Note, we're matching MergedVersion from these funs. - % They should be the same. - {MergedVersion, MergedNodes} = - merge_nodes(RemoteVersion, RemoteNodes, LocalVersion, LocalNodes), - {MergedVersion, MergedPMap} = - merge_pmaps(RemoteVersion, RemotePMap, LocalVersion, LocalPMap), - {MergedVersion, MergedFullMap} = - merge_fullmaps(RemoteVersion, RemoteFullMap, - LocalVersion, LocalFullMap), - - % notify of arrivals & departures - Arrived = MergedNodes -- LocalNodes, - notify(node_join, Arrived), - % Departed = LocalNodes -- MergedNodes, - % notify(node_leave, Departed), - - {merged, LocalState#membership{version=MergedVersion, nodes=MergedNodes, - partitions=MergedPMap, - fullmap=MergedFullMap}} - end. - - -merge_nodes(RemoteVersion, RemoteNodes, LocalVersion, LocalNodes) -> - {MergedVersion, Merged} = vector_clock:resolve({RemoteVersion, RemoteNodes}, - {LocalVersion, LocalNodes}), - {MergedVersion, lists:usort(Merged)}. - - -merge_pmaps(RemoteVersion, RemotePMap, LocalVersion, LocalPMap) -> - {MergedVersion, Merged} = vector_clock:resolve({RemoteVersion, RemotePMap}, - {LocalVersion, LocalPMap}), - {MergedVersion, lists:ukeysort(2, Merged)}. - - -merge_fullmaps(RemoteVersion, RemoteFullMap, LocalVersion, LocalFullMap) -> - {MergedVersion, Merged} = vector_clock:resolve({RemoteVersion, RemoteFullMap}, - {LocalVersion, LocalFullMap}), - {MergedVersion, lists:usort(Merged)}. - - -notify(Type, Nodes) -> - lists:foreach(fun(Node) -> - gen_event:notify(membership_events, {Type, Node}) - end, Nodes). - - -%% @doc fires a gossip message (membership state) to partners nodes in the -%% cluster. -%% @end -fire_gossip(Me, WorldNodes, Gossip) -> - % GossipPartners = partners_plus(Me, WorldNodes), - % random experiment, gossip with all nodes, not just partners_plus - GossipPartners = lists:delete(Me, WorldNodes), - lists:foreach(fun(TargetNode) -> - showroom_log:message(info, "membership: firing gossip from ~p to ~p", - [Me, TargetNode]), - gen_server:cast({membership, TargetNode}, {gossip, Gossip}) - end, GossipPartners). - - -%% @doc construct a table with all partitions, with the primary node and all -%% replication partner nodes as well. -make_all_nodes_parts(PMap) -> - {Nodes, _Parts} = lists:unzip(PMap), - NodeParts = lists:flatmap( - fun({Node,Part}) -> - Partners = replication:partners(Node, lists:usort(Nodes)), - PartnerList = [{Partner, Part, partner} || Partner <- Partners], - [{Node, Part, primary} | PartnerList] - end, PMap), - NodeParts. - - -%% @doc for the given key, return a list of {Node,Part} tuples. Nodes are both -%% primary and replication partner nodes, and should number N. -int_node_parts_for_key(Key) -> - Config = configuration:get_config(), - Hash = lib_misc:hash(Key), - Part = partitions:hash_to_partition(Hash, Config#config.q), - NodePartList = all_nodes_parts(true), - lists:filter(fun({_N,P}) -> P =:= Part end, NodePartList). - - -%% ets table helper functions -ets_name(Node) -> - list_to_atom(lists:concat(["mem_", atom_to_list(Node)])). - - -update_ets(Table, #membership{partitions=PMap, fullmap=FullMap}) -> - ets:insert(Table, {pmap, PMap}), - ets:insert(Table, {fullmap, FullMap}), - ok. - - -ets_pmap() -> - [{pmap, PMap}] = ets:lookup(ets_name(node()), pmap), - PMap. - - -ets_fullmap() -> - [{fullmap, FullMap}] = ets:lookup(ets_name(node()), fullmap), - FullMap. diff --git a/src/node.erl b/src/node.erl deleted file mode 100644 index 9a9c82c1..00000000 --- a/src/node.erl +++ /dev/null @@ -1,39 +0,0 @@ -%%%------------------------------------------------------------------- -%%% File: node.erl -%%% @author Cliff Moon <> [] -%%% @copyright 2009 Cliff Moon -%%% @doc -%%% -%%% @end -%%% -%%% @since 2009-05-11 by Cliff Moon -%%%------------------------------------------------------------------- --module(node). --author('cliff@powerset.com'). - -%% API --export([name/1, attributes/1]). - --include("../include/common.hrl"). - -%% -ifdef(TEST). -%% -include("../etest/node_test.erl"). -%% -endif. - -%%==================================================================== -%% API -%%==================================================================== - -name(Name) when is_atom(Name) -> - Name; -name(Node) when is_tuple(Node) -> - element(1, Node); -name(Node) -> - Node. - -attributes(Name) when is_atom(Name) -> - []; -attributes(Node) when is_tuple(Node) -> - element(2, Node); -attributes(_) -> - []. diff --git a/src/replication.erl b/src/replication.erl deleted file mode 100644 index 96be0ad3..00000000 --- a/src/replication.erl +++ /dev/null @@ -1,165 +0,0 @@ -%%%------------------------------------------------------------------- -%%% File: replication.erl -%%% @author Brad Anderson [http://www.cloudant.com] -%%% @copyright 2009 Brad Anderson -%%% @doc -%%% -%%% @end -%%% -%%% @since 2009-06-14 by Brad Anderson -%%%------------------------------------------------------------------- --module(replication). --author('brad@cloudant.com'). - -%% API --export([partners/2, partners/3, partners_plus/2]). - --include_lib("eunit/include/eunit.hrl"). --include("../include/config.hrl"). --include("../include/common.hrl"). - - -%%==================================================================== -%% API -%%==================================================================== - -partners(Node, Nodes) -> - partners(Node, Nodes, configuration:get_config()). - - -%%-------------------------------------------------------------------- -%% @spec partners(Node::atom(), Nodes::list(), Config::config()) -> -%% list() -%% @doc returns the list of all replication partners for the specified node -%% @end -%%-------------------------------------------------------------------- -partners(Node, Nodes, Config) -> - N = Config#config.n, - Meta = Config#config.meta, - pick_partners(Meta, Node, Nodes, [], N - 1). - - -%% return a list of live/up Partners, and if all Partners are down, -%% walk the ring to get one other remote node and return it. -partners_plus(Node, Nodes) -> - Partners = partners(Node, Nodes), - PartnersDown = lists:subtract(Partners, erlang:nodes()), - PartnersUp = lists:subtract(Partners, PartnersDown), - case PartnersUp of - [] -> - TargetNodes = target_list(Node, Nodes), - NonPartners = lists:subtract(TargetNodes, - lists:flatten([Node, Partners])), - walk_ring(NonPartners); - _ -> - %% at least one partner is up, so gossip w/ them - PartnersUp - end. - - -%%==================================================================== -%% Internal functions -%%==================================================================== - -%% @spec pick_partners(proplist(), Node::dynomite_node(), [Node], [Node], -%% integer()) -> list() -%% @doc iterate through N-1 partner picks, returning the resulting list sorted -pick_partners(_Meta, Node, _Nodes, Acc, 0) -> - lists:sort(lists:delete(Node, Acc)); -pick_partners(Meta, Node, Nodes, Acc, Count) -> - Partner = pick_partner(Meta, Node, Nodes, Acc, 1), - NewNodes = lists:filter(fun(Elem) -> - case Elem of - no_partner_found -> false; - Partner -> false; - _ -> true - end - end, Nodes), - NewAcc = case Partner of - no_partner_found -> Acc; - _ -> [Partner|Acc] - end, - pick_partners(Meta, Node, NewNodes, NewAcc, Count-1). - - -%% @spec pick_partner(proplist(), Node::dynomite_node(), [Node], [Node], -%% integer()) -> Node::dynomite_node() -%% @doc pick a specific replication partner at the given level -pick_partner([], Node, Nodes, _Acc, 1) -> - %% handle the no metadata situation - %% Note: This clause must be before the Level > length(Meta) guarded clause - target_key(node:name(Node), lists:map(fun node:name/1, Nodes), roundrobin); - -pick_partner(Meta, _Node, _Nodes, Acc, Level) when Level > length(Meta) -> - Acc; - -pick_partner(Meta, Node, Nodes, Acc, Level) -> - MetaDict = meta_dict(Nodes, Level, dict:new()), - NodeKey = lists:sublist(node:attributes(Node), Level), - Keys = dict:fetch_keys(MetaDict), - {_MetaName, Strategy} = lists:nth(Level, Meta), - TargetKey = target_key(NodeKey, Keys, Strategy), - Candidates = dict:fetch(TargetKey, MetaDict), - case length(Candidates) of - 0 -> - %% didn't find a candidate - no_partner_found; - 1 -> - %% found only one candidate, return it - [Partner] = Candidates, - Partner; - _ -> - pick_partner(Meta, Node, Nodes, Acc, Level + 1) - end. - - -%% @doc construct a dict that holds the key of metadata values so far (up to -%% the current level, and dynomite_node() list as the value. This is used -%% to select a partner in pick_partner/5 -%% @end -meta_dict([], _Level, Dict) -> - Dict; - -meta_dict([Node|Rest], Level, Dict) -> - Key = lists:sublist(node:attributes(Node), Level), - DictNew = dict:append(Key, Node, Dict), - meta_dict(Rest, Level, DictNew). - - -%% @spec target_key(term(), list(), Strategy::atom()) -> term() -%% @doc given the key and keys, sort the list of keys based on stragety (i.e. -%% for roundrobin, sort them, put the NodeKey on the end of the list, and -%% then return the head of the list as the target. -%% @end -%% TODO: moar strategies other than roundrobin? -target_key(NodeKey, Keys, roundrobin) -> - SortedKeys = lists:sort(Keys), - TargetKey = case target_list(NodeKey, SortedKeys) of - [] -> no_partner_found; - [Key|_Rest] -> Key - end, - TargetKey. - - -%% @spec target_list(term(), list()) -> list() -%% @doc split the list of keys into 'lessthan NodeKey', NodeKey, and 'greaterthan -%% Nodekey' and then put the lessthan section on the end of the list -%% @end -target_list(_NodeKey, []) -> - []; -target_list(NodeKey, Keys) -> - {A, [NodeKey|B]} = lists:splitwith(fun(K) -> K /= NodeKey end, Keys), - lists:append([B, A, [NodeKey]]). - - -walk_ring([]) -> - %% TODO: should we be more forceful here and throw? not for now - showroom_log:message(info, - "~p:walk_ring/1 - could not find node for gossip", [?MODULE]), - []; - -walk_ring([Node|Rest]) -> - case lists:member(Node, erlang:nodes()) of - true -> [Node]; - _ -> walk_ring(Rest) - end. -- cgit v1.2.3 From 916871eb58001d8f261edeb838f6839dbc195303 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Fri, 28 May 2010 16:50:35 -0400 Subject: begin move of dynomite to membership --- ebin/dynomite.app | 29 ------- ebin/membership.app | 20 +++++ include/membership.hrl | 11 +++ src/dbs.erl | 46 +++++++++++ src/dbs_cache.erl | 86 ++++++++++++++++++++ src/dynomite.erl | 22 ----- src/dynomite_app.erl | 50 ------------ src/dynomite_prof.erl | 164 ------------------------------------- src/dynomite_sup.erl | 58 ------------- src/mem3.erl | 6 +- src/membership.erl | 15 ++++ src/membership_app.erl | 19 +++++ src/membership_sup.erl | 24 ++++++ src/partitions.erl | 217 +++++++++++++++++++++++++++++++++++++++++++++++++ 14 files changed, 439 insertions(+), 328 deletions(-) delete mode 100644 ebin/dynomite.app create mode 100644 ebin/membership.app create mode 100644 src/dbs.erl create mode 100644 src/dbs_cache.erl delete mode 100644 src/dynomite.erl delete mode 100644 src/dynomite_app.erl delete mode 100644 src/dynomite_prof.erl delete mode 100644 src/dynomite_sup.erl create mode 100644 src/membership.erl create mode 100644 src/membership_app.erl create mode 100644 src/membership_sup.erl create mode 100644 src/partitions.erl diff --git a/ebin/dynomite.app b/ebin/dynomite.app deleted file mode 100644 index 634c09b2..00000000 --- a/ebin/dynomite.app +++ /dev/null @@ -1,29 +0,0 @@ -%% dynomite app resource file - -{application, dynomite, - [{description, "Dynomite Clustering System"}, - {mod, {dynomite_app, []}}, - {vsn, "0.9.5-cloudant"}, - {modules, - [ - bootstrap_manager, - bootstrap_receiver, - cluster_ops, - configuration, - dynomite, - dynomite_app, - dynomite_couch_api, - dynomite_couch_storage, - dynomite_prof, - dynomite_sup, - lib_misc, - mem3, - mem_utils, - membership2, - node, - replication, - vector_clock - ]}, - {registered, [membership]}, - {applications, [kernel, stdlib, sasl, crypto, mochiweb]} - ]}. diff --git a/ebin/membership.app b/ebin/membership.app new file mode 100644 index 00000000..82f0b299 --- /dev/null +++ b/ebin/membership.app @@ -0,0 +1,20 @@ +%% membership app resource file + +{application, membership, + [{description, "cluster membership"}, + {mod, {membership_app, []}}, + {vsn, "0.9.6"}, + {modules, + [ + dbs, + dbs_cache, + membership, + membership_app, + membership_sup, + mem3, + partitions, + vector_clock + ]}, + {registered, [membership]}, + {applications, [kernel, stdlib, sasl, crypto, mochiweb]} + ]}. diff --git a/include/membership.hrl b/include/membership.hrl index 98b47e2c..031c12c4 100644 --- a/include/membership.hrl +++ b/include/membership.hrl @@ -1,3 +1,14 @@ +-define(MEMBERSHIP, true). + +-ifndef(FABRIC). +-include("../../fabric/include/fabric.hrl"). +-endif. + +-ifndef(COUCH). +-include("../../couch/src/couch_db.hrl"). +-endif. + +-include_lib("eunit/include/eunit.hrl"). %% version 3 of membership state -record(mem, {header=3, diff --git a/src/dbs.erl b/src/dbs.erl new file mode 100644 index 00000000..b5e17b6a --- /dev/null +++ b/src/dbs.erl @@ -0,0 +1,46 @@ +-module(dbs). +-behaviour(supervisor). + +-export([start_link/0, init/1, childspec/1, sup_upgrade_notify/2]). + +-include("membership.hrl"). + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +init([]) -> + {ok, MemNodes} = mem3:nodes(), + LiveNodes = nodes(), + ChildSpecs = [childspec(N) || N <- MemNodes, lists:member(N, LiveNodes)], + %gen_event:add_handler(membership_events, showroom_dbs_event, []), + {ok, {{one_for_one, 10, 8}, ChildSpecs}}. + +childspec(Node) -> + ?LOG_INFO("dbs repl ~p --> ~p starting", [node(), Node]), + PostBody = {[ + {<<"source">>, <<"dbs">>}, + {<<"target">>, {[{<<"node">>, Node}, {<<"name">>, <<"dbs">>}]}}, + {<<"continuous">>, true} + ]}, + Id = couch_util:to_hex(erlang:md5(term_to_binary([node(), Node]))), + MFA = {couch_rep, start_link, [Id, PostBody, #user_ctx{}]}, + {Node, MFA, permanent, 100, worker, [couch_rep]}. + +% from http://code.google.com/p/erlrc/wiki/ErlrcHowto +sup_upgrade_notify (_Old, _New) -> + {ok, {_, Specs}} = init([]), + + Old = sets:from_list( + [Name || {Name, _, _, _} <- supervisor:which_children(?MODULE)]), + New = sets:from_list([Name || {Name, _, _, _, _, _} <- Specs]), + Kill = sets:subtract(Old, New), + + sets:fold(fun(Id, ok) -> + supervisor:terminate_child(?MODULE, Id), + supervisor:delete_child(?MODULE, Id), + ok + end, + ok, + Kill), + [supervisor:start_child (?MODULE, Spec) || Spec <- Specs ], + ok. diff --git a/src/dbs_cache.erl b/src/dbs_cache.erl new file mode 100644 index 00000000..96319802 --- /dev/null +++ b/src/dbs_cache.erl @@ -0,0 +1,86 @@ +-module(dbs_cache). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/0]). + +-include("membership.hrl"). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init([]) -> + ets:new(partitions, [bag, protected, named_table, {keypos,#shard.dbname}]), + ets:new(memnodes, [bag, protected, named_table]), + cache_dbs(), + Self = self(), + couch_db_update_notifier:start_link(fun({updated, <<"dbs">>}) -> + Self ! rebuild_dbs_cache; + (_) -> ok end), + {ok, nil}. + +handle_call(_Msg, _From, State) -> + {reply, ok, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(rebuild_dbs_cache, State) -> + receive rebuild_dbs_cache -> + handle_info(rebuild_dbs_cache, State) + after 0 -> ok end, + T0 = now(), + ?LOG_INFO("rebuilding dbs DB cache", []), + ets:delete_all_objects(partitions), + ets:delete_all_objects(memnodes), + cache_dbs(), + ?LOG_INFO("rebuild of dbs DB cache complete in ~p ms", + [round(timer:now_diff(now(),T0)/1000)]), + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +cache_dbs() -> + try couch_db:open(<<"dbs">>, []) of + {ok, Db} -> + Bt = Db#db.id_tree, + FoldFun = fun(#full_doc_info{id=Id, deleted=false} = FullDocInfo, _, _) -> + {ok, Doc} = couch_db:open_doc_int(Db, FullDocInfo, []), + {Props} = couch_doc:to_json_obj(Doc, []), + cache_map(Id, Props), + cache_nodes(Id, Props), + {ok, true}; + (_, _, _) -> + {ok, nil} + end, + couch_btree:foldl(Bt, FoldFun, nil), + couch_db:close(Db) + catch exit:{noproc,{gen_server,call,[couch_server|_]}} -> + timer:sleep(1000), + exit(couch_server_is_dead) + end. + +cache_map(Id, Props) -> + Map = couch_util:get_value(map, Props), + lists:foreach(fun({[{node,Node},{b,Beg},{e,End}]}) -> + Part = #shard{ + name = partitions:shard_name(Beg, Id), + dbname = Id, + node = Node, + range = [Beg,End] + }, + ets:insert(partitions, Part) + end, Map). + +cache_nodes(Id, Props) -> + Nodes = couch_util:get_value(nodes, Props), + lists:foreach(fun({[{order,Order},{node, Node},{options,Opts}]}) -> + ets:insert(memnodes, {Id, {Order, Node, Opts}}) + end, Nodes). + +%{ok, ets:insert(dbs_cache, {Id, Props})}; diff --git a/src/dynomite.erl b/src/dynomite.erl deleted file mode 100644 index bb50986b..00000000 --- a/src/dynomite.erl +++ /dev/null @@ -1,22 +0,0 @@ -%%% @doc convenience start/stop functions for Dynomite -%%% --module(dynomite). --author('Brad Anderson '). - --export([start/0, stop/0, restart/0]). - - -%% @doc start Dynomite app with no args, for -s at the command-line -start() -> - application:start(dynomite). - - -%% @doc stops the Dynomite application -stop() -> - application:stop(dynomite). - - -%% @doc restart Dynomite app, with no args -restart() -> - stop(), - start(). diff --git a/src/dynomite_app.erl b/src/dynomite_app.erl deleted file mode 100644 index 4b520921..00000000 --- a/src/dynomite_app.erl +++ /dev/null @@ -1,50 +0,0 @@ --module(dynomite_app). --author('cliff@powerset.com'). --author('brad@cloudant.com'). - --behaviour(application). - --include("../include/config.hrl"). --include("../../couch/src/couch_db.hrl"). - -%% Application callbacks --export([start/2, stop/1]). - -%%==================================================================== -%% Application callbacks -%%==================================================================== -%%-------------------------------------------------------------------- -%% @spec start(Type, StartArgs) -> {ok, Pid} | -%% {ok, Pid, State} | -%% {error, Reason} -%% @doc This function is called whenever an application -%% is started using application:start/1,2, and should start the processes -%% of the application. If the application is structured according to the -%% OTP design principles as a supervision tree, this means starting the -%% top supervisor of the tree. -%% @end -%%-------------------------------------------------------------------- - - -%% @doc start required apps, join cluster, start dynomite supervisor -start(_Type, _StartArgs) -> - % start dynomite supervisor - dynomite_sup:start_link(). - - -%%-------------------------------------------------------------------- -%% @spec stop(State) -> void() -%% @doc This function is called whenever an application -%% has stopped. It is intended to be the opposite of Module:start/2 and -%% should do any necessary cleaning up. The return value is ignored. -%% @end -%%-------------------------------------------------------------------- -stop({_, Sup}) -> - showroom_log:message(alert, "dynomite application stopped", []), - exit(Sup, normal), - ok. - - -%%==================================================================== -%% Internal functions -%%==================================================================== diff --git a/src/dynomite_prof.erl b/src/dynomite_prof.erl deleted file mode 100644 index 80c4b5b7..00000000 --- a/src/dynomite_prof.erl +++ /dev/null @@ -1,164 +0,0 @@ -%%%------------------------------------------------------------------- -%%% File: dynomite_prof.erl -%%% @author Cliff Moon <> [] -%%% @copyright 2009 Cliff Moon -%%% @doc -%%% -%%% @end -%%% -%%% @since 2009-02-15 by Cliff Moon -%%%------------------------------------------------------------------- --module(dynomite_prof). --author('cliff@powerset.com'). - --behaviour(gen_server). - -%% API --export([start_link/0, start_prof/1, stop_prof/1, stats/1, averages/0, balance_prof/0]). - -%% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). - --record(state, {ets,balance}). - --record(profile, {name, count, sum}). - -%%==================================================================== -%% API -%%==================================================================== -%%-------------------------------------------------------------------- -%% @spec start_link() -> {ok,Pid} | ignore | {error,Error} -%% @doc Starts the server -%% @end -%%-------------------------------------------------------------------- -start_link() -> - gen_server:start_link({local, dynomite_prof}, ?MODULE, [], []). - -stats(Id) -> - gen_server:call(dynomite_prof, {stats, Id}). - -balance_prof() -> - gen_server:cast(dynomite_prof, {balance, self(), lib_misc:now_float()}). - -start_prof(Id) -> - gen_server:cast(dynomite_prof, {start, self(), Id, lib_misc:now_float()}). - -stop_prof(Id) -> - gen_server:cast(dynomite_prof, {stop, self(), Id, lib_misc:now_float()}). - -averages() -> - gen_server:call(dynomite_prof, averages). - -%%==================================================================== -%% gen_server callbacks -%%==================================================================== - -%%-------------------------------------------------------------------- -%% @spec init(Args) -> {ok, State} | -%% {ok, State, Timeout} | -%% ignore | -%% {stop, Reason} -%% @doc Initiates the server -%% @end -%%-------------------------------------------------------------------- -init([]) -> - Tid = ets:new(profiling, [set, {keypos, 2}]), - Bal = ets:new(balance, [set]), - {ok, #state{ets=Tid, balance=Bal}}. - -%%-------------------------------------------------------------------- -%% @spec -%% handle_call(Request, From, State) -> {reply, Reply, State} | -%% {reply, Reply, State, Timeout} | -%% {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, Reply, State} | -%% {stop, Reason, State} -%% @doc Handling call messages -%% @end -%%-------------------------------------------------------------------- -handle_call({stats, Id}, _From, State = #state{ets=Ets}) -> - Reply = ets:lookup(Ets, Id), - {reply, Reply, State}; - -handle_call(table, _From, State = #state{ets=Ets}) -> - {reply, Ets, State}; - -handle_call(averages, _From, State = #state{ets=Ets,balance=Bal}) -> - Avgs = ets:foldl(fun(#profile{name=Name,count=Count,sum=Sum}, List) -> - [{Name, Sum/Count}|List] - end, [], Ets), - {_, MaxCount} = ets:foldl(fun - ({Pid, Count}, {_P, M}) when Count > M -> {Pid, Count}; - (_, {P, M}) -> {P, M} - end, {pid, 0}, Bal), - Balances = ets:foldl(fun({Pid, Count}, List) -> - [{Pid, Count / MaxCount} | List] - end, [], Bal), - {reply, [Balances, Avgs], State}. - -%%-------------------------------------------------------------------- -%% @spec handle_cast(Msg, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% @doc Handling cast messages -%% @end -%%-------------------------------------------------------------------- -handle_cast({balance, Pid, _Time}, State = #state{balance=Ets}) -> - case ets:lookup(Ets, Pid) of - [] -> ets:insert(Ets, {Pid, 1}); - [{Pid, Count}] -> ets:insert(Ets, {Pid, Count+1}) - end, - {noreply, State}; - -handle_cast({start, Pid, Id, Time}, State = #state{ets=_Ets}) -> - put({Pid,Id}, Time), - {noreply, State}; - -handle_cast({stop, Pid, Id, Time}, State = #state{ets=Ets}) -> - case get({Pid, Id}) of - undefined -> ok; - OldTime -> - erase({Pid, Id}), - increment_time(Ets, Time-OldTime, Id) - end, - {noreply, State}. - -%%-------------------------------------------------------------------- -%% @spec handle_info(Info, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% @doc Handling all non call/cast messages -%% @end -%%-------------------------------------------------------------------- -handle_info(_Info, State) -> - {noreply, State}. - -%%-------------------------------------------------------------------- -%% @spec terminate(Reason, State) -> void() -%% @doc This function is called by a gen_server when it is about to -%% terminate. It should be the opposite of Module:init/1 and do any necessary -%% cleaning up. When it returns, the gen_server terminates with Reason. -%% The return value is ignored. -%% @end -%%-------------------------------------------------------------------- -terminate(_Reason, _State) -> - ok. - -%%-------------------------------------------------------------------- -%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} -%% @doc Convert process state when code is changed -%% @end -%%-------------------------------------------------------------------- -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - -%%-------------------------------------------------------------------- -%%% Internal functions -%%-------------------------------------------------------------------- -increment_time(Ets, Time, Id) -> - case ets:lookup(Ets, Id) of - [] -> ets:insert(Ets, #profile{name=Id,count=1,sum=Time}); - [#profile{name=Id,count=Count,sum=Sum}] -> ets:insert(Ets, #profile{name=Id,count=Count+1,sum=Sum+Time}) - end. diff --git a/src/dynomite_sup.erl b/src/dynomite_sup.erl deleted file mode 100644 index b60824ac..00000000 --- a/src/dynomite_sup.erl +++ /dev/null @@ -1,58 +0,0 @@ --module(dynomite_sup). --author('brad@cloudant.com'). - --behaviour(supervisor). - -%% API --export([start_link/0]). - -%% Supervisor callbacks --export([init/1]). - --include("../include/config.hrl"). - --define(SERVER, ?MODULE). - -%%==================================================================== -%% API functions -%%==================================================================== -%%-------------------------------------------------------------------- -%% @spec start_link() -> {ok,Pid} | ignore | {error,Error} -%% @doc Starts the supervisor -%% @end -%%-------------------------------------------------------------------- -start_link() -> - supervisor:start_link(?MODULE, []). - -%%==================================================================== -%% Supervisor callbacks -%%==================================================================== -%%-------------------------------------------------------------------- -%% @spec init(Args) -> {ok, {SupFlags, [ChildSpec]}} | -%% ignore | -%% {error, Reason} -%% @doc Whenever a supervisor is started using -%% supervisor:start_link/[2,3], this function is called by the new process -%% to find out about restart strategy, maximum restart frequency and child -%% specifications. -%% @end -%%-------------------------------------------------------------------- -init(_Args) -> - Membership = {membership, - {mem3, start_link, []}, - permanent, - 1000, - worker, - [mem3]}, - MemEventMgr = {mem_event_manager, - {gen_event, start_link, [{local, membership_events}]}, - permanent, - 1000, - worker, - []}, - {ok, {{one_for_one,10,1}, [Membership, MemEventMgr]}}. - - -%%==================================================================== -%% Internal functions -%%==================================================================== diff --git a/src/mem3.erl b/src/mem3.erl index 11c39ef7..cbb7a8d5 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -22,9 +22,6 @@ -export([start_link/0, start_link/1, stop/0, stop/1, reset/0]). -export([join/3, clock/0, state/0, states/0, nodes/0, fullnodes/0, start_gossip/0]). -%-export([partitions/0, fullmap/0]). -%-export([nodes/0, nodes_for_part/1, nodes_for_part/2, all_nodes_parts/1]). -%-export([parts_for_node/1]). %% for testing more than anything else -export([merge_nodes/2, next_up_node/1, next_up_node/3]). @@ -34,8 +31,7 @@ terminate/2, code_change/3]). %% includes --include("../include/membership.hrl"). --include_lib("eunit/include/eunit.hrl"). +-include("membership.hrl"). -define(SERVER, membership). -define(STATE_FILE_PREFIX, "membership"). diff --git a/src/membership.erl b/src/membership.erl new file mode 100644 index 00000000..1e06e798 --- /dev/null +++ b/src/membership.erl @@ -0,0 +1,15 @@ +-module(membership). +-author('Brad Anderson '). + +-export([start/0, stop/0, restart/0]). + + +start() -> + application:start(membership). + +stop() -> + application:stop(membership). + +restart() -> + stop(), + start(). diff --git a/src/membership_app.erl b/src/membership_app.erl new file mode 100644 index 00000000..589a6f81 --- /dev/null +++ b/src/membership_app.erl @@ -0,0 +1,19 @@ +-module(membership_app). +-author('brad@cloudant.com'). + +-behaviour(application). + +-include("membership.hrl"). + +%% Application callbacks +-export([start/2, stop/1]). + +%% @doc start required apps, join cluster, start supervisor +start(_Type, _StartArgs) -> + % start dynomite supervisor + membership_sup:start_link(). + +stop({_, Sup}) -> + ?LOG_ALERT("dynomite application stopped", []), + exit(Sup, normal), + ok. diff --git a/src/membership_sup.erl b/src/membership_sup.erl new file mode 100644 index 00000000..81b6562a --- /dev/null +++ b/src/membership_sup.erl @@ -0,0 +1,24 @@ +-module(membership_sup). +-author('brad@cloudant.com'). + +-behaviour(supervisor). + +%% API +-export([start_link/0]). + +%% Supervisor callbacks +-export([init/1]). + +-define(SERVER, ?MODULE). + +start_link() -> + supervisor:start_link(?MODULE, []). + +init(_Args) -> + Membership = {membership, + {mem3, start_link, []}, + permanent, + 1000, + worker, + [mem3]}, + {ok, {{one_for_one,10,1}, [Membership]}}. diff --git a/src/partitions.erl b/src/partitions.erl new file mode 100644 index 00000000..ade8efe4 --- /dev/null +++ b/src/partitions.erl @@ -0,0 +1,217 @@ +-module(partitions). +-author('brad@cloudant.com'). + +%% API +-export([fullmap/2, fullmap/3, hash/1, install_fullmap/4]). +-export([for_key/2, all_parts/1]). +-export([shard_name/2]). + +-define(RINGTOP, trunc(math:pow(2,160))). % SHA-1 space + +-include("../../couch/src/couch_db.hrl"). +-include("../../dynomite/include/membership.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +%%==================================================================== +%% API +%%==================================================================== + +%% @doc build a full partition map +fullmap(DbName, Options) -> + {ok, FullNodes} = mem3:fullnodes(), + {_, Nodes, _} = lists:unzip3(lists:keysort(1, FullNodes)), + fullmap(DbName, Nodes, Options). + +fullmap(DbName, Nodes, Options) -> + {N,Q} = db_init_constants(Options), + NewNodes = ordered_nodes(DbName, Nodes), + Pmap = pmap(Q, NewNodes), + int_fullmap(DbName, N, Pmap, NewNodes). + +%% @spec hash(term()) -> Digest::binary() +%% @doc uses SHA-1 as its hash +hash(Item) when is_binary(Item) -> + crypto:sha(Item); +hash(Item) -> + crypto:sha(term_to_binary(Item)). + +install_fullmap(DbName, Fullmap, FullNodes, Options) -> + {N,Q} = db_init_constants(Options), + Doc = {[{<<"_id">>,DbName}, + {map, jsonify(map, Fullmap)}, + {nodes, jsonify(nodes, FullNodes)}, + {n,N}, + {q,Q}]}, + write_db_doc(Doc). + +for_key(DbName, Key) -> + HashKey = hash_int(hash(Key)), + Head = #shard{ + name = '_', + node = '_', + dbname = DbName, + range = ['$1','$2'], + ref = '_' + }, + Conditions = [{'<', '$1', HashKey}, {'<', HashKey, '$2'}], + case ets:select(partitions, [{Head, Conditions, ['$_']}]) of + [] -> + erlang:error(database_does_not_exist); + Shards -> + Shards + end. + +all_parts(DbName) -> + ets:lookup(partitions, DbName). + +% %% @doc for the given key, return a list of {Node,Part} tuples. Nodes are both +% %% primary and replication partner nodes, and should number N. +% int_node_parts_for_key(Key) -> +% Config = configuration:get_config(), +% Hash = lib_misc:hash(Key), +% Part = partitions:hash_to_partition(Hash, Config#config.q), +% NodePartList = all_nodes_parts(true), +% lists:filter(fun({_N,P}) -> P =:= Part end, NodePartList). + +%%==================================================================== +%% Internal functions +%%==================================================================== + +%% @doc get cluster constants from options or config +db_init_constants(Options) -> + {const(n, Options), const(q, Options)}. + +%% @doc get individual constant +const(Const, Options) -> + ListResult = case couch_util:get_value(Const, Options) of + undefined -> couch_config:get("cluster", atom_to_list(Const)); + Val -> Val + end, + list_to_integer(ListResult). + +%% @doc hash the dbname, and return the corresponding node for seeding a ring +seednode(DbName, Nodes) -> + Hash = hash(DbName), + HashInt = hash_int(Hash), + Size = partition_range(length(Nodes)), + Factor = (HashInt div Size), + lists:nth(Factor+1, Nodes). + +%% @doc take the list of nodes, and rearrange it, starting with the node that +%% results from hashing the Term +ordered_nodes(Term, Nodes) -> + SeedNode = seednode(Term, Nodes), + {A, B} = lists:splitwith(fun(N) -> N /= SeedNode end, Nodes), + lists:append(B,A). + +%% @doc create a partition map +pmap(NumPartitions, Nodes) -> + Increment = ?RINGTOP div NumPartitions, + Parts = parts(?RINGTOP, Increment, 0, []), + make_map(Nodes, Nodes, Parts, []). + +%% @doc makes a {beg, end} list of partition ranges +%% last range may have an extra few values, because Increment is created +%% with Ringtop 'div' NumPartitions above. +parts(Top, _, Beg, Acc) when Beg > Top -> Acc; +parts(Top, Increment, Beg, Acc) -> + End = case Beg + 2*Increment of + Over when Over > Top -> Top; + _ -> Beg + Increment - 1 + end, + NewAcc = [{Beg, End} | Acc], + parts(Top, Increment, End+1, NewAcc). + +%% @doc create a full map, which is a pmap with N-1 replication partner nodes +%% added per partition +int_fullmap(DbName, N, Pmap, Nodes) -> + Full = lists:foldl(fun({Node,{B,E} = Part}, AccIn) -> + Primary = [#shard{dbname=DbName, node=Node, range=[B,E], + name=shard_name(B,DbName)}], + Partners = partners(DbName, N, Node, Nodes, Part), + lists:append([Primary, Partners, AccIn]) + end, [], Pmap), + lists:reverse(Full). + +partners(DbName, N, Node, Nodes, {Beg,End}) -> + {A, [Node|B]} = lists:splitwith(fun(Nd) -> Nd /= Node end, Nodes), + Nodes1 = lists:append(B,A), + Partners = lists:sublist(Nodes1, N-1), % N-1 replication partner nodes + lists:map(fun(Partner) -> + #shard{dbname=DbName, node=Partner, range=[Beg,End], + name=shard_name(Beg,DbName)} + end, Partners). + + +%% @doc turn hash into an integer +hash_int(Hash) when is_binary(Hash) -> + <> = Hash, + IndexAsInt; +hash_int(Hash) when is_integer(Hash) -> + Hash. + +%% @doc size of one partition in the ring +partition_range(Q) -> + trunc( ?RINGTOP / Q ). % SHA-1 space / Q + +%% @doc assign nodes to each of the partitions. When you run out of nodes, +%% start at the beginning of the node list again. +%% The provided node list starts with the seed node (seednode fun) +make_map(_,_,[], Acc) -> + lists:keysort(2,Acc); +make_map(AllNodes, [], Parts, Acc) -> + % start back at beginning of node list + make_map(AllNodes, AllNodes, Parts, Acc); +make_map(AllNodes, [Node|RestNodes], [Part|RestParts], Acc) -> + % add a node/part combo to the Acc + make_map(AllNodes, RestNodes, RestParts, [{Node,Part}|Acc]). + +jsonify(map, Map) -> + lists:map(fun(#shard{node=Node, range=[Beg,End]}) -> + {[{node, Node}, {b, Beg}, {e, End}]} + end, Map); +jsonify(nodes, Nodes) -> + lists:map(fun({Order, Node, Options}) -> + {[{order, Order}, {node, Node}, {options, Options}]} + end, Nodes). + +write_db_doc(EDoc) -> + Doc = couch_doc:from_json_obj(EDoc), + {ok, Db} = couch_db:open(<<"dbs">>, []), + {ok, NewRev} = couch_db:update_doc(Db, Doc, []), + NewRev. + +shard_name(Part, DbName) when is_list(DbName) -> + shard_name(Part, ?l2b(DbName)); +shard_name(Part, DbName) -> + PartHex = ?l2b(showroom_utils:int_to_hexstr(Part)), + <<"x", PartHex/binary, "/", DbName/binary, "_", PartHex/binary>>. + +% %% @doc given an int and a partition map from ets cache table, +% %% get the first part greater than Int. +% int_to_nps(_, [], _, Acc) -> Acc; +% int_to_nps(Int, [{_,{N,P}} | Rest], CurrentPart, NPAcc) -> +% case P > Int of +% true -> +% case P =/= CurrentPart of +% true -> NPAcc; +% _ -> +% NewAcc = [{N,P}|NPAcc], +% int_to_nps(Int, Rest, P, NewAcc) +% end; +% _ -> int_to_nps(Int, Rest, P, NPAcc) +% end. + + +% % get parts +% {_,NPs} = lists:unzip(Map), +% {_,AllParts} = lists:unzip(NPs), +% Parts = lists:usort(AllParts), +% % lookup part +% Rem = lists:dropwhile(fun(E) -> E < Int end, Parts), +% Part = case Rem of +% [] -> 0; % wrap-around-ring case (back to 0) +% [H|_T] -> H +% end, +% % get nodes/parts +% ok. -- cgit v1.2.3 From 2a9cc195ec53a5d15d2240d09dae59c007e6471f Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Fri, 28 May 2010 17:19:25 -0400 Subject: forgot to start the db children --- src/membership_sup.erl | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/membership_sup.erl b/src/membership_sup.erl index 81b6562a..27e547ff 100644 --- a/src/membership_sup.erl +++ b/src/membership_sup.erl @@ -21,4 +21,18 @@ init(_Args) -> 1000, worker, [mem3]}, - {ok, {{one_for_one,10,1}, [Membership]}}. + DbsRepl = + {dbs, + {dbs, start_link, []}, + permanent, + infinity, + supervisor, + [dbs]}, + DbsCache = + {dbs_cache, + {dbs_cache, start_link, []}, + permanent, + 1000, + worker, + [dbs_cache]}, + {ok, {{one_for_one,10,1}, [Membership, DbsRepl, DbsCache]}}. -- cgit v1.2.3 From 58a668a67081c1c3374605fcc9d98b13680454a3 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 2 Jun 2010 12:17:48 -0400 Subject: throw an error in all_parts if DB does not exist --- src/partitions.erl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/partitions.erl b/src/partitions.erl index ade8efe4..d64d405a 100644 --- a/src/partitions.erl +++ b/src/partitions.erl @@ -62,7 +62,12 @@ for_key(DbName, Key) -> end. all_parts(DbName) -> - ets:lookup(partitions, DbName). + case ets:lookup(partitions, DbName) of + [] -> + erlang:error(database_does_not_exist); + Else -> + Else + end. % %% @doc for the given key, return a list of {Node,Part} tuples. Nodes are both % %% primary and replication partner nodes, and should number N. -- cgit v1.2.3 From ec6a2999c98332ff90e2d0907eefc8bf5671c71d Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Mon, 7 Jun 2010 16:18:44 -0400 Subject: dbs database creation moved to membership app --- src/membership_app.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/membership_app.erl b/src/membership_app.erl index 589a6f81..7f050fe5 100644 --- a/src/membership_app.erl +++ b/src/membership_app.erl @@ -10,10 +10,11 @@ %% @doc start required apps, join cluster, start supervisor start(_Type, _StartArgs) -> - % start dynomite supervisor + couch_api:create_db(<<"dbs">>, []), % all nodes have local 'dbs' db + % start membership supervisor membership_sup:start_link(). stop({_, Sup}) -> - ?LOG_ALERT("dynomite application stopped", []), + ?LOG_ALERT("membership application stopped", []), exit(Sup, normal), ok. -- cgit v1.2.3 From 9f19e5450164e492bfe1ab57862725a1966cd231 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Tue, 8 Jun 2010 22:25:34 -0400 Subject: missed a lib_misc function that vector clocks used, during dynomite -> membership rename --- src/vector_clock.erl | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/vector_clock.erl b/src/vector_clock.erl index 740d1520..0a422334 100644 --- a/src/vector_clock.erl +++ b/src/vector_clock.erl @@ -9,7 +9,7 @@ %% -include("etest/vector_clock_test.erl"). %% -endif. -create(NodeName) -> [{NodeName, lib_misc:now_float()}]. +create(NodeName) -> [{NodeName, now_float()}]. truncate(Clock) when length(Clock) > 10 -> lists:nthtail(length(Clock) - 10, lists:keysort(2, Clock)); @@ -17,13 +17,13 @@ truncate(Clock) when length(Clock) > 10 -> truncate(Clock) -> Clock. increment(NodeName, [{NodeName, _Version}|Clocks]) -> - [{NodeName, lib_misc:now_float()}|Clocks]; + [{NodeName, now_float()}|Clocks]; increment(NodeName, [NodeClock|Clocks]) -> [NodeClock|increment(NodeName, Clocks)]; increment(NodeName, []) -> - [{NodeName, lib_misc:now_float()}]. + [{NodeName, now_float()}]. resolve({ClockA, ValuesA}, {ClockB, ValuesB}) -> case compare(ClockA, ClockB) of @@ -98,3 +98,12 @@ equals(ClockA, ClockB) -> end, ClockB) end, ClockA), Equivalent and (length(ClockA) == length(ClockB)). + +now_float() -> + time_to_epoch_float(now()). + +time_to_epoch_float(Time) when is_integer(Time) or is_float(Time) -> + Time; + +time_to_epoch_float({Mega,Sec,Micro}) -> + Mega * 1000000 + Sec + Micro / 1000000. -- cgit v1.2.3 From 05332b0749393e7e644e5f7f76500b57bb70f7e3 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 9 Jun 2010 22:19:59 -0400 Subject: faster+simpler get_pingnode_state --- src/mem3.erl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index cbb7a8d5..29140471 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -321,10 +321,7 @@ install_new_state(#mem{args=Args} = State) -> get_pingnode_state(PingNode) -> - % ping the PingNode and get its state - pong = net_adm:ping(PingNode), - timer:sleep(1000), % let dist. erl get set up... sigh. - {ok, RemoteState} = rpc:call(PingNode, mem3, state, []), + {ok, RemoteState} = gen_server:call({?SERVER, PingNode}, state), RemoteState. -- cgit v1.2.3 From cd25fee1bfd046afe92c83df243ac69dfca2bffe Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 10 Jun 2010 09:22:01 -0400 Subject: more detailed report for mem3:states() --- src/mem3.erl | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 29140471..55a8a541 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -77,14 +77,20 @@ state() -> gen_server:call(?SERVER, state). --spec states() -> {ok, [mem_state()]}. +%% @doc Detailed report of cluster-wide membership state. Queries the state +%% on all member nodes and builds a dictionary with unique states as the +%% key and the nodes holding that state as the value. Also reports member +%% nodes which fail to respond and nodes which are connected but are not +%% cluster members. Useful for debugging. +-spec states() -> [{mem_state() | bad_nodes | non_member_nodes, [node()]}]. states() -> {ok, Nodes} = mem3:nodes(), - case rpc:multicall(Nodes, ?MODULE, state, []) of - {States, []} -> {ok, lists:map(fun({ok,S}) -> S end, States)}; - {Good, Bad} -> {error, {[{good,Good},{bad,Bad}]}} - end. - + AllNodes = [node()|erlang:nodes()], + {Replies, BadNodes} = gen_server:multi_call(Nodes, ?SERVER, state), + Dict = lists:foldl(fun({Node, {ok,State}}, D) -> + orddict:append(State, Node, D) + end, orddict:new(), Replies), + [{non_member_nodes, AllNodes -- Nodes}, {bad_nodes, BadNodes} | Dict]. -spec start_gossip() -> ok. start_gossip() -> -- cgit v1.2.3 From 67ddab6a918f1844ec5d15ac5613f70ef763c356 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 10 Jun 2010 09:43:22 -0400 Subject: remove unused code --- src/partitions.erl | 39 +-------------------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/src/partitions.erl b/src/partitions.erl index d64d405a..cfc1d90b 100644 --- a/src/partitions.erl +++ b/src/partitions.erl @@ -53,6 +53,7 @@ for_key(DbName, Key) -> range = ['$1','$2'], ref = '_' }, + % TODO these conditions assume A < B, which we don't require Conditions = [{'<', '$1', HashKey}, {'<', HashKey, '$2'}], case ets:select(partitions, [{Head, Conditions, ['$_']}]) of [] -> @@ -69,15 +70,6 @@ all_parts(DbName) -> Else end. -% %% @doc for the given key, return a list of {Node,Part} tuples. Nodes are both -% %% primary and replication partner nodes, and should number N. -% int_node_parts_for_key(Key) -> -% Config = configuration:get_config(), -% Hash = lib_misc:hash(Key), -% Part = partitions:hash_to_partition(Hash, Config#config.q), -% NodePartList = all_nodes_parts(true), -% lists:filter(fun({_N,P}) -> P =:= Part end, NodePartList). - %%==================================================================== %% Internal functions %%==================================================================== @@ -191,32 +183,3 @@ shard_name(Part, DbName) when is_list(DbName) -> shard_name(Part, DbName) -> PartHex = ?l2b(showroom_utils:int_to_hexstr(Part)), <<"x", PartHex/binary, "/", DbName/binary, "_", PartHex/binary>>. - -% %% @doc given an int and a partition map from ets cache table, -% %% get the first part greater than Int. -% int_to_nps(_, [], _, Acc) -> Acc; -% int_to_nps(Int, [{_,{N,P}} | Rest], CurrentPart, NPAcc) -> -% case P > Int of -% true -> -% case P =/= CurrentPart of -% true -> NPAcc; -% _ -> -% NewAcc = [{N,P}|NPAcc], -% int_to_nps(Int, Rest, P, NewAcc) -% end; -% _ -> int_to_nps(Int, Rest, P, NPAcc) -% end. - - -% % get parts -% {_,NPs} = lists:unzip(Map), -% {_,AllParts} = lists:unzip(NPs), -% Parts = lists:usort(AllParts), -% % lookup part -% Rem = lists:dropwhile(fun(E) -> E < Int end, Parts), -% Part = case Rem of -% [] -> 0; % wrap-around-ring case (back to 0) -% [H|_T] -> H -% end, -% % get nodes/parts -% ok. -- cgit v1.2.3 From 4928ce68b31b7fd5ce3fcf1d4ebc4a786fd045eb Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 10 Jun 2010 09:45:28 -0400 Subject: remove hash_int(), just convert hash() to int inline --- src/partitions.erl | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/partitions.erl b/src/partitions.erl index cfc1d90b..0a20d195 100644 --- a/src/partitions.erl +++ b/src/partitions.erl @@ -45,7 +45,7 @@ install_fullmap(DbName, Fullmap, FullNodes, Options) -> write_db_doc(Doc). for_key(DbName, Key) -> - HashKey = hash_int(hash(Key)), + <> = hash(Key), Head = #shard{ name = '_', node = '_', @@ -88,8 +88,7 @@ const(Const, Options) -> %% @doc hash the dbname, and return the corresponding node for seeding a ring seednode(DbName, Nodes) -> - Hash = hash(DbName), - HashInt = hash_int(Hash), + <> = hash(DbName), Size = partition_range(length(Nodes)), Factor = (HashInt div Size), lists:nth(Factor+1, Nodes). @@ -139,14 +138,6 @@ partners(DbName, N, Node, Nodes, {Beg,End}) -> name=shard_name(Beg,DbName)} end, Partners). - -%% @doc turn hash into an integer -hash_int(Hash) when is_binary(Hash) -> - <> = Hash, - IndexAsInt; -hash_int(Hash) when is_integer(Hash) -> - Hash. - %% @doc size of one partition in the ring partition_range(Q) -> trunc( ?RINGTOP / Q ). % SHA-1 space / Q -- cgit v1.2.3 From 9d4e5646f1387d310b91d9cc40cbbe7db96aa33b Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 10 Jun 2010 11:41:26 -0400 Subject: mem3:nodes() does the trick here --- src/partitions.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/partitions.erl b/src/partitions.erl index 0a20d195..09337561 100644 --- a/src/partitions.erl +++ b/src/partitions.erl @@ -18,8 +18,7 @@ %% @doc build a full partition map fullmap(DbName, Options) -> - {ok, FullNodes} = mem3:fullnodes(), - {_, Nodes, _} = lists:unzip3(lists:keysort(1, FullNodes)), + {ok, Nodes} = mem3:nodes(), fullmap(DbName, Nodes, Options). fullmap(DbName, Nodes, Options) -> -- cgit v1.2.3 From 6195e39d61c991a463377106310d7db971415928 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Mon, 14 Jun 2010 11:47:01 -0400 Subject: better handling of repeated create_db attempts. BugzID 10300 --- src/partitions.erl | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/partitions.erl b/src/partitions.erl index 09337561..4fba0011 100644 --- a/src/partitions.erl +++ b/src/partitions.erl @@ -163,10 +163,24 @@ jsonify(nodes, Nodes) -> end, Nodes). write_db_doc(EDoc) -> - Doc = couch_doc:from_json_obj(EDoc), {ok, Db} = couch_db:open(<<"dbs">>, []), - {ok, NewRev} = couch_db:update_doc(Db, Doc, []), - NewRev. + try + update_db_doc(Db, couch_doc:from_json_obj(EDoc)) + catch {conflict, _} -> + ?LOG_ERROR("conflict writing db doc, must be a race", []) + after + couch_db:close(Db) + end. + +update_db_doc(Db, #doc{id=Id, body=Body} = Doc) -> + case couch_db:open_doc(Db, Id, []) of + {not_found, _} -> + {ok, _} = couch_db:update_doc(Db, Doc, []); + {ok, #doc{body=Body}} -> + ok; + {ok, OldDoc} -> + {ok, _} = couch_db:update_doc(Db, OldDoc#doc{body=Body}, []) + end. shard_name(Part, DbName) when is_list(DbName) -> shard_name(Part, ?l2b(DbName)); -- cgit v1.2.3 From f26ad32b18809d81c9c01d6b007736eb0299d5f6 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Mon, 14 Jun 2010 16:06:05 -0400 Subject: atoms are not ejson keys, use bins for http compatibility --- src/dbs_cache.erl | 17 +++++++++++------ src/partitions.erl | 16 ++++++++-------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/dbs_cache.erl b/src/dbs_cache.erl index 96319802..f4faab3a 100644 --- a/src/dbs_cache.erl +++ b/src/dbs_cache.erl @@ -66,21 +66,26 @@ cache_dbs() -> end. cache_map(Id, Props) -> - Map = couch_util:get_value(map, Props), - lists:foreach(fun({[{node,Node},{b,Beg},{e,End}]}) -> + Map = couch_util:get_value(<<"map">>, Props), + lists:foreach(fun({[{<<"node">>,Node},{<<"b">>,Beg},{<<"e">>,End}]}) -> Part = #shard{ name = partitions:shard_name(Beg, Id), dbname = Id, - node = Node, + node = to_atom(Node), range = [Beg,End] }, ets:insert(partitions, Part) end, Map). cache_nodes(Id, Props) -> - Nodes = couch_util:get_value(nodes, Props), - lists:foreach(fun({[{order,Order},{node, Node},{options,Opts}]}) -> - ets:insert(memnodes, {Id, {Order, Node, Opts}}) + Nodes = couch_util:get_value(<<"nodes">>, Props), + lists:foreach(fun({[{<<"order">>,Order},{<<"node">>, Node},{<<"options">>,Opts}]}) -> + ets:insert(memnodes, {Id, {Order, to_atom(Node), Opts}}) end, Nodes). +to_atom(Node) when is_binary(Node) -> + list_to_atom(binary_to_list(Node)); +to_atom(Node) when is_atom(Node) -> + Node. + %{ok, ets:insert(dbs_cache, {Id, Props})}; diff --git a/src/partitions.erl b/src/partitions.erl index 4fba0011..3e024264 100644 --- a/src/partitions.erl +++ b/src/partitions.erl @@ -37,10 +37,10 @@ hash(Item) -> install_fullmap(DbName, Fullmap, FullNodes, Options) -> {N,Q} = db_init_constants(Options), Doc = {[{<<"_id">>,DbName}, - {map, jsonify(map, Fullmap)}, - {nodes, jsonify(nodes, FullNodes)}, - {n,N}, - {q,Q}]}, + {<<"map">>, jsonify(<<"map">>, Fullmap)}, + {<<"nodes">>, jsonify(<<"nodes">>, FullNodes)}, + {<<"n">>,N}, + {<<"q">>,Q}]}, write_db_doc(Doc). for_key(DbName, Key) -> @@ -153,13 +153,13 @@ make_map(AllNodes, [Node|RestNodes], [Part|RestParts], Acc) -> % add a node/part combo to the Acc make_map(AllNodes, RestNodes, RestParts, [{Node,Part}|Acc]). -jsonify(map, Map) -> +jsonify(<<"map">>, Map) -> lists:map(fun(#shard{node=Node, range=[Beg,End]}) -> - {[{node, Node}, {b, Beg}, {e, End}]} + {[{<<"node">>, Node}, {<<"b">>, Beg}, {<<"e">>, End}]} end, Map); -jsonify(nodes, Nodes) -> +jsonify(<<"nodes">>, Nodes) -> lists:map(fun({Order, Node, Options}) -> - {[{order, Order}, {node, Node}, {options, Options}]} + {[{<<"order">>, Order}, {<<"node">>, Node}, {<<"options">>, Options}]} end, Nodes). write_db_doc(EDoc) -> -- cgit v1.2.3 From 8759d8d56b4214cb209563dabbfeb298e0afd21d Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Tue, 15 Jun 2010 08:51:31 -0400 Subject: skip non-membership docs in dbs.couch --- src/dbs_cache.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dbs_cache.erl b/src/dbs_cache.erl index f4faab3a..1afb873b 100644 --- a/src/dbs_cache.erl +++ b/src/dbs_cache.erl @@ -66,7 +66,7 @@ cache_dbs() -> end. cache_map(Id, Props) -> - Map = couch_util:get_value(<<"map">>, Props), + Map = couch_util:get_value(<<"map">>, Props, []), lists:foreach(fun({[{<<"node">>,Node},{<<"b">>,Beg},{<<"e">>,End}]}) -> Part = #shard{ name = partitions:shard_name(Beg, Id), @@ -78,7 +78,7 @@ cache_map(Id, Props) -> end, Map). cache_nodes(Id, Props) -> - Nodes = couch_util:get_value(<<"nodes">>, Props), + Nodes = couch_util:get_value(<<"nodes">>, Props, []), lists:foreach(fun({[{<<"order">>,Order},{<<"node">>, Node},{<<"options">>,Opts}]}) -> ets:insert(memnodes, {Id, {Order, to_atom(Node), Opts}}) end, Nodes). -- cgit v1.2.3 From 86720c8aad8d8329e8c029a0f9c5fa90d9e04adf Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Tue, 15 Jun 2010 10:23:38 -0400 Subject: add membership events back into mem3, and add dbs handlers --- ebin/membership.app | 1 + src/dbs.erl | 2 +- src/mem3.erl | 12 +++++++++++- src/membership_sup.erl | 8 +++++++- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/ebin/membership.app b/ebin/membership.app index 82f0b299..4773b636 100644 --- a/ebin/membership.app +++ b/ebin/membership.app @@ -8,6 +8,7 @@ [ dbs, dbs_cache, + dbs_event, membership, membership_app, membership_sup, diff --git a/src/dbs.erl b/src/dbs.erl index b5e17b6a..345788ef 100644 --- a/src/dbs.erl +++ b/src/dbs.erl @@ -12,7 +12,7 @@ init([]) -> {ok, MemNodes} = mem3:nodes(), LiveNodes = nodes(), ChildSpecs = [childspec(N) || N <- MemNodes, lists:member(N, LiveNodes)], - %gen_event:add_handler(membership_events, showroom_dbs_event, []), + gen_event:add_handler(membership_events, dbs_event, []), {ok, {{one_for_one, 10, 8}, ChildSpecs}}. childspec(Node) -> diff --git a/src/mem3.erl b/src/mem3.erl index 55a8a541..d715e657 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -202,12 +202,14 @@ handle_cast(Msg, State) -> %% net_kernel:monitor_nodes(true) handle_info({nodedown, Node}, State) -> showroom_log:message(alert, "membership: nodedown ~p", [Node]), + notify(nodedown, [Node]), {noreply, State}; %% @doc handle nodeup messages because we have %% net_kernel:monitor_nodes(true) handle_info({nodeup, Node}, State) -> showroom_log:message(alert, "membership: nodeup ~p", [Node]), + notify(nodeup, [Node]), {noreply, State}; %% ignored info @@ -296,10 +298,12 @@ handle_join(replace, {OldNode, NewOpts}, PingNode, _State) -> OldState = #mem{nodes=OldNodes} = get_pingnode_state(PingNode), {Order, OldNode, _OldOpts} = lists:keyfind(OldNode, 2, OldNodes), NewNodes = lists:keyreplace(OldNode, 2, OldNodes, {Order, node(), NewOpts}), + notify(node_leave, [OldNode]), int_join([], OldState#mem{nodes=NewNodes}); % leave -handle_join(leave, [_OldNode | _], _PingNode, _State) -> +handle_join(leave, [OldNode | _], _PingNode, _State) -> % TODO implement me + notify(node_leave, [OldNode]), ok; handle_join(JoinType, _, PingNode, _) -> @@ -311,6 +315,7 @@ handle_join(JoinType, _, PingNode, _) -> int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> NewNodes = lists:foldl(fun({Pos, N, _Options}=New, AccIn) -> check_pos(Pos, N, Nodes), + notify(node_join, [N]), [New|AccIn] end, Nodes, ExtNodes), NewNodes1 = lists:sort(NewNodes), @@ -530,3 +535,8 @@ compare_state_with_rest(#mem{clock=Clock} = _State, States) -> length(BadResults) == 0 -> match; true -> {bad_state_match, node(), BadResults} end. + +notify(Type, Nodes) -> + lists:foreach(fun(Node) -> + gen_event:notify(membership_events, {Type, Node}) + end, Nodes). diff --git a/src/membership_sup.erl b/src/membership_sup.erl index 27e547ff..f203924d 100644 --- a/src/membership_sup.erl +++ b/src/membership_sup.erl @@ -21,6 +21,12 @@ init(_Args) -> 1000, worker, [mem3]}, + MemEventMgr = {mem_event_manager, + {gen_event, start_link, [{local, membership_events}]}, + permanent, + 1000, + worker, + []}, DbsRepl = {dbs, {dbs, start_link, []}, @@ -35,4 +41,4 @@ init(_Args) -> 1000, worker, [dbs_cache]}, - {ok, {{one_for_one,10,1}, [Membership, DbsRepl, DbsCache]}}. + {ok, {{one_for_one,10,1}, [Membership, MemEventMgr, DbsRepl, DbsCache]}}. -- cgit v1.2.3 From 113581d1227d70c6b7b3447f7305695993d638c4 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Thu, 17 Jun 2010 10:17:25 -0400 Subject: patch up bugzid 10333, a cast on nodeup now sends gossip as well --- include/membership.hrl | 1 + src/mem3.erl | 66 +++++++++++++++++++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/include/membership.hrl b/include/membership.hrl index 031c12c4..a1e6f822 100644 --- a/include/membership.hrl +++ b/include/membership.hrl @@ -34,6 +34,7 @@ -type clock() :: {node(), epoch()}. -type vector_clock() :: [clock()]. -type ping_node() :: node() | nil. +-type gossip_fun() :: call | cast. -type part() :: #shard{}. -type fullmap() :: [part()]. diff --git a/src/mem3.erl b/src/mem3.erl index d715e657..7ae7627c 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -188,6 +188,15 @@ handle_call(Msg, _From, State) -> {reply, ignored, State}. +%% gossip +handle_cast({gossip, RemoteState}, LocalState) -> + State = case handle_gossip(none, RemoteState, LocalState) of + {reply, ok, NewState} -> NewState; + {reply, {new_state, NewState}, _} -> NewState; + {noreply, NewState} -> NewState + end, + {noreply, State}; + %% stop handle_cast(stop, State) -> {stop, normal, State}; @@ -202,14 +211,15 @@ handle_cast(Msg, State) -> %% net_kernel:monitor_nodes(true) handle_info({nodedown, Node}, State) -> showroom_log:message(alert, "membership: nodedown ~p", [Node]), - notify(nodedown, [Node]), + notify(nodedown, [Node], State), {noreply, State}; %% @doc handle nodeup messages because we have %% net_kernel:monitor_nodes(true) handle_info({nodeup, Node}, State) -> showroom_log:message(alert, "membership: nodeup ~p", [Node]), - notify(nodeup, [Node]), + notify(nodeup, [Node], State), + gossip_cast(State), {noreply, State}; %% ignored info @@ -294,16 +304,16 @@ handle_join(replace, OldNode, PingNode, State) when is_atom(OldNode) -> handle_join(replace, {OldNode, []}, PingNode, State); handle_join(replace, [OldNode | _], PingNode, State) -> handle_join(replace, {OldNode, []}, PingNode, State); -handle_join(replace, {OldNode, NewOpts}, PingNode, _State) -> +handle_join(replace, {OldNode, NewOpts}, PingNode, State) -> OldState = #mem{nodes=OldNodes} = get_pingnode_state(PingNode), {Order, OldNode, _OldOpts} = lists:keyfind(OldNode, 2, OldNodes), NewNodes = lists:keyreplace(OldNode, 2, OldNodes, {Order, node(), NewOpts}), - notify(node_leave, [OldNode]), + notify(node_leave, [OldNode], State), int_join([], OldState#mem{nodes=NewNodes}); % leave -handle_join(leave, [OldNode | _], _PingNode, _State) -> +handle_join(leave, [OldNode | _], _PingNode, State) -> % TODO implement me - notify(node_leave, [OldNode]), + notify(node_leave, [OldNode], State), ok; handle_join(JoinType, _, PingNode, _) -> @@ -315,7 +325,7 @@ handle_join(JoinType, _, PingNode, _) -> int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> NewNodes = lists:foldl(fun({Pos, N, _Options}=New, AccIn) -> check_pos(Pos, N, Nodes), - notify(node_join, [N]), + notify(node_join, [N], State), [New|AccIn] end, Nodes, ExtNodes), NewNodes1 = lists:sort(NewNodes), @@ -328,7 +338,7 @@ int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> install_new_state(#mem{args=Args} = State) -> Test = get_test(Args), save_state_file(Test, State), - gossip(Test, State). + gossip(call, Test, State). get_pingnode_state(PingNode) -> @@ -346,6 +356,8 @@ handle_gossip(From, RemoteState=#mem{clock=RemoteClock}, less -> % remote node needs updating {reply, {new_state, LocalState}, LocalState}; + greater when From == none-> + {noreply, install_new_state(RemoteState)}; greater -> % local node needs updating gen_server:reply(From, ok), % reply to sender first @@ -357,7 +369,10 @@ handle_gossip(From, RemoteState=#mem{clock=RemoteClock}, "RemoteState : ~p~nLocalState : ~p~n" , [RemoteState, LocalState]), MergedState = merge_states(RemoteState, LocalState), - gen_server:reply(From, {new_state, MergedState}), % reply to sender + if From =/= none -> + % reply to sender + gen_server:reply(From, {new_state, MergedState}) + end, {noreply, install_new_state(MergedState)} end. @@ -384,11 +399,18 @@ merge_nodes(Remote, Local) -> gossip(#mem{args=Args} = NewState) -> Test = get_test(Args), - gossip(Test, NewState). + gossip(call, Test, NewState). --spec gossip(test(), mem_state()) -> mem_state(). -gossip(undefined, #mem{nodes=StateNodes} = State) -> +gossip_cast(#mem{nodes=[]}) -> ok; +gossip_cast(#mem{args=Args} = NewState) -> + Test = get_test(Args), + gossip(cast, Test, NewState). + + +-spec gossip(gossip_fun(), test(), mem_state()) -> mem_state(). +gossip(_, _, #mem{nodes=[]}) -> ok; +gossip(Fun, undefined, #mem{nodes=StateNodes} = State) -> {_, Nodes, _} = lists:unzip3(StateNodes), case next_up_node(Nodes) of no_gossip_targets_available -> @@ -396,21 +418,20 @@ gossip(undefined, #mem{nodes=StateNodes} = State) -> TargetNode -> showroom_log:message(info, "membership: firing gossip from ~p to ~p", [node(), TargetNode]), - case gen_server:call({?SERVER, TargetNode}, {gossip, State}) of + case gen_server:Fun({?SERVER, TargetNode}, {gossip, State}) of ok -> State; {new_state, NewState} -> NewState; Error -> throw({unknown_gossip_response, Error}) end end; -gossip(_,_) -> +gossip(_,_,_) -> % testing, so don't gossip ok. next_up_node(Nodes) -> - Node = node(), - next_up_node(Node, Nodes, up_nodes()). + next_up_node(node(), Nodes, up_nodes()). next_up_node(Node, Nodes, UpNodes) -> @@ -536,7 +557,12 @@ compare_state_with_rest(#mem{clock=Clock} = _State, States) -> true -> {bad_state_match, node(), BadResults} end. -notify(Type, Nodes) -> - lists:foreach(fun(Node) -> - gen_event:notify(membership_events, {Type, Node}) - end, Nodes). +notify(Type, Nodes, #mem{nodes=MemNodesList} = _State) -> + {_,MemNodes,_} = lists:unzip3(lists:keysort(1, MemNodesList)), + lists:foreach(fun(Node) -> + case lists:member(Node, MemNodes) orelse Type == nodedown of + true -> + gen_event:notify(membership_events, {Type, Node}); + _ -> ok % node not in cluster + end + end, Nodes). -- cgit v1.2.3 From e0acb18d4565c59e7b88eea494a3ee9009546c22 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Wed, 30 Jun 2010 12:25:32 -0400 Subject: add distinct membership http req handler to membership app, not showroom --- ebin/membership.app | 1 + 1 file changed, 1 insertion(+) diff --git a/ebin/membership.app b/ebin/membership.app index 4773b636..522e833c 100644 --- a/ebin/membership.app +++ b/ebin/membership.app @@ -11,6 +11,7 @@ dbs_event, membership, membership_app, + membership_httpd, membership_sup, mem3, partitions, -- cgit v1.2.3 From 8a09581aa2252f53047fa0e9e95591eaae4556c9 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 30 Jun 2010 15:36:54 -0400 Subject: clean up membership application callback mod --- src/membership_app.erl | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/membership_app.erl b/src/membership_app.erl index 7f050fe5..df0f4fee 100644 --- a/src/membership_app.erl +++ b/src/membership_app.erl @@ -1,20 +1,11 @@ -module(membership_app). --author('brad@cloudant.com'). - -behaviour(application). - --include("membership.hrl"). - -%% Application callbacks -export([start/2, stop/1]). -%% @doc start required apps, join cluster, start supervisor -start(_Type, _StartArgs) -> - couch_api:create_db(<<"dbs">>, []), % all nodes have local 'dbs' db - % start membership supervisor +start(_Type, []) -> + DbName = couch_config:get("membership", "db", "dbs"), + couch_server:create(list_to_binary(DbName), []), membership_sup:start_link(). -stop({_, Sup}) -> - ?LOG_ALERT("membership application stopped", []), - exit(Sup, normal), +stop([]) -> ok. -- cgit v1.2.3 From 48c8fde34591f782be7af77575eaa02dab8659b3 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 30 Jun 2010 16:23:38 -0400 Subject: standardize mem3 naming. app is horribly broken for now --- ebin/dynomite.appup | 6 - ebin/mem3.app | 25 +++ ebin/membership.app | 22 -- include/mem3.hrl | 44 ++++ include/membership.hrl | 44 ---- include/profile.hrl | 9 - src/dbs.erl | 46 ---- src/dbs_cache.erl | 91 -------- src/mem3.erl | 569 +------------------------------------------------ src/mem3_app.erl | 11 + src/mem3_cache.erl | 91 ++++++++ src/mem3_event.erl | 74 +++++++ src/mem3_httpd.erl | 77 +++++++ src/mem3_server.erl | 568 ++++++++++++++++++++++++++++++++++++++++++++++++ src/mem3_sup.erl | 22 ++ src/mem3_sync.erl | 46 ++++ src/mem3_util.erl | 187 ++++++++++++++++ src/mem3_vclock.erl | 109 ++++++++++ src/membership.erl | 15 -- src/membership_app.erl | 11 - src/membership_sup.erl | 44 ---- src/partitions.erl | 189 ---------------- src/vector_clock.erl | 109 ---------- 23 files changed, 1262 insertions(+), 1147 deletions(-) delete mode 100644 ebin/dynomite.appup create mode 100644 ebin/mem3.app delete mode 100644 ebin/membership.app create mode 100644 include/mem3.hrl delete mode 100644 include/membership.hrl delete mode 100644 include/profile.hrl delete mode 100644 src/dbs.erl delete mode 100644 src/dbs_cache.erl create mode 100644 src/mem3_app.erl create mode 100644 src/mem3_cache.erl create mode 100644 src/mem3_event.erl create mode 100644 src/mem3_httpd.erl create mode 100644 src/mem3_server.erl create mode 100644 src/mem3_sup.erl create mode 100644 src/mem3_sync.erl create mode 100644 src/mem3_util.erl create mode 100644 src/mem3_vclock.erl delete mode 100644 src/membership.erl delete mode 100644 src/membership_app.erl delete mode 100644 src/membership_sup.erl delete mode 100644 src/partitions.erl delete mode 100644 src/vector_clock.erl diff --git a/ebin/dynomite.appup b/ebin/dynomite.appup deleted file mode 100644 index c88a78bd..00000000 --- a/ebin/dynomite.appup +++ /dev/null @@ -1,6 +0,0 @@ -{"0.9.5-cloudant", [{"0.9.4-cloudant", [ - {apply, {supervisor, terminate_child, [showroom_sup, dynomite_sup]}}, - {restart_application, dynomite}, - {apply, {supervisor, delete_child, [showroom_sup, dynomite_sup]}}, - {update, showroom_sup, supervisor} -]}],[{"0.9.4-cloudant",[]}]}. diff --git a/ebin/mem3.app b/ebin/mem3.app new file mode 100644 index 00000000..1b04f5a1 --- /dev/null +++ b/ebin/mem3.app @@ -0,0 +1,25 @@ +{application, mem3, [ + {description, "CouchDB Cluster Membership"}, + {mod, {mem3_app, []}}, + {vsn, "0.9.6"}, + {modules, [ + mem3, + mem3_app, + mem3_cache, + mem3_event, + mem3_httpd, + mem3_server, + mem3_sup, + mem3_sync, + mem3_util, + mem3_vclock + ]}, + {registered, [ + mem3_cache, + mem3_event, + mem3_server, + mem3_sync, + mem3_sup + ]}, + {applications, [kernel, stdlib, sasl, crypto, mochiweb, couch]} +]}. diff --git a/ebin/membership.app b/ebin/membership.app deleted file mode 100644 index 522e833c..00000000 --- a/ebin/membership.app +++ /dev/null @@ -1,22 +0,0 @@ -%% membership app resource file - -{application, membership, - [{description, "cluster membership"}, - {mod, {membership_app, []}}, - {vsn, "0.9.6"}, - {modules, - [ - dbs, - dbs_cache, - dbs_event, - membership, - membership_app, - membership_httpd, - membership_sup, - mem3, - partitions, - vector_clock - ]}, - {registered, [membership]}, - {applications, [kernel, stdlib, sasl, crypto, mochiweb]} - ]}. diff --git a/include/mem3.hrl b/include/mem3.hrl new file mode 100644 index 00000000..a1e6f822 --- /dev/null +++ b/include/mem3.hrl @@ -0,0 +1,44 @@ +-define(MEMBERSHIP, true). + +-ifndef(FABRIC). +-include("../../fabric/include/fabric.hrl"). +-endif. + +-ifndef(COUCH). +-include("../../couch/src/couch_db.hrl"). +-endif. + +-include_lib("eunit/include/eunit.hrl"). + +%% version 3 of membership state +-record(mem, {header=3, + nodes=[], + clock=[], + args + }). + +%% partition record +-record(shard, {name, node, dbname, range, ref}). + +%% types +-type join_type() :: init | join | replace | leave. +-type join_order() :: non_neg_integer(). +-type options() :: list(). +-type mem_node() :: {join_order(), node(), options()}. +-type mem_node_list() :: [mem_node()]. +-type arg_options() :: {test, boolean()}. +-type args() :: [] | [arg_options()]. +-type mem_state() :: #mem{}. +-type test() :: undefined | node(). +-type epoch() :: float(). +-type clock() :: {node(), epoch()}. +-type vector_clock() :: [clock()]. +-type ping_node() :: node() | nil. +-type gossip_fun() :: call | cast. + +-type part() :: #shard{}. +-type fullmap() :: [part()]. +-type ref_part_map() :: {reference(), part()}. +-type tref() :: reference(). +-type np() :: {node(), part()}. +-type beg_acc() :: [integer()]. diff --git a/include/membership.hrl b/include/membership.hrl deleted file mode 100644 index a1e6f822..00000000 --- a/include/membership.hrl +++ /dev/null @@ -1,44 +0,0 @@ --define(MEMBERSHIP, true). - --ifndef(FABRIC). --include("../../fabric/include/fabric.hrl"). --endif. - --ifndef(COUCH). --include("../../couch/src/couch_db.hrl"). --endif. - --include_lib("eunit/include/eunit.hrl"). - -%% version 3 of membership state --record(mem, {header=3, - nodes=[], - clock=[], - args - }). - -%% partition record --record(shard, {name, node, dbname, range, ref}). - -%% types --type join_type() :: init | join | replace | leave. --type join_order() :: non_neg_integer(). --type options() :: list(). --type mem_node() :: {join_order(), node(), options()}. --type mem_node_list() :: [mem_node()]. --type arg_options() :: {test, boolean()}. --type args() :: [] | [arg_options()]. --type mem_state() :: #mem{}. --type test() :: undefined | node(). --type epoch() :: float(). --type clock() :: {node(), epoch()}. --type vector_clock() :: [clock()]. --type ping_node() :: node() | nil. --type gossip_fun() :: call | cast. - --type part() :: #shard{}. --type fullmap() :: [part()]. --type ref_part_map() :: {reference(), part()}. --type tref() :: reference(). --type np() :: {node(), part()}. --type beg_acc() :: [integer()]. diff --git a/include/profile.hrl b/include/profile.hrl deleted file mode 100644 index 2ffd8009..00000000 --- a/include/profile.hrl +++ /dev/null @@ -1,9 +0,0 @@ --ifdef(PROF). --define(balance_prof, dynomite_prof:balance_prof()). --define(prof(Label), dynomite_prof:start_prof(Label)). --define(forp(Label), dynomite_prof:stop_prof(Label)). --else. --define(prof(Label), true). --define(forp(Label), true). --define(balance_prof, true). --endif. diff --git a/src/dbs.erl b/src/dbs.erl deleted file mode 100644 index 345788ef..00000000 --- a/src/dbs.erl +++ /dev/null @@ -1,46 +0,0 @@ --module(dbs). --behaviour(supervisor). - --export([start_link/0, init/1, childspec/1, sup_upgrade_notify/2]). - --include("membership.hrl"). - -start_link() -> - supervisor:start_link({local, ?MODULE}, ?MODULE, []). - -init([]) -> - {ok, MemNodes} = mem3:nodes(), - LiveNodes = nodes(), - ChildSpecs = [childspec(N) || N <- MemNodes, lists:member(N, LiveNodes)], - gen_event:add_handler(membership_events, dbs_event, []), - {ok, {{one_for_one, 10, 8}, ChildSpecs}}. - -childspec(Node) -> - ?LOG_INFO("dbs repl ~p --> ~p starting", [node(), Node]), - PostBody = {[ - {<<"source">>, <<"dbs">>}, - {<<"target">>, {[{<<"node">>, Node}, {<<"name">>, <<"dbs">>}]}}, - {<<"continuous">>, true} - ]}, - Id = couch_util:to_hex(erlang:md5(term_to_binary([node(), Node]))), - MFA = {couch_rep, start_link, [Id, PostBody, #user_ctx{}]}, - {Node, MFA, permanent, 100, worker, [couch_rep]}. - -% from http://code.google.com/p/erlrc/wiki/ErlrcHowto -sup_upgrade_notify (_Old, _New) -> - {ok, {_, Specs}} = init([]), - - Old = sets:from_list( - [Name || {Name, _, _, _} <- supervisor:which_children(?MODULE)]), - New = sets:from_list([Name || {Name, _, _, _, _, _} <- Specs]), - Kill = sets:subtract(Old, New), - - sets:fold(fun(Id, ok) -> - supervisor:terminate_child(?MODULE, Id), - supervisor:delete_child(?MODULE, Id), - ok - end, - ok, - Kill), - [supervisor:start_child (?MODULE, Spec) || Spec <- Specs ], - ok. diff --git a/src/dbs_cache.erl b/src/dbs_cache.erl deleted file mode 100644 index 1afb873b..00000000 --- a/src/dbs_cache.erl +++ /dev/null @@ -1,91 +0,0 @@ --module(dbs_cache). --behaviour(gen_server). --export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3]). - --export([start_link/0]). - --include("membership.hrl"). - -start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). - -init([]) -> - ets:new(partitions, [bag, protected, named_table, {keypos,#shard.dbname}]), - ets:new(memnodes, [bag, protected, named_table]), - cache_dbs(), - Self = self(), - couch_db_update_notifier:start_link(fun({updated, <<"dbs">>}) -> - Self ! rebuild_dbs_cache; - (_) -> ok end), - {ok, nil}. - -handle_call(_Msg, _From, State) -> - {reply, ok, State}. - -handle_cast(_Msg, State) -> - {noreply, State}. - -handle_info(rebuild_dbs_cache, State) -> - receive rebuild_dbs_cache -> - handle_info(rebuild_dbs_cache, State) - after 0 -> ok end, - T0 = now(), - ?LOG_INFO("rebuilding dbs DB cache", []), - ets:delete_all_objects(partitions), - ets:delete_all_objects(memnodes), - cache_dbs(), - ?LOG_INFO("rebuild of dbs DB cache complete in ~p ms", - [round(timer:now_diff(now(),T0)/1000)]), - {noreply, State}. - -terminate(_Reason, _State) -> - ok. - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - -cache_dbs() -> - try couch_db:open(<<"dbs">>, []) of - {ok, Db} -> - Bt = Db#db.id_tree, - FoldFun = fun(#full_doc_info{id=Id, deleted=false} = FullDocInfo, _, _) -> - {ok, Doc} = couch_db:open_doc_int(Db, FullDocInfo, []), - {Props} = couch_doc:to_json_obj(Doc, []), - cache_map(Id, Props), - cache_nodes(Id, Props), - {ok, true}; - (_, _, _) -> - {ok, nil} - end, - couch_btree:foldl(Bt, FoldFun, nil), - couch_db:close(Db) - catch exit:{noproc,{gen_server,call,[couch_server|_]}} -> - timer:sleep(1000), - exit(couch_server_is_dead) - end. - -cache_map(Id, Props) -> - Map = couch_util:get_value(<<"map">>, Props, []), - lists:foreach(fun({[{<<"node">>,Node},{<<"b">>,Beg},{<<"e">>,End}]}) -> - Part = #shard{ - name = partitions:shard_name(Beg, Id), - dbname = Id, - node = to_atom(Node), - range = [Beg,End] - }, - ets:insert(partitions, Part) - end, Map). - -cache_nodes(Id, Props) -> - Nodes = couch_util:get_value(<<"nodes">>, Props, []), - lists:foreach(fun({[{<<"order">>,Order},{<<"node">>, Node},{<<"options">>,Opts}]}) -> - ets:insert(memnodes, {Id, {Order, to_atom(Node), Opts}}) - end, Nodes). - -to_atom(Node) when is_binary(Node) -> - list_to_atom(binary_to_list(Node)); -to_atom(Node) when is_atom(Node) -> - Node. - -%{ok, ets:insert(dbs_cache, {Id, Props})}; diff --git a/src/mem3.erl b/src/mem3.erl index 7ae7627c..2b8f0188 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -1,568 +1,15 @@ -%%% membership module -%%% -%%% State of the gen_server is a #mem record -%%% -%%% Nodes and Gossip are the same thing, and are a list of three-tuples like: -%%% -%%% [ {Pos,NodeName,Options} | _ ] -%%% -%%% Position is 1-based incrementing in order of node joining -%%% -%%% Options is a proplist, with [{hints, [Part1|_]}] denoting that the node -%%% is responsible for the extra partitions too. -%%% -%%% TODO: dialyzer type specs -%%% -module(mem3). --author('brad@cloudant.com'). +-author('Brad Anderson '). --behaviour(gen_server). +-export([start/0, stop/0, restart/0]). -%% API --export([start_link/0, start_link/1, stop/0, stop/1, reset/0]). --export([join/3, clock/0, state/0, states/0, nodes/0, fullnodes/0, - start_gossip/0]). -%% for testing more than anything else --export([merge_nodes/2, next_up_node/1, next_up_node/3]). +start() -> + application:start(mem3). -%% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). - -%% includes --include("membership.hrl"). - --define(SERVER, membership). --define(STATE_FILE_PREFIX, "membership"). - - -%%==================================================================== -%% API -%%==================================================================== - --spec start_link() -> {ok, pid()}. -start_link() -> - start_link([]). - - --spec start_link(args()) -> {ok, pid()}. -start_link(Args) -> - gen_server:start_link({local, ?SERVER}, ?MODULE, Args, []). - - --spec stop() -> ok. stop() -> - stop(?MODULE). - - --spec stop(atom()) -> ok. -stop(Server) -> - gen_server:cast(Server, stop). - - --spec join(join_type(), mem_node_list() | {node(), options()}, node() | nil) -> - ok. -join(JoinType, Payload, PingNode) -> - gen_server:call(?SERVER, {join, JoinType, Payload, PingNode}). - - --spec clock() -> vector_clock(). -clock() -> - gen_server:call(?SERVER, clock). - - --spec state() -> mem_state(). -state() -> - gen_server:call(?SERVER, state). - - -%% @doc Detailed report of cluster-wide membership state. Queries the state -%% on all member nodes and builds a dictionary with unique states as the -%% key and the nodes holding that state as the value. Also reports member -%% nodes which fail to respond and nodes which are connected but are not -%% cluster members. Useful for debugging. --spec states() -> [{mem_state() | bad_nodes | non_member_nodes, [node()]}]. -states() -> - {ok, Nodes} = mem3:nodes(), - AllNodes = [node()|erlang:nodes()], - {Replies, BadNodes} = gen_server:multi_call(Nodes, ?SERVER, state), - Dict = lists:foldl(fun({Node, {ok,State}}, D) -> - orddict:append(State, Node, D) - end, orddict:new(), Replies), - [{non_member_nodes, AllNodes -- Nodes}, {bad_nodes, BadNodes} | Dict]. - --spec start_gossip() -> ok. -start_gossip() -> - gen_server:call(?SERVER, start_gossip). - - --spec reset() -> ok | not_reset. -reset() -> - gen_server:call(?SERVER, reset). - - -%% @doc get the list of cluster nodes (according to membership module) -%% This may differ from erlang:nodes() -%% Guaranteed to be in order of State's node list (1st elem in 3-tuple) --spec nodes() -> {ok, [node()]}. -nodes() -> - gen_server:call(?SERVER, nodes). - - -%% @doc get the list of cluster nodes (according to membership module) -%% This may differ from erlang:nodes() -%% Guaranteed to be in order of State's node list (1st elem in 3-tuple) --spec fullnodes() -> {ok, [mem_node()]}. -fullnodes() -> - gen_server:call(?SERVER, fullnodes). - - -%%==================================================================== -%% gen_server callbacks -%%==================================================================== - -%% start up membership server --spec init(args()) -> {ok, mem_state()}. -init(Args) -> - process_flag(trap_exit,true), - Test = get_test(Args), - OldState = read_latest_state_file(Test), - showroom_log:message(info, "membership: membership server starting...", []), - net_kernel:monitor_nodes(true), - State = handle_init(Test, OldState), - {ok, State#mem{args=Args}}. - - -%% new node(s) joining to this node -handle_call({join, JoinType, ExtNodes, PingNode}, _From, State) -> - try - case handle_join(JoinType, ExtNodes, PingNode, State) of - {ok, NewState} -> {reply, ok, NewState}; - Other -> {reply, Other, State} - end - catch _:Error -> - showroom_log:message(error, "~p", [Error]), - {reply, Error, State} - end; - -%% clock -handle_call(clock, _From, #mem{clock=Clock} = State) -> - {reply, {ok, Clock}, State}; - -%% state -handle_call(state, _From, State) -> - {reply, {ok, State}, State}; - -%% reset - but only if we're in test mode -handle_call(reset, _From, #mem{args=Args} = State) -> - Test = get_test(Args), - case Test of - undefined -> {reply, not_reset, State}; - _ -> {reply, ok, int_reset(Test, State)} - end; - -%% nodes -handle_call(nodes, _From, #mem{nodes=Nodes} = State) -> - {_,NodeList,_} = lists:unzip3(lists:keysort(1, Nodes)), - {reply, {ok, NodeList}, State}; - -%% fullnodes -handle_call(fullnodes, _From, #mem{nodes=Nodes} = State) -> - {reply, {ok, Nodes}, State}; - -%% gossip -handle_call({gossip, RemoteState}, {Pid,_Tag} = From, LocalState) -> - showroom_log:message(info, "membership: received gossip from ~p", - [erlang:node(Pid)]), - handle_gossip(From, RemoteState, LocalState); - -% start_gossip -handle_call(start_gossip, _From, State) -> - NewState = gossip(State), - {reply, ok, NewState}; - -%% ignored call -handle_call(Msg, _From, State) -> - showroom_log:message(info, "membership: ignored call: ~p", [Msg]), - {reply, ignored, State}. - - -%% gossip -handle_cast({gossip, RemoteState}, LocalState) -> - State = case handle_gossip(none, RemoteState, LocalState) of - {reply, ok, NewState} -> NewState; - {reply, {new_state, NewState}, _} -> NewState; - {noreply, NewState} -> NewState - end, - {noreply, State}; - -%% stop -handle_cast(stop, State) -> - {stop, normal, State}; - -%% ignored cast -handle_cast(Msg, State) -> - showroom_log:message(info, "membership: ignored cast: ~p", [Msg]), - {noreply, State}. - - -%% @doc handle nodedown messages because we have -%% net_kernel:monitor_nodes(true) -handle_info({nodedown, Node}, State) -> - showroom_log:message(alert, "membership: nodedown ~p", [Node]), - notify(nodedown, [Node], State), - {noreply, State}; - -%% @doc handle nodeup messages because we have -%% net_kernel:monitor_nodes(true) -handle_info({nodeup, Node}, State) -> - showroom_log:message(alert, "membership: nodeup ~p", [Node]), - notify(nodeup, [Node], State), - gossip_cast(State), - {noreply, State}; - -%% ignored info -handle_info(Info, State) -> - showroom_log:message(info, "membership: ignored info: ~p", [Info]), - {noreply, State}. - - -% terminate -terminate(_Reason, _State) -> - ok. - - -% ignored code change -code_change(OldVsn, State, _Extra) -> - io:format("Unknown Old Version~nOldVsn: ~p~nState : ~p~n", [OldVsn, State]), - {ok, State}. - - -%%-------------------------------------------------------------------- -%%% Internal functions -%%-------------------------------------------------------------------- - -%% @doc if Args has config use it, otherwise call configuration module -%% most times Args will have config during testing runs -%get_config(Args) -> -% case proplists:get_value(config, Args) of -% undefined -> configuration:get_config(); -% Any -> Any -% end. - - -get_test(Args) -> - proplists:get_value(test, Args). - - -%% @doc handle_init starts a node -%% Most of the time, this puts the node in a single-node cluster setup, -%% But, we could be automatically rejoining a cluster after some downtime. -%% See handle_join for initing, joining, leaving a cluster, or replacing a -%% node. -%% @end -handle_init(Test, nil) -> - int_reset(Test); - -handle_init(_Test, #mem{nodes=Nodes, args=Args} = OldState) -> - % there's an old state, let's try to rejoin automatically - % but only if we can compare our old state to other available - % nodes and get a match... otherwise get a human involved - {_, NodeList, _} = lists:unzip3(Nodes), - ping_all_yall(NodeList), - {RemoteStates, _BadNodes} = get_remote_states(NodeList), - Test = get_test(Args), - case compare_state_with_rest(OldState, RemoteStates) of - match -> - showroom_log:message(info, "membership: rejoined successfully", []), - OldState; - Other -> - showroom_log:message(error, "membership: rejoin failed: ~p", [Other]), - int_reset(Test) - end. - - -%% @doc handle join activities, return {ok,NewState} --spec handle_join(join_type(), [mem_node()], ping_node(), mem_state()) -> - {ok, mem_state()}. -% init -handle_join(init, ExtNodes, nil, State) -> - {_,Nodes,_} = lists:unzip3(ExtNodes), - ping_all_yall(Nodes), - int_join(ExtNodes, State); -% join -handle_join(join, ExtNodes, PingNode, #mem{args=Args} = State) -> - NewState = case get_test(Args) of - undefined -> get_pingnode_state(PingNode); - _ -> State % testing, so meh - end, - % now use this info to join the ring - int_join(ExtNodes, NewState); -% replace -handle_join(replace, OldNode, PingNode, State) when is_atom(OldNode) -> - handle_join(replace, {OldNode, []}, PingNode, State); -handle_join(replace, [OldNode | _], PingNode, State) -> - handle_join(replace, {OldNode, []}, PingNode, State); -handle_join(replace, {OldNode, NewOpts}, PingNode, State) -> - OldState = #mem{nodes=OldNodes} = get_pingnode_state(PingNode), - {Order, OldNode, _OldOpts} = lists:keyfind(OldNode, 2, OldNodes), - NewNodes = lists:keyreplace(OldNode, 2, OldNodes, {Order, node(), NewOpts}), - notify(node_leave, [OldNode], State), - int_join([], OldState#mem{nodes=NewNodes}); -% leave -handle_join(leave, [OldNode | _], _PingNode, State) -> - % TODO implement me - notify(node_leave, [OldNode], State), - ok; - -handle_join(JoinType, _, PingNode, _) -> - showroom_log:message(info, "membership: unknown join type: ~p " - "for ping node: ~p", [JoinType, PingNode]), - {error, unknown_join_type}. - -%% @doc common operations for all join types -int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> - NewNodes = lists:foldl(fun({Pos, N, _Options}=New, AccIn) -> - check_pos(Pos, N, Nodes), - notify(node_join, [N], State), - [New|AccIn] - end, Nodes, ExtNodes), - NewNodes1 = lists:sort(NewNodes), - NewClock = vector_clock:increment(node(), Clock), - NewState = State#mem{nodes=NewNodes1, clock=NewClock}, - install_new_state(NewState), - {ok, NewState}. - - -install_new_state(#mem{args=Args} = State) -> - Test = get_test(Args), - save_state_file(Test, State), - gossip(call, Test, State). - - -get_pingnode_state(PingNode) -> - {ok, RemoteState} = gen_server:call({?SERVER, PingNode}, state), - RemoteState. - - -%% @doc handle the gossip messages -%% We're not using vector_clock:resolve b/c we need custom merge strategy -handle_gossip(From, RemoteState=#mem{clock=RemoteClock}, - LocalState=#mem{clock=LocalClock}) -> - case vector_clock:compare(RemoteClock, LocalClock) of - equal -> - {reply, ok, LocalState}; - less -> - % remote node needs updating - {reply, {new_state, LocalState}, LocalState}; - greater when From == none-> - {noreply, install_new_state(RemoteState)}; - greater -> - % local node needs updating - gen_server:reply(From, ok), % reply to sender first - {noreply, install_new_state(RemoteState)}; - concurrent -> - % ick, so let's resolve and merge states - showroom_log:message(info, - "membership: Concurrent Clocks~n" - "RemoteState : ~p~nLocalState : ~p~n" - , [RemoteState, LocalState]), - MergedState = merge_states(RemoteState, LocalState), - if From =/= none -> - % reply to sender - gen_server:reply(From, {new_state, MergedState}) - end, - {noreply, install_new_state(MergedState)} - end. - - -merge_states(#mem{clock=RemoteClock, nodes=RemoteNodes} = _RemoteState, - #mem{clock=LocalClock, nodes=LocalNodes} = LocalState) -> - MergedClock = vector_clock:merge(RemoteClock, LocalClock), - MergedNodes = merge_nodes(RemoteNodes, LocalNodes), - LocalState#mem{clock=MergedClock, nodes=MergedNodes}. - - -%% this will give one of the lists back, deterministically -merge_nodes(Remote, Local) -> - % get rid of the initial 0 node if it's still there, and sort - Remote1 = lists:usort(lists:keydelete(0,1,Remote)), - Local1 = lists:usort(lists:keydelete(0,1,Local)), - % handle empty lists as well as other cases - case {Remote1, Local1} of - {[], L} -> L; - {R, []} -> R; - _ -> erlang:min(Remote1, Local1) - end. - - -gossip(#mem{args=Args} = NewState) -> - Test = get_test(Args), - gossip(call, Test, NewState). - - -gossip_cast(#mem{nodes=[]}) -> ok; -gossip_cast(#mem{args=Args} = NewState) -> - Test = get_test(Args), - gossip(cast, Test, NewState). - - --spec gossip(gossip_fun(), test(), mem_state()) -> mem_state(). -gossip(_, _, #mem{nodes=[]}) -> ok; -gossip(Fun, undefined, #mem{nodes=StateNodes} = State) -> - {_, Nodes, _} = lists:unzip3(StateNodes), - case next_up_node(Nodes) of - no_gossip_targets_available -> - State; % skip gossip, I'm the only node - TargetNode -> - showroom_log:message(info, "membership: firing gossip from ~p to ~p", - [node(), TargetNode]), - case gen_server:Fun({?SERVER, TargetNode}, {gossip, State}) of - ok -> State; - {new_state, NewState} -> NewState; - Error -> throw({unknown_gossip_response, Error}) - end - end; - -gossip(_,_,_) -> - % testing, so don't gossip - ok. - - -next_up_node(Nodes) -> - next_up_node(node(), Nodes, up_nodes()). - - -next_up_node(Node, Nodes, UpNodes) -> - {A, [Node|B]} = lists:splitwith(fun(N) -> N /= Node end, Nodes), - List = lists:append(B, A), % be sure to eliminate Node - DownNodes = Nodes -- UpNodes, - case List -- DownNodes of - [Target|_] -> Target; - [] -> no_gossip_targets_available - end. - - -up_nodes() -> - % TODO: implement cache (fb 9704 & 9449) - erlang:nodes(). - - -%% @doc find the latest state file on disk -find_latest_state_filename() -> - Dir = couch_config:get("couchdb", "database_dir"), - case file:list_dir(Dir) of - {ok, Filenames} -> - Timestamps = [list_to_integer(TS) || {?STATE_FILE_PREFIX, TS} <- - [list_to_tuple(string:tokens(FN, ".")) || FN <- Filenames]], - SortedTimestamps = lists:reverse(lists:sort(Timestamps)), - case SortedTimestamps of - [Latest | _] -> - {ok, Dir ++ "/" ++ ?STATE_FILE_PREFIX ++ "." ++ - integer_to_list(Latest)}; - _ -> - throw({error, mem_state_file_not_found}) - end; - {error, Reason} -> - throw({error, Reason}) - end. - - -%% (Test, Config) -read_latest_state_file(undefined) -> - try - {ok, File} = find_latest_state_filename(), - case file:consult(File) of - {ok, [#mem{}=State]} -> State; - _Else -> - throw({error, bad_mem_state_file}) - end - catch _:Error -> - showroom_log:message(info, "membership: ~p", [Error]), - nil - end; -read_latest_state_file(_) -> - nil. - - -%% @doc save the state file to disk, with current timestamp. -%% thx to riak_ring_manager:do_write_ringfile/1 --spec save_state_file(test(), mem_state()) -> ok. -save_state_file(undefined, State) -> - Dir = couch_config:get("couchdb", "database_dir"), - {{Year, Month, Day},{Hour, Minute, Second}} = calendar:universal_time(), - TS = io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", - [Year, Month, Day, Hour, Minute, Second]), - FN = Dir ++ "/" ++ ?STATE_FILE_PREFIX ++ "." ++ TS, - ok = filelib:ensure_dir(FN), - {ok, File} = file:open(FN, [binary, write]), - io:format(File, "~w.~n", [State]), - file:close(File); - -save_state_file(_,_) -> ok. % don't save if testing - - -check_pos(Pos, Node, Nodes) -> - Found = lists:keyfind(Pos, 1, Nodes), - case Found of - false -> ok; - _ -> - {_,OldNode,_} = Found, - if - OldNode =:= Node -> - Msg = "node_exists_at_position_" ++ integer_to_list(Pos), - throw({error, list_to_binary(Msg)}); - true -> - Msg = "position_exists_" ++ integer_to_list(Pos), - throw({error, list_to_binary(Msg)}) - end - end. - - -int_reset(Test) -> - int_reset(Test, #mem{}). - - -int_reset(_Test, State) -> - State#mem{nodes=[], clock=[]}. - - -ping_all_yall(Nodes) -> - lists:foreach(fun(Node) -> - net_adm:ping(Node) - end, Nodes), - timer:sleep(500). % sigh. - - -get_remote_states(NodeList) -> - NodeList1 = lists:delete(node(), NodeList), - {States1, BadNodes} = rpc:multicall(NodeList1, mem3, state, [], 5000), - {_Status, States2} = lists:unzip(States1), - NodeList2 = NodeList1 -- BadNodes, - {lists:zip(NodeList2,States2), BadNodes}. - - -%% @doc compare state with states based on vector clock -%% return match | {bad_state_match, Node, NodesThatDontMatch} -compare_state_with_rest(#mem{clock=Clock} = _State, States) -> - Results = lists:map(fun({Node, #mem{clock=Clock1}}) -> - {vector_clock:equals(Clock, Clock1), Node} - end, States), - BadResults = lists:foldl(fun({true, _N}, AccIn) -> AccIn; - ({false, N}, AccIn) -> [N | AccIn] - end, [], Results), - if - length(BadResults) == 0 -> match; - true -> {bad_state_match, node(), BadResults} - end. + application:stop(mem3). -notify(Type, Nodes, #mem{nodes=MemNodesList} = _State) -> - {_,MemNodes,_} = lists:unzip3(lists:keysort(1, MemNodesList)), - lists:foreach(fun(Node) -> - case lists:member(Node, MemNodes) orelse Type == nodedown of - true -> - gen_event:notify(membership_events, {Type, Node}); - _ -> ok % node not in cluster - end - end, Nodes). +restart() -> + stop(), + start(). diff --git a/src/mem3_app.erl b/src/mem3_app.erl new file mode 100644 index 00000000..70bf1cf9 --- /dev/null +++ b/src/mem3_app.erl @@ -0,0 +1,11 @@ +-module(mem3_app). +-behaviour(application). +-export([start/2, stop/1]). + +start(_Type, []) -> + DbName = couch_config:get("mem3", "db", "dbs"), + couch_server:create(list_to_binary(DbName), []), + mem3_sup:start_link(). + +stop([]) -> + ok. diff --git a/src/mem3_cache.erl b/src/mem3_cache.erl new file mode 100644 index 00000000..8f5c372a --- /dev/null +++ b/src/mem3_cache.erl @@ -0,0 +1,91 @@ +-module(mem3_cache). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/0]). + +-include("mem3.hrl"). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init([]) -> + ets:new(partitions, [bag, protected, named_table, {keypos,#shard.dbname}]), + ets:new(memnodes, [bag, protected, named_table]), + cache_dbs(), + Self = self(), + couch_db_update_notifier:start_link(fun({updated, <<"dbs">>}) -> + Self ! rebuild_dbs_cache; + (_) -> ok end), + {ok, nil}. + +handle_call(_Msg, _From, State) -> + {reply, ok, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(rebuild_dbs_cache, State) -> + receive rebuild_dbs_cache -> + handle_info(rebuild_dbs_cache, State) + after 0 -> ok end, + T0 = now(), + ?LOG_INFO("rebuilding dbs DB cache", []), + ets:delete_all_objects(partitions), + ets:delete_all_objects(memnodes), + cache_dbs(), + ?LOG_INFO("rebuild of dbs DB cache complete in ~p ms", + [round(timer:now_diff(now(),T0)/1000)]), + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +cache_dbs() -> + try couch_db:open(<<"dbs">>, []) of + {ok, Db} -> + Bt = Db#db.id_tree, + FoldFun = fun(#full_doc_info{id=Id, deleted=false} = FullDocInfo, _, _) -> + {ok, Doc} = couch_db:open_doc_int(Db, FullDocInfo, []), + {Props} = couch_doc:to_json_obj(Doc, []), + cache_map(Id, Props), + cache_nodes(Id, Props), + {ok, true}; + (_, _, _) -> + {ok, nil} + end, + couch_btree:foldl(Bt, FoldFun, nil), + couch_db:close(Db) + catch exit:{noproc,{gen_server,call,[couch_server|_]}} -> + timer:sleep(1000), + exit(couch_server_is_dead) + end. + +cache_map(Id, Props) -> + Map = couch_util:get_value(<<"map">>, Props, []), + lists:foreach(fun({[{<<"node">>,Node},{<<"b">>,Beg},{<<"e">>,End}]}) -> + Part = #shard{ + name = partitions:shard_name(Beg, Id), + dbname = Id, + node = to_atom(Node), + range = [Beg,End] + }, + ets:insert(partitions, Part) + end, Map). + +cache_nodes(Id, Props) -> + Nodes = couch_util:get_value(<<"nodes">>, Props, []), + lists:foreach(fun({[{<<"order">>,Order},{<<"node">>, Node},{<<"options">>,Opts}]}) -> + ets:insert(memnodes, {Id, {Order, to_atom(Node), Opts}}) + end, Nodes). + +to_atom(Node) when is_binary(Node) -> + list_to_atom(binary_to_list(Node)); +to_atom(Node) when is_atom(Node) -> + Node. + +%{ok, ets:insert(dbs_cache, {Id, Props})}; diff --git a/src/mem3_event.erl b/src/mem3_event.erl new file mode 100644 index 00000000..59156adc --- /dev/null +++ b/src/mem3_event.erl @@ -0,0 +1,74 @@ +-module(mem3_event). + +-behaviour(gen_event). + +-export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, + code_change/3]). + +-include("mem3.hrl"). + +init([]) -> + {ok, []}. + +handle_event({node_join, Node}, State) -> + start_repl({node_join, Node}, State); + +handle_event({nodeup, Node}, State) -> + start_repl({nodeup, Node}, State); + +handle_event({node_leave, Node}, State) -> + stop_repl({node_leave, Node}, State); + +handle_event({nodedown, Node}, State) -> + stop_repl({nodedown, Node}, State); + +handle_event(Event, State) -> + ?LOG_ERROR("unexpected event in dbs handler ~p", [Event]), + {ok, State}. + +handle_call(Request, State) -> + ?LOG_ERROR("unexpected call in dbs handler ~p", [Request]), + {ok, ok, State}. + +handle_info(Info, State) -> + ?LOG_ERROR("unexpected msg in dbs handler ~p", [Info]), + {ok, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% +%% internal +%% + +start_repl({Reason, Node}, State) -> + ChildSpec = dbs:childspec(Node), + case supervisor:start_child(dbs, ChildSpec) of + {ok, _} -> + ok; + {error, {already_started, _Child}} -> + ok; + {error, running} -> + ok; + {error, already_present} -> + case supervisor:restart_child(dbs, ChildSpec) of + {ok, _} -> + ok; + {error, running} -> + ok; + {error, Reason} -> + ?LOG_ERROR("dbs repl restart failed ~p", [Reason]) + end; + {error, Reason} -> + ?LOG_ERROR("dbs repl start failed ~p", [Reason]) + end, + {ok, State}. + +stop_repl({Reason, Node}, State) -> + ?LOG_INFO("dbs repl ~p --> ~p terminating (~p)", [node(), Node, Reason]), + supervisor:terminate_child(dbs, Node), + supervisor:delete_child(dbs, Node), + {ok, State}. diff --git a/src/mem3_httpd.erl b/src/mem3_httpd.erl new file mode 100644 index 00000000..2b29b488 --- /dev/null +++ b/src/mem3_httpd.erl @@ -0,0 +1,77 @@ +-module(mem3_httpd). + +-export([handle_membership_req/1]). + +%% includes +-include("mem3.hrl"). + + +handle_membership_req(#httpd{method='GET', + path_parts=[<<"_membership">>]} = Req) -> + {ok,ClusterNodes} = try mem3:nodes() + catch _:_ -> {ok,[]} end, + couch_httpd:send_json(Req, {[ + {all_nodes, lists:sort([node()|nodes()])}, + {cluster_nodes, lists:sort(ClusterNodes)} + ]}); + +handle_membership_req(#httpd{method='POST', + path_parts=[<<"_membership">>]} = Req) -> + {JsonProps} = couch_httpd:json_body_obj(Req), + Method = couch_util:get_value(<<"method">>, JsonProps), + Params = couch_util:get_value(<<"params">>, JsonProps), + Id = couch_util:get_value(<<"id">>, JsonProps), + {Result, Error} = membership_dispatch(Method, Params), + couch_httpd:send_json(Req, {[ + {result, Result}, + {error, Error}, + {id, Id} + ]}). + +%% +%% internal +%% +membership_dispatch(<<"replace">>, Params) -> + OldNode = get_oldnode(Params), + NewNodeOpts = get_value_json(<<"newnode_options">>, Params, []), + PingNode = get_pingnode(Params), + send_join(replace, {OldNode, NewNodeOpts}, PingNode); +membership_dispatch(TypeBin, Params) -> + Type = list_to_atom(?b2l(TypeBin)), + NodeList = get_value_json(<<"nodes">>, Params, []), + Nodes = lists:map(fun({List}) -> node_info(List) end, NodeList), + PingNode = get_pingnode(Params), + send_join(Type, Nodes, PingNode). + +get_pingnode(Params) -> + PingNodeBin = get_value_json(<<"pingnode">>, Params, <<"nil">>), + list_to_atom(?b2l(PingNodeBin)). + +get_oldnode(Params) -> + NodeBin = get_value_json(<<"oldnode">>, Params, undefined), + NodeList = ?b2l(NodeBin), + list_to_atom(NodeList). + +%% @doc send join command to mem module +send_join(Type, Payload, PingNode) -> + case mem3:join(Type, Payload, PingNode) of + ok -> {ok, null}; + {error, Error} -> {Type, Error}; + Other -> + ?LOG_ERROR("membership dispatch error ~p", [Other]), + {Type, unknown_error} + end. + +node_info(List) -> + Order = couch_util:get_value(<<"order">>, List), + Node1 = couch_util:get_value(<<"node">>, List), + Node2 = list_to_atom(?b2l(Node1)), + Options = couch_util:get_value(<<"options">>, List), + {Order, Node2, Options}. + +get_value_json(_,[], Default) -> Default; +get_value_json(Key, [JsonProp|Rest], Default) -> + case JsonProp of + {[{Key, Value}]} -> Value; + _ -> get_value_json(Key, Rest, Default) + end. diff --git a/src/mem3_server.erl b/src/mem3_server.erl new file mode 100644 index 00000000..863e752f --- /dev/null +++ b/src/mem3_server.erl @@ -0,0 +1,568 @@ +%%% membership module +%%% +%%% State of the gen_server is a #mem record +%%% +%%% Nodes and Gossip are the same thing, and are a list of three-tuples like: +%%% +%%% [ {Pos,NodeName,Options} | _ ] +%%% +%%% Position is 1-based incrementing in order of node joining +%%% +%%% Options is a proplist, with [{hints, [Part1|_]}] denoting that the node +%%% is responsible for the extra partitions too. +%%% +%%% TODO: dialyzer type specs +%%% +-module(mem3_server). +-author('brad@cloudant.com'). + +-behaviour(gen_server). + +%% API +-export([start_link/0, start_link/1, stop/0, stop/1, reset/0]). +-export([join/3, clock/0, state/0, states/0, nodes/0, fullnodes/0, + start_gossip/0]). + +%% for testing more than anything else +-export([merge_nodes/2, next_up_node/1, next_up_node/3]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%% includes +-include("mem3.hrl"). + +-define(SERVER, membership). +-define(STATE_FILE_PREFIX, "membership"). + + +%%==================================================================== +%% API +%%==================================================================== + +-spec start_link() -> {ok, pid()}. +start_link() -> + start_link([]). + + +-spec start_link(args()) -> {ok, pid()}. +start_link(Args) -> + gen_server:start_link({local, ?SERVER}, ?MODULE, Args, []). + + +-spec stop() -> ok. +stop() -> + stop(?MODULE). + + +-spec stop(atom()) -> ok. +stop(Server) -> + gen_server:cast(Server, stop). + + +-spec join(join_type(), mem_node_list() | {node(), options()}, node() | nil) -> + ok. +join(JoinType, Payload, PingNode) -> + gen_server:call(?SERVER, {join, JoinType, Payload, PingNode}). + + +-spec clock() -> vector_clock(). +clock() -> + gen_server:call(?SERVER, clock). + + +-spec state() -> mem_state(). +state() -> + gen_server:call(?SERVER, state). + + +%% @doc Detailed report of cluster-wide membership state. Queries the state +%% on all member nodes and builds a dictionary with unique states as the +%% key and the nodes holding that state as the value. Also reports member +%% nodes which fail to respond and nodes which are connected but are not +%% cluster members. Useful for debugging. +-spec states() -> [{mem_state() | bad_nodes | non_member_nodes, [node()]}]. +states() -> + {ok, Nodes} = mem3:nodes(), + AllNodes = [node()|erlang:nodes()], + {Replies, BadNodes} = gen_server:multi_call(Nodes, ?SERVER, state), + Dict = lists:foldl(fun({Node, {ok,State}}, D) -> + orddict:append(State, Node, D) + end, orddict:new(), Replies), + [{non_member_nodes, AllNodes -- Nodes}, {bad_nodes, BadNodes} | Dict]. + +-spec start_gossip() -> ok. +start_gossip() -> + gen_server:call(?SERVER, start_gossip). + + +-spec reset() -> ok | not_reset. +reset() -> + gen_server:call(?SERVER, reset). + + +%% @doc get the list of cluster nodes (according to membership module) +%% This may differ from erlang:nodes() +%% Guaranteed to be in order of State's node list (1st elem in 3-tuple) +-spec nodes() -> {ok, [node()]}. +nodes() -> + gen_server:call(?SERVER, nodes). + + +%% @doc get the list of cluster nodes (according to membership module) +%% This may differ from erlang:nodes() +%% Guaranteed to be in order of State's node list (1st elem in 3-tuple) +-spec fullnodes() -> {ok, [mem_node()]}. +fullnodes() -> + gen_server:call(?SERVER, fullnodes). + + +%%==================================================================== +%% gen_server callbacks +%%==================================================================== + +%% start up membership server +-spec init(args()) -> {ok, mem_state()}. +init(Args) -> + process_flag(trap_exit,true), + Test = get_test(Args), + OldState = read_latest_state_file(Test), + showroom_log:message(info, "membership: membership server starting...", []), + net_kernel:monitor_nodes(true), + State = handle_init(Test, OldState), + {ok, State#mem{args=Args}}. + + +%% new node(s) joining to this node +handle_call({join, JoinType, ExtNodes, PingNode}, _From, State) -> + try + case handle_join(JoinType, ExtNodes, PingNode, State) of + {ok, NewState} -> {reply, ok, NewState}; + Other -> {reply, Other, State} + end + catch _:Error -> + showroom_log:message(error, "~p", [Error]), + {reply, Error, State} + end; + +%% clock +handle_call(clock, _From, #mem{clock=Clock} = State) -> + {reply, {ok, Clock}, State}; + +%% state +handle_call(state, _From, State) -> + {reply, {ok, State}, State}; + +%% reset - but only if we're in test mode +handle_call(reset, _From, #mem{args=Args} = State) -> + Test = get_test(Args), + case Test of + undefined -> {reply, not_reset, State}; + _ -> {reply, ok, int_reset(Test, State)} + end; + +%% nodes +handle_call(nodes, _From, #mem{nodes=Nodes} = State) -> + {_,NodeList,_} = lists:unzip3(lists:keysort(1, Nodes)), + {reply, {ok, NodeList}, State}; + +%% fullnodes +handle_call(fullnodes, _From, #mem{nodes=Nodes} = State) -> + {reply, {ok, Nodes}, State}; + +%% gossip +handle_call({gossip, RemoteState}, {Pid,_Tag} = From, LocalState) -> + showroom_log:message(info, "membership: received gossip from ~p", + [erlang:node(Pid)]), + handle_gossip(From, RemoteState, LocalState); + +% start_gossip +handle_call(start_gossip, _From, State) -> + NewState = gossip(State), + {reply, ok, NewState}; + +%% ignored call +handle_call(Msg, _From, State) -> + showroom_log:message(info, "membership: ignored call: ~p", [Msg]), + {reply, ignored, State}. + + +%% gossip +handle_cast({gossip, RemoteState}, LocalState) -> + State = case handle_gossip(none, RemoteState, LocalState) of + {reply, ok, NewState} -> NewState; + {reply, {new_state, NewState}, _} -> NewState; + {noreply, NewState} -> NewState + end, + {noreply, State}; + +%% stop +handle_cast(stop, State) -> + {stop, normal, State}; + +%% ignored cast +handle_cast(Msg, State) -> + showroom_log:message(info, "membership: ignored cast: ~p", [Msg]), + {noreply, State}. + + +%% @doc handle nodedown messages because we have +%% net_kernel:monitor_nodes(true) +handle_info({nodedown, Node}, State) -> + showroom_log:message(alert, "membership: nodedown ~p", [Node]), + notify(nodedown, [Node], State), + {noreply, State}; + +%% @doc handle nodeup messages because we have +%% net_kernel:monitor_nodes(true) +handle_info({nodeup, Node}, State) -> + showroom_log:message(alert, "membership: nodeup ~p", [Node]), + notify(nodeup, [Node], State), + gossip_cast(State), + {noreply, State}; + +%% ignored info +handle_info(Info, State) -> + showroom_log:message(info, "membership: ignored info: ~p", [Info]), + {noreply, State}. + + +% terminate +terminate(_Reason, _State) -> + ok. + + +% ignored code change +code_change(OldVsn, State, _Extra) -> + io:format("Unknown Old Version~nOldVsn: ~p~nState : ~p~n", [OldVsn, State]), + {ok, State}. + + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- + +%% @doc if Args has config use it, otherwise call configuration module +%% most times Args will have config during testing runs +%get_config(Args) -> +% case proplists:get_value(config, Args) of +% undefined -> configuration:get_config(); +% Any -> Any +% end. + + +get_test(Args) -> + proplists:get_value(test, Args). + + +%% @doc handle_init starts a node +%% Most of the time, this puts the node in a single-node cluster setup, +%% But, we could be automatically rejoining a cluster after some downtime. +%% See handle_join for initing, joining, leaving a cluster, or replacing a +%% node. +%% @end +handle_init(Test, nil) -> + int_reset(Test); + +handle_init(_Test, #mem{nodes=Nodes, args=Args} = OldState) -> + % there's an old state, let's try to rejoin automatically + % but only if we can compare our old state to other available + % nodes and get a match... otherwise get a human involved + {_, NodeList, _} = lists:unzip3(Nodes), + ping_all_yall(NodeList), + {RemoteStates, _BadNodes} = get_remote_states(NodeList), + Test = get_test(Args), + case compare_state_with_rest(OldState, RemoteStates) of + match -> + showroom_log:message(info, "membership: rejoined successfully", []), + OldState; + Other -> + showroom_log:message(error, "membership: rejoin failed: ~p", [Other]), + int_reset(Test) + end. + + +%% @doc handle join activities, return {ok,NewState} +-spec handle_join(join_type(), [mem_node()], ping_node(), mem_state()) -> + {ok, mem_state()}. +% init +handle_join(init, ExtNodes, nil, State) -> + {_,Nodes,_} = lists:unzip3(ExtNodes), + ping_all_yall(Nodes), + int_join(ExtNodes, State); +% join +handle_join(join, ExtNodes, PingNode, #mem{args=Args} = State) -> + NewState = case get_test(Args) of + undefined -> get_pingnode_state(PingNode); + _ -> State % testing, so meh + end, + % now use this info to join the ring + int_join(ExtNodes, NewState); +% replace +handle_join(replace, OldNode, PingNode, State) when is_atom(OldNode) -> + handle_join(replace, {OldNode, []}, PingNode, State); +handle_join(replace, [OldNode | _], PingNode, State) -> + handle_join(replace, {OldNode, []}, PingNode, State); +handle_join(replace, {OldNode, NewOpts}, PingNode, State) -> + OldState = #mem{nodes=OldNodes} = get_pingnode_state(PingNode), + {Order, OldNode, _OldOpts} = lists:keyfind(OldNode, 2, OldNodes), + NewNodes = lists:keyreplace(OldNode, 2, OldNodes, {Order, node(), NewOpts}), + notify(node_leave, [OldNode], State), + int_join([], OldState#mem{nodes=NewNodes}); +% leave +handle_join(leave, [OldNode | _], _PingNode, State) -> + % TODO implement me + notify(node_leave, [OldNode], State), + ok; + +handle_join(JoinType, _, PingNode, _) -> + showroom_log:message(info, "membership: unknown join type: ~p " + "for ping node: ~p", [JoinType, PingNode]), + {error, unknown_join_type}. + +%% @doc common operations for all join types +int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> + NewNodes = lists:foldl(fun({Pos, N, _Options}=New, AccIn) -> + check_pos(Pos, N, Nodes), + notify(node_join, [N], State), + [New|AccIn] + end, Nodes, ExtNodes), + NewNodes1 = lists:sort(NewNodes), + NewClock = vector_clock:increment(node(), Clock), + NewState = State#mem{nodes=NewNodes1, clock=NewClock}, + install_new_state(NewState), + {ok, NewState}. + + +install_new_state(#mem{args=Args} = State) -> + Test = get_test(Args), + save_state_file(Test, State), + gossip(call, Test, State). + + +get_pingnode_state(PingNode) -> + {ok, RemoteState} = gen_server:call({?SERVER, PingNode}, state), + RemoteState. + + +%% @doc handle the gossip messages +%% We're not using vector_clock:resolve b/c we need custom merge strategy +handle_gossip(From, RemoteState=#mem{clock=RemoteClock}, + LocalState=#mem{clock=LocalClock}) -> + case vector_clock:compare(RemoteClock, LocalClock) of + equal -> + {reply, ok, LocalState}; + less -> + % remote node needs updating + {reply, {new_state, LocalState}, LocalState}; + greater when From == none-> + {noreply, install_new_state(RemoteState)}; + greater -> + % local node needs updating + gen_server:reply(From, ok), % reply to sender first + {noreply, install_new_state(RemoteState)}; + concurrent -> + % ick, so let's resolve and merge states + showroom_log:message(info, + "membership: Concurrent Clocks~n" + "RemoteState : ~p~nLocalState : ~p~n" + , [RemoteState, LocalState]), + MergedState = merge_states(RemoteState, LocalState), + if From =/= none -> + % reply to sender + gen_server:reply(From, {new_state, MergedState}) + end, + {noreply, install_new_state(MergedState)} + end. + + +merge_states(#mem{clock=RemoteClock, nodes=RemoteNodes} = _RemoteState, + #mem{clock=LocalClock, nodes=LocalNodes} = LocalState) -> + MergedClock = vector_clock:merge(RemoteClock, LocalClock), + MergedNodes = merge_nodes(RemoteNodes, LocalNodes), + LocalState#mem{clock=MergedClock, nodes=MergedNodes}. + + +%% this will give one of the lists back, deterministically +merge_nodes(Remote, Local) -> + % get rid of the initial 0 node if it's still there, and sort + Remote1 = lists:usort(lists:keydelete(0,1,Remote)), + Local1 = lists:usort(lists:keydelete(0,1,Local)), + % handle empty lists as well as other cases + case {Remote1, Local1} of + {[], L} -> L; + {R, []} -> R; + _ -> erlang:min(Remote1, Local1) + end. + + +gossip(#mem{args=Args} = NewState) -> + Test = get_test(Args), + gossip(call, Test, NewState). + + +gossip_cast(#mem{nodes=[]}) -> ok; +gossip_cast(#mem{args=Args} = NewState) -> + Test = get_test(Args), + gossip(cast, Test, NewState). + + +-spec gossip(gossip_fun(), test(), mem_state()) -> mem_state(). +gossip(_, _, #mem{nodes=[]}) -> ok; +gossip(Fun, undefined, #mem{nodes=StateNodes} = State) -> + {_, Nodes, _} = lists:unzip3(StateNodes), + case next_up_node(Nodes) of + no_gossip_targets_available -> + State; % skip gossip, I'm the only node + TargetNode -> + showroom_log:message(info, "membership: firing gossip from ~p to ~p", + [node(), TargetNode]), + case gen_server:Fun({?SERVER, TargetNode}, {gossip, State}) of + ok -> State; + {new_state, NewState} -> NewState; + Error -> throw({unknown_gossip_response, Error}) + end + end; + +gossip(_,_,_) -> + % testing, so don't gossip + ok. + + +next_up_node(Nodes) -> + next_up_node(node(), Nodes, up_nodes()). + + +next_up_node(Node, Nodes, UpNodes) -> + {A, [Node|B]} = lists:splitwith(fun(N) -> N /= Node end, Nodes), + List = lists:append(B, A), % be sure to eliminate Node + DownNodes = Nodes -- UpNodes, + case List -- DownNodes of + [Target|_] -> Target; + [] -> no_gossip_targets_available + end. + + +up_nodes() -> + % TODO: implement cache (fb 9704 & 9449) + erlang:nodes(). + + +%% @doc find the latest state file on disk +find_latest_state_filename() -> + Dir = couch_config:get("couchdb", "database_dir"), + case file:list_dir(Dir) of + {ok, Filenames} -> + Timestamps = [list_to_integer(TS) || {?STATE_FILE_PREFIX, TS} <- + [list_to_tuple(string:tokens(FN, ".")) || FN <- Filenames]], + SortedTimestamps = lists:reverse(lists:sort(Timestamps)), + case SortedTimestamps of + [Latest | _] -> + {ok, Dir ++ "/" ++ ?STATE_FILE_PREFIX ++ "." ++ + integer_to_list(Latest)}; + _ -> + throw({error, mem_state_file_not_found}) + end; + {error, Reason} -> + throw({error, Reason}) + end. + + +%% (Test, Config) +read_latest_state_file(undefined) -> + try + {ok, File} = find_latest_state_filename(), + case file:consult(File) of + {ok, [#mem{}=State]} -> State; + _Else -> + throw({error, bad_mem_state_file}) + end + catch _:Error -> + showroom_log:message(info, "membership: ~p", [Error]), + nil + end; +read_latest_state_file(_) -> + nil. + + +%% @doc save the state file to disk, with current timestamp. +%% thx to riak_ring_manager:do_write_ringfile/1 +-spec save_state_file(test(), mem_state()) -> ok. +save_state_file(undefined, State) -> + Dir = couch_config:get("couchdb", "database_dir"), + {{Year, Month, Day},{Hour, Minute, Second}} = calendar:universal_time(), + TS = io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", + [Year, Month, Day, Hour, Minute, Second]), + FN = Dir ++ "/" ++ ?STATE_FILE_PREFIX ++ "." ++ TS, + ok = filelib:ensure_dir(FN), + {ok, File} = file:open(FN, [binary, write]), + io:format(File, "~w.~n", [State]), + file:close(File); + +save_state_file(_,_) -> ok. % don't save if testing + + +check_pos(Pos, Node, Nodes) -> + Found = lists:keyfind(Pos, 1, Nodes), + case Found of + false -> ok; + _ -> + {_,OldNode,_} = Found, + if + OldNode =:= Node -> + Msg = "node_exists_at_position_" ++ integer_to_list(Pos), + throw({error, list_to_binary(Msg)}); + true -> + Msg = "position_exists_" ++ integer_to_list(Pos), + throw({error, list_to_binary(Msg)}) + end + end. + + +int_reset(Test) -> + int_reset(Test, #mem{}). + + +int_reset(_Test, State) -> + State#mem{nodes=[], clock=[]}. + + +ping_all_yall(Nodes) -> + lists:foreach(fun(Node) -> + net_adm:ping(Node) + end, Nodes), + timer:sleep(500). % sigh. + + +get_remote_states(NodeList) -> + NodeList1 = lists:delete(node(), NodeList), + {States1, BadNodes} = rpc:multicall(NodeList1, mem3, state, [], 5000), + {_Status, States2} = lists:unzip(States1), + NodeList2 = NodeList1 -- BadNodes, + {lists:zip(NodeList2,States2), BadNodes}. + + +%% @doc compare state with states based on vector clock +%% return match | {bad_state_match, Node, NodesThatDontMatch} +compare_state_with_rest(#mem{clock=Clock} = _State, States) -> + Results = lists:map(fun({Node, #mem{clock=Clock1}}) -> + {vector_clock:equals(Clock, Clock1), Node} + end, States), + BadResults = lists:foldl(fun({true, _N}, AccIn) -> AccIn; + ({false, N}, AccIn) -> [N | AccIn] + end, [], Results), + if + length(BadResults) == 0 -> match; + true -> {bad_state_match, node(), BadResults} + end. + +notify(Type, Nodes, #mem{nodes=MemNodesList} = _State) -> + {_,MemNodes,_} = lists:unzip3(lists:keysort(1, MemNodesList)), + lists:foreach(fun(Node) -> + case lists:member(Node, MemNodes) orelse Type == nodedown of + true -> + gen_event:notify(membership_events, {Type, Node}); + _ -> ok % node not in cluster + end + end, Nodes). diff --git a/src/mem3_sup.erl b/src/mem3_sup.erl new file mode 100644 index 00000000..122e68d7 --- /dev/null +++ b/src/mem3_sup.erl @@ -0,0 +1,22 @@ +-module(mem3_sup). +-behaviour(supervisor). +-export([start_link/0, init/1]). + +start_link() -> + supervisor:start_link(?MODULE, []). + +init(_Args) -> + Children = [ + child(mem3_server), + child(mem3_event), + child(mem3_sync), + child(mem3_cache) + ], + {ok, {{one_for_one,10,1}, Children}}. + +child(mem3_event) -> + MFA = {gen_event, start_link, [{local,mem3_event}]}, + {mem3_event, MFA, permanent, 1000, worker, dynamic}; +child(Child) -> + {Child, {Child, start_link, []}, permanent, 1000, worker, [Child]}. + \ No newline at end of file diff --git a/src/mem3_sync.erl b/src/mem3_sync.erl new file mode 100644 index 00000000..d50514d9 --- /dev/null +++ b/src/mem3_sync.erl @@ -0,0 +1,46 @@ +-module(mem3_sync). +-behaviour(supervisor). + +-export([start_link/0, init/1, childspec/1, sup_upgrade_notify/2]). + +-include("mem3.hrl"). + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +init([]) -> + {ok, MemNodes} = mem3:nodes(), + LiveNodes = nodes(), + ChildSpecs = [childspec(N) || N <- MemNodes, lists:member(N, LiveNodes)], + gen_event:add_handler(membership_events, dbs_event, []), + {ok, {{one_for_one, 10, 8}, ChildSpecs}}. + +childspec(Node) -> + ?LOG_INFO("dbs repl ~p --> ~p starting", [node(), Node]), + PostBody = {[ + {<<"source">>, <<"dbs">>}, + {<<"target">>, {[{<<"node">>, Node}, {<<"name">>, <<"dbs">>}]}}, + {<<"continuous">>, true} + ]}, + Id = couch_util:to_hex(erlang:md5(term_to_binary([node(), Node]))), + MFA = {couch_rep, start_link, [Id, PostBody, #user_ctx{}]}, + {Node, MFA, permanent, 100, worker, [couch_rep]}. + +% from http://code.google.com/p/erlrc/wiki/ErlrcHowto +sup_upgrade_notify (_Old, _New) -> + {ok, {_, Specs}} = init([]), + + Old = sets:from_list( + [Name || {Name, _, _, _} <- supervisor:which_children(?MODULE)]), + New = sets:from_list([Name || {Name, _, _, _, _, _} <- Specs]), + Kill = sets:subtract(Old, New), + + sets:fold(fun(Id, ok) -> + supervisor:terminate_child(?MODULE, Id), + supervisor:delete_child(?MODULE, Id), + ok + end, + ok, + Kill), + [supervisor:start_child (?MODULE, Spec) || Spec <- Specs ], + ok. diff --git a/src/mem3_util.erl b/src/mem3_util.erl new file mode 100644 index 00000000..f6c94748 --- /dev/null +++ b/src/mem3_util.erl @@ -0,0 +1,187 @@ +-module(mem3_util). +-author('brad@cloudant.com'). + +%% API +-export([fullmap/2, fullmap/3, hash/1, install_fullmap/4]). +-export([for_key/2, all_parts/1]). +-export([shard_name/2]). + +-define(RINGTOP, trunc(math:pow(2,160))). % SHA-1 space + +-include("mem3.hrl"). + +%%==================================================================== +%% API +%%==================================================================== + +%% @doc build a full partition map +fullmap(DbName, Options) -> + {ok, Nodes} = mem3:nodes(), + fullmap(DbName, Nodes, Options). + +fullmap(DbName, Nodes, Options) -> + {N,Q} = db_init_constants(Options), + NewNodes = ordered_nodes(DbName, Nodes), + Pmap = pmap(Q, NewNodes), + int_fullmap(DbName, N, Pmap, NewNodes). + +%% @spec hash(term()) -> Digest::binary() +%% @doc uses SHA-1 as its hash +hash(Item) when is_binary(Item) -> + crypto:sha(Item); +hash(Item) -> + crypto:sha(term_to_binary(Item)). + +install_fullmap(DbName, Fullmap, FullNodes, Options) -> + {N,Q} = db_init_constants(Options), + Doc = {[{<<"_id">>,DbName}, + {<<"map">>, jsonify(<<"map">>, Fullmap)}, + {<<"nodes">>, jsonify(<<"nodes">>, FullNodes)}, + {<<"n">>,N}, + {<<"q">>,Q}]}, + write_db_doc(Doc). + +for_key(DbName, Key) -> + <> = hash(Key), + Head = #shard{ + name = '_', + node = '_', + dbname = DbName, + range = ['$1','$2'], + ref = '_' + }, + % TODO these conditions assume A < B, which we don't require + Conditions = [{'<', '$1', HashKey}, {'<', HashKey, '$2'}], + case ets:select(partitions, [{Head, Conditions, ['$_']}]) of + [] -> + erlang:error(database_does_not_exist); + Shards -> + Shards + end. + +all_parts(DbName) -> + case ets:lookup(partitions, DbName) of + [] -> + erlang:error(database_does_not_exist); + Else -> + Else + end. + +%%==================================================================== +%% Internal functions +%%==================================================================== + +%% @doc get cluster constants from options or config +db_init_constants(Options) -> + {const(n, Options), const(q, Options)}. + +%% @doc get individual constant +const(Const, Options) -> + ListResult = case couch_util:get_value(Const, Options) of + undefined -> couch_config:get("cluster", atom_to_list(Const)); + Val -> Val + end, + list_to_integer(ListResult). + +%% @doc hash the dbname, and return the corresponding node for seeding a ring +seednode(DbName, Nodes) -> + <> = hash(DbName), + Size = partition_range(length(Nodes)), + Factor = (HashInt div Size), + lists:nth(Factor+1, Nodes). + +%% @doc take the list of nodes, and rearrange it, starting with the node that +%% results from hashing the Term +ordered_nodes(Term, Nodes) -> + SeedNode = seednode(Term, Nodes), + {A, B} = lists:splitwith(fun(N) -> N /= SeedNode end, Nodes), + lists:append(B,A). + +%% @doc create a partition map +pmap(NumPartitions, Nodes) -> + Increment = ?RINGTOP div NumPartitions, + Parts = parts(?RINGTOP, Increment, 0, []), + make_map(Nodes, Nodes, Parts, []). + +%% @doc makes a {beg, end} list of partition ranges +%% last range may have an extra few values, because Increment is created +%% with Ringtop 'div' NumPartitions above. +parts(Top, _, Beg, Acc) when Beg > Top -> Acc; +parts(Top, Increment, Beg, Acc) -> + End = case Beg + 2*Increment of + Over when Over > Top -> Top; + _ -> Beg + Increment - 1 + end, + NewAcc = [{Beg, End} | Acc], + parts(Top, Increment, End+1, NewAcc). + +%% @doc create a full map, which is a pmap with N-1 replication partner nodes +%% added per partition +int_fullmap(DbName, N, Pmap, Nodes) -> + Full = lists:foldl(fun({Node,{B,E} = Part}, AccIn) -> + Primary = [#shard{dbname=DbName, node=Node, range=[B,E], + name=shard_name(B,DbName)}], + Partners = partners(DbName, N, Node, Nodes, Part), + lists:append([Primary, Partners, AccIn]) + end, [], Pmap), + lists:reverse(Full). + +partners(DbName, N, Node, Nodes, {Beg,End}) -> + {A, [Node|B]} = lists:splitwith(fun(Nd) -> Nd /= Node end, Nodes), + Nodes1 = lists:append(B,A), + Partners = lists:sublist(Nodes1, N-1), % N-1 replication partner nodes + lists:map(fun(Partner) -> + #shard{dbname=DbName, node=Partner, range=[Beg,End], + name=shard_name(Beg,DbName)} + end, Partners). + +%% @doc size of one partition in the ring +partition_range(Q) -> + trunc( ?RINGTOP / Q ). % SHA-1 space / Q + +%% @doc assign nodes to each of the partitions. When you run out of nodes, +%% start at the beginning of the node list again. +%% The provided node list starts with the seed node (seednode fun) +make_map(_,_,[], Acc) -> + lists:keysort(2,Acc); +make_map(AllNodes, [], Parts, Acc) -> + % start back at beginning of node list + make_map(AllNodes, AllNodes, Parts, Acc); +make_map(AllNodes, [Node|RestNodes], [Part|RestParts], Acc) -> + % add a node/part combo to the Acc + make_map(AllNodes, RestNodes, RestParts, [{Node,Part}|Acc]). + +jsonify(<<"map">>, Map) -> + lists:map(fun(#shard{node=Node, range=[Beg,End]}) -> + {[{<<"node">>, Node}, {<<"b">>, Beg}, {<<"e">>, End}]} + end, Map); +jsonify(<<"nodes">>, Nodes) -> + lists:map(fun({Order, Node, Options}) -> + {[{<<"order">>, Order}, {<<"node">>, Node}, {<<"options">>, Options}]} + end, Nodes). + +write_db_doc(EDoc) -> + {ok, Db} = couch_db:open(<<"dbs">>, []), + try + update_db_doc(Db, couch_doc:from_json_obj(EDoc)) + catch {conflict, _} -> + ?LOG_ERROR("conflict writing db doc, must be a race", []) + after + couch_db:close(Db) + end. + +update_db_doc(Db, #doc{id=Id, body=Body} = Doc) -> + case couch_db:open_doc(Db, Id, []) of + {not_found, _} -> + {ok, _} = couch_db:update_doc(Db, Doc, []); + {ok, #doc{body=Body}} -> + ok; + {ok, OldDoc} -> + {ok, _} = couch_db:update_doc(Db, OldDoc#doc{body=Body}, []) + end. + +shard_name(Part, DbName) when is_list(DbName) -> + shard_name(Part, ?l2b(DbName)); +shard_name(Part, DbName) -> + PartHex = ?l2b(showroom_utils:int_to_hexstr(Part)), + <<"x", PartHex/binary, "/", DbName/binary, "_", PartHex/binary>>. diff --git a/src/mem3_vclock.erl b/src/mem3_vclock.erl new file mode 100644 index 00000000..a48da43c --- /dev/null +++ b/src/mem3_vclock.erl @@ -0,0 +1,109 @@ +%%% @author Cliff Moon [] +%%% @copyright 2008 Cliff Moon + +-module (mem3_vclock). +-export ([create/1, truncate/1, increment/2, compare/2, resolve/2, merge/2, + equals/2]). + +%% -ifdef(TEST). +%% -include("etest/vector_clock_test.erl"). +%% -endif. + +create(NodeName) -> [{NodeName, now_float()}]. + +truncate(Clock) when length(Clock) > 10 -> + lists:nthtail(length(Clock) - 10, lists:keysort(2, Clock)); + +truncate(Clock) -> Clock. + +increment(NodeName, [{NodeName, _Version}|Clocks]) -> + [{NodeName, now_float()}|Clocks]; + +increment(NodeName, [NodeClock|Clocks]) -> + [NodeClock|increment(NodeName, Clocks)]; + +increment(NodeName, []) -> + [{NodeName, now_float()}]. + +resolve({ClockA, ValuesA}, {ClockB, ValuesB}) -> + case compare(ClockA, ClockB) of + less -> {ClockB, ValuesB}; + greater -> {ClockA, ValuesA}; + equal -> {ClockA, ValuesA}; + concurrent -> + showroom_log:message(info, + "~nConcurrent Clocks~n" + "ClockA : ~p~nClockB : ~p~n" + "ValuesA: ~p~nValuesB: ~p~n" + , [ClockA, ClockB, ValuesA, ValuesB]), + {merge(ClockA,ClockB), ValuesA ++ ValuesB} + end; +resolve(not_found, {Clock, Values}) -> + {Clock, Values}; +resolve({Clock, Values}, not_found) -> + {Clock, Values}. + +merge(ClockA, ClockB) -> + merge([], ClockA, ClockB). + +merge(Merged, [], ClockB) -> lists:keysort(1, Merged ++ ClockB); + +merge(Merged, ClockA, []) -> lists:keysort(1, Merged ++ ClockA); + +merge(Merged, [{NodeA, VersionA}|ClockA], ClockB) -> + case lists:keytake(NodeA, 1, ClockB) of + {value, {NodeA, VersionB}, TrunkClockB} when VersionA > VersionB -> + merge([{NodeA,VersionA}|Merged],ClockA,TrunkClockB); + {value, {NodeA, VersionB}, TrunkClockB} -> + merge([{NodeA,VersionB}|Merged],ClockA,TrunkClockB); + false -> + merge([{NodeA,VersionA}|Merged],ClockA,ClockB) + end. + +compare(ClockA, ClockB) -> + AltB = less_than(ClockA, ClockB), + if AltB -> less; true -> + BltA = less_than(ClockB, ClockA), + if BltA -> greater; true -> + AeqB = equals(ClockA, ClockB), + if AeqB -> equal; true -> concurrent end + end + end. + +%% ClockA is less than ClockB if and only if ClockA[z] <= ClockB[z] for all +%% instances z and there exists an index z' such that ClockA[z'] < ClockB[z'] +less_than(ClockA, ClockB) -> + ForAll = lists:all(fun({Node, VersionA}) -> + case lists:keysearch(Node, 1, ClockB) of + {value, {_NodeB, VersionB}} -> VersionA =< VersionB; + false -> false + end + end, ClockA), + Exists = lists:any(fun({NodeA, VersionA}) -> + case lists:keysearch(NodeA, 1, ClockB) of + {value, {_NodeB, VersionB}} -> VersionA /= VersionB; + false -> true + end + end, ClockA), + %length takes care of the case when clockA is shorter than B + ForAll and (Exists or (length(ClockA) < length(ClockB))). + +equals(ClockA, ClockB) -> + Equivalent = lists:all(fun({NodeA, VersionA}) -> + lists:any(fun(NodeClockB) -> + case NodeClockB of + {NodeA, VersionA} -> true; + _ -> false + end + end, ClockB) + end, ClockA), + Equivalent and (length(ClockA) == length(ClockB)). + +now_float() -> + time_to_epoch_float(now()). + +time_to_epoch_float(Time) when is_integer(Time) or is_float(Time) -> + Time; + +time_to_epoch_float({Mega,Sec,Micro}) -> + Mega * 1000000 + Sec + Micro / 1000000. diff --git a/src/membership.erl b/src/membership.erl deleted file mode 100644 index 1e06e798..00000000 --- a/src/membership.erl +++ /dev/null @@ -1,15 +0,0 @@ --module(membership). --author('Brad Anderson '). - --export([start/0, stop/0, restart/0]). - - -start() -> - application:start(membership). - -stop() -> - application:stop(membership). - -restart() -> - stop(), - start(). diff --git a/src/membership_app.erl b/src/membership_app.erl deleted file mode 100644 index df0f4fee..00000000 --- a/src/membership_app.erl +++ /dev/null @@ -1,11 +0,0 @@ --module(membership_app). --behaviour(application). --export([start/2, stop/1]). - -start(_Type, []) -> - DbName = couch_config:get("membership", "db", "dbs"), - couch_server:create(list_to_binary(DbName), []), - membership_sup:start_link(). - -stop([]) -> - ok. diff --git a/src/membership_sup.erl b/src/membership_sup.erl deleted file mode 100644 index f203924d..00000000 --- a/src/membership_sup.erl +++ /dev/null @@ -1,44 +0,0 @@ --module(membership_sup). --author('brad@cloudant.com'). - --behaviour(supervisor). - -%% API --export([start_link/0]). - -%% Supervisor callbacks --export([init/1]). - --define(SERVER, ?MODULE). - -start_link() -> - supervisor:start_link(?MODULE, []). - -init(_Args) -> - Membership = {membership, - {mem3, start_link, []}, - permanent, - 1000, - worker, - [mem3]}, - MemEventMgr = {mem_event_manager, - {gen_event, start_link, [{local, membership_events}]}, - permanent, - 1000, - worker, - []}, - DbsRepl = - {dbs, - {dbs, start_link, []}, - permanent, - infinity, - supervisor, - [dbs]}, - DbsCache = - {dbs_cache, - {dbs_cache, start_link, []}, - permanent, - 1000, - worker, - [dbs_cache]}, - {ok, {{one_for_one,10,1}, [Membership, MemEventMgr, DbsRepl, DbsCache]}}. diff --git a/src/partitions.erl b/src/partitions.erl deleted file mode 100644 index 3e024264..00000000 --- a/src/partitions.erl +++ /dev/null @@ -1,189 +0,0 @@ --module(partitions). --author('brad@cloudant.com'). - -%% API --export([fullmap/2, fullmap/3, hash/1, install_fullmap/4]). --export([for_key/2, all_parts/1]). --export([shard_name/2]). - --define(RINGTOP, trunc(math:pow(2,160))). % SHA-1 space - --include("../../couch/src/couch_db.hrl"). --include("../../dynomite/include/membership.hrl"). --include_lib("eunit/include/eunit.hrl"). - -%%==================================================================== -%% API -%%==================================================================== - -%% @doc build a full partition map -fullmap(DbName, Options) -> - {ok, Nodes} = mem3:nodes(), - fullmap(DbName, Nodes, Options). - -fullmap(DbName, Nodes, Options) -> - {N,Q} = db_init_constants(Options), - NewNodes = ordered_nodes(DbName, Nodes), - Pmap = pmap(Q, NewNodes), - int_fullmap(DbName, N, Pmap, NewNodes). - -%% @spec hash(term()) -> Digest::binary() -%% @doc uses SHA-1 as its hash -hash(Item) when is_binary(Item) -> - crypto:sha(Item); -hash(Item) -> - crypto:sha(term_to_binary(Item)). - -install_fullmap(DbName, Fullmap, FullNodes, Options) -> - {N,Q} = db_init_constants(Options), - Doc = {[{<<"_id">>,DbName}, - {<<"map">>, jsonify(<<"map">>, Fullmap)}, - {<<"nodes">>, jsonify(<<"nodes">>, FullNodes)}, - {<<"n">>,N}, - {<<"q">>,Q}]}, - write_db_doc(Doc). - -for_key(DbName, Key) -> - <> = hash(Key), - Head = #shard{ - name = '_', - node = '_', - dbname = DbName, - range = ['$1','$2'], - ref = '_' - }, - % TODO these conditions assume A < B, which we don't require - Conditions = [{'<', '$1', HashKey}, {'<', HashKey, '$2'}], - case ets:select(partitions, [{Head, Conditions, ['$_']}]) of - [] -> - erlang:error(database_does_not_exist); - Shards -> - Shards - end. - -all_parts(DbName) -> - case ets:lookup(partitions, DbName) of - [] -> - erlang:error(database_does_not_exist); - Else -> - Else - end. - -%%==================================================================== -%% Internal functions -%%==================================================================== - -%% @doc get cluster constants from options or config -db_init_constants(Options) -> - {const(n, Options), const(q, Options)}. - -%% @doc get individual constant -const(Const, Options) -> - ListResult = case couch_util:get_value(Const, Options) of - undefined -> couch_config:get("cluster", atom_to_list(Const)); - Val -> Val - end, - list_to_integer(ListResult). - -%% @doc hash the dbname, and return the corresponding node for seeding a ring -seednode(DbName, Nodes) -> - <> = hash(DbName), - Size = partition_range(length(Nodes)), - Factor = (HashInt div Size), - lists:nth(Factor+1, Nodes). - -%% @doc take the list of nodes, and rearrange it, starting with the node that -%% results from hashing the Term -ordered_nodes(Term, Nodes) -> - SeedNode = seednode(Term, Nodes), - {A, B} = lists:splitwith(fun(N) -> N /= SeedNode end, Nodes), - lists:append(B,A). - -%% @doc create a partition map -pmap(NumPartitions, Nodes) -> - Increment = ?RINGTOP div NumPartitions, - Parts = parts(?RINGTOP, Increment, 0, []), - make_map(Nodes, Nodes, Parts, []). - -%% @doc makes a {beg, end} list of partition ranges -%% last range may have an extra few values, because Increment is created -%% with Ringtop 'div' NumPartitions above. -parts(Top, _, Beg, Acc) when Beg > Top -> Acc; -parts(Top, Increment, Beg, Acc) -> - End = case Beg + 2*Increment of - Over when Over > Top -> Top; - _ -> Beg + Increment - 1 - end, - NewAcc = [{Beg, End} | Acc], - parts(Top, Increment, End+1, NewAcc). - -%% @doc create a full map, which is a pmap with N-1 replication partner nodes -%% added per partition -int_fullmap(DbName, N, Pmap, Nodes) -> - Full = lists:foldl(fun({Node,{B,E} = Part}, AccIn) -> - Primary = [#shard{dbname=DbName, node=Node, range=[B,E], - name=shard_name(B,DbName)}], - Partners = partners(DbName, N, Node, Nodes, Part), - lists:append([Primary, Partners, AccIn]) - end, [], Pmap), - lists:reverse(Full). - -partners(DbName, N, Node, Nodes, {Beg,End}) -> - {A, [Node|B]} = lists:splitwith(fun(Nd) -> Nd /= Node end, Nodes), - Nodes1 = lists:append(B,A), - Partners = lists:sublist(Nodes1, N-1), % N-1 replication partner nodes - lists:map(fun(Partner) -> - #shard{dbname=DbName, node=Partner, range=[Beg,End], - name=shard_name(Beg,DbName)} - end, Partners). - -%% @doc size of one partition in the ring -partition_range(Q) -> - trunc( ?RINGTOP / Q ). % SHA-1 space / Q - -%% @doc assign nodes to each of the partitions. When you run out of nodes, -%% start at the beginning of the node list again. -%% The provided node list starts with the seed node (seednode fun) -make_map(_,_,[], Acc) -> - lists:keysort(2,Acc); -make_map(AllNodes, [], Parts, Acc) -> - % start back at beginning of node list - make_map(AllNodes, AllNodes, Parts, Acc); -make_map(AllNodes, [Node|RestNodes], [Part|RestParts], Acc) -> - % add a node/part combo to the Acc - make_map(AllNodes, RestNodes, RestParts, [{Node,Part}|Acc]). - -jsonify(<<"map">>, Map) -> - lists:map(fun(#shard{node=Node, range=[Beg,End]}) -> - {[{<<"node">>, Node}, {<<"b">>, Beg}, {<<"e">>, End}]} - end, Map); -jsonify(<<"nodes">>, Nodes) -> - lists:map(fun({Order, Node, Options}) -> - {[{<<"order">>, Order}, {<<"node">>, Node}, {<<"options">>, Options}]} - end, Nodes). - -write_db_doc(EDoc) -> - {ok, Db} = couch_db:open(<<"dbs">>, []), - try - update_db_doc(Db, couch_doc:from_json_obj(EDoc)) - catch {conflict, _} -> - ?LOG_ERROR("conflict writing db doc, must be a race", []) - after - couch_db:close(Db) - end. - -update_db_doc(Db, #doc{id=Id, body=Body} = Doc) -> - case couch_db:open_doc(Db, Id, []) of - {not_found, _} -> - {ok, _} = couch_db:update_doc(Db, Doc, []); - {ok, #doc{body=Body}} -> - ok; - {ok, OldDoc} -> - {ok, _} = couch_db:update_doc(Db, OldDoc#doc{body=Body}, []) - end. - -shard_name(Part, DbName) when is_list(DbName) -> - shard_name(Part, ?l2b(DbName)); -shard_name(Part, DbName) -> - PartHex = ?l2b(showroom_utils:int_to_hexstr(Part)), - <<"x", PartHex/binary, "/", DbName/binary, "_", PartHex/binary>>. diff --git a/src/vector_clock.erl b/src/vector_clock.erl deleted file mode 100644 index 0a422334..00000000 --- a/src/vector_clock.erl +++ /dev/null @@ -1,109 +0,0 @@ -%%% @author Cliff Moon [] -%%% @copyright 2008 Cliff Moon - --module (vector_clock). --export ([create/1, truncate/1, increment/2, compare/2, resolve/2, merge/2, - equals/2]). - -%% -ifdef(TEST). -%% -include("etest/vector_clock_test.erl"). -%% -endif. - -create(NodeName) -> [{NodeName, now_float()}]. - -truncate(Clock) when length(Clock) > 10 -> - lists:nthtail(length(Clock) - 10, lists:keysort(2, Clock)); - -truncate(Clock) -> Clock. - -increment(NodeName, [{NodeName, _Version}|Clocks]) -> - [{NodeName, now_float()}|Clocks]; - -increment(NodeName, [NodeClock|Clocks]) -> - [NodeClock|increment(NodeName, Clocks)]; - -increment(NodeName, []) -> - [{NodeName, now_float()}]. - -resolve({ClockA, ValuesA}, {ClockB, ValuesB}) -> - case compare(ClockA, ClockB) of - less -> {ClockB, ValuesB}; - greater -> {ClockA, ValuesA}; - equal -> {ClockA, ValuesA}; - concurrent -> - showroom_log:message(info, - "~nConcurrent Clocks~n" - "ClockA : ~p~nClockB : ~p~n" - "ValuesA: ~p~nValuesB: ~p~n" - , [ClockA, ClockB, ValuesA, ValuesB]), - {merge(ClockA,ClockB), ValuesA ++ ValuesB} - end; -resolve(not_found, {Clock, Values}) -> - {Clock, Values}; -resolve({Clock, Values}, not_found) -> - {Clock, Values}. - -merge(ClockA, ClockB) -> - merge([], ClockA, ClockB). - -merge(Merged, [], ClockB) -> lists:keysort(1, Merged ++ ClockB); - -merge(Merged, ClockA, []) -> lists:keysort(1, Merged ++ ClockA); - -merge(Merged, [{NodeA, VersionA}|ClockA], ClockB) -> - case lists:keytake(NodeA, 1, ClockB) of - {value, {NodeA, VersionB}, TrunkClockB} when VersionA > VersionB -> - merge([{NodeA,VersionA}|Merged],ClockA,TrunkClockB); - {value, {NodeA, VersionB}, TrunkClockB} -> - merge([{NodeA,VersionB}|Merged],ClockA,TrunkClockB); - false -> - merge([{NodeA,VersionA}|Merged],ClockA,ClockB) - end. - -compare(ClockA, ClockB) -> - AltB = less_than(ClockA, ClockB), - if AltB -> less; true -> - BltA = less_than(ClockB, ClockA), - if BltA -> greater; true -> - AeqB = equals(ClockA, ClockB), - if AeqB -> equal; true -> concurrent end - end - end. - -%% ClockA is less than ClockB if and only if ClockA[z] <= ClockB[z] for all -%% instances z and there exists an index z' such that ClockA[z'] < ClockB[z'] -less_than(ClockA, ClockB) -> - ForAll = lists:all(fun({Node, VersionA}) -> - case lists:keysearch(Node, 1, ClockB) of - {value, {_NodeB, VersionB}} -> VersionA =< VersionB; - false -> false - end - end, ClockA), - Exists = lists:any(fun({NodeA, VersionA}) -> - case lists:keysearch(NodeA, 1, ClockB) of - {value, {_NodeB, VersionB}} -> VersionA /= VersionB; - false -> true - end - end, ClockA), - %length takes care of the case when clockA is shorter than B - ForAll and (Exists or (length(ClockA) < length(ClockB))). - -equals(ClockA, ClockB) -> - Equivalent = lists:all(fun({NodeA, VersionA}) -> - lists:any(fun(NodeClockB) -> - case NodeClockB of - {NodeA, VersionA} -> true; - _ -> false - end - end, ClockB) - end, ClockA), - Equivalent and (length(ClockA) == length(ClockB)). - -now_float() -> - time_to_epoch_float(now()). - -time_to_epoch_float(Time) when is_integer(Time) or is_float(Time) -> - Time; - -time_to_epoch_float({Mega,Sec,Micro}) -> - Mega * 1000000 + Sec + Micro / 1000000. -- cgit v1.2.3 From 301e2345a51355fedb60f1668996ce43964f0801 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 30 Jun 2010 16:26:53 -0400 Subject: update to new vclock module name --- src/mem3_server.erl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mem3_server.erl b/src/mem3_server.erl index 863e752f..addcb591 100644 --- a/src/mem3_server.erl +++ b/src/mem3_server.erl @@ -329,7 +329,7 @@ int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> [New|AccIn] end, Nodes, ExtNodes), NewNodes1 = lists:sort(NewNodes), - NewClock = vector_clock:increment(node(), Clock), + NewClock = mem3_vclock:increment(node(), Clock), NewState = State#mem{nodes=NewNodes1, clock=NewClock}, install_new_state(NewState), {ok, NewState}. @@ -347,10 +347,10 @@ get_pingnode_state(PingNode) -> %% @doc handle the gossip messages -%% We're not using vector_clock:resolve b/c we need custom merge strategy +%% We're not using mem3_vclock:resolve b/c we need custom merge strategy handle_gossip(From, RemoteState=#mem{clock=RemoteClock}, LocalState=#mem{clock=LocalClock}) -> - case vector_clock:compare(RemoteClock, LocalClock) of + case mem3_vclock:compare(RemoteClock, LocalClock) of equal -> {reply, ok, LocalState}; less -> @@ -379,7 +379,7 @@ handle_gossip(From, RemoteState=#mem{clock=RemoteClock}, merge_states(#mem{clock=RemoteClock, nodes=RemoteNodes} = _RemoteState, #mem{clock=LocalClock, nodes=LocalNodes} = LocalState) -> - MergedClock = vector_clock:merge(RemoteClock, LocalClock), + MergedClock = mem3_vclock:merge(RemoteClock, LocalClock), MergedNodes = merge_nodes(RemoteNodes, LocalNodes), LocalState#mem{clock=MergedClock, nodes=MergedNodes}. @@ -547,7 +547,7 @@ get_remote_states(NodeList) -> %% return match | {bad_state_match, Node, NodesThatDontMatch} compare_state_with_rest(#mem{clock=Clock} = _State, States) -> Results = lists:map(fun({Node, #mem{clock=Clock1}}) -> - {vector_clock:equals(Clock, Clock1), Node} + {mem3_vclock:equals(Clock, Clock1), Node} end, States), BadResults = lists:foldl(fun({true, _N}, AccIn) -> AccIn; ({false, N}, AccIn) -> [N | AccIn] -- cgit v1.2.3 From b3172298ca9445c83f045e2c9a9ae1335a2c0841 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 30 Jun 2010 16:31:06 -0400 Subject: remove unused/obsolete test code --- test/cluster_ops_test.erl | 83 ------------ test/mem2_code_change.erl | 12 -- test/mem3_cache_bench.erl | 29 ----- test/mem_utils_test.erl | 97 -------------- test/membership2_test.erl | 126 ------------------ test/mock.erl | 322 ---------------------------------------------- test/mock_genserver.erl | 209 ------------------------------ test/replication_test.erl | 89 ------------- test/stub.erl | 168 ------------------------ 9 files changed, 1135 deletions(-) delete mode 100644 test/cluster_ops_test.erl delete mode 100644 test/mem2_code_change.erl delete mode 100644 test/mem3_cache_bench.erl delete mode 100644 test/mem_utils_test.erl delete mode 100644 test/membership2_test.erl delete mode 100644 test/mock.erl delete mode 100644 test/mock_genserver.erl delete mode 100644 test/replication_test.erl delete mode 100644 test/stub.erl diff --git a/test/cluster_ops_test.erl b/test/cluster_ops_test.erl deleted file mode 100644 index 7bc8fdeb..00000000 --- a/test/cluster_ops_test.erl +++ /dev/null @@ -1,83 +0,0 @@ --module(cluster_ops_test). - --include("../../couch/src/couch_db.hrl"). --include_lib("eunit/include/eunit.hrl"). - - -% read_quorum_test() -> -% % we need to be running a cluster here... -% % not sure how to start things up for unit tests - -% % but we're testing reads when a node is missing a doc, so disable internal -% % replication - a bit harsh if anything else is here, but hey, it's a test -% rpc:multicall(showroom, stop, []), -% rpc:multicall(supervisor, terminate_child, -% [couch_primary_services, couch_replication_supervisor]), -% rpc:multicall(supervisor, delete_child, -% [couch_primary_services, couch_replication_supervisor]), - -% % create db -% DbName = <<"cluster_ops_test">>, -% showroom_db:delete_db(DbName, []), -% {Status, #db{name=DbName}} = showroom_db:create_db(DbName, []), -% ?assertEqual(ok, Status), - -% % open db -% {ok, Db} = showroom_db:open_db(DbName, []), - -% % make a test doc -% Key = <<"a">>, -% Json = {[{<<"_id">>,Key}]}, -% Doc = couch_doc:from_json_obj(Json), -% Clock = vector_clock:create(node()), -% NewDoc = Doc#doc{clock=Clock}, - -% % insert a doc in two shards out of three -% % TODO: we need N=3, need to fix that at db create time Options above -% % (fb 1001) -% {M,F,A} = {dynomite_couch_api, put,[Db, NewDoc, []]}, -% CorrectNodeParts = membership2:nodeparts_for_key(Key), -% [{MissingNode, MissingPart} | BadNodeParts] = CorrectNodeParts, -% MapFun = fun({Node,Part}) -> -% rpc:call(Node, M, F, [[Part | A]]) -% end, -% {Good, Bad} = pcall(MapFun, BadNodeParts, 2), -% ?assertEqual(2, length(Good)), -% ?assertEqual([], Bad), - -% % make sure it's notfound on the MissingNode -% MissingNodeGet = rpc:call(MissingNode, dynomite_couch_api, get, -% [[MissingPart, Db, Key, nil, []]]), -% ?assertEqual({not_found, {[], [missing]}}, MissingNodeGet), - -% JsonDoc = {[{<<"_id">>,<<"a">>}, -% {<<"_rev">>, -% <<"1-967a00dff5e02add41819138abb3284d">>}]}, - -% % r=3 should fail -% {r_quorum_not_met, {[{message, _M}, {good, G}, {bad, B}]}} = -% showroom_doc:open_doc(Db, Key, nil, [{r, "3"}]), -% ?assertEqual([JsonDoc,JsonDoc], G), -% ?assertEqual([{not_found, missing}], B), - -% % r=2 should never fail (run it many times to make sure) -% do_opens({Db, Key, nil, [{r, "2"}]}, 20), - -% ok. - - -% pcall(MapFun, Servers, Const) -> -% Replies = lib_misc:pmap(MapFun, Servers, Const), -% lists:partition(fun valid/1, Replies). - - -% valid({ok, _}) -> true; -% valid(ok) -> true; -% valid(_) -> false. - - -% do_opens(_,0) -> ok; -% do_opens({Db, DocId, Refs, Options} = Payload, Times) -> -% {Status, _Doc} = showroom_doc:open_doc(Db, DocId, Refs, Options), -% ?assertEqual(ok, Status), -% do_opens(Payload, Times-1). diff --git a/test/mem2_code_change.erl b/test/mem2_code_change.erl deleted file mode 100644 index 3b0c73fb..00000000 --- a/test/mem2_code_change.erl +++ /dev/null @@ -1,12 +0,0 @@ --module(mem2_code_change). - --export([run/0]). - -run() -> - Pid = whereis(membership), - OldVsn = "0.7.1-cloudant", - Extra = "", - - sys:suspend(Pid), - sys:change_code(Pid, membership2, OldVsn, Extra), - sys:resume(Pid). diff --git a/test/mem3_cache_bench.erl b/test/mem3_cache_bench.erl deleted file mode 100644 index a78f029d..00000000 --- a/test/mem3_cache_bench.erl +++ /dev/null @@ -1,29 +0,0 @@ --module (mem3_cache_bench). - --export ([doit/1]). - --include("../include/config.hrl"). - - -init() -> - Config = #config{n=3,r=2,w=2,q=3,directory="/srv/db", - storage_mod="dynomite_couch_storage"}, - {ok, _Pid} = mem3:start_link([{test,true}, {config, Config}]), - mem3:join(first, [{1, a, []}, {2, b, []}]). - - -doit(Reps) -> - init(), - Begin = erlang:now(), - process(Reps), - % seconds to run the test - Time = timer:now_diff(erlang:now(), Begin)/1000000, - mem3:stop(), - Time. - - -process(0) -> - ok; -process(M) -> - mem3:fullmap(), - process(M-1). diff --git a/test/mem_utils_test.erl b/test/mem_utils_test.erl deleted file mode 100644 index b884d94e..00000000 --- a/test/mem_utils_test.erl +++ /dev/null @@ -1,97 +0,0 @@ --module(mem_utils_test). - --include_lib("eunit/include/eunit.hrl"). - - -join_type_test() -> - Options = [{replace,node3}], - ?assertEqual({replace,node3}, mem_utils:join_type(dummy,dummy,Options)). - - -pmap_from_full_test() -> - ?assertEqual([{n1,0},{n2,1},{n3,2},{n4,3}], - mem_utils:pmap_from_full(t_fullmap(0))). - - -fix_mappings_nodedown_test() -> - {PMap0, Fullmap0} = mem_utils:fix_mappings(nodedown, n3, t_fullmap(0)), - % with n3 down, n1 takes over - ?assertEqual([{n1,0},{n2,1},{n1,2},{n4,3}], PMap0), - ?assertEqual(t_fullmap(1), lists:sort(Fullmap0)). - - -fix_mappings_rejoin_test() -> - {PMap0, Fullmap0} = mem_utils:fix_mappings(nodedown, n3, t_fullmap(0)), - % with n3 down, n1 takes over - ?assertEqual([{n1,0},{n2,1},{n1,2},{n4,3}], PMap0), - ?assertEqual(t_fullmap(1), lists:sort(Fullmap0)), - % now have n3 rejoin - {PMap1, Fullmap1} = mem_utils:fix_mappings(rejoin, n3, Fullmap0), - ?assertEqual([{n1,0},{n2,1},{n3,2},{n4,3}], PMap1), - ?assertEqual(lists:sort(t_fullmap(0)), lists:sort(Fullmap1)). - - -fix_mappings_replace_test() -> - {PMap0, Fullmap0} = mem_utils:fix_mappings(nodedown, n3, t_fullmap(0)), - % with n3 down, n1 takes over - ?assertEqual([{n1,0},{n2,1},{n1,2},{n4,3}], PMap0), - ?assertEqual(t_fullmap(1), lists:sort(Fullmap0)), - % now replace n3 with n5 - {PMap2, Fullmap2} = mem_utils:fix_mappings(replace, {n3,n5}, Fullmap0), - ?assertEqual([{n1,0},{n2,1},{n5,2},{n4,3}], PMap2), - ?assertEqual(lists:sort(t_fullmap(2)), lists:sort(Fullmap2)). - - -fix_mappings_already_down_test() -> - {_PMap0, Fullmap0} = mem_utils:fix_mappings(nodedown, n3, t_fullmap(1)), - ?assertEqual(t_fullmap(1), lists:sort(Fullmap0)). - - -was_i_nodedown_test() -> - ?assertEqual(true, mem_utils:was_i_nodedown(n3, t_fullmap(1))), - ?assertEqual(false, mem_utils:was_i_nodedown(n3, t_fullmap(0))). - - -%% test helper funs - -t_fullmap(0) -> % four node, four part fullmap (unsorted) - [{n1,0,primary}, - {n2,0,partner}, - {n3,0,partner}, - {n2,1,primary}, - {n3,1,partner}, - {n4,1,partner}, - {n3,2,primary}, - {n4,2,partner}, - {n1,2,partner}, - {n4,3,primary}, - {n1,3,partner}, - {n2,3,partner}]; -t_fullmap(1) -> % like (0) above, but n3 is down (sorted) - [{n1,0,primary}, - {n1,2,partner}, - {n1,3,partner}, - {n2,0,partner}, - {n2,1,primary}, - {n2,3,partner}, - {n3,0,{nodedown,partner}}, - {n3,1,{nodedown,partner}}, - {n3,2,{nodedown,primary}}, - {n4,1,partner}, - {n4,2,partner}, - {n4,3,primary}]; -t_fullmap(2) -> % like (0) above, but n3 is replaced w/ n5 (unsorted) - [{n1,0,primary}, - {n2,0,partner}, - {n5,0,partner}, - {n2,1,primary}, - {n5,1,partner}, - {n4,1,partner}, - {n5,2,primary}, - {n4,2,partner}, - {n1,2,partner}, - {n4,3,primary}, - {n1,3,partner}, - {n2,3,partner}]; -t_fullmap(_Huh) -> - huh. diff --git a/test/membership2_test.erl b/test/membership2_test.erl deleted file mode 100644 index ed804cc2..00000000 --- a/test/membership2_test.erl +++ /dev/null @@ -1,126 +0,0 @@ -%%% -*- erlang-indent-level:2 -*- --module(membership2_test). --author('cliff@powerset.com'). --author('brad@cloudant.com'). - --include("../include/config.hrl"). --include("../include/common.hrl"). --include("../include/test.hrl"). - --include_lib("eunit/include/eunit.hrl"). - -% singular_startup_sequence_test() -> -% %% configuration:start_link(#config{n=1,r=1,w=1,q=6,directory=?TMP_DIR}), -% {ok, _} = mock:mock(configuration), -% mock:expects(configuration, get_config, fun(_Args) -> true end, -% #config{n=1,r=1,w=1,q=6,directory=?TMP_DIR}, 3), -% {ok, _} = mock:mock(replication), -% mock:expects(replication, partners, fun({_, [a], _}) -> true end, []), -% mock:expects(replication, partners_plus, fun({a, [a]}) -> true end, []), -% {ok, M} = membership2:start_link(a, [a]), -% State = gen_server:call(M, state), -% ?assertEqual(a, State#membership.node), -% ?assertEqual([a], State#membership.nodes), -% mock:verify_and_stop(replication), -% membership2:stop(M), -% %% configuration:stop(), -% mock:verify_and_stop(configuration), -% ?assertMatch({ok, [[a]]}, file:consult(?TMP_FILE("a.world"))), -% file:delete(?TMP_FILE("a.world")). - -% -define(NODEA, {a, ["d", "1", "4"]}). -% -define(NODEB, {b, ["e", "3", "1"]}). -% -define(NODEC, {c, ["f", "1", "2"]}). -% -define(NODES, [?NODEA, ?NODEB, ?NODEC]). - -% multi_startup_sequence_test() -> -% {ok, _} = mock:mock(configuration), -% mock:expects(configuration, get_config, fun(_Args) -> true end, -% (#config{n=3,r=1,w=1,q=6,directory=?TMP_DIR}), 3), -% {ok, _} = mock:mock(replication), -% VersionOne = vector_clock:create(make_ref()), -% Pid1 = make_ref(), -% VersionTwo = vector_clock:create(make_ref()), -% Pid2 = make_ref(), -% mock:expects(replication, partners, fun({_, ?NODES, _}) -> true end, [?NODEB, ?NODEC]), -% {ok, _} = stub:stub(membership2, call_join, fun(?NODEB, ?NODEA) -> -% {VersionOne, ?NODES, [{1,Pid1}]}; -% (?NODEC, ?NODEA) -> -% {VersionTwo, ?NODES, [{2,Pid2}]} -% end, 2), -% ?debugMsg("proxied"), -% ?debugFmt("check process code: ~p", [erlang:check_process_code(self(), membership2)]), -% {ok, M} = membership2:start_link(?NODEA, ?NODES), -% State = gen_server:call(M, state), -% ?assertEqual(?NODEA, State#membership.node), -% ?assertEqual(?NODES, State#membership.nodes), -% % Servers = State#membership.servers, -% % ?assertMatch([{1,Pid1},{2,Pid2}], membership2:servers_to_list(Servers)), -% ?assertEqual(greater, vector_clock:compare(State#membership.version, VersionOne)), -% ?assertEqual(greater, vector_clock:compare(State#membership.version, VersionTwo)), -% mock:verify_and_stop(replication), -% membership2:stop(M), -% mock:verify_and_stop(configuration), -% ?assertMatch({ok, [?NODES]}, file:consult(?TMP_FILE("a.world"))), -% file:delete(?TMP_FILE("a.world")). - -% startup_and_first_servers_for_key_test() -> -% configuration:start_link(#config{n=1,r=1,w=1,q=6,directory=?TMP_DIR}), -% {ok, _} = mock:mock(replication), -% mock:expects(replication, partners, fun({_, [a], _}) -> true end, []), -% {ok, M} = membership2:start_link(a, [a]), -% _State = gen_server:call(M, state), -% ?assertEqual([], membership2:servers_for_key("blah")), -% mock:verify_and_stop(replication), -% membership2:stop(M), -% configuration:stop(), -% ?assertMatch({ok, [[a]]}, file:consult(?TMP_FILE("a.world"))), -% file:delete(?TMP_FILE("a.world")). - -% startup_and_register_test() -> -% configuration:start_link(#config{n=1,r=1,w=1,q=0,directory=?TMP_DIR}), -% {ok, _} = mock:mock(replication), -% mock:expects(replication, partners, fun({_, [?NODEA], _}) -> true end, [], 3), -% {ok, M} = membership2:start_link(?NODEA, [?NODEA]), -% SServer1 = make_server(), -% SServer2 = make_server(), -% membership2:register(1, SServer1), -% membership2:register(1, SServer2), -% ?assertEqual([SServer1, SServer2], membership2:servers_for_key("blah")), -% mock:verify_and_stop(replication), -% membership2:stop(M), -% configuration:stop(), -% SServer1 ! stop, -% SServer2 ! stop, -% file:delete(?TMP_FILE("a.world")). - -% handle_local_server_outage_test() -> -% configuration:start_link(#config{n=1,r=1,w=1,q=0,directory=?TMP_DIR}), -% {ok, _} = mock:mock(replication), -% mock:expects(replication, partners, fun({_, [?NODEA], _}) -> true end, [], 4), -% {ok, M} = membership2:start_link(?NODEA, [?NODEA]), -% SServer1 = make_server(), -% SServer2 = make_server(), -% membership2:register(1, SServer1), -% membership2:register(1, SServer2), -% SServer1 ! stop, -% timer:sleep(1), -% ?assertEqual([SServer2], membership2:servers_for_key("blah")), -% mock:verify_and_stop(replication), -% membership2:stop(M), -% configuration:stop(), -% SServer2 ! stop, -% file:delete(?TMP_FILE("a.world")). - -% full_gossip_test() -> -% configuration:start_link(#config{n=1,r=1,w=1,q=2,directory=priv_dir()}), -% {ok, _} = mock:mock(replication), -% mock:expects(replication, partners, fun({_, ?NODES, _}) -> true end, [?NODEB, ?NODEC],4), - - -% make_server() -> -% spawn(fun() -> -% receive -% stop -> ok -% end -% end). diff --git a/test/mock.erl b/test/mock.erl deleted file mode 100644 index 2ecbf4f7..00000000 --- a/test/mock.erl +++ /dev/null @@ -1,322 +0,0 @@ -%%% -*- erlang-indent-level:2 -*- -%%%------------------------------------------------------------------- -%%% File: mock.erl -%%% @author Cliff Moon <> [] -%%% @copyright 2009 Cliff Moon -%%% @doc -%%% -%%% @end -%%% -%%% @since 2009-01-04 by Cliff Moon -%%%------------------------------------------------------------------- --module(mock). --author('cliff@powerset.com'). - -%% API --export([mock/1, proxy_call/2, proxy_call/3, expects/4, expects/5, - verify_and_stop/1, verify/1, stub_proxy_call/3, stop/1]). - --include_lib("eunit/include/eunit.hrl"). --include("../include/common.hrl"). - -%% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). - --record(mockstate, {old_code, module, expectations=[]}). - -%%==================================================================== -%% API -%%==================================================================== -%%-------------------------------------------------------------------- -%% @spec mock(Module::atom()) -> {ok,Mock::record()} | ignore | {error,Error} -%% @doc Starts the server -%% @end -%%-------------------------------------------------------------------- -mock(Module) -> - case gen_server:start_link({local, mod_to_name(Module)}, mock, Module, []) of - {ok, Pid} -> {ok, Pid}; - {error, Reason} -> {error, Reason} - end. - -%% @spec proxy_call(Module::atom(), Function::atom()) -> term() -%% @doc Proxies a call to the mock server for Module without arguments -%% @end -proxy_call(Module, Function) -> - gen_server:call(mod_to_name(Module), {proxy_call, Function, {}}). - -%% @spec proxy_call(Module::atom(), Function::atom(), Args::tuple()) -> term() -%% @doc Proxies a call to the mock server for Module with arguments -%% @end -proxy_call(Module, Function, Args) -> - gen_server:call(mod_to_name(Module), {proxy_call, Function, Args}). - -stub_proxy_call(Module, Function, Args) -> - RegName = list_to_atom(lists:concat([Module, "_", Function, "_stub"])), - Ref = make_ref(), - RegName ! {Ref, self(), Args}, - ?debugFmt("sending {~p,~p,~p}", [Ref, self(), Args]), - receive - {Ref, Answer} -> Answer - end. - -%% @spec expects(Module::atom(), -%% Function::atom(), -%% Args::function(), -%% Ret::function() | term() ) -> term() - -%% Times:: {at_least, integer()} | never | {no_more_than, integer()} | integer()) -> term() - -%% @doc Sets the expectation that Function of Module will be called during a -%% test with Args. Args should be a fun predicate that will return true or -%% false whether or not the argument list matches. The argument list of the -%% function is passed in as a tuple. Ret is either a value to return or a fun -%% of arity 2 to be evaluated in response to a proxied call. The first argument -%% is the actual args from the call, the second is the call count starting -%% with 1. -expects(Module, Function, Args, Ret) -> - gen_server:call(mod_to_name(Module), {expects, Function, Args, Ret, 1}). - -expects(Module, Function, Args, Ret, Times) -> - gen_server:call(mod_to_name(Module), {expects, Function, Args, Ret, Times}). - -%% stub(Module, Function, Args, Ret) -> -%% gen_server:call(mod_to_name(Module), {stub, Function, Args, Ret}). - -verify_and_stop(Module) -> - verify(Module), - stop(Module). - -verify(Module) -> - ?assertEqual(ok, gen_server:call(mod_to_name(Module), verify)). - -stop(Module) -> - gen_server:cast(mod_to_name(Module), stop), - timer:sleep(10). - - -%%==================================================================== -%% gen_server callbacks -%%==================================================================== - -%%-------------------------------------------------------------------- -%% @spec init(Args) -> {ok, State} | -%% {ok, State, Timeout} | -%% ignore | -%% {stop, Reason} -%% @doc Initiates the server -%% @end -%%-------------------------------------------------------------------- -init(Module) -> - case code:get_object_code(Module) of - {Module, Bin, Filename} -> - case replace_code(Module) of - ok -> {ok, #mockstate{module=Module,old_code={Module, Bin, Filename}}}; - {error, Reason} -> {stop, Reason} - end; - error -> {stop, ?fmt("Could not get object code for module ~p", [Module])} - end. - -%%-------------------------------------------------------------------- -%% @spec -%% handle_call(Request, From, State) -> {reply, Reply, State} | -%% {reply, Reply, State, Timeout} | -%% {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, Reply, State} | -%% {stop, Reason, State} -%% @doc Handling call messages -%% @end -%%-------------------------------------------------------------------- -handle_call({proxy_call, Function, Args}, _From, - State = #mockstate{module=Mod,expectations=Expects}) -> - case match_expectation(Function, Args, Expects) of - {matched, ReturnTerm, NewExpects} -> - {reply, ReturnTerm, State#mockstate{expectations=NewExpects}}; - unmatched -> - {stop, ?fmt("got unexpected call to ~p:~p", [Mod,Function])} - end; - -handle_call({expects, Function, Args, Ret, Times}, _From, - State = #mockstate{expectations=Expects}) -> - {reply, ok, State#mockstate{ - expectations=add_expectation(Function, Args, Ret, Times, Expects)}}; - -handle_call(verify, _From, State = #mockstate{expectations=Expects,module=Mod}) -> - ?infoFmt("verifying ~p~n", [Mod]), - if - length(Expects) > 0 -> - {reply, {mismatch, format_missing_expectations(Expects, Mod)}, State}; - true -> {reply, ok, State} - end. - -%%-------------------------------------------------------------------- -%% @spec handle_cast(Msg, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% @doc Handling cast messages -%% @end -%%-------------------------------------------------------------------- -handle_cast(stop, State) -> - timer:sleep(10), - {stop, normal, State}. - -%%-------------------------------------------------------------------- -%% @spec handle_info(Info, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% @doc Handling all non call/cast messages -%% @end -%%-------------------------------------------------------------------- -handle_info(_Info, State) -> - {noreply, State}. - -%%-------------------------------------------------------------------- -%% @spec terminate(Reason, State) -> void() -%% @doc This function is called by a gen_server when it is about to -%% terminate. It should be the opposite of Module:init/1 and do any necessary -%% cleaning up. When it returns, the gen_server terminates with Reason. -%% The return value is ignored. -%% @end -%%-------------------------------------------------------------------- -terminate(_Reason, #mockstate{old_code={Module, Binary, Filename}}) -> - code:purge(Module), - code:delete(Module), - code:load_binary(Module, Filename, Binary), - timer:sleep(10). - -%%-------------------------------------------------------------------- -%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} -%% @doc Convert process state when code is changed -%% @end -%%-------------------------------------------------------------------- -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - -%%-------------------------------------------------------------------- -%%% Internal functions -%%-------------------------------------------------------------------- -format_missing_expectations(Expects, Mod) -> - format_missing_expectations(Expects, Mod, []). - -format_missing_expectations([], _, Msgs) -> - lists:reverse(Msgs); - -format_missing_expectations([{Function, _Args, _Ret, Times, Called}|Expects], Mod, Msgs) -> - Msgs1 = [?fmt("expected ~p:~p to be called ~p times but was called ~p", [Mod,Function,Times,Called])|Msgs], - format_missing_expectations(Expects, Mod, Msgs1). - -add_expectation(Function, Args, Ret, Times, Expects) -> - Expects ++ [{Function, Args, Ret, Times, 0}]. - -match_expectation(Function, Args, Expectations) -> - match_expectation(Function, Args, Expectations, []). - -match_expectation(_Function, _Args, [], _Rest) -> - unmatched; - -match_expectation(Function, Args, [{Function, Matcher, Ret, MaxTimes, Invoked}|Expects], Rest) -> - case Matcher(Args) of - true -> - ReturnTerm = prepare_return(Args, Ret, Invoked+1), - if - Invoked + 1 >= MaxTimes -> {matched, ReturnTerm, lists:reverse(Rest) ++ Expects}; - true -> {matched, ReturnTerm, lists:reverse(Rest) ++ [{Function, Matcher, Ret, MaxTimes, Invoked+1}] ++ Expects} - end; - false -> match_expectation(Function, Args, Expects, [{Function,Matcher,Ret,MaxTimes,Invoked}|Rest]) - end; - -match_expectation(Function, Args, [Expect|Expects], Rest) -> - match_expectation(Function, Args, Expects, [Expect|Rest]). - -prepare_return(Args, Ret, Invoked) when is_function(Ret) -> - Ret(Args, Invoked); - -prepare_return(_Args, Ret, _Invoked) -> - Ret. - -replace_code(Module) -> - Info = Module:module_info(), - Exports = get_exports(Info), - unload_code(Module), - NewFunctions = generate_functions(Module, Exports), - Forms = [ - {attribute,1,module,Module}, - {attribute,2,export,Exports} - ] ++ NewFunctions, - case compile:forms(Forms, [binary]) of - {ok, Module, Binary} -> case code:load_binary(Module, atom_to_list(Module) ++ ".erl", Binary) of - {module, Module} -> ok; - {error, Reason} -> {error, Reason} - end; - error -> {error, "An undefined error happened when compiling."}; - {error, Errors, Warnings} -> {error, Errors ++ Warnings} - end. - -unload_code(Module) -> - code:purge(Module), - code:delete(Module). - -get_exports(Info) -> - get_exports(Info, []). - -get_exports(Info, Acc) -> - case lists:keytake(exports, 1, Info) of - {value, {exports, Exports}, ModInfo} -> - get_exports(ModInfo, Acc ++ lists:filter(fun({module_info, _}) -> false; (_) -> true end, Exports)); - _ -> Acc - end. - -%% stub_function_loop(Fun) -> -%% receive -%% {Ref, Pid, Args} -> -%% ?debugFmt("received {~p,~p,~p}", [Ref, Pid, Args]), -%% Ret = (catch Fun(Args) ), -%% ?debugFmt("sending {~p,~p}", [Ref,Ret]), -%% Pid ! {Ref, Ret}, -%% stub_function_loop(Fun) -%% end. - -% Function -> {function, Lineno, Name, Arity, [Clauses]} -% Clause -> {clause, Lineno, [Variables], [Guards], [Expressions]} -% Variable -> {var, Line, Name} -% -generate_functions(Module, Exports) -> - generate_functions(Module, Exports, []). - -generate_functions(_Module, [], FunctionForms) -> - lists:reverse(FunctionForms); - -generate_functions(Module, [{Name,Arity}|Exports], FunctionForms) -> - generate_functions(Module, Exports, [generate_function(Module, Name, Arity)|FunctionForms]). - -generate_function(Module, Name, Arity) -> - {function, 1, Name, Arity, [{clause, 1, generate_variables(Arity), [], generate_expression(mock, proxy_call, Module, Name, Arity)}]}. - -generate_variables(0) -> []; -generate_variables(Arity) -> - lists:map(fun(N) -> - {var, 1, list_to_atom(lists:concat(['Arg', N]))} - end, lists:seq(1, Arity)). - -generate_expression(M, F, Module, Name, 0) -> - [{call,1,{remote,1,{atom,1,M},{atom,1,F}}, [{atom,1,Module}, {atom,1,Name}]}]; -generate_expression(M, F, Module, Name, Arity) -> - [{call,1,{remote,1,{atom,1,M},{atom,1,F}}, [{atom,1,Module}, {atom,1,Name}, {tuple,1,lists:map(fun(N) -> - {var, 1, list_to_atom(lists:concat(['Arg', N]))} - end, lists:seq(1, Arity))}]}]. - -mod_to_name(Module) -> - list_to_atom(lists:concat([mock_, Module])). - -%% replace_function(FF, Forms) -> -%% replace_function(FF, Forms, []). - -%% replace_function(FF, [], Ret) -> -%% [FF|lists:reverse(Ret)]; - -%% replace_function({function,_,Name,Arity,Clauses}, [{function,Line,Name,Arity,_}|Forms], Ret) -> -%% lists:reverse(Ret) ++ [{function,Line,Name,Arity,Clauses}|Forms]; - -%% replace_function(FF, [FD|Forms], Ret) -> -%% replace_function(FF, Forms, [FD|Ret]). diff --git a/test/mock_genserver.erl b/test/mock_genserver.erl deleted file mode 100644 index cde41ff5..00000000 --- a/test/mock_genserver.erl +++ /dev/null @@ -1,209 +0,0 @@ -%%%------------------------------------------------------------------- -%%% File: mock_genserver.erl -%%% @author Cliff Moon <> [] -%%% @copyright 2009 Cliff Moon -%%% @doc -%%% -%%% @end -%%% -%%% @since 2009-01-02 by Cliff Moon -%%%------------------------------------------------------------------- --module(mock_genserver). --author('cliff@powerset.com'). - --behaviour(gen_server). - --include_lib("eunit/include/eunit.hrl"). - -%% API --export([start_link/1, stub_call/3, expects_call/3, expects_call/4, stop/1]). - -%% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). - --record(state, {call_stubs=[], call_expects=[], cast_expectations, info_expectations}). - -%%==================================================================== -%% API -%%==================================================================== -%%-------------------------------------------------------------------- -%% @spec start_link(Reference::atom()) -> {ok,Pid} | ignore | {error,Error} -%% @doc Starts the server -%% @end -%%-------------------------------------------------------------------- -start_link(Reference) -> - gen_server:start_link(Reference, ?MODULE, [], []). - -stub_call(Server, Sym, Fun) when is_function(Fun) -> - gen_server:call(Server, {mock_stub_call, Sym, Fun}). - -expects_call(Server, Args, Fun) when is_function(Fun) -> - gen_server:call(Server, {mock_expects_call, Args, Fun}). - -expects_call(Server, Args, Fun, Times) when is_function(Fun) -> - gen_server:call(Server, {mock_expects_call, Args, Fun, Times}). - -stop(Server) -> - gen_server:call(Server, mock_stop). - -%%==================================================================== -%% gen_server callbacks -%%==================================================================== - -%%-------------------------------------------------------------------- -%% @spec init(Args) -> {ok, State} | -%% {ok, State, Timeout} | -%% ignore | -%% {stop, Reason} -%% @doc Initiates the server -%% @end -%%-------------------------------------------------------------------- -init([]) -> - {ok, #state{}}. - -%%-------------------------------------------------------------------- -%% @spec -%% handle_call(Request, From, State) -> {reply, Reply, State} | -%% {reply, Reply, State, Timeout} | -%% {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, Reply, State} | -%% {stop, Reason, State} -%% @doc Handling call messages -%% @end -%%-------------------------------------------------------------------- -handle_call({mock_stub_call, Sym, Fun}, _From, State = #state{call_stubs=Stubs}) -> - {reply, ok, State#state{call_stubs=[{Sym, Fun}|Stubs]}}; - -handle_call({mock_expects_call, Args, Fun}, _From, State = #state{call_expects=Expects}) -> - {reply, ok, State#state{call_expects=add_expectation(Args, Fun, at_least_once, Expects)}}; - -handle_call({mock_expects_call, Args, Fun, Times}, _From, State = #state{call_expects=Expects}) -> - {reply, ok, State#state{call_expects=add_expectation(Args, Fun, Times, Expects)}}; - -handle_call(mock_stop, _From, State) -> - {stop, normal, ok, State}; - -handle_call(Request, _From, State = #state{call_stubs=Stubs,call_expects=Expects}) -> - % expectations have a higher priority - case find_expectation(Request, Expects) of - {found, {_, Fun, Time}, NewExpects} -> {reply, Fun(Request, Time), State#state{call_expects=NewExpects}}; - not_found -> % look for a stub - case find_stub(Request, Stubs) of - {found, {_, Fun}} -> {reply, Fun(Request), State}; - not_found -> - {stop, {unexpected_call, Request}, State} - end - end. - -%%-------------------------------------------------------------------- -%% @spec handle_cast(Msg, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% @doc Handling cast messages -%% @end -%%-------------------------------------------------------------------- -handle_cast(_Msg, State) -> - {noreply, State}. - -%%-------------------------------------------------------------------- -%% @spec handle_info(Info, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% @doc Handling all non call/cast messages -%% @end -%%-------------------------------------------------------------------- -handle_info(_Info, State) -> - {noreply, State}. - -%%-------------------------------------------------------------------- -%% @spec terminate(Reason, State) -> void() -%% @doc This function is called by a gen_server when it is about to -%% terminate. It should be the opposite of Module:init/1 and do any necessary -%% cleaning up. When it returns, the gen_server terminates with Reason. -%% The return value is ignored. -%% @end -%%-------------------------------------------------------------------- -terminate(_Reason, _State) -> - ok. - -%%-------------------------------------------------------------------- -%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} -%% @doc Convert process state when code is changed -%% @end -%%-------------------------------------------------------------------- -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - -%%-------------------------------------------------------------------- -%%% Internal functions -%%-------------------------------------------------------------------- - - -add_expectation(Args, Fun, Times, Expects) -> - Expects ++ [{Args, Fun, Times}]. - -find_expectation(Request, Expects) -> - find_expectation(Request, Expects, []). - -find_expectation(_Request, [], _Rest) -> - not_found; - -find_expectation(Request, [{Args, Fun, Times}|Expects], Rest) -> - MatchFun = generate_match_fun(Args), - case MatchFun(Request) of - true -> - if - Times == at_least_once -> {found, {Args, Fun, Times}, lists:reverse(Rest) ++ [{Args, Fun, Times}] ++ Expects}; - Times == 1 -> {found, {Args, Fun, Times}, lists:reverse(Rest) ++ Expects}; - true -> {found, {Args, Fun, Times}, lists:reverse(Rest) ++ [{Args, Fun, Times-1}] ++ Expects} - end; - false -> find_expectation(Request, Expects, [{Args, Fun, Times}|Rest]) - end. - -find_stub(Request, Stub) when is_tuple(Request) -> - Sym = element(1, Request), - find_stub(Sym, Stub); - -find_stub(_Sym, []) -> - not_found; - -find_stub(Sym, _Stubs) when not is_atom(Sym) -> - not_found; - -find_stub(Sym, [{Sym, Fun}|_Stubs]) -> - {found, {Sym, Fun}}; - -find_stub(Sym, [_Stub|Stubs]) -> - find_stub(Sym, Stubs). - -generate_match_fun(Args) when is_tuple(Args) -> - generate_match_fun(tuple_to_list(Args)); - -generate_match_fun(Args) when not is_list(Args) -> - generate_match_fun([Args]); - -generate_match_fun(Args) when is_list(Args) -> - Src = generate_match_fun("fun({", Args), - {ok, Tokens, _} = erl_scan:string(Src), - {ok, [Form]} = erl_parse:parse_exprs(Tokens), - {value, Fun, _} = erl_eval:expr(Form, erl_eval:new_bindings()), - Fun. - -generate_match_fun(Src, []) -> - Src ++ "}) -> true; (_) -> false end."; - -% unbound atom means you don't care about an arg -generate_match_fun(Src, [unbound|Args]) -> - if - length(Args) > 0 -> generate_match_fun(Src ++ "_,", Args); - true -> generate_match_fun(Src ++ "_", Args) - end; - -generate_match_fun(Src, [Bound|Args]) -> - Term = lists:flatten(io_lib:format("~w", [Bound])), - if - length(Args) > 0 -> generate_match_fun(Src ++ Term ++ ",", Args); - true -> generate_match_fun(Src ++ Term, Args) - end. diff --git a/test/replication_test.erl b/test/replication_test.erl deleted file mode 100644 index 095e1b44..00000000 --- a/test/replication_test.erl +++ /dev/null @@ -1,89 +0,0 @@ -%%% -*- erlang-indent-level:2 -*- --module(replication_test). --author('brad@cloudant.com'). - --include("../include/config.hrl"). --include("../include/test.hrl"). - --include_lib("eunit/include/eunit.hrl"). - --define(NODEA, {a, ["d", "1", "4"]}). --define(NODEB, {b, ["e", "3", "1"]}). --define(NODEC, {c, ["f", "1", "2"]}). --define(NODED, {d, ["e", "1", "2"]}). --define(NODEE, {e, ["e", "2", "2"]}). --define(NODES, [?NODEA, ?NODEB, ?NODEC, ?NODED, ?NODEE]). - -%% TODO: give this some effigy love, mock configuration up all of these -%% different ways. - -metadata_level_1_test() -> - configuration:start_link(#config{n=3,r=1,w=1,q=6, - directory=?TMP_DIR, - meta=[{datacenter,roundrobin}, - {rack, roundrobin}, - {slot, roundrobin} - ]}), - Partners = replication:partners(?NODEA, - [?NODEA, ?NODEB, ?NODEC], - configuration:get_config()), - ?assertEqual([?NODEB, ?NODEC], Partners), - configuration:stop(). - - -metadata_level_2_test() -> - configuration:start_link(#config{n=3,r=1,w=1,q=6, - directory=?TMP_DIR, - meta=[{datacenter,roundrobin}, - {rack, roundrobin}, - {slot, roundrobin} - ]}), - Partners = replication:partners(?NODEA, - ?NODES, - configuration:get_config()), - ?assertEqual([?NODED,?NODEE], Partners), - configuration:stop(). - - -no_metadata_test() -> - configuration:start_link(#config{n=2,r=1,w=1,q=6, - directory=?TMP_DIR, - meta=[]}), - Partners = replication:partners(a, - [a,b,c,d], - configuration:get_config()), - ?assertEqual([b], Partners), - configuration:stop(). - - -wrap_test() -> - configuration:start_link(#config{n=3,r=1,w=1,q=6, - directory=?TMP_DIR, - meta=[]}), - Wrap1Partners = replication:partners(c, - [a,b,c,d], - configuration:get_config()), - ?assertEqual([a,d], Wrap1Partners), - Wrap2Partners = replication:partners(d, - [a,b,c,d], - configuration:get_config()), - ?assertEqual([a,b], Wrap2Partners), - configuration:stop(). - - -self_test() -> - configuration:start_link(#config{n=3,r=1,w=1,q=6, - directory=?TMP_DIR, - meta=[]}), - Partners = replication:partners(a, [a], - configuration:get_config()), - ?assertEqual([], Partners), - configuration:stop(). - - -remove_self_test() -> - configuration:start_link( - #config{n=4,r=1,w=1,q=6, directory=?TMP_DIR, meta=[]}), - Partners = replication:partners(a, [a,b], configuration:get_config()), - ?assertEqual([b], Partners), - configuration:stop(). diff --git a/test/stub.erl b/test/stub.erl deleted file mode 100644 index 2a6173b5..00000000 --- a/test/stub.erl +++ /dev/null @@ -1,168 +0,0 @@ -%%%------------------------------------------------------------------- -%%% File: stub.erl -%%% @author Cliff Moon <> [] -%%% @copyright 2009 Cliff Moon -%%% @doc -%%% -%%% @end -%%% -%%% @since 2009-05-10 by Cliff Moon -%%%------------------------------------------------------------------- --module(stub). --author('cliff@powerset.com'). - --behaviour(gen_server). - -%% API --export([stub/3, stub/4, proxy_call/3]). - -%% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). - --include_lib("eunit/include/eunit.hrl"). --include("../include/common.hrl"). - --record(state, {old_code, module, stub, times}). - -%%==================================================================== -%% API -%%==================================================================== - -stub(Module, Function, Fun) -> - stub(Module, Function, Fun, 1). - -stub(Module, Function, Fun, Times) when is_function(Fun) -> - gen_server:start({local, name(Module, Function)}, ?MODULE, [Module, Function, Fun, Times], []). - -proxy_call(_, Name, Args) -> - {Times, Reply} = gen_server:call(Name, {proxy_call, Args}), - if - Times =< 0 -> gen_server:cast(Name, stop); - true -> ok - end, - Reply. - -%%==================================================================== -%% gen_server callbacks -%%==================================================================== - -%%-------------------------------------------------------------------- -%% @spec init(Args) -> {ok, State} | -%% {ok, State, Timeout} | -%% ignore | -%% {stop, Reason} -%% @doc Initiates the server -%% @end -%%-------------------------------------------------------------------- -init([Module, Function, Fun, Times]) -> - case code:get_object_code(Module) of - {Module, Bin, Filename} -> - ?debugMsg("stubbing"), - stub_function(Module, Function, arity(Fun)), - {ok, #state{module=Module,old_code={Module,Bin,Filename},times=Times,stub=Fun}}; - error -> {stop, ?fmt("Could not get object code for module ~p", [Module])} - end. - -%%-------------------------------------------------------------------- -%% @spec -%% handle_call(Request, From, State) -> {reply, Reply, State} | -%% {reply, Reply, State, Timeout} | -%% {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, Reply, State} | -%% {stop, Reason, State} -%% @doc Handling call messages -%% @end -%%-------------------------------------------------------------------- -handle_call({proxy_call, Args}, _From, State = #state{stub=Fun, times=Times}) -> - Reply = apply(Fun, tuple_to_list(Args)), - {reply, {Times-1, Reply}, State#state{times=Times-1}}. - -%%-------------------------------------------------------------------- -%% @spec handle_cast(Msg, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% @doc Handling cast messages -%% @end -%%-------------------------------------------------------------------- -handle_cast(stop, State) -> - sleep:timer(10), - {stop, normal, State}. - -%%-------------------------------------------------------------------- -%% @spec handle_info(Info, State) -> {noreply, State} | -%% {noreply, State, Timeout} | -%% {stop, Reason, State} -%% @doc Handling all non call/cast messages -%% @end -%%-------------------------------------------------------------------- -handle_info(_Info, State) -> - {noreply, State}. - -%%-------------------------------------------------------------------- -%% @spec terminate(Reason, State) -> void() -%% @doc This function is called by a gen_server when it is about to -%% terminate. It should be the opposite of Module:init/1 and do any necessary -%% cleaning up. When it returns, the gen_server terminates with Reason. -%% The return value is ignored. -%% @end -%%-------------------------------------------------------------------- -terminate(_Reason, #state{old_code={_Module,_Bin,_Filename}}) -> - ok. - -%%-------------------------------------------------------------------- -%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} -%% @doc Convert process state when code is changed -%% @end -%%-------------------------------------------------------------------- -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - -%%-------------------------------------------------------------------- -%%% Internal functions -%%-------------------------------------------------------------------- -name(Module, Function) -> - list_to_atom(lists:concat([Module, Function, "stub"])). - -stub_function(Module, Function, Arity) -> - {_, Bin, _} = code:get_object_code(Module), - {ok, {Module,[{abstract_code,{raw_abstract_v1,Forms}}]}} = beam_lib:chunks(Bin, [abstract_code]), - ?debugMsg("replacing function"), - StubbedForms = replace_function(Module, Function, Arity, Forms), - case compile:forms(StubbedForms, [binary]) of - {ok, Module, Binary} -> code:load_binary(Module, atom_to_list(Module) ++ ".erl", Binary); - Other -> Other - end. - -arity(Fun) when is_function(Fun) -> - Props = erlang:fun_info(Fun), - proplists:get_value(arity, Props). - -replace_function(Module, Function, Arity, Forms) -> - replace_function(Module, Function, Arity, Forms, []). - -replace_function(_Module, _Function, _Arity, [], Acc) -> - lists:reverse(Acc); -replace_function(Module, Function, Arity, [{function, Line, Function, Arity, _Clauses}|Forms], Acc) -> - lists:reverse(Acc) ++ [{function, Line, Function, Arity, [ - {clause, - Line, - generate_variables(Arity), - [], - generate_expression(stub,proxy_call,Module,name(Module,Function),Arity)}]}] ++ Forms; -replace_function(Module, Function, Arity, [Form|Forms], Acc) -> - replace_function(Module, Function, Arity, Forms, [Form|Acc]). - -generate_variables(0) -> []; -generate_variables(Arity) -> - lists:map(fun(N) -> - {var, 1, list_to_atom(lists:concat(['Arg', N]))} - end, lists:seq(1, Arity)). - -generate_expression(M, F, Module, Name, 0) -> - [{call,1,{remote,1,{atom,1,M},{atom,1,F}}, [{atom,1,Module}, {atom,1,Name}]}]; -generate_expression(M, F, Module, Name, Arity) -> - [{call,1,{remote,1,{atom,1,M},{atom,1,F}}, [{atom,1,Module}, {atom,1,Name}, {tuple,1,lists:map(fun(N) -> - {var, 1, list_to_atom(lists:concat(['Arg', N]))} - end, lists:seq(1, Arity))}]}]. -- cgit v1.2.3 From dcc02f9b3ef2083f89c54ea3e9a8ad7cccf791d3 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 30 Jun 2010 16:41:47 -0400 Subject: better naming for event manager and sync event handler --- ebin/mem3.app | 4 +-- src/mem3_event.erl | 74 ------------------------------------------------- src/mem3_sup.erl | 9 +++--- src/mem3_sync_event.erl | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 81 deletions(-) delete mode 100644 src/mem3_event.erl create mode 100644 src/mem3_sync_event.erl diff --git a/ebin/mem3.app b/ebin/mem3.app index 1b04f5a1..d0caaeec 100644 --- a/ebin/mem3.app +++ b/ebin/mem3.app @@ -6,17 +6,17 @@ mem3, mem3_app, mem3_cache, - mem3_event, mem3_httpd, mem3_server, mem3_sup, mem3_sync, + mem3_sync_event, mem3_util, mem3_vclock ]}, {registered, [ mem3_cache, - mem3_event, + mem3_events, mem3_server, mem3_sync, mem3_sup diff --git a/src/mem3_event.erl b/src/mem3_event.erl deleted file mode 100644 index 59156adc..00000000 --- a/src/mem3_event.erl +++ /dev/null @@ -1,74 +0,0 @@ --module(mem3_event). - --behaviour(gen_event). - --export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, - code_change/3]). - --include("mem3.hrl"). - -init([]) -> - {ok, []}. - -handle_event({node_join, Node}, State) -> - start_repl({node_join, Node}, State); - -handle_event({nodeup, Node}, State) -> - start_repl({nodeup, Node}, State); - -handle_event({node_leave, Node}, State) -> - stop_repl({node_leave, Node}, State); - -handle_event({nodedown, Node}, State) -> - stop_repl({nodedown, Node}, State); - -handle_event(Event, State) -> - ?LOG_ERROR("unexpected event in dbs handler ~p", [Event]), - {ok, State}. - -handle_call(Request, State) -> - ?LOG_ERROR("unexpected call in dbs handler ~p", [Request]), - {ok, ok, State}. - -handle_info(Info, State) -> - ?LOG_ERROR("unexpected msg in dbs handler ~p", [Info]), - {ok, State}. - -terminate(_Reason, _State) -> - ok. - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - -%% -%% internal -%% - -start_repl({Reason, Node}, State) -> - ChildSpec = dbs:childspec(Node), - case supervisor:start_child(dbs, ChildSpec) of - {ok, _} -> - ok; - {error, {already_started, _Child}} -> - ok; - {error, running} -> - ok; - {error, already_present} -> - case supervisor:restart_child(dbs, ChildSpec) of - {ok, _} -> - ok; - {error, running} -> - ok; - {error, Reason} -> - ?LOG_ERROR("dbs repl restart failed ~p", [Reason]) - end; - {error, Reason} -> - ?LOG_ERROR("dbs repl start failed ~p", [Reason]) - end, - {ok, State}. - -stop_repl({Reason, Node}, State) -> - ?LOG_INFO("dbs repl ~p --> ~p terminating (~p)", [node(), Node, Reason]), - supervisor:terminate_child(dbs, Node), - supervisor:delete_child(dbs, Node), - {ok, State}. diff --git a/src/mem3_sup.erl b/src/mem3_sup.erl index 122e68d7..0a9f66d0 100644 --- a/src/mem3_sup.erl +++ b/src/mem3_sup.erl @@ -8,15 +8,14 @@ start_link() -> init(_Args) -> Children = [ child(mem3_server), - child(mem3_event), + child(mem3_events), child(mem3_sync), child(mem3_cache) ], {ok, {{one_for_one,10,1}, Children}}. -child(mem3_event) -> - MFA = {gen_event, start_link, [{local,mem3_event}]}, - {mem3_event, MFA, permanent, 1000, worker, dynamic}; +child(mem3_events) -> + MFA = {gen_event, start_link, [{local, mem3_events}]}, + {mem3_events, MFA, permanent, 1000, worker, dynamic}; child(Child) -> {Child, {Child, start_link, []}, permanent, 1000, worker, [Child]}. - \ No newline at end of file diff --git a/src/mem3_sync_event.erl b/src/mem3_sync_event.erl new file mode 100644 index 00000000..1a360bde --- /dev/null +++ b/src/mem3_sync_event.erl @@ -0,0 +1,74 @@ +-module(mem3_sync_event). + +-behaviour(gen_event). + +-export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, + code_change/3]). + +-include("mem3.hrl"). + +init([]) -> + {ok, []}. + +handle_event({node_join, Node}, State) -> + start_repl({node_join, Node}, State); + +handle_event({nodeup, Node}, State) -> + start_repl({nodeup, Node}, State); + +handle_event({node_leave, Node}, State) -> + stop_repl({node_leave, Node}, State); + +handle_event({nodedown, Node}, State) -> + stop_repl({nodedown, Node}, State); + +handle_event(Event, State) -> + ?LOG_ERROR("unexpected event in dbs handler ~p", [Event]), + {ok, State}. + +handle_call(Request, State) -> + ?LOG_ERROR("unexpected call in dbs handler ~p", [Request]), + {ok, ok, State}. + +handle_info(Info, State) -> + ?LOG_ERROR("unexpected msg in dbs handler ~p", [Info]), + {ok, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% +%% internal +%% + +start_repl({Reason, Node}, State) -> + ChildSpec = dbs:childspec(Node), + case supervisor:start_child(dbs, ChildSpec) of + {ok, _} -> + ok; + {error, {already_started, _Child}} -> + ok; + {error, running} -> + ok; + {error, already_present} -> + case supervisor:restart_child(dbs, ChildSpec) of + {ok, _} -> + ok; + {error, running} -> + ok; + {error, Reason} -> + ?LOG_ERROR("dbs repl restart failed ~p", [Reason]) + end; + {error, Reason} -> + ?LOG_ERROR("dbs repl start failed ~p", [Reason]) + end, + {ok, State}. + +stop_repl({Reason, Node}, State) -> + ?LOG_INFO("dbs repl ~p --> ~p terminating (~p)", [node(), Node, Reason]), + supervisor:terminate_child(dbs, Node), + supervisor:delete_child(dbs, Node), + {ok, State}. -- cgit v1.2.3 From ebfbbb43cddaf592687b53caa0ed42cd58b2d36d Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 30 Jun 2010 16:43:49 -0400 Subject: update test_suite --- test/test_suite.erl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test/test_suite.erl b/test/test_suite.erl index 255ed5a9..22416c5d 100644 --- a/test/test_suite.erl +++ b/test/test_suite.erl @@ -3,8 +3,7 @@ -include_lib("eunit/include/eunit.hrl"). all_test_() -> - [{module, mem_utils_test}, - {module, membership2_test}, - {module, partitions_test}, - {module, replication_test} - ]. + [ + {module, mem3_test}, + {module, partitions_test} + ]. -- cgit v1.2.3 From 4b74111cb63e5f5b250d0e617512e1221a8c509d Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 30 Jun 2010 17:02:41 -0400 Subject: simpler event handler for sync server --- src/mem3_sync_event.erl | 63 +++++++------------------------------------------ 1 file changed, 9 insertions(+), 54 deletions(-) diff --git a/src/mem3_sync_event.erl b/src/mem3_sync_event.erl index 1a360bde..55f3840c 100644 --- a/src/mem3_sync_event.erl +++ b/src/mem3_sync_event.erl @@ -1,37 +1,25 @@ -module(mem3_sync_event). - -behaviour(gen_event). -export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, code_change/3]). --include("mem3.hrl"). - -init([]) -> - {ok, []}. - -handle_event({node_join, Node}, State) -> - start_repl({node_join, Node}, State); +init(_) -> + {ok, nil}. -handle_event({nodeup, Node}, State) -> - start_repl({nodeup, Node}, State); +handle_event({Up, Node}, State) when Up == nodeup; Up == node_join -> + mem3_sync:add_node(Node); -handle_event({node_leave, Node}, State) -> - stop_repl({node_leave, Node}, State); +handle_event({Down, Node}, State) when Down == nodedown; Down == node_leave -> + mem3_sync:remove_node(Node); -handle_event({nodedown, Node}, State) -> - stop_repl({nodedown, Node}, State); - -handle_event(Event, State) -> - ?LOG_ERROR("unexpected event in dbs handler ~p", [Event]), +handle_event(_Event, State) -> {ok, State}. -handle_call(Request, State) -> - ?LOG_ERROR("unexpected call in dbs handler ~p", [Request]), +handle_call(_Request, State) -> {ok, ok, State}. -handle_info(Info, State) -> - ?LOG_ERROR("unexpected msg in dbs handler ~p", [Info]), +handle_info(_Info, State) -> {ok, State}. terminate(_Reason, _State) -> @@ -39,36 +27,3 @@ terminate(_Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. - -%% -%% internal -%% - -start_repl({Reason, Node}, State) -> - ChildSpec = dbs:childspec(Node), - case supervisor:start_child(dbs, ChildSpec) of - {ok, _} -> - ok; - {error, {already_started, _Child}} -> - ok; - {error, running} -> - ok; - {error, already_present} -> - case supervisor:restart_child(dbs, ChildSpec) of - {ok, _} -> - ok; - {error, running} -> - ok; - {error, Reason} -> - ?LOG_ERROR("dbs repl restart failed ~p", [Reason]) - end; - {error, Reason} -> - ?LOG_ERROR("dbs repl start failed ~p", [Reason]) - end, - {ok, State}. - -stop_repl({Reason, Node}, State) -> - ?LOG_INFO("dbs repl ~p --> ~p terminating (~p)", [node(), Node, Reason]), - supervisor:terminate_child(dbs, Node), - supervisor:delete_child(dbs, Node), - {ok, State}. -- cgit v1.2.3 From 217fcd205cc3c29ceac4d28763ba74e6adecf1f3 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 1 Jul 2010 09:46:04 -0400 Subject: move states/0 to top-level module --- src/mem3.erl | 20 +++++++++++++++++++- src/mem3_server.erl | 18 +----------------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 2b8f0188..4f7c6ade 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -1,8 +1,11 @@ -module(mem3). -author('Brad Anderson '). --export([start/0, stop/0, restart/0]). +-export([start/0, stop/0, restart/0, state/0]). +-include("mem3.hrl"). + +-define(SERVER, mem3_server). start() -> application:start(mem3). @@ -13,3 +16,18 @@ stop() -> restart() -> stop(), start(). + +%% @doc Detailed report of cluster-wide membership state. Queries the state +%% on all member nodes and builds a dictionary with unique states as the +%% key and the nodes holding that state as the value. Also reports member +%% nodes which fail to respond and nodes which are connected but are not +%% cluster members. Useful for debugging. +-spec state() -> [{mem_state() | bad_nodes | non_member_nodes, [node()]}]. +state() -> + {ok, Nodes} = mem3:nodes(), + AllNodes = erlang:nodes([this, visible]), + {Replies, BadNodes} = gen_server:multi_call(Nodes, ?SERVER, state), + Dict = lists:foldl(fun({Node, {ok,State}}, D) -> + orddict:append(State, Node, D) + end, orddict:new(), Replies), + [{non_member_nodes, AllNodes -- Nodes}, {bad_nodes, BadNodes} | Dict]. \ No newline at end of file diff --git a/src/mem3_server.erl b/src/mem3_server.erl index addcb591..0d76344d 100644 --- a/src/mem3_server.erl +++ b/src/mem3_server.erl @@ -20,7 +20,7 @@ %% API -export([start_link/0, start_link/1, stop/0, stop/1, reset/0]). --export([join/3, clock/0, state/0, states/0, nodes/0, fullnodes/0, +-export([join/3, clock/0, state/0, nodes/0, fullnodes/0, start_gossip/0]). %% for testing more than anything else @@ -76,22 +76,6 @@ clock() -> state() -> gen_server:call(?SERVER, state). - -%% @doc Detailed report of cluster-wide membership state. Queries the state -%% on all member nodes and builds a dictionary with unique states as the -%% key and the nodes holding that state as the value. Also reports member -%% nodes which fail to respond and nodes which are connected but are not -%% cluster members. Useful for debugging. --spec states() -> [{mem_state() | bad_nodes | non_member_nodes, [node()]}]. -states() -> - {ok, Nodes} = mem3:nodes(), - AllNodes = [node()|erlang:nodes()], - {Replies, BadNodes} = gen_server:multi_call(Nodes, ?SERVER, state), - Dict = lists:foldl(fun({Node, {ok,State}}, D) -> - orddict:append(State, Node, D) - end, orddict:new(), Replies), - [{non_member_nodes, AllNodes -- Nodes}, {bad_nodes, BadNodes} | Dict]. - -spec start_gossip() -> ok. start_gossip() -> gen_server:call(?SERVER, start_gossip). -- cgit v1.2.3 From 68a6934ae52b5876054a525411ef69523b6b9a03 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 1 Jul 2010 11:15:04 -0400 Subject: rewrite mem3_cache to use continuous _changes feed --- src/mem3_cache.erl | 132 ++++++++++++++++++++++++++++------------------------- 1 file changed, 71 insertions(+), 61 deletions(-) diff --git a/src/mem3_cache.erl b/src/mem3_cache.erl index 8f5c372a..532a023a 100644 --- a/src/mem3_cache.erl +++ b/src/mem3_cache.erl @@ -5,87 +5,97 @@ -export([start_link/0]). +-record(state, {changes_pid}). + -include("mem3.hrl"). start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). init([]) -> - ets:new(partitions, [bag, protected, named_table, {keypos,#shard.dbname}]), - ets:new(memnodes, [bag, protected, named_table]), - cache_dbs(), - Self = self(), - couch_db_update_notifier:start_link(fun({updated, <<"dbs">>}) -> - Self ! rebuild_dbs_cache; - (_) -> ok end), - {ok, nil}. + ets:new(partitions, [bag, public, named_table, {keypos,#shard.dbname}]), + {Pid, _} = spawn_monitor(fun() -> listen_for_changes(0) end), + {ok, #state{changes_pid = Pid}}. -handle_call(_Msg, _From, State) -> - {reply, ok, State}. +handle_call(_Call, _From, State) -> + {noreply, State}. handle_cast(_Msg, State) -> {noreply, State}. -handle_info(rebuild_dbs_cache, State) -> - receive rebuild_dbs_cache -> - handle_info(rebuild_dbs_cache, State) - after 0 -> ok end, - T0 = now(), - ?LOG_INFO("rebuilding dbs DB cache", []), - ets:delete_all_objects(partitions), - ets:delete_all_objects(memnodes), - cache_dbs(), - ?LOG_INFO("rebuild of dbs DB cache complete in ~p ms", - [round(timer:now_diff(now(),T0)/1000)]), +handle_info({'DOWN', _, _, Pid, Reason}, #state{changes_pid=Pid} = State) -> + ?LOG_INFO("~p changes listener died ~p", [?MODULE, Reason]), + Seq = case Reason of {seq, EndSeq} -> EndSeq; _ -> 0 end, + timer:send_after(5000, {start_listener, Seq}), + {noreply, State}; +handle_info({start_listener, Seq}, State) -> + {NewPid, _} = spawn_monitor(fun() -> listen_for_changes(Seq) end), + {noreply, State#state{changes_pid=NewPid}}; +handle_info(_Msg, State) -> {noreply, State}. -terminate(_Reason, _State) -> +terminate(_Reason, #state{changes_pid=Pid}) -> + exit(Pid, kill), ok. code_change(_OldVsn, State, _Extra) -> {ok, State}. -cache_dbs() -> - try couch_db:open(<<"dbs">>, []) of +%% internal functions + +listen_for_changes(Since) -> + DbName = ?l2b(couch_config:get("mem3", "db", "dbs")), + {ok, Db} = ensure_exists(DbName), + Args = #changes_args{ + feed = "continuous", + since = Since, + heartbeat = true, + include_docs = true + }, + ChangesFun = couch_changes:handle_changes(Args, nil, Db), + ChangesFun(fun changes_callback/2). + +ensure_exists(DbName) -> + Options = [{user_ctx, #user_ctx{roles=[<<"_admin">>]}}], + case couch_db:open(DbName, Options) of {ok, Db} -> - Bt = Db#db.id_tree, - FoldFun = fun(#full_doc_info{id=Id, deleted=false} = FullDocInfo, _, _) -> - {ok, Doc} = couch_db:open_doc_int(Db, FullDocInfo, []), - {Props} = couch_doc:to_json_obj(Doc, []), - cache_map(Id, Props), - cache_nodes(Id, Props), - {ok, true}; - (_, _, _) -> - {ok, nil} - end, - couch_btree:foldl(Bt, FoldFun, nil), - couch_db:close(Db) - catch exit:{noproc,{gen_server,call,[couch_server|_]}} -> - timer:sleep(1000), - exit(couch_server_is_dead) + {ok, Db}; + _ -> + couch_server:create(DbName, Options) end. -cache_map(Id, Props) -> - Map = couch_util:get_value(<<"map">>, Props, []), - lists:foreach(fun({[{<<"node">>,Node},{<<"b">>,Beg},{<<"e">>,End}]}) -> - Part = #shard{ - name = partitions:shard_name(Beg, Id), - dbname = Id, - node = to_atom(Node), - range = [Beg,End] - }, - ets:insert(partitions, Part) - end, Map). - -cache_nodes(Id, Props) -> - Nodes = couch_util:get_value(<<"nodes">>, Props, []), - lists:foreach(fun({[{<<"order">>,Order},{<<"node">>, Node},{<<"options">>,Opts}]}) -> - ets:insert(memnodes, {Id, {Order, to_atom(Node), Opts}}) - end, Nodes). +changes_callback(start, _) -> + {ok, nil}; +changes_callback({stop, EndSeq}, _) -> + exit({seq, EndSeq}); +changes_callback({change, {Change}, _}, _) -> + DbName = couch_util:get_value(id, Change), + case couch_util:get_value(deleted, Change, false) of + true -> + ets:delete(partitions, DbName); + false -> + case couch_util:get_value(doc, Change) of + {error, Reason} -> + ?LOG_ERROR("missing partition table for ~s: ~p", [DbName, Reason]); + {Doc} -> + ets:delete(partitions, DbName), + cache_partition_table(DbName, Doc) + end + end, + {ok, couch_util:get_value(seq, Change)}; +changes_callback(timeout, _) -> + {ok, nil}. + +cache_partition_table(DbName, Doc) -> + ets:insert(partitions, lists:map(fun({Map}) -> + Begin = couch_util:get_value(<<"b">>, Map), + #shard{ + name = mem3_util:shard_name(Begin, DbName), + dbname = DbName, + node = to_atom(couch_util:get_value(<<"node">>, Map)), + range = [Begin, couch_util:get_value(<<"e">>, Map)] + } + end, couch_util:get_value(<<"map">>, Doc, {[]}))). to_atom(Node) when is_binary(Node) -> - list_to_atom(binary_to_list(Node)); -to_atom(Node) when is_atom(Node) -> - Node. - -%{ok, ets:insert(dbs_cache, {Id, Props})}; + list_to_atom(binary_to_list(Node)). -- cgit v1.2.3 From 5f7ff55f41acd820b5aea51fbff6dc3b61a0222c Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 1 Jul 2010 11:44:51 -0400 Subject: converter from Doc to #shard{} is more general --- src/mem3_cache.erl | 16 +--------------- src/mem3_util.erl | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/mem3_cache.erl b/src/mem3_cache.erl index 532a023a..b17db1a2 100644 --- a/src/mem3_cache.erl +++ b/src/mem3_cache.erl @@ -79,23 +79,9 @@ changes_callback({change, {Change}, _}, _) -> ?LOG_ERROR("missing partition table for ~s: ~p", [DbName, Reason]); {Doc} -> ets:delete(partitions, DbName), - cache_partition_table(DbName, Doc) + ets:insert(partitions, mem3_util:build_shards(DbName, Doc)) end end, {ok, couch_util:get_value(seq, Change)}; changes_callback(timeout, _) -> {ok, nil}. - -cache_partition_table(DbName, Doc) -> - ets:insert(partitions, lists:map(fun({Map}) -> - Begin = couch_util:get_value(<<"b">>, Map), - #shard{ - name = mem3_util:shard_name(Begin, DbName), - dbname = DbName, - node = to_atom(couch_util:get_value(<<"node">>, Map)), - range = [Begin, couch_util:get_value(<<"e">>, Map)] - } - end, couch_util:get_value(<<"map">>, Doc, {[]}))). - -to_atom(Node) when is_binary(Node) -> - list_to_atom(binary_to_list(Node)). diff --git a/src/mem3_util.erl b/src/mem3_util.erl index f6c94748..476742b7 100644 --- a/src/mem3_util.erl +++ b/src/mem3_util.erl @@ -4,7 +4,7 @@ %% API -export([fullmap/2, fullmap/3, hash/1, install_fullmap/4]). -export([for_key/2, all_parts/1]). --export([shard_name/2]). +-export([shard_name/2, build_shards/2]). -define(RINGTOP, trunc(math:pow(2,160))). % SHA-1 space @@ -185,3 +185,17 @@ shard_name(Part, DbName) when is_list(DbName) -> shard_name(Part, DbName) -> PartHex = ?l2b(showroom_utils:int_to_hexstr(Part)), <<"x", PartHex/binary, "/", DbName/binary, "_", PartHex/binary>>. + +build_shards(DbName, DocProps) -> + lists:map(fun({Map}) -> + Begin = couch_util:get_value(<<"b">>, Map), + #shard{ + name = mem3_util:shard_name(Begin, DbName), + dbname = DbName, + node = to_atom(couch_util:get_value(<<"node">>, Map)), + range = [Begin, couch_util:get_value(<<"e">>, Map)] + } + end, couch_util:get_value(<<"map">>, DocProps, {[]})). + +to_atom(Node) when is_binary(Node) -> + list_to_atom(binary_to_list(Node)). -- cgit v1.2.3 From 570649ec3d6c66be8d7900b655cdc1d31ca8fe27 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Fri, 2 Jul 2010 03:12:23 -0400 Subject: use name_shard/1 instead of obsolete shard_name/2 --- src/mem3_nodes.erl | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 src/mem3_nodes.erl diff --git a/src/mem3_nodes.erl b/src/mem3_nodes.erl new file mode 100644 index 00000000..9b3f32f3 --- /dev/null +++ b/src/mem3_nodes.erl @@ -0,0 +1,117 @@ +-module(mem3_nodes). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/0, get_nodelist/0]). + +-include("mem3.hrl"). + +-record(state, {changes_pid, update_seq, nodes}). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +get_nodelist() -> + gen_server:call(?MODULE, get_nodelist). + +init([]) -> + {Nodes, UpdateSeq} = initialize_nodelist(), + {Pid, _} = spawn_monitor(fun() -> listen_for_changes(UpdateSeq) end), + {ok, #state{changes_pid = Pid, update_seq = UpdateSeq, nodes = Nodes}}. + +handle_call(get_nodelist, _From, State) -> + {reply, State#state.nodes, State}; +handle_call({add_node, Node}, _From, #state{nodes=Nodes} = State) -> + gen_event:notify(mem3_events, {add_node, Node}), + {reply, ok, State#state{nodes = lists:umerge([Node], Nodes)}}; +handle_call({remove_node, Node}, _From, #state{nodes=Nodes} = State) -> + gen_event:notify(mem3_events, {remove_node, Node}), + {reply, ok, State#state{nodes = lists:delete(Node, Nodes)}}; +handle_call(_Call, _From, State) -> + {noreply, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({'DOWN', _, _, Pid, Reason}, #state{changes_pid=Pid} = State) -> + ?LOG_INFO("~p changes listener died ~p", [?MODULE, Reason]), + StartSeq = State#state.update_seq, + Seq = case Reason of {seq, EndSeq} -> EndSeq; _ -> StartSeq end, + timer:send_after(5000, start_listener), + {noreply, State#state{update_seq = Seq}}; +handle_info(start_listener, #state{update_seq = Seq} = State) -> + {NewPid, _} = spawn_monitor(fun() -> listen_for_changes(Seq) end), + {noreply, State#state{changes_pid=NewPid}}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% internal functions + +initialize_nodelist() -> + DbName = couch_config:get("mem3", "nodedb", "nodes"), + {ok, Db} = ensure_exists(DbName), + {ok, _, Nodes0} = couch_btree:fold(Db#db.id_tree, fun first_fold/3, [], []), + % add self if not already present + case lists:member(node(), Nodes0) of + true -> + Nodes = Nodes0; + false -> + Doc = #doc{id = couch_util:to_binary(node())}, + {ok, _} = couch_db:update_doc(Db, Doc, []), + Nodes = [node() | Nodes0] + end, + couch_db:close(Db), + {lists:sort(Nodes), Db#db.update_seq}. + +first_fold(#full_doc_info{id = <<"_design/", _/binary>>}, _, Acc) -> + {ok, Acc}; +first_fold(#full_doc_info{id=Id}, _, Acc) -> + {ok, [mem3_util:to_atom(Id) | Acc]}. + +listen_for_changes(Since) -> + DbName = ?l2b(couch_config:get("mem3", "nodedb", "nodes")), + {ok, Db} = ensure_exists(DbName), + Args = #changes_args{ + feed = "continuous", + since = Since, + heartbeat = true, + include_docs = true + }, + ChangesFun = couch_changes:handle_changes(Args, nil, Db), + ChangesFun(fun changes_callback/2). + +ensure_exists(DbName) when is_list(DbName) -> + ensure_exists(list_to_binary(DbName)); +ensure_exists(DbName) -> + Options = [{user_ctx, #user_ctx{roles=[<<"_admin">>]}}], + case couch_db:open(DbName, Options) of + {ok, Db} -> + {ok, Db}; + _ -> + couch_server:create(DbName, Options) + end. + +changes_callback(start, _) -> + {ok, nil}; +changes_callback({stop, EndSeq}, _) -> + exit({seq, EndSeq}); +changes_callback({change, {Change}, _}, _) -> + Node = couch_util:get_value(<<"id">>, Change), + case Node of <<"_design/", _/binary>> -> ok; _ -> + case couch_util:get_value(deleted, Change, false) of + false -> + gen_server:call(?MODULE, {add_node, mem3_util:to_atom(Node)}); + true -> + gen_server:call(?MODULE, {remove_node, mem3_util:to_atom(Node)}) + end + end, + {ok, couch_util:get_value(<<"seq">>, Change)}; +changes_callback(timeout, _) -> + {ok, nil}. -- cgit v1.2.3 From 40c669d1864c4c9eb788240dd4edc533d8a352f2 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Fri, 2 Jul 2010 03:12:45 -0400 Subject: mega refactoring of mem3 --- ebin/mem3.app | 7 +- include/mem3.hrl | 8 - src/mem3.erl | 56 ++++- src/mem3_app.erl | 2 - src/mem3_cache.erl | 4 +- src/mem3_server.erl | 552 ------------------------------------------------ src/mem3_sup.erl | 4 +- src/mem3_sync.erl | 242 +++++++++++++++++---- src/mem3_sync_event.erl | 25 ++- src/mem3_util.erl | 253 ++++++++-------------- src/mem3_vclock.erl | 109 ---------- 11 files changed, 368 insertions(+), 894 deletions(-) delete mode 100644 src/mem3_server.erl delete mode 100644 src/mem3_vclock.erl diff --git a/ebin/mem3.app b/ebin/mem3.app index d0caaeec..05d50748 100644 --- a/ebin/mem3.app +++ b/ebin/mem3.app @@ -7,17 +7,16 @@ mem3_app, mem3_cache, mem3_httpd, - mem3_server, + mem3_nodes, mem3_sup, mem3_sync, mem3_sync_event, - mem3_util, - mem3_vclock + mem3_util ]}, {registered, [ mem3_cache, mem3_events, - mem3_server, + mem3_nodes, mem3_sync, mem3_sup ]}, diff --git a/include/mem3.hrl b/include/mem3.hrl index a1e6f822..533056f9 100644 --- a/include/mem3.hrl +++ b/include/mem3.hrl @@ -10,13 +10,6 @@ -include_lib("eunit/include/eunit.hrl"). -%% version 3 of membership state --record(mem, {header=3, - nodes=[], - clock=[], - args - }). - %% partition record -record(shard, {name, node, dbname, range, ref}). @@ -28,7 +21,6 @@ -type mem_node_list() :: [mem_node()]. -type arg_options() :: {test, boolean()}. -type args() :: [] | [arg_options()]. --type mem_state() :: #mem{}. -type test() :: undefined | node(). -type epoch() :: float(). -type clock() :: {node(), epoch()}. diff --git a/src/mem3.erl b/src/mem3.erl index 4f7c6ade..e6ee5bf8 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -1,7 +1,7 @@ -module(mem3). --author('Brad Anderson '). --export([start/0, stop/0, restart/0, state/0]). +-export([start/0, stop/0, restart/0, state/0, nodes/0, shards/1, shards/2, + choose_shards/2]). -include("mem3.hrl"). @@ -22,7 +22,7 @@ restart() -> %% key and the nodes holding that state as the value. Also reports member %% nodes which fail to respond and nodes which are connected but are not %% cluster members. Useful for debugging. --spec state() -> [{mem_state() | bad_nodes | non_member_nodes, [node()]}]. +-spec state() -> [{any | bad_nodes | non_member_nodes, [node()]}]. state() -> {ok, Nodes} = mem3:nodes(), AllNodes = erlang:nodes([this, visible]), @@ -30,4 +30,52 @@ state() -> Dict = lists:foldl(fun({Node, {ok,State}}, D) -> orddict:append(State, Node, D) end, orddict:new(), Replies), - [{non_member_nodes, AllNodes -- Nodes}, {bad_nodes, BadNodes} | Dict]. \ No newline at end of file + [{non_member_nodes, AllNodes -- Nodes}, {bad_nodes, BadNodes} | Dict]. + +-spec nodes() -> [node()]. +nodes() -> + mem3_nodes:get_nodelist(). + +-spec shards(DbName::binary()) -> [#shard{}]. +shards(DbName) -> + case ets:lookup(partitions, DbName) of + [] -> + % TODO fall back to checking dbs.couch directly + erlang:error(database_does_not_exist); + Else -> + Else + end. + +-spec shards(DbName::binary(), DocId::binary()) -> [#shard{}]. +shards(DbName, DocId) -> + HashKey = mem3_util:hash(DocId), + Head = #shard{ + name = '_', + node = '_', + dbname = DbName, + range = ['$1','$2'], + ref = '_' + }, + % TODO these conditions assume A < B, which we don't require + Conditions = [{'<', '$1', HashKey}, {'=<', HashKey, '$2'}], + case ets:select(partitions, [{Head, Conditions, ['$_']}]) of + [] -> + % TODO fall back to checking dbs.couch directly + erlang:error(database_does_not_exist); + Shards -> + Shards + end. + +choose_shards(DbName, Options) -> + try shards(DbName) + catch error:database_does_not_exist -> + Nodes = mem3:nodes(), + NodeCount = length(Nodes), + N = mem3_util:n_val(couch_util:get_value(n, Options), NodeCount), + Q = mem3_util:to_integer(couch_util:get_value(q, Options, + couch_config:get("cluster", "q", "8"))), + % rotate to a random entry in the nodelist for even distribution + {A, B} = lists:split(crypto:rand_uniform(1,length(Nodes)+1), Nodes), + RotatedNodes = B ++ A, + mem3_util:create_partition_map(DbName, N, Q, RotatedNodes) + end. diff --git a/src/mem3_app.erl b/src/mem3_app.erl index 70bf1cf9..88cd1ea1 100644 --- a/src/mem3_app.erl +++ b/src/mem3_app.erl @@ -3,8 +3,6 @@ -export([start/2, stop/1]). start(_Type, []) -> - DbName = couch_config:get("mem3", "db", "dbs"), - couch_server:create(list_to_binary(DbName), []), mem3_sup:start_link(). stop([]) -> diff --git a/src/mem3_cache.erl b/src/mem3_cache.erl index b17db1a2..1d1bbe9b 100644 --- a/src/mem3_cache.erl +++ b/src/mem3_cache.erl @@ -69,7 +69,7 @@ changes_callback(start, _) -> changes_callback({stop, EndSeq}, _) -> exit({seq, EndSeq}); changes_callback({change, {Change}, _}, _) -> - DbName = couch_util:get_value(id, Change), + DbName = couch_util:get_value(<<"id">>, Change), case couch_util:get_value(deleted, Change, false) of true -> ets:delete(partitions, DbName); @@ -82,6 +82,6 @@ changes_callback({change, {Change}, _}, _) -> ets:insert(partitions, mem3_util:build_shards(DbName, Doc)) end end, - {ok, couch_util:get_value(seq, Change)}; + {ok, couch_util:get_value(<<"seq">>, Change)}; changes_callback(timeout, _) -> {ok, nil}. diff --git a/src/mem3_server.erl b/src/mem3_server.erl deleted file mode 100644 index 0d76344d..00000000 --- a/src/mem3_server.erl +++ /dev/null @@ -1,552 +0,0 @@ -%%% membership module -%%% -%%% State of the gen_server is a #mem record -%%% -%%% Nodes and Gossip are the same thing, and are a list of three-tuples like: -%%% -%%% [ {Pos,NodeName,Options} | _ ] -%%% -%%% Position is 1-based incrementing in order of node joining -%%% -%%% Options is a proplist, with [{hints, [Part1|_]}] denoting that the node -%%% is responsible for the extra partitions too. -%%% -%%% TODO: dialyzer type specs -%%% --module(mem3_server). --author('brad@cloudant.com'). - --behaviour(gen_server). - -%% API --export([start_link/0, start_link/1, stop/0, stop/1, reset/0]). --export([join/3, clock/0, state/0, nodes/0, fullnodes/0, - start_gossip/0]). - -%% for testing more than anything else --export([merge_nodes/2, next_up_node/1, next_up_node/3]). - -%% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). - -%% includes --include("mem3.hrl"). - --define(SERVER, membership). --define(STATE_FILE_PREFIX, "membership"). - - -%%==================================================================== -%% API -%%==================================================================== - --spec start_link() -> {ok, pid()}. -start_link() -> - start_link([]). - - --spec start_link(args()) -> {ok, pid()}. -start_link(Args) -> - gen_server:start_link({local, ?SERVER}, ?MODULE, Args, []). - - --spec stop() -> ok. -stop() -> - stop(?MODULE). - - --spec stop(atom()) -> ok. -stop(Server) -> - gen_server:cast(Server, stop). - - --spec join(join_type(), mem_node_list() | {node(), options()}, node() | nil) -> - ok. -join(JoinType, Payload, PingNode) -> - gen_server:call(?SERVER, {join, JoinType, Payload, PingNode}). - - --spec clock() -> vector_clock(). -clock() -> - gen_server:call(?SERVER, clock). - - --spec state() -> mem_state(). -state() -> - gen_server:call(?SERVER, state). - --spec start_gossip() -> ok. -start_gossip() -> - gen_server:call(?SERVER, start_gossip). - - --spec reset() -> ok | not_reset. -reset() -> - gen_server:call(?SERVER, reset). - - -%% @doc get the list of cluster nodes (according to membership module) -%% This may differ from erlang:nodes() -%% Guaranteed to be in order of State's node list (1st elem in 3-tuple) --spec nodes() -> {ok, [node()]}. -nodes() -> - gen_server:call(?SERVER, nodes). - - -%% @doc get the list of cluster nodes (according to membership module) -%% This may differ from erlang:nodes() -%% Guaranteed to be in order of State's node list (1st elem in 3-tuple) --spec fullnodes() -> {ok, [mem_node()]}. -fullnodes() -> - gen_server:call(?SERVER, fullnodes). - - -%%==================================================================== -%% gen_server callbacks -%%==================================================================== - -%% start up membership server --spec init(args()) -> {ok, mem_state()}. -init(Args) -> - process_flag(trap_exit,true), - Test = get_test(Args), - OldState = read_latest_state_file(Test), - showroom_log:message(info, "membership: membership server starting...", []), - net_kernel:monitor_nodes(true), - State = handle_init(Test, OldState), - {ok, State#mem{args=Args}}. - - -%% new node(s) joining to this node -handle_call({join, JoinType, ExtNodes, PingNode}, _From, State) -> - try - case handle_join(JoinType, ExtNodes, PingNode, State) of - {ok, NewState} -> {reply, ok, NewState}; - Other -> {reply, Other, State} - end - catch _:Error -> - showroom_log:message(error, "~p", [Error]), - {reply, Error, State} - end; - -%% clock -handle_call(clock, _From, #mem{clock=Clock} = State) -> - {reply, {ok, Clock}, State}; - -%% state -handle_call(state, _From, State) -> - {reply, {ok, State}, State}; - -%% reset - but only if we're in test mode -handle_call(reset, _From, #mem{args=Args} = State) -> - Test = get_test(Args), - case Test of - undefined -> {reply, not_reset, State}; - _ -> {reply, ok, int_reset(Test, State)} - end; - -%% nodes -handle_call(nodes, _From, #mem{nodes=Nodes} = State) -> - {_,NodeList,_} = lists:unzip3(lists:keysort(1, Nodes)), - {reply, {ok, NodeList}, State}; - -%% fullnodes -handle_call(fullnodes, _From, #mem{nodes=Nodes} = State) -> - {reply, {ok, Nodes}, State}; - -%% gossip -handle_call({gossip, RemoteState}, {Pid,_Tag} = From, LocalState) -> - showroom_log:message(info, "membership: received gossip from ~p", - [erlang:node(Pid)]), - handle_gossip(From, RemoteState, LocalState); - -% start_gossip -handle_call(start_gossip, _From, State) -> - NewState = gossip(State), - {reply, ok, NewState}; - -%% ignored call -handle_call(Msg, _From, State) -> - showroom_log:message(info, "membership: ignored call: ~p", [Msg]), - {reply, ignored, State}. - - -%% gossip -handle_cast({gossip, RemoteState}, LocalState) -> - State = case handle_gossip(none, RemoteState, LocalState) of - {reply, ok, NewState} -> NewState; - {reply, {new_state, NewState}, _} -> NewState; - {noreply, NewState} -> NewState - end, - {noreply, State}; - -%% stop -handle_cast(stop, State) -> - {stop, normal, State}; - -%% ignored cast -handle_cast(Msg, State) -> - showroom_log:message(info, "membership: ignored cast: ~p", [Msg]), - {noreply, State}. - - -%% @doc handle nodedown messages because we have -%% net_kernel:monitor_nodes(true) -handle_info({nodedown, Node}, State) -> - showroom_log:message(alert, "membership: nodedown ~p", [Node]), - notify(nodedown, [Node], State), - {noreply, State}; - -%% @doc handle nodeup messages because we have -%% net_kernel:monitor_nodes(true) -handle_info({nodeup, Node}, State) -> - showroom_log:message(alert, "membership: nodeup ~p", [Node]), - notify(nodeup, [Node], State), - gossip_cast(State), - {noreply, State}; - -%% ignored info -handle_info(Info, State) -> - showroom_log:message(info, "membership: ignored info: ~p", [Info]), - {noreply, State}. - - -% terminate -terminate(_Reason, _State) -> - ok. - - -% ignored code change -code_change(OldVsn, State, _Extra) -> - io:format("Unknown Old Version~nOldVsn: ~p~nState : ~p~n", [OldVsn, State]), - {ok, State}. - - -%%-------------------------------------------------------------------- -%%% Internal functions -%%-------------------------------------------------------------------- - -%% @doc if Args has config use it, otherwise call configuration module -%% most times Args will have config during testing runs -%get_config(Args) -> -% case proplists:get_value(config, Args) of -% undefined -> configuration:get_config(); -% Any -> Any -% end. - - -get_test(Args) -> - proplists:get_value(test, Args). - - -%% @doc handle_init starts a node -%% Most of the time, this puts the node in a single-node cluster setup, -%% But, we could be automatically rejoining a cluster after some downtime. -%% See handle_join for initing, joining, leaving a cluster, or replacing a -%% node. -%% @end -handle_init(Test, nil) -> - int_reset(Test); - -handle_init(_Test, #mem{nodes=Nodes, args=Args} = OldState) -> - % there's an old state, let's try to rejoin automatically - % but only if we can compare our old state to other available - % nodes and get a match... otherwise get a human involved - {_, NodeList, _} = lists:unzip3(Nodes), - ping_all_yall(NodeList), - {RemoteStates, _BadNodes} = get_remote_states(NodeList), - Test = get_test(Args), - case compare_state_with_rest(OldState, RemoteStates) of - match -> - showroom_log:message(info, "membership: rejoined successfully", []), - OldState; - Other -> - showroom_log:message(error, "membership: rejoin failed: ~p", [Other]), - int_reset(Test) - end. - - -%% @doc handle join activities, return {ok,NewState} --spec handle_join(join_type(), [mem_node()], ping_node(), mem_state()) -> - {ok, mem_state()}. -% init -handle_join(init, ExtNodes, nil, State) -> - {_,Nodes,_} = lists:unzip3(ExtNodes), - ping_all_yall(Nodes), - int_join(ExtNodes, State); -% join -handle_join(join, ExtNodes, PingNode, #mem{args=Args} = State) -> - NewState = case get_test(Args) of - undefined -> get_pingnode_state(PingNode); - _ -> State % testing, so meh - end, - % now use this info to join the ring - int_join(ExtNodes, NewState); -% replace -handle_join(replace, OldNode, PingNode, State) when is_atom(OldNode) -> - handle_join(replace, {OldNode, []}, PingNode, State); -handle_join(replace, [OldNode | _], PingNode, State) -> - handle_join(replace, {OldNode, []}, PingNode, State); -handle_join(replace, {OldNode, NewOpts}, PingNode, State) -> - OldState = #mem{nodes=OldNodes} = get_pingnode_state(PingNode), - {Order, OldNode, _OldOpts} = lists:keyfind(OldNode, 2, OldNodes), - NewNodes = lists:keyreplace(OldNode, 2, OldNodes, {Order, node(), NewOpts}), - notify(node_leave, [OldNode], State), - int_join([], OldState#mem{nodes=NewNodes}); -% leave -handle_join(leave, [OldNode | _], _PingNode, State) -> - % TODO implement me - notify(node_leave, [OldNode], State), - ok; - -handle_join(JoinType, _, PingNode, _) -> - showroom_log:message(info, "membership: unknown join type: ~p " - "for ping node: ~p", [JoinType, PingNode]), - {error, unknown_join_type}. - -%% @doc common operations for all join types -int_join(ExtNodes, #mem{nodes=Nodes, clock=Clock} = State) -> - NewNodes = lists:foldl(fun({Pos, N, _Options}=New, AccIn) -> - check_pos(Pos, N, Nodes), - notify(node_join, [N], State), - [New|AccIn] - end, Nodes, ExtNodes), - NewNodes1 = lists:sort(NewNodes), - NewClock = mem3_vclock:increment(node(), Clock), - NewState = State#mem{nodes=NewNodes1, clock=NewClock}, - install_new_state(NewState), - {ok, NewState}. - - -install_new_state(#mem{args=Args} = State) -> - Test = get_test(Args), - save_state_file(Test, State), - gossip(call, Test, State). - - -get_pingnode_state(PingNode) -> - {ok, RemoteState} = gen_server:call({?SERVER, PingNode}, state), - RemoteState. - - -%% @doc handle the gossip messages -%% We're not using mem3_vclock:resolve b/c we need custom merge strategy -handle_gossip(From, RemoteState=#mem{clock=RemoteClock}, - LocalState=#mem{clock=LocalClock}) -> - case mem3_vclock:compare(RemoteClock, LocalClock) of - equal -> - {reply, ok, LocalState}; - less -> - % remote node needs updating - {reply, {new_state, LocalState}, LocalState}; - greater when From == none-> - {noreply, install_new_state(RemoteState)}; - greater -> - % local node needs updating - gen_server:reply(From, ok), % reply to sender first - {noreply, install_new_state(RemoteState)}; - concurrent -> - % ick, so let's resolve and merge states - showroom_log:message(info, - "membership: Concurrent Clocks~n" - "RemoteState : ~p~nLocalState : ~p~n" - , [RemoteState, LocalState]), - MergedState = merge_states(RemoteState, LocalState), - if From =/= none -> - % reply to sender - gen_server:reply(From, {new_state, MergedState}) - end, - {noreply, install_new_state(MergedState)} - end. - - -merge_states(#mem{clock=RemoteClock, nodes=RemoteNodes} = _RemoteState, - #mem{clock=LocalClock, nodes=LocalNodes} = LocalState) -> - MergedClock = mem3_vclock:merge(RemoteClock, LocalClock), - MergedNodes = merge_nodes(RemoteNodes, LocalNodes), - LocalState#mem{clock=MergedClock, nodes=MergedNodes}. - - -%% this will give one of the lists back, deterministically -merge_nodes(Remote, Local) -> - % get rid of the initial 0 node if it's still there, and sort - Remote1 = lists:usort(lists:keydelete(0,1,Remote)), - Local1 = lists:usort(lists:keydelete(0,1,Local)), - % handle empty lists as well as other cases - case {Remote1, Local1} of - {[], L} -> L; - {R, []} -> R; - _ -> erlang:min(Remote1, Local1) - end. - - -gossip(#mem{args=Args} = NewState) -> - Test = get_test(Args), - gossip(call, Test, NewState). - - -gossip_cast(#mem{nodes=[]}) -> ok; -gossip_cast(#mem{args=Args} = NewState) -> - Test = get_test(Args), - gossip(cast, Test, NewState). - - --spec gossip(gossip_fun(), test(), mem_state()) -> mem_state(). -gossip(_, _, #mem{nodes=[]}) -> ok; -gossip(Fun, undefined, #mem{nodes=StateNodes} = State) -> - {_, Nodes, _} = lists:unzip3(StateNodes), - case next_up_node(Nodes) of - no_gossip_targets_available -> - State; % skip gossip, I'm the only node - TargetNode -> - showroom_log:message(info, "membership: firing gossip from ~p to ~p", - [node(), TargetNode]), - case gen_server:Fun({?SERVER, TargetNode}, {gossip, State}) of - ok -> State; - {new_state, NewState} -> NewState; - Error -> throw({unknown_gossip_response, Error}) - end - end; - -gossip(_,_,_) -> - % testing, so don't gossip - ok. - - -next_up_node(Nodes) -> - next_up_node(node(), Nodes, up_nodes()). - - -next_up_node(Node, Nodes, UpNodes) -> - {A, [Node|B]} = lists:splitwith(fun(N) -> N /= Node end, Nodes), - List = lists:append(B, A), % be sure to eliminate Node - DownNodes = Nodes -- UpNodes, - case List -- DownNodes of - [Target|_] -> Target; - [] -> no_gossip_targets_available - end. - - -up_nodes() -> - % TODO: implement cache (fb 9704 & 9449) - erlang:nodes(). - - -%% @doc find the latest state file on disk -find_latest_state_filename() -> - Dir = couch_config:get("couchdb", "database_dir"), - case file:list_dir(Dir) of - {ok, Filenames} -> - Timestamps = [list_to_integer(TS) || {?STATE_FILE_PREFIX, TS} <- - [list_to_tuple(string:tokens(FN, ".")) || FN <- Filenames]], - SortedTimestamps = lists:reverse(lists:sort(Timestamps)), - case SortedTimestamps of - [Latest | _] -> - {ok, Dir ++ "/" ++ ?STATE_FILE_PREFIX ++ "." ++ - integer_to_list(Latest)}; - _ -> - throw({error, mem_state_file_not_found}) - end; - {error, Reason} -> - throw({error, Reason}) - end. - - -%% (Test, Config) -read_latest_state_file(undefined) -> - try - {ok, File} = find_latest_state_filename(), - case file:consult(File) of - {ok, [#mem{}=State]} -> State; - _Else -> - throw({error, bad_mem_state_file}) - end - catch _:Error -> - showroom_log:message(info, "membership: ~p", [Error]), - nil - end; -read_latest_state_file(_) -> - nil. - - -%% @doc save the state file to disk, with current timestamp. -%% thx to riak_ring_manager:do_write_ringfile/1 --spec save_state_file(test(), mem_state()) -> ok. -save_state_file(undefined, State) -> - Dir = couch_config:get("couchdb", "database_dir"), - {{Year, Month, Day},{Hour, Minute, Second}} = calendar:universal_time(), - TS = io_lib:format("~B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B", - [Year, Month, Day, Hour, Minute, Second]), - FN = Dir ++ "/" ++ ?STATE_FILE_PREFIX ++ "." ++ TS, - ok = filelib:ensure_dir(FN), - {ok, File} = file:open(FN, [binary, write]), - io:format(File, "~w.~n", [State]), - file:close(File); - -save_state_file(_,_) -> ok. % don't save if testing - - -check_pos(Pos, Node, Nodes) -> - Found = lists:keyfind(Pos, 1, Nodes), - case Found of - false -> ok; - _ -> - {_,OldNode,_} = Found, - if - OldNode =:= Node -> - Msg = "node_exists_at_position_" ++ integer_to_list(Pos), - throw({error, list_to_binary(Msg)}); - true -> - Msg = "position_exists_" ++ integer_to_list(Pos), - throw({error, list_to_binary(Msg)}) - end - end. - - -int_reset(Test) -> - int_reset(Test, #mem{}). - - -int_reset(_Test, State) -> - State#mem{nodes=[], clock=[]}. - - -ping_all_yall(Nodes) -> - lists:foreach(fun(Node) -> - net_adm:ping(Node) - end, Nodes), - timer:sleep(500). % sigh. - - -get_remote_states(NodeList) -> - NodeList1 = lists:delete(node(), NodeList), - {States1, BadNodes} = rpc:multicall(NodeList1, mem3, state, [], 5000), - {_Status, States2} = lists:unzip(States1), - NodeList2 = NodeList1 -- BadNodes, - {lists:zip(NodeList2,States2), BadNodes}. - - -%% @doc compare state with states based on vector clock -%% return match | {bad_state_match, Node, NodesThatDontMatch} -compare_state_with_rest(#mem{clock=Clock} = _State, States) -> - Results = lists:map(fun({Node, #mem{clock=Clock1}}) -> - {mem3_vclock:equals(Clock, Clock1), Node} - end, States), - BadResults = lists:foldl(fun({true, _N}, AccIn) -> AccIn; - ({false, N}, AccIn) -> [N | AccIn] - end, [], Results), - if - length(BadResults) == 0 -> match; - true -> {bad_state_match, node(), BadResults} - end. - -notify(Type, Nodes, #mem{nodes=MemNodesList} = _State) -> - {_,MemNodes,_} = lists:unzip3(lists:keysort(1, MemNodesList)), - lists:foreach(fun(Node) -> - case lists:member(Node, MemNodes) orelse Type == nodedown of - true -> - gen_event:notify(membership_events, {Type, Node}); - _ -> ok % node not in cluster - end - end, Nodes). diff --git a/src/mem3_sup.erl b/src/mem3_sup.erl index 0a9f66d0..353216d4 100644 --- a/src/mem3_sup.erl +++ b/src/mem3_sup.erl @@ -7,10 +7,10 @@ start_link() -> init(_Args) -> Children = [ - child(mem3_server), child(mem3_events), child(mem3_sync), - child(mem3_cache) + child(mem3_cache), + child(mem3_nodes) ], {ok, {{one_for_one,10,1}, Children}}. diff --git a/src/mem3_sync.erl b/src/mem3_sync.erl index d50514d9..0f402834 100644 --- a/src/mem3_sync.erl +++ b/src/mem3_sync.erl @@ -1,46 +1,214 @@ -module(mem3_sync). --behaviour(supervisor). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). --export([start_link/0, init/1, childspec/1, sup_upgrade_notify/2]). +-export([start_link/0, get_active/0, get_queue/0, push/2, remove_node/1]). --include("mem3.hrl"). +-include_lib("../../couch/src/couch_db.hrl"). + +-record(state, { + active = [], + count = 0, + limit, + dict = dict:new(), + waiting = [], + update_notifier +}). start_link() -> - supervisor:start_link({local, ?MODULE}, ?MODULE, []). + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +get_active() -> + gen_server:call(?MODULE, get_active). + +get_queue() -> + gen_server:call(?MODULE, get_queue). + +push(Db, Node) -> + gen_server:cast(?MODULE, {push, Db, Node}). + +remove_node(Node) -> + gen_server:cast(?MODULE, {remove_node, Node}). init([]) -> - {ok, MemNodes} = mem3:nodes(), - LiveNodes = nodes(), - ChildSpecs = [childspec(N) || N <- MemNodes, lists:member(N, LiveNodes)], - gen_event:add_handler(membership_events, dbs_event, []), - {ok, {{one_for_one, 10, 8}, ChildSpecs}}. - -childspec(Node) -> - ?LOG_INFO("dbs repl ~p --> ~p starting", [node(), Node]), + process_flag(trap_exit, true), + Concurrency = couch_config:get("mem3", "sync_concurrency", "10"), + gen_event:add_handler(mem3_events, mem3_sync_event, []), + {ok, Pid} = start_update_notifier(), + spawn(fun initial_sync/0), + {ok, #state{limit = list_to_integer(Concurrency), update_notifier=Pid}}. + +handle_call(get_active, _From, State) -> + {reply, State#state.active, State}; + +handle_call(get_queue, _From, State) -> + {reply, State#state.waiting, State}. + +handle_cast({push, DbName, Node}, #state{count=Count, limit=Limit} = State) + when Count >= Limit -> + {noreply, add_to_queue(State, DbName, Node)}; + +handle_cast({push, DbName, Node}, State) -> + #state{active = L, count = C} = State, + case is_running(DbName, Node, L) of + true -> + {noreply, add_to_queue(State, DbName, Node)}; + false -> + Pid = start_push_replication(DbName, Node), + {noreply, State#state{active=[{DbName, Node, Pid}|L], count=C+1}} + end; + +handle_cast({remove_node, Node}, State) -> + Waiting = [{S,N} || {S,N} <- State#state.waiting, N =/= Node], + Dict = lists:foldl(fun(DbName,D) -> dict:erase({DbName,Node}, D) end, + State#state.dict, [S || {S,N} <- Waiting, N =:= Node]), + {noreply, State#state{dict = Dict, waiting = Waiting}}. + +handle_info({'EXIT', Pid, _}, #state{update_notifier=Pid} = State) -> + {ok, NewPid} = start_update_notifier(), + {noreply, State#state{update_notifier=NewPid}}; + +handle_info({'EXIT', Active, normal}, State) -> + handle_replication_exit(State, Active); + +handle_info({'EXIT', Active, Reason}, State) -> + case lists:keyfind(Active, 3, State#state.active) of + {OldDbName, OldNode, _} -> + ?LOG_ERROR("~p replication ~s -> ~p died:~n~p", [?MODULE, OldDbName, + OldNode, Reason]), + timer:apply_after(5000, ?MODULE, push, [OldDbName, OldNode]); + false -> ok end, + handle_replication_exit(State, Active); + +handle_info(Msg, State) -> + ?LOG_ERROR("unexpected msg at replication manager ~p", [Msg]), + {noreply, State}. + +terminate(_Reason, State) -> + [exit(Pid, shutdown) || {_,_,Pid} <- State#state.active], + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_replication_exit(#state{waiting=[]} = State, Pid) -> + NewActive = lists:keydelete(Pid, 3, State#state.active), + {noreply, State#state{active=NewActive, count=length(NewActive)}}; +handle_replication_exit(State, Pid) -> + #state{active=Active, limit=Limit, dict=D, waiting=Waiting} = State, + Active1 = lists:keydelete(Pid, 3, Active), + Count = length(Active1), + NewState = if Count < Limit -> + case next_replication(Active1, Waiting) of + nil -> % all waiting replications are also active + State#state{active = Active1, count = Count}; + {DbName, Node, StillWaiting} -> + NewPid = start_push_replication(DbName, Node), + State#state{ + active = [{DbName, Node, NewPid} | Active1], + count = Count+1, + dict = dict:erase({DbName,Node}, D), + waiting = StillWaiting + } + end; + true -> + State#state{active = Active1, count=Count} + end, + {noreply, NewState}. + +start_push_replication(DbName, Node) -> PostBody = {[ - {<<"source">>, <<"dbs">>}, - {<<"target">>, {[{<<"node">>, Node}, {<<"name">>, <<"dbs">>}]}}, - {<<"continuous">>, true} + {<<"source">>, DbName}, + {<<"target">>, {[{<<"node">>, Node}, {<<"name">>, DbName}]}}, + {<<"continuous">>, false}, + {<<"async">>, true} ]}, - Id = couch_util:to_hex(erlang:md5(term_to_binary([node(), Node]))), - MFA = {couch_rep, start_link, [Id, PostBody, #user_ctx{}]}, - {Node, MFA, permanent, 100, worker, [couch_rep]}. - -% from http://code.google.com/p/erlrc/wiki/ErlrcHowto -sup_upgrade_notify (_Old, _New) -> - {ok, {_, Specs}} = init([]), - - Old = sets:from_list( - [Name || {Name, _, _, _} <- supervisor:which_children(?MODULE)]), - New = sets:from_list([Name || {Name, _, _, _, _, _} <- Specs]), - Kill = sets:subtract(Old, New), - - sets:fold(fun(Id, ok) -> - supervisor:terminate_child(?MODULE, Id), - supervisor:delete_child(?MODULE, Id), - ok - end, - ok, - Kill), - [supervisor:start_child (?MODULE, Spec) || Spec <- Specs ], - ok. + ?LOG_INFO("starting ~s -> ~p internal replication", [DbName, Node]), + UserCtx = #user_ctx{name = <<"replicator">>, roles = [<<"_admin">>]}, + case (catch couch_rep:replicate(PostBody, UserCtx)) of + Pid when is_pid(Pid) -> + link(Pid), + Pid; + {db_not_found, _Msg} -> + case couch_api:open_db(DbName, []) of + {ok, Db} -> + % source exists, let's (re)create the target + couch_api:close_db(Db), + case rpc:call(Node, couch_api, create_db, [DbName, []]) of + {ok, Target} -> + ?LOG_INFO("~p successfully created ~s on ~p", [?MODULE, DbName, + Node]), + couch_api:close_db(Target), + start_push_replication(DbName, Node); + file_exists -> + start_push_replication(DbName, Node); + Error -> + ?LOG_ERROR("~p couldn't create ~s on ~p because ~p", + [?MODULE, DbName, Node, Error]), + exit(shutdown) + end; + {not_found, no_db_file} -> + % source is gone, so this is a hack to skip it + ?LOG_INFO("~p tried to push ~s to ~p but it was already deleted", + [?MODULE, DbName, Node]), + spawn_link(fun() -> ok end) + end; + {node_not_connected, _} -> + % we'll get this one when the node rejoins + ?LOG_ERROR("~p exiting because ~p is not connected", [?MODULE, Node]), + spawn_link(fun() -> ok end); + CatchAll -> + ?LOG_INFO("~p strange error ~p", [?MODULE, CatchAll]), + case lists:member(Node, nodes()) of + true -> + timer:apply_after(5000, ?MODULE, push, [DbName, Node]); + false -> + ok + end, + spawn_link(fun() -> ok end) + end. + +add_to_queue(State, DbName, Node) -> + #state{dict=D, waiting=Waiting} = State, + case dict:is_key({DbName, Node}, D) of + true -> + State; + false -> + ?LOG_DEBUG("adding ~s -> ~p to internal queue", [DbName, Node]), + State#state{ + dict = dict:store({DbName,Node}, ok, D), + waiting = Waiting ++ [{DbName,Node}] + } + end. + +initial_sync() -> + Db1 = ?l2b(couch_config:get("mem3", "node_db", "nodes")), + Db2 = ?l2b(couch_config:get("mem3", "shard_db", "dbs")), + Nodes = mem3:nodes(), + Live = nodes(), + [[push(Db, N) || Db <- [Db1,Db2]] || N <- Nodes, lists:member(N, Live)]. + +start_update_notifier() -> + Db1 = ?l2b(couch_config:get("mem3", "node_db", "nodes")), + Db2 = ?l2b(couch_config:get("mem3", "shard_db", "dbs")), + couch_db_update_notifier:start_link(fun + ({updated, Db}) when Db == Db1; Db == Db2 -> + Nodes = mem3:nodes(), + Live = nodes(), + [?MODULE:push(Db, N) || N <- Nodes, lists:member(N, Live)]; + (_) -> ok end). + +%% @doc Finds the next {DbName,Node} pair in the list of waiting replications +%% which does not correspond to an already running replication +-spec next_replication(list(), list()) -> {binary(),node(),list()} | nil. +next_replication(Active, Waiting) -> + case lists:splitwith(fun({S,N}) -> is_running(S,N,Active) end, Waiting) of + {_, []} -> + nil; + {Running, [{DbName,Node}|Rest]} -> + {DbName, Node, Running ++ Rest} + end. + +is_running(DbName, Node, ActiveList) -> + [] =/= [true || {S,N,_} <- ActiveList, S=:=DbName, N=:=Node]. diff --git a/src/mem3_sync_event.erl b/src/mem3_sync_event.erl index 55f3840c..45fcb8aa 100644 --- a/src/mem3_sync_event.erl +++ b/src/mem3_sync_event.erl @@ -7,11 +7,26 @@ init(_) -> {ok, nil}. -handle_event({Up, Node}, State) when Up == nodeup; Up == node_join -> - mem3_sync:add_node(Node); - -handle_event({Down, Node}, State) when Down == nodedown; Down == node_leave -> - mem3_sync:remove_node(Node); +handle_event({add_node, Node}, State) -> + Db1 = list_to_binary(couch_config:get("mem3", "node_db", "nodes")), + Db2 = list_to_binary(couch_config:get("mem3", "shard_db", "dbs")), + [mem3_sync:push(Db, Node) || Db <- [Db1, Db2]], + {ok, State}; + +handle_event({nodeup, Node}, State) -> + case lists:member(Node, mem3:nodes()) of + true -> + Db1 = list_to_binary(couch_config:get("mem3", "node_db", "nodes")), + Db2 = list_to_binary(couch_config:get("mem3", "shard_db", "dbs")), + [mem3_sync:push(Db, Node) || Db <- [Db1, Db2]]; + false -> + ok + end, + {ok, State}; + +handle_event({Down, Node}, State) when Down == nodedown; Down == remove_node -> + mem3_sync:remove_node(Node), + {ok, State}; handle_event(_Event, State) -> {ok, State}. diff --git a/src/mem3_util.erl b/src/mem3_util.erl index 476742b7..b05faa15 100644 --- a/src/mem3_util.erl +++ b/src/mem3_util.erl @@ -1,170 +1,52 @@ -module(mem3_util). -author('brad@cloudant.com'). -%% API --export([fullmap/2, fullmap/3, hash/1, install_fullmap/4]). --export([for_key/2, all_parts/1]). --export([shard_name/2, build_shards/2]). +-export([hash/1, name_shard/1, create_partition_map/4, build_shards/2, + n_val/2, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1]). --define(RINGTOP, trunc(math:pow(2,160))). % SHA-1 space +-define(RINGTOP, 2 bsl 31). % CRC32 space -include("mem3.hrl"). -%%==================================================================== -%% API -%%==================================================================== - -%% @doc build a full partition map -fullmap(DbName, Options) -> - {ok, Nodes} = mem3:nodes(), - fullmap(DbName, Nodes, Options). - -fullmap(DbName, Nodes, Options) -> - {N,Q} = db_init_constants(Options), - NewNodes = ordered_nodes(DbName, Nodes), - Pmap = pmap(Q, NewNodes), - int_fullmap(DbName, N, Pmap, NewNodes). - -%% @spec hash(term()) -> Digest::binary() -%% @doc uses SHA-1 as its hash hash(Item) when is_binary(Item) -> - crypto:sha(Item); + erlang:crc32(Item); hash(Item) -> - crypto:sha(term_to_binary(Item)). - -install_fullmap(DbName, Fullmap, FullNodes, Options) -> - {N,Q} = db_init_constants(Options), - Doc = {[{<<"_id">>,DbName}, - {<<"map">>, jsonify(<<"map">>, Fullmap)}, - {<<"nodes">>, jsonify(<<"nodes">>, FullNodes)}, - {<<"n">>,N}, - {<<"q">>,Q}]}, - write_db_doc(Doc). - -for_key(DbName, Key) -> - <> = hash(Key), - Head = #shard{ - name = '_', - node = '_', - dbname = DbName, - range = ['$1','$2'], - ref = '_' - }, - % TODO these conditions assume A < B, which we don't require - Conditions = [{'<', '$1', HashKey}, {'<', HashKey, '$2'}], - case ets:select(partitions, [{Head, Conditions, ['$_']}]) of - [] -> - erlang:error(database_does_not_exist); - Shards -> - Shards - end. - -all_parts(DbName) -> - case ets:lookup(partitions, DbName) of - [] -> - erlang:error(database_does_not_exist); - Else -> - Else - end. - -%%==================================================================== -%% Internal functions -%%==================================================================== + erlang:crc32(term_to_binary(Item)). + +name_shard(#shard{dbname = DbName, range=[B,E]} = Shard) -> + Name = ["shards/", couch_util:to_hex(<>), "-", + couch_util:to_hex(<>), "/", DbName], + Shard#shard{name = ?l2b(Name)}. + +create_partition_map(DbName, N, Q, Nodes) -> + UniqueShards = make_key_ranges((?RINGTOP) div Q, 0, []), + Shards0 = lists:flatten([lists:duplicate(N, S) || S <- UniqueShards]), + Shards1 = attach_nodes(Shards0, [], Nodes, []), + [name_shard(S#shard{dbname=DbName}) || S <- Shards1]. + +make_key_ranges(_, CurrentPos, Acc) when CurrentPos >= ?RINGTOP -> + Acc; +make_key_ranges(Increment, Start, Acc) -> + case Start + 2*Increment of + X when X > ?RINGTOP -> + End = ?RINGTOP - 1; + _ -> + End = Start + Increment - 1 + end, + make_key_ranges(Increment, End+1, [#shard{range=[Start, End]} | Acc]). -%% @doc get cluster constants from options or config -db_init_constants(Options) -> - {const(n, Options), const(q, Options)}. +attach_nodes([], Acc, _, _) -> + lists:reverse(Acc); +attach_nodes(Shards, Acc, [], UsedNodes) -> + attach_nodes(Shards, Acc, lists:reverse(UsedNodes), []); +attach_nodes([S | Rest], Acc, [Node | Nodes], UsedNodes) -> + attach_nodes(Rest, [S#shard{node=Node} | Acc], Nodes, [Node | UsedNodes]). -%% @doc get individual constant -const(Const, Options) -> - ListResult = case couch_util:get_value(Const, Options) of - undefined -> couch_config:get("cluster", atom_to_list(Const)); - Val -> Val - end, - list_to_integer(ListResult). - -%% @doc hash the dbname, and return the corresponding node for seeding a ring -seednode(DbName, Nodes) -> - <> = hash(DbName), - Size = partition_range(length(Nodes)), - Factor = (HashInt div Size), - lists:nth(Factor+1, Nodes). - -%% @doc take the list of nodes, and rearrange it, starting with the node that -%% results from hashing the Term -ordered_nodes(Term, Nodes) -> - SeedNode = seednode(Term, Nodes), - {A, B} = lists:splitwith(fun(N) -> N /= SeedNode end, Nodes), - lists:append(B,A). - -%% @doc create a partition map -pmap(NumPartitions, Nodes) -> - Increment = ?RINGTOP div NumPartitions, - Parts = parts(?RINGTOP, Increment, 0, []), - make_map(Nodes, Nodes, Parts, []). - -%% @doc makes a {beg, end} list of partition ranges -%% last range may have an extra few values, because Increment is created -%% with Ringtop 'div' NumPartitions above. -parts(Top, _, Beg, Acc) when Beg > Top -> Acc; -parts(Top, Increment, Beg, Acc) -> - End = case Beg + 2*Increment of - Over when Over > Top -> Top; - _ -> Beg + Increment - 1 - end, - NewAcc = [{Beg, End} | Acc], - parts(Top, Increment, End+1, NewAcc). - -%% @doc create a full map, which is a pmap with N-1 replication partner nodes -%% added per partition -int_fullmap(DbName, N, Pmap, Nodes) -> - Full = lists:foldl(fun({Node,{B,E} = Part}, AccIn) -> - Primary = [#shard{dbname=DbName, node=Node, range=[B,E], - name=shard_name(B,DbName)}], - Partners = partners(DbName, N, Node, Nodes, Part), - lists:append([Primary, Partners, AccIn]) - end, [], Pmap), - lists:reverse(Full). - -partners(DbName, N, Node, Nodes, {Beg,End}) -> - {A, [Node|B]} = lists:splitwith(fun(Nd) -> Nd /= Node end, Nodes), - Nodes1 = lists:append(B,A), - Partners = lists:sublist(Nodes1, N-1), % N-1 replication partner nodes - lists:map(fun(Partner) -> - #shard{dbname=DbName, node=Partner, range=[Beg,End], - name=shard_name(Beg,DbName)} - end, Partners). - -%% @doc size of one partition in the ring -partition_range(Q) -> - trunc( ?RINGTOP / Q ). % SHA-1 space / Q - -%% @doc assign nodes to each of the partitions. When you run out of nodes, -%% start at the beginning of the node list again. -%% The provided node list starts with the seed node (seednode fun) -make_map(_,_,[], Acc) -> - lists:keysort(2,Acc); -make_map(AllNodes, [], Parts, Acc) -> - % start back at beginning of node list - make_map(AllNodes, AllNodes, Parts, Acc); -make_map(AllNodes, [Node|RestNodes], [Part|RestParts], Acc) -> - % add a node/part combo to the Acc - make_map(AllNodes, RestNodes, RestParts, [{Node,Part}|Acc]). - -jsonify(<<"map">>, Map) -> - lists:map(fun(#shard{node=Node, range=[Beg,End]}) -> - {[{<<"node">>, Node}, {<<"b">>, Beg}, {<<"e">>, End}]} - end, Map); -jsonify(<<"nodes">>, Nodes) -> - lists:map(fun({Order, Node, Options}) -> - {[{<<"order">>, Order}, {<<"node">>, Node}, {<<"options">>, Options}]} - end, Nodes). - -write_db_doc(EDoc) -> +write_db_doc(Doc) -> {ok, Db} = couch_db:open(<<"dbs">>, []), try - update_db_doc(Db, couch_doc:from_json_obj(EDoc)) - catch {conflict, _} -> + update_db_doc(Db, Doc) + catch conflict -> ?LOG_ERROR("conflict writing db doc, must be a race", []) after couch_db:close(Db) @@ -180,22 +62,55 @@ update_db_doc(Db, #doc{id=Id, body=Body} = Doc) -> {ok, _} = couch_db:update_doc(Db, OldDoc#doc{body=Body}, []) end. -shard_name(Part, DbName) when is_list(DbName) -> - shard_name(Part, ?l2b(DbName)); -shard_name(Part, DbName) -> - PartHex = ?l2b(showroom_utils:int_to_hexstr(Part)), - <<"x", PartHex/binary, "/", DbName/binary, "_", PartHex/binary>>. +delete_db_doc(DocId) -> + {ok, Db} = couch_db:open(<<"dbs">>, []), + try + delete_db_doc(Db, DocId) + catch conflict -> + ok + after + couch_db:close(Db) + end. + +delete_db_doc(Db, DocId) -> + case couch_db:open_doc(Db, DocId, []) of + {not_found, _} -> + ok; + {ok, OldDoc} -> + {ok, _} = couch_db:update_doc(Db, OldDoc#doc{deleted=true}, []) + end. build_shards(DbName, DocProps) -> - lists:map(fun({Map}) -> - Begin = couch_util:get_value(<<"b">>, Map), - #shard{ - name = mem3_util:shard_name(Begin, DbName), - dbname = DbName, - node = to_atom(couch_util:get_value(<<"node">>, Map)), - range = [Begin, couch_util:get_value(<<"e">>, Map)] - } - end, couch_util:get_value(<<"map">>, DocProps, {[]})). + {ByNode} = couch_util:get_value(<<"by_node">>, DocProps, {[]}), + lists:flatmap(fun({Node, Ranges}) -> + lists:map(fun(Range) -> + [B,E] = string:tokens(?b2l(Range), "-"), + Beg = httpd_util:hexlist_to_integer(B), + End = httpd_util:hexlist_to_integer(E), + name_shard(#shard{ + dbname = DbName, + node = to_atom(Node), + range = [Beg, End] + }) + end, Ranges) + end, ByNode). to_atom(Node) when is_binary(Node) -> - list_to_atom(binary_to_list(Node)). + list_to_atom(binary_to_list(Node)); +to_atom(Node) when is_atom(Node) -> + Node. + +to_integer(N) when is_integer(N) -> + N; +to_integer(N) when is_binary(N) -> + list_to_integer(binary_to_list(N)); +to_integer(N) when is_list(N) -> + list_to_integer(N). + +n_val(undefined, NodeCount) -> + n_val(list_to_integer(couch_config:get("cluster", "n", "3")), NodeCount); +n_val(N, NodeCount) when N > NodeCount -> + ?LOG_ERROR("Request to create N=~p DB but only ~p node(s)", [N, NodeCount]), + NodeCount; +n_val(N, _) -> + N. diff --git a/src/mem3_vclock.erl b/src/mem3_vclock.erl deleted file mode 100644 index a48da43c..00000000 --- a/src/mem3_vclock.erl +++ /dev/null @@ -1,109 +0,0 @@ -%%% @author Cliff Moon [] -%%% @copyright 2008 Cliff Moon - --module (mem3_vclock). --export ([create/1, truncate/1, increment/2, compare/2, resolve/2, merge/2, - equals/2]). - -%% -ifdef(TEST). -%% -include("etest/vector_clock_test.erl"). -%% -endif. - -create(NodeName) -> [{NodeName, now_float()}]. - -truncate(Clock) when length(Clock) > 10 -> - lists:nthtail(length(Clock) - 10, lists:keysort(2, Clock)); - -truncate(Clock) -> Clock. - -increment(NodeName, [{NodeName, _Version}|Clocks]) -> - [{NodeName, now_float()}|Clocks]; - -increment(NodeName, [NodeClock|Clocks]) -> - [NodeClock|increment(NodeName, Clocks)]; - -increment(NodeName, []) -> - [{NodeName, now_float()}]. - -resolve({ClockA, ValuesA}, {ClockB, ValuesB}) -> - case compare(ClockA, ClockB) of - less -> {ClockB, ValuesB}; - greater -> {ClockA, ValuesA}; - equal -> {ClockA, ValuesA}; - concurrent -> - showroom_log:message(info, - "~nConcurrent Clocks~n" - "ClockA : ~p~nClockB : ~p~n" - "ValuesA: ~p~nValuesB: ~p~n" - , [ClockA, ClockB, ValuesA, ValuesB]), - {merge(ClockA,ClockB), ValuesA ++ ValuesB} - end; -resolve(not_found, {Clock, Values}) -> - {Clock, Values}; -resolve({Clock, Values}, not_found) -> - {Clock, Values}. - -merge(ClockA, ClockB) -> - merge([], ClockA, ClockB). - -merge(Merged, [], ClockB) -> lists:keysort(1, Merged ++ ClockB); - -merge(Merged, ClockA, []) -> lists:keysort(1, Merged ++ ClockA); - -merge(Merged, [{NodeA, VersionA}|ClockA], ClockB) -> - case lists:keytake(NodeA, 1, ClockB) of - {value, {NodeA, VersionB}, TrunkClockB} when VersionA > VersionB -> - merge([{NodeA,VersionA}|Merged],ClockA,TrunkClockB); - {value, {NodeA, VersionB}, TrunkClockB} -> - merge([{NodeA,VersionB}|Merged],ClockA,TrunkClockB); - false -> - merge([{NodeA,VersionA}|Merged],ClockA,ClockB) - end. - -compare(ClockA, ClockB) -> - AltB = less_than(ClockA, ClockB), - if AltB -> less; true -> - BltA = less_than(ClockB, ClockA), - if BltA -> greater; true -> - AeqB = equals(ClockA, ClockB), - if AeqB -> equal; true -> concurrent end - end - end. - -%% ClockA is less than ClockB if and only if ClockA[z] <= ClockB[z] for all -%% instances z and there exists an index z' such that ClockA[z'] < ClockB[z'] -less_than(ClockA, ClockB) -> - ForAll = lists:all(fun({Node, VersionA}) -> - case lists:keysearch(Node, 1, ClockB) of - {value, {_NodeB, VersionB}} -> VersionA =< VersionB; - false -> false - end - end, ClockA), - Exists = lists:any(fun({NodeA, VersionA}) -> - case lists:keysearch(NodeA, 1, ClockB) of - {value, {_NodeB, VersionB}} -> VersionA /= VersionB; - false -> true - end - end, ClockA), - %length takes care of the case when clockA is shorter than B - ForAll and (Exists or (length(ClockA) < length(ClockB))). - -equals(ClockA, ClockB) -> - Equivalent = lists:all(fun({NodeA, VersionA}) -> - lists:any(fun(NodeClockB) -> - case NodeClockB of - {NodeA, VersionA} -> true; - _ -> false - end - end, ClockB) - end, ClockA), - Equivalent and (length(ClockA) == length(ClockB)). - -now_float() -> - time_to_epoch_float(now()). - -time_to_epoch_float(Time) when is_integer(Time) or is_float(Time) -> - Time; - -time_to_epoch_float({Mega,Sec,Micro}) -> - Mega * 1000000 + Sec + Micro / 1000000. -- cgit v1.2.3 From c02b2f9115d8d5a6bd48e9eaf644950bda14e29d Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Fri, 2 Jul 2010 09:44:22 -0400 Subject: fix debugging funs and clean up API module --- src/mem3.erl | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index e6ee5bf8..5116f008 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -1,12 +1,11 @@ -module(mem3). --export([start/0, stop/0, restart/0, state/0, nodes/0, shards/1, shards/2, +-export([start/0, stop/0, restart/0, nodes/0, shards/1, shards/2, choose_shards/2]). +-export([compare_nodelists/0, compare_shards/1]). -include("mem3.hrl"). --define(SERVER, mem3_server). - start() -> application:start(mem3). @@ -22,21 +21,36 @@ restart() -> %% key and the nodes holding that state as the value. Also reports member %% nodes which fail to respond and nodes which are connected but are not %% cluster members. Useful for debugging. --spec state() -> [{any | bad_nodes | non_member_nodes, [node()]}]. -state() -> - {ok, Nodes} = mem3:nodes(), +-spec compare_nodelists() -> [{{cluster_nodes, [node()]} | bad_nodes + | non_member_nodes, [node()]}]. +compare_nodelists() -> + Nodes = mem3:nodes(), AllNodes = erlang:nodes([this, visible]), - {Replies, BadNodes} = gen_server:multi_call(Nodes, ?SERVER, state), - Dict = lists:foldl(fun({Node, {ok,State}}, D) -> - orddict:append(State, Node, D) + {Replies, BadNodes} = gen_server:multi_call(Nodes, mem3_nodes, get_nodelist), + Dict = lists:foldl(fun({Node, Nodelist}, D) -> + orddict:append({cluster_nodes, Nodelist}, Node, D) end, orddict:new(), Replies), [{non_member_nodes, AllNodes -- Nodes}, {bad_nodes, BadNodes} | Dict]. +-spec compare_shards(DbName::iolist()) -> [{bad_nodes | [#shard{}], [node()]}]. +compare_shards(DbName) when is_list(DbName) -> + compare_shards(list_to_binary(DbName)); +compare_shards(DbName) -> + Nodes = mem3:nodes(), + {Replies, BadNodes} = rpc:multicall(mem3, shards, [DbName]), + GoodNodes = [N || N <- Nodes, not lists:member(N, BadNodes)], + Dict = lists:foldl(fun({Shards, Node}, D) -> + orddict:append(Shards, Node, D) + end, orddict:new(), lists:zip(Replies, GoodNodes)), + [{bad_nodes, BadNodes} | Dict]. + -spec nodes() -> [node()]. nodes() -> mem3_nodes:get_nodelist(). --spec shards(DbName::binary()) -> [#shard{}]. +-spec shards(DbName::iolist()) -> [#shard{}]. +shards(DbName) when is_list(DbName) -> + shards(list_to_binary(DbName)); shards(DbName) -> case ets:lookup(partitions, DbName) of [] -> @@ -46,7 +60,11 @@ shards(DbName) -> Else end. --spec shards(DbName::binary(), DocId::binary()) -> [#shard{}]. +-spec shards(DbName::iolist(), DocId::binary()) -> [#shard{}]. +shards(DbName, DocId) when is_list(DbName) -> + shards(list_to_binary(DbName), DocId); +shards(DbName, DocId) when is_list(DocId) -> + shards(DbName, list_to_binary(DocId)); shards(DbName, DocId) -> HashKey = mem3_util:hash(DocId), Head = #shard{ @@ -66,6 +84,9 @@ shards(DbName, DocId) -> Shards end. +-spec choose_shards(DbName::iolist(), Options::list()) -> [#shard{}]. +choose_shards(DbName, Options) when is_list(DbName) -> + choose_shards(list_to_binary(DbName), Options); choose_shards(DbName, Options) -> try shards(DbName) catch error:database_does_not_exist -> -- cgit v1.2.3 From 56bdbc73f05e5450ff9c36f8e369f2399d113641 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Fri, 2 Jul 2010 10:23:14 -0400 Subject: failover to reading dbs.couch directly when ets is MIA --- src/mem3.erl | 17 +++++++++-------- src/mem3_util.erl | 21 ++++++++++++++++++++- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/mem3.erl b/src/mem3.erl index 5116f008..5610f085 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -52,12 +52,13 @@ nodes() -> shards(DbName) when is_list(DbName) -> shards(list_to_binary(DbName)); shards(DbName) -> - case ets:lookup(partitions, DbName) of + try ets:lookup(partitions, DbName) of [] -> - % TODO fall back to checking dbs.couch directly - erlang:error(database_does_not_exist); + mem3_util:load_shards_from_disk(DbName); Else -> Else + catch error:badarg -> + mem3_util:load_shards_from_disk(DbName) end. -spec shards(DbName::iolist(), DocId::binary()) -> [#shard{}]. @@ -74,14 +75,14 @@ shards(DbName, DocId) -> range = ['$1','$2'], ref = '_' }, - % TODO these conditions assume A < B, which we don't require Conditions = [{'<', '$1', HashKey}, {'=<', HashKey, '$2'}], - case ets:select(partitions, [{Head, Conditions, ['$_']}]) of + try ets:select(partitions, [{Head, Conditions, ['$_']}]) of [] -> - % TODO fall back to checking dbs.couch directly - erlang:error(database_does_not_exist); + mem3_util:load_shards_from_disk(DbName, DocId); Shards -> Shards + catch error:badarg -> + mem3_util:load_shards_from_disk(DbName, DocId) end. -spec choose_shards(DbName::iolist(), Options::list()) -> [#shard{}]. @@ -89,7 +90,7 @@ choose_shards(DbName, Options) when is_list(DbName) -> choose_shards(list_to_binary(DbName), Options); choose_shards(DbName, Options) -> try shards(DbName) - catch error:database_does_not_exist -> + catch error:E when E==database_does_not_exist; E==badarg -> Nodes = mem3:nodes(), NodeCount = length(Nodes), N = mem3_util:n_val(couch_util:get_value(n, Options), NodeCount), diff --git a/src/mem3_util.erl b/src/mem3_util.erl index b05faa15..e58bbd4c 100644 --- a/src/mem3_util.erl +++ b/src/mem3_util.erl @@ -2,7 +2,8 @@ -author('brad@cloudant.com'). -export([hash/1, name_shard/1, create_partition_map/4, build_shards/2, - n_val/2, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1]). + n_val/2, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1, + load_shards_from_disk/1, load_shards_from_disk/2]). -define(RINGTOP, 2 bsl 31). % CRC32 space @@ -114,3 +115,21 @@ n_val(N, NodeCount) when N > NodeCount -> NodeCount; n_val(N, _) -> N. + +load_shards_from_disk(DbName) when is_binary(DbName) -> + {ok, Db} = couch_db:open(<<"dbs">>, []), + try load_shards_from_db(Db, DbName) after couch_db:close(Db) end. + +load_shards_from_db(#db{} = ShardDb, DbName) -> + case couch_db:open_doc(ShardDb, DbName, []) of + {ok, #doc{body = {Props}}} -> + ?LOG_INFO("dbs cache miss for ~s", [DbName]), + build_shards(DbName, Props); + {not_found, _} -> + erlang:error(database_does_not_exist) + end. + +load_shards_from_disk(DbName, DocId)-> + Shards = load_shards_from_disk(DbName), + HashKey = hash(DocId), + [S || #shard{range = [B,E]} = S <- Shards, B < HashKey, HashKey =< E]. -- cgit v1.2.3 From 7de28cb172b47cd33acf17527689126784e1c42e Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Fri, 2 Jul 2010 10:23:56 -0400 Subject: restart mem3_cache if ets table errors out --- src/mem3_cache.erl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mem3_cache.erl b/src/mem3_cache.erl index 1d1bbe9b..38cc57ef 100644 --- a/src/mem3_cache.erl +++ b/src/mem3_cache.erl @@ -23,6 +23,10 @@ handle_call(_Call, _From, State) -> handle_cast(_Msg, State) -> {noreply, State}. +handle_info({'DOWN', _, _, Pid, {badarg, [{ets,delete,[partitions,_]}|_]}}, + #state{changes_pid=Pid} = State) -> + % fatal error, somebody deleted our ets table + {stop, ets_table_error, State}; handle_info({'DOWN', _, _, Pid, Reason}, #state{changes_pid=Pid} = State) -> ?LOG_INFO("~p changes listener died ~p", [?MODULE, Reason]), Seq = case Reason of {seq, EndSeq} -> EndSeq; _ -> 0 end, -- cgit v1.2.3 From a1800ffdd01f549959277e894a0fe07a05ef88f3 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Fri, 2 Jul 2010 14:20:37 -0400 Subject: no authors in src files --- src/mem3_util.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mem3_util.erl b/src/mem3_util.erl index e58bbd4c..7c31ce8e 100644 --- a/src/mem3_util.erl +++ b/src/mem3_util.erl @@ -1,5 +1,4 @@ -module(mem3_util). --author('brad@cloudant.com'). -export([hash/1, name_shard/1, create_partition_map/4, build_shards/2, n_val/2, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1, -- cgit v1.2.3 From f3c820d21573a93bc8b792ef555a578d8db2f0b3 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Sat, 3 Jul 2010 09:37:51 -0400 Subject: forgot to register the mem3 top-level supervisor --- src/mem3_sup.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem3_sup.erl b/src/mem3_sup.erl index 353216d4..58d0bbf5 100644 --- a/src/mem3_sup.erl +++ b/src/mem3_sup.erl @@ -3,7 +3,7 @@ -export([start_link/0, init/1]). start_link() -> - supervisor:start_link(?MODULE, []). + supervisor:start_link({local, ?MODULE}, ?MODULE, []). init(_Args) -> Children = [ -- cgit v1.2.3 From 971ed3daf11c51319b1bfa0eef73fbed2b2cc653 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Sat, 3 Jul 2010 11:18:42 -0400 Subject: cast to int, otherwise "1" > 2 --- src/mem3_util.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mem3_util.erl b/src/mem3_util.erl index 7c31ce8e..aa43fd88 100644 --- a/src/mem3_util.erl +++ b/src/mem3_util.erl @@ -108,8 +108,10 @@ to_integer(N) when is_list(N) -> list_to_integer(N). n_val(undefined, NodeCount) -> - n_val(list_to_integer(couch_config:get("cluster", "n", "3")), NodeCount); -n_val(N, NodeCount) when N > NodeCount -> + n_val(couch_config:get("cluster", "n", "3"), NodeCount); +n_val(N, NodeCount) when is_list(N) -> + n_val(list_to_integer(N), NodeCount); +n_val(N, NodeCount) when is_integer(NodeCount), N > NodeCount -> ?LOG_ERROR("Request to create N=~p DB but only ~p node(s)", [N, NodeCount]), NodeCount; n_val(N, _) -> -- cgit v1.2.3 From 6860b77ba628911a658fe3b4cb49529c293968e9 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Mon, 5 Jul 2010 15:34:02 -0400 Subject: update include path for couch_db.hrl --- include/mem3.hrl | 13 +------------ src/mem3_sync.erl | 2 +- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/include/mem3.hrl b/include/mem3.hrl index 533056f9..346d3e3e 100644 --- a/include/mem3.hrl +++ b/include/mem3.hrl @@ -1,16 +1,5 @@ --define(MEMBERSHIP, true). +-include_lib("couch/include/couch_db.hrl"). --ifndef(FABRIC). --include("../../fabric/include/fabric.hrl"). --endif. - --ifndef(COUCH). --include("../../couch/src/couch_db.hrl"). --endif. - --include_lib("eunit/include/eunit.hrl"). - -%% partition record -record(shard, {name, node, dbname, range, ref}). %% types diff --git a/src/mem3_sync.erl b/src/mem3_sync.erl index 0f402834..f011b008 100644 --- a/src/mem3_sync.erl +++ b/src/mem3_sync.erl @@ -5,7 +5,7 @@ -export([start_link/0, get_active/0, get_queue/0, push/2, remove_node/1]). --include_lib("../../couch/src/couch_db.hrl"). +-include("mem3.hrl"). -record(state, { active = [], -- cgit v1.2.3 From f03a4121b8f22997108ba75b0e130f3731d76c48 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Mon, 5 Jul 2010 15:49:43 -0400 Subject: more cleanup of the includes --- include/mem3.hrl | 2 -- src/mem3_cache.erl | 1 + src/mem3_httpd.erl | 1 + src/mem3_nodes.erl | 1 + src/mem3_sync.erl | 1 + src/mem3_util.erl | 1 + 6 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/mem3.hrl b/include/mem3.hrl index 346d3e3e..f6584f19 100644 --- a/include/mem3.hrl +++ b/include/mem3.hrl @@ -1,5 +1,3 @@ --include_lib("couch/include/couch_db.hrl"). - -record(shard, {name, node, dbname, range, ref}). %% types diff --git a/src/mem3_cache.erl b/src/mem3_cache.erl index 38cc57ef..2a29ca4c 100644 --- a/src/mem3_cache.erl +++ b/src/mem3_cache.erl @@ -8,6 +8,7 @@ -record(state, {changes_pid}). -include("mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). diff --git a/src/mem3_httpd.erl b/src/mem3_httpd.erl index 2b29b488..4b4f8496 100644 --- a/src/mem3_httpd.erl +++ b/src/mem3_httpd.erl @@ -4,6 +4,7 @@ %% includes -include("mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). handle_membership_req(#httpd{method='GET', diff --git a/src/mem3_nodes.erl b/src/mem3_nodes.erl index 9b3f32f3..0214ed03 100644 --- a/src/mem3_nodes.erl +++ b/src/mem3_nodes.erl @@ -6,6 +6,7 @@ -export([start_link/0, get_nodelist/0]). -include("mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). -record(state, {changes_pid, update_seq, nodes}). diff --git a/src/mem3_sync.erl b/src/mem3_sync.erl index f011b008..140675f7 100644 --- a/src/mem3_sync.erl +++ b/src/mem3_sync.erl @@ -6,6 +6,7 @@ -export([start_link/0, get_active/0, get_queue/0, push/2, remove_node/1]). -include("mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). -record(state, { active = [], diff --git a/src/mem3_util.erl b/src/mem3_util.erl index aa43fd88..f6fad76a 100644 --- a/src/mem3_util.erl +++ b/src/mem3_util.erl @@ -7,6 +7,7 @@ -define(RINGTOP, 2 bsl 31). % CRC32 space -include("mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). hash(Item) when is_binary(Item) -> erlang:crc32(Item); -- cgit v1.2.3 From 5f4f2cf10dfb7e7d27c3207103a2c2e630880af0 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Mon, 5 Jul 2010 15:57:28 -0400 Subject: update .app to 1.0 --- ebin/mem3.app | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ebin/mem3.app b/ebin/mem3.app index 05d50748..4d0cf69b 100644 --- a/ebin/mem3.app +++ b/ebin/mem3.app @@ -1,7 +1,7 @@ {application, mem3, [ {description, "CouchDB Cluster Membership"}, {mod, {mem3_app, []}}, - {vsn, "0.9.6"}, + {vsn, "1.0"}, {modules, [ mem3, mem3_app, -- cgit v1.2.3 From 15820b8ec6aa24fe0283879de85cebec17686868 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Sat, 10 Jul 2010 15:47:33 -0400 Subject: remove couch_api module --- src/mem3_sync.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mem3_sync.erl b/src/mem3_sync.erl index 140675f7..d3b3ea51 100644 --- a/src/mem3_sync.erl +++ b/src/mem3_sync.erl @@ -132,15 +132,15 @@ start_push_replication(DbName, Node) -> link(Pid), Pid; {db_not_found, _Msg} -> - case couch_api:open_db(DbName, []) of + case couch_db:open(DbName, []) of {ok, Db} -> % source exists, let's (re)create the target - couch_api:close_db(Db), + couch_db:close(Db), case rpc:call(Node, couch_api, create_db, [DbName, []]) of {ok, Target} -> ?LOG_INFO("~p successfully created ~s on ~p", [?MODULE, DbName, Node]), - couch_api:close_db(Target), + couch_db:close(Target), start_push_replication(DbName, Node); file_exists -> start_push_replication(DbName, Node); -- cgit v1.2.3 From 6db30f462e5c77df2a4fb56cd793ecb5243ecdff Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Sat, 10 Jul 2010 15:50:17 -0400 Subject: thank you dialyzer --- include/mem3.hrl | 9 ++++++++- src/mem3.erl | 8 ++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/mem3.hrl b/include/mem3.hrl index f6584f19..b9359b44 100644 --- a/include/mem3.hrl +++ b/include/mem3.hrl @@ -1,4 +1,11 @@ --record(shard, {name, node, dbname, range, ref}). +% type specification hacked to suppress dialyzer warning re: match spec +-record(shard, { + name :: binary() | '_', + node :: node() | '_', + dbname :: binary(), + range :: [non_neg_integer() | '$1' | '$2'], + ref :: reference() | 'undefined' | '_' +}). %% types -type join_type() :: init | join | replace | leave. diff --git a/src/mem3.erl b/src/mem3.erl index 5610f085..1485c7fe 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -32,7 +32,7 @@ compare_nodelists() -> end, orddict:new(), Replies), [{non_member_nodes, AllNodes -- Nodes}, {bad_nodes, BadNodes} | Dict]. --spec compare_shards(DbName::iolist()) -> [{bad_nodes | [#shard{}], [node()]}]. +-spec compare_shards(DbName::iodata()) -> [{bad_nodes | [#shard{}], [node()]}]. compare_shards(DbName) when is_list(DbName) -> compare_shards(list_to_binary(DbName)); compare_shards(DbName) -> @@ -48,7 +48,7 @@ compare_shards(DbName) -> nodes() -> mem3_nodes:get_nodelist(). --spec shards(DbName::iolist()) -> [#shard{}]. +-spec shards(DbName::iodata()) -> [#shard{}]. shards(DbName) when is_list(DbName) -> shards(list_to_binary(DbName)); shards(DbName) -> @@ -61,7 +61,7 @@ shards(DbName) -> mem3_util:load_shards_from_disk(DbName) end. --spec shards(DbName::iolist(), DocId::binary()) -> [#shard{}]. +-spec shards(DbName::iodata(), DocId::binary()) -> [#shard{}]. shards(DbName, DocId) when is_list(DbName) -> shards(list_to_binary(DbName), DocId); shards(DbName, DocId) when is_list(DocId) -> @@ -85,7 +85,7 @@ shards(DbName, DocId) -> mem3_util:load_shards_from_disk(DbName, DocId) end. --spec choose_shards(DbName::iolist(), Options::list()) -> [#shard{}]. +-spec choose_shards(DbName::iodata(), Options::list()) -> [#shard{}]. choose_shards(DbName, Options) when is_list(DbName) -> choose_shards(list_to_binary(DbName), Options); choose_shards(DbName, Options) -> -- cgit v1.2.3 From 49de47fb50998ee4d266ad046d3b9c437fe64393 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Tue, 13 Jul 2010 12:25:19 -0400 Subject: add _membership handler to cluster http layer, and get good node list --- src/mem3_httpd.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem3_httpd.erl b/src/mem3_httpd.erl index 4b4f8496..3609de24 100644 --- a/src/mem3_httpd.erl +++ b/src/mem3_httpd.erl @@ -9,7 +9,7 @@ handle_membership_req(#httpd{method='GET', path_parts=[<<"_membership">>]} = Req) -> - {ok,ClusterNodes} = try mem3:nodes() + ClusterNodes = try mem3:nodes() catch _:_ -> {ok,[]} end, couch_httpd:send_json(Req, {[ {all_nodes, lists:sort([node()|nodes()])}, -- cgit v1.2.3 From d17def4f873f3d333c5b16242535b68ccbb6d926 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Thu, 15 Jul 2010 18:01:55 -0400 Subject: don't use deleted nodes --- src/mem3_nodes.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mem3_nodes.erl b/src/mem3_nodes.erl index 0214ed03..6cbf3d9a 100644 --- a/src/mem3_nodes.erl +++ b/src/mem3_nodes.erl @@ -73,6 +73,8 @@ initialize_nodelist() -> first_fold(#full_doc_info{id = <<"_design/", _/binary>>}, _, Acc) -> {ok, Acc}; +first_fold(#full_doc_info{deleted=true}, _, Acc) -> + {ok, Acc}; first_fold(#full_doc_info{id=Id}, _, Acc) -> {ok, [mem3_util:to_atom(Id) | Acc]}. -- cgit v1.2.3 From 14f739cf46798d40a060e7280080f1a368c5093f Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Mon, 19 Jul 2010 15:47:16 -0400 Subject: add partitions endpoint, updated for mem3 - implements bugzid 10536 --- src/mem3_httpd.erl | 69 +++++++++++------------------------------------------- 1 file changed, 14 insertions(+), 55 deletions(-) diff --git a/src/mem3_httpd.erl b/src/mem3_httpd.erl index 3609de24..958e9ef5 100644 --- a/src/mem3_httpd.erl +++ b/src/mem3_httpd.erl @@ -8,71 +8,30 @@ handle_membership_req(#httpd{method='GET', - path_parts=[<<"_membership">>]} = Req) -> + path_parts=[<<"_membership">>]} = Req) -> ClusterNodes = try mem3:nodes() catch _:_ -> {ok,[]} end, couch_httpd:send_json(Req, {[ {all_nodes, lists:sort([node()|nodes()])}, {cluster_nodes, lists:sort(ClusterNodes)} ]}); - -handle_membership_req(#httpd{method='POST', - path_parts=[<<"_membership">>]} = Req) -> - {JsonProps} = couch_httpd:json_body_obj(Req), - Method = couch_util:get_value(<<"method">>, JsonProps), - Params = couch_util:get_value(<<"params">>, JsonProps), - Id = couch_util:get_value(<<"id">>, JsonProps), - {Result, Error} = membership_dispatch(Method, Params), +handle_membership_req(#httpd{method='GET', + path_parts=[<<"_membership">>, <<"parts">>, DbName]} = Req) -> + ClusterNodes = try mem3:nodes() + catch _:_ -> {ok,[]} end, + Shards = mem3:shards(DbName), + JsonShards = json_shards(Shards, []), couch_httpd:send_json(Req, {[ - {result, Result}, - {error, Error}, - {id, Id} + {all_nodes, lists:sort([node()|nodes()])}, + {cluster_nodes, lists:sort(ClusterNodes)}, + {partitions, JsonShards} ]}). %% %% internal %% -membership_dispatch(<<"replace">>, Params) -> - OldNode = get_oldnode(Params), - NewNodeOpts = get_value_json(<<"newnode_options">>, Params, []), - PingNode = get_pingnode(Params), - send_join(replace, {OldNode, NewNodeOpts}, PingNode); -membership_dispatch(TypeBin, Params) -> - Type = list_to_atom(?b2l(TypeBin)), - NodeList = get_value_json(<<"nodes">>, Params, []), - Nodes = lists:map(fun({List}) -> node_info(List) end, NodeList), - PingNode = get_pingnode(Params), - send_join(Type, Nodes, PingNode). - -get_pingnode(Params) -> - PingNodeBin = get_value_json(<<"pingnode">>, Params, <<"nil">>), - list_to_atom(?b2l(PingNodeBin)). - -get_oldnode(Params) -> - NodeBin = get_value_json(<<"oldnode">>, Params, undefined), - NodeList = ?b2l(NodeBin), - list_to_atom(NodeList). - -%% @doc send join command to mem module -send_join(Type, Payload, PingNode) -> - case mem3:join(Type, Payload, PingNode) of - ok -> {ok, null}; - {error, Error} -> {Type, Error}; - Other -> - ?LOG_ERROR("membership dispatch error ~p", [Other]), - {Type, unknown_error} - end. - -node_info(List) -> - Order = couch_util:get_value(<<"order">>, List), - Node1 = couch_util:get_value(<<"node">>, List), - Node2 = list_to_atom(?b2l(Node1)), - Options = couch_util:get_value(<<"options">>, List), - {Order, Node2, Options}. -get_value_json(_,[], Default) -> Default; -get_value_json(Key, [JsonProp|Rest], Default) -> - case JsonProp of - {[{Key, Value}]} -> Value; - _ -> get_value_json(Key, Rest, Default) - end. +json_shards([], Acc) -> {lists:sort(Acc)}; +json_shards([#shard{node=Node, range=[B,_E]} | Rest], AccIn) -> + HexBeg = couch_util:to_hex(<>), + json_shards(Rest, [{HexBeg,[Node]}|AccIn]). -- cgit v1.2.3 From aceb96703ef4df50d080a9f9acd11a46c4db9fcb Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Mon, 19 Jul 2010 16:39:42 -0400 Subject: make _membership/parts/dbname work for N>1, dumbass --- src/mem3_httpd.erl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mem3_httpd.erl b/src/mem3_httpd.erl index 958e9ef5..cbfaea95 100644 --- a/src/mem3_httpd.erl +++ b/src/mem3_httpd.erl @@ -20,7 +20,7 @@ handle_membership_req(#httpd{method='GET', ClusterNodes = try mem3:nodes() catch _:_ -> {ok,[]} end, Shards = mem3:shards(DbName), - JsonShards = json_shards(Shards, []), + JsonShards = json_shards(Shards, dict:new()), couch_httpd:send_json(Req, {[ {all_nodes, lists:sort([node()|nodes()])}, {cluster_nodes, lists:sort(ClusterNodes)}, @@ -31,7 +31,9 @@ handle_membership_req(#httpd{method='GET', %% internal %% -json_shards([], Acc) -> {lists:sort(Acc)}; +json_shards([], AccIn) -> + List = dict:to_list(AccIn), + {lists:sort(List)}; json_shards([#shard{node=Node, range=[B,_E]} | Rest], AccIn) -> HexBeg = couch_util:to_hex(<>), - json_shards(Rest, [{HexBeg,[Node]}|AccIn]). + json_shards(Rest, dict:append(HexBeg, Node, AccIn)). -- cgit v1.2.3 From 57f813bbeba80db2e5f87b717f26387dbaa5eb29 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Mon, 26 Jul 2010 20:54:26 -0400 Subject: appups for 1.2.1 --- ebin/mem3.app | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ebin/mem3.app b/ebin/mem3.app index 4d0cf69b..0613ab26 100644 --- a/ebin/mem3.app +++ b/ebin/mem3.app @@ -1,7 +1,7 @@ {application, mem3, [ {description, "CouchDB Cluster Membership"}, {mod, {mem3_app, []}}, - {vsn, "1.0"}, + {vsn, "1.0.1"}, {modules, [ mem3, mem3_app, -- cgit v1.2.3 From 0ce0b7588e7a28c490c40491d561d7d9464eb7c5 Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Tue, 27 Jul 2010 11:52:55 -0400 Subject: add the appups for real --- ebin/mem3.appup | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 ebin/mem3.appup diff --git a/ebin/mem3.appup b/ebin/mem3.appup new file mode 100644 index 00000000..6e9ebe71 --- /dev/null +++ b/ebin/mem3.appup @@ -0,0 +1,3 @@ +{"1.0.1",[{"1.0",[ + {load_module, mem3_httpd} +]}],[{"1.0",[]}]}. -- cgit v1.2.3 From e660dd48e78f1a368010bbcf4455b576dd59d7c1 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Thu, 5 Aug 2010 13:41:53 -0400 Subject: n_val tests for mem3_util --- src/mem3_util.erl | 2 + test/01-config-default.ini | 2 + test/mem3_test.erl | 164 --------------------------------------------- test/mem3_util_test.erl | 140 ++++++++++++++++++++++++++++++++++++++ test/partitions_test.erl | 53 --------------- test/test_suite.erl | 9 --- 6 files changed, 144 insertions(+), 226 deletions(-) create mode 100644 test/01-config-default.ini delete mode 100644 test/mem3_test.erl create mode 100644 test/mem3_util_test.erl delete mode 100644 test/partitions_test.erl delete mode 100644 test/test_suite.erl diff --git a/src/mem3_util.erl b/src/mem3_util.erl index f6fad76a..2ed84db6 100644 --- a/src/mem3_util.erl +++ b/src/mem3_util.erl @@ -115,6 +115,8 @@ n_val(N, NodeCount) when is_list(N) -> n_val(N, NodeCount) when is_integer(NodeCount), N > NodeCount -> ?LOG_ERROR("Request to create N=~p DB but only ~p node(s)", [N, NodeCount]), NodeCount; +n_val(N, _) when N < 1 -> + 1; n_val(N, _) -> N. diff --git a/test/01-config-default.ini b/test/01-config-default.ini new file mode 100644 index 00000000..757f7830 --- /dev/null +++ b/test/01-config-default.ini @@ -0,0 +1,2 @@ +[cluster] +n=3 diff --git a/test/mem3_test.erl b/test/mem3_test.erl deleted file mode 100644 index 01d80eb0..00000000 --- a/test/mem3_test.erl +++ /dev/null @@ -1,164 +0,0 @@ --module(mem3_test). - --include("../include/common.hrl"). --include("../include/config.hrl"). --include_lib("eunit/include/eunit.hrl"). - -%% version 3 of membership state --record(mem, {header=3, - nodes=[], - clock=[], - args - }). - --define(TEST_NODE_NAME, a). --define(HINT_C1, 365375409332725729550921208179070754913983135744). --define(HINT_C2, 1096126227998177188652763624537212264741949407232). --define(PARTS_FOR_D1, [365375409332725729550921208179070754913983135744, - 548063113999088594326381812268606132370974703616, - 730750818665451459101842416358141509827966271488, - 913438523331814323877303020447676887284957839360, - 1096126227998177188652763624537212264741949407232, - 1278813932664540053428224228626747642198940975104]). --define(x40, 365375409332725729550921208179070754913983135744). --define(x60, 548063113999088594326381812268606132370974703616). - -%% TEST SETUP - -all_tests_test_() -> - {"membership3 tests", - [ - {setup, - fun test_setup/0, - fun test_teardown/1, - fun(Pid) -> - {with, Pid, - [ - fun init/1, - fun clock/1, - fun join_init/1, - fun join_init_with_hints/1, - fun join_new_node/1, - fun join_two_new_nodes/1, - fun join_with_wrong_order/1 - ]} - end} - ] - }. - - -test_setup() -> - % Config = #config{n=3,r=2,w=2,q=3,directory="/srv/db", - % storage_mod="dynomite_couch_storage"}, - {ok, Pid} = mem3:start_link([{test,?TEST_NODE_NAME}]), - Pid. - - -test_teardown(Pid) -> - exit(Pid, shutdown). - - -%% TESTS - -init(_Pid) -> - {ok, #mem{args=Args}} = mem3:state(), - Test = proplists:get_value(test, Args), - ?assertEqual(?TEST_NODE_NAME, Test). - - -clock(_Pid) -> - {ok, Clock} = mem3:clock(), - ?assertMatch([], Clock). - - -join_init(_Pid) -> - mem3:reset(), - mem3:join(init, [{1, a, []}, {2, b, []}], nil), - {ok, Nodes} = mem3:nodes(), - ?assertEqual(2, length(Nodes)), - ok. - - -join_init_with_hints(_Pid) -> - mem3:reset(), - mem3:join(init, [{1, a, []}, - {2, b, []}, - {3, c, [{hints, [?HINT_C1, ?HINT_C2]}]}, - {4, d, []}, - {5, e, []}], - nil), - {ok, Nodes} = mem3:nodes(), - ?assertEqual(5, length(Nodes)), - %?debugFmt("~nFullmap: ~p~n", [Fullmap]), -% ?assertEqual([c,d,e], mem3:nodes_for_part(?HINT_C1)), -% ?assertEqual([c,d,e], mem3:nodes_for_part(?HINT_C2)), - ok. - - -join_new_node(_Pid) -> - mem3:reset(), - mem3:join(init, [{1, a, []}, {2, b, []}, {3, c, []}], nil), - {ok, Nodes1} = mem3:nodes(), - ?assertEqual(3, length(Nodes1)), - mem3:join(join, [{4, d, []}], a), - {ok, Nodes2} = mem3:nodes(), - ?assertEqual(4, length(Nodes2)), - ok. - - -join_two_new_nodes(_Pid) -> - mem3:reset(), - mem3:join(init, [{1, a, []}, {2, b, []}, {3, c, []}], nil), - {ok, Nodes1} = mem3:nodes(), - ?assertEqual(3, length(Nodes1)), - Res = mem3:join(join, [{4, d, []}, {5, e, []}], b), - ?assertEqual(ok, Res), - {ok, Nodes2} = mem3:nodes(), - ?assertEqual(5, length(Nodes2)), - %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), - ok. - - -join_with_wrong_order(_Pid) -> - mem3:reset(), - mem3:join(init, [{1, a, []}, {2, b, []}, {3, c, []}], nil), -% ?assertEqual([], mem3:parts_for_node(d)), - %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), - Res = mem3:join(join, [{3, d, []}], c), - ?assertEqual({error, <<"position_exists_3">>}, Res), - %?debugFmt("~nFullmap: ~p~n", [mem3:fullmap()]), - ok. - - -%% -%% tests without running gen_server -%% -merge_nodes_test() -> - A = [{1,a1,[]},{2,a2,[]},{3,a3,[]}], - B = [{1,a1,[]},{2,a2,[]},{3,b3,[]}], - ?assertEqual(A, mem3:merge_nodes(A,B)), - ?assertEqual(mem3:merge_nodes(A,B), mem3:merge_nodes(B,A)), - C = [{1,c1,[]},{2,c2,[]},{3,c3,[]}], - ?assertEqual(A, mem3:merge_nodes(A,C)), - ?assertEqual(A, mem3:merge_nodes(C,A)), - ok. - - -merge_nodes_with_init_nodelist_test() -> - A = [{1,a1,[]},{2,a2,[]},{3,a3,[]}], - B = [{0, b, []}], - ?assertEqual(A, mem3:merge_nodes(A,B)), - ?assertEqual(mem3:merge_nodes(A,B), mem3:merge_nodes(B,A)), - ok. - - -next_up_nodes_test() -> - Nodes = [a,b,c,d], - UpNodes = [a,b,d], - ?assertEqual(b, mem3:next_up_node(a,Nodes,UpNodes)), - ?assertEqual(d, mem3:next_up_node(b,Nodes,UpNodes)), - ?assertEqual(a, mem3:next_up_node(d,Nodes,UpNodes)), - ?assertThrow({error, no_gossip_targets_available}, - mem3:next_up_node(a,[a,b,c],[])), - ?assertEqual(b, mem3:next_up_node(a,[a,b],[a,b])), - ok. diff --git a/test/mem3_util_test.erl b/test/mem3_util_test.erl new file mode 100644 index 00000000..0f6d24be --- /dev/null +++ b/test/mem3_util_test.erl @@ -0,0 +1,140 @@ +-module(mem3_util_test). + +-include("mem3.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +hash_test() -> + ?assertEqual(1624516141,mem3_util:hash(0)), + ?assertEqual(3816901808,mem3_util:hash("0")), + ?assertEqual(3523407757,mem3_util:hash(<<0>>)), + ?assertEqual(4108050209,mem3_util:hash(<<"0">>)), + ?assertEqual(3094724072,mem3_util:hash(zero)), + ok. + +name_shard_test() -> + Shard1 = #shard{}, + ?assertError(function_clause, mem3_util:name_shard(Shard1)), + + Shard2 = #shard{dbname = <<"testdb">>, range = [0,100]}, + #shard{name=Name2} = mem3_util:name_shard(Shard2), + ?assertEqual(<<"shards/00000000-00000064/testdb">>, Name2), + + ok. + +create_partition_map_test() -> + {DbName1, N1, Q1, Nodes1} = {<<"testdb1">>, 3, 4, [a,b,c,d]}, + Map1 = mem3_util:create_partition_map(DbName1, N1, Q1, Nodes1), + ?assertEqual(12, length(Map1)), + + {DbName2, N2, Q2, Nodes2} = {<<"testdb2">>, 1, 1, [a,b,c,d]}, + [#shard{name=Name2,node=Node2}] = Map2 = + mem3_util:create_partition_map(DbName2, N2, Q2, Nodes2), + ?assertEqual(1, length(Map2)), + ?assertEqual(<<"shards/00000000-ffffffff/testdb2">>, Name2), + ?assertEqual(a, Node2), + ok. + +build_shards_test() -> + DocProps1 = + [{<<"changelog">>, + [[<<"add">>,<<"00000000-1fffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"20000000-3fffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"40000000-5fffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"60000000-7fffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"80000000-9fffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"a0000000-bfffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"c0000000-dfffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"e0000000-ffffffff">>, + <<"dbcore@node.local">>]]}, + {<<"by_node">>, + {[{<<"dbcore@node.local">>, + [<<"00000000-1fffffff">>,<<"20000000-3fffffff">>, + <<"40000000-5fffffff">>,<<"60000000-7fffffff">>, + <<"80000000-9fffffff">>,<<"a0000000-bfffffff">>, + <<"c0000000-dfffffff">>,<<"e0000000-ffffffff">>]}]}}, + {<<"by_range">>, + {[{<<"00000000-1fffffff">>,[<<"dbcore@node.local">>]}, + {<<"20000000-3fffffff">>,[<<"dbcore@node.local">>]}, + {<<"40000000-5fffffff">>,[<<"dbcore@node.local">>]}, + {<<"60000000-7fffffff">>,[<<"dbcore@node.local">>]}, + {<<"80000000-9fffffff">>,[<<"dbcore@node.local">>]}, + {<<"a0000000-bfffffff">>,[<<"dbcore@node.local">>]}, + {<<"c0000000-dfffffff">>,[<<"dbcore@node.local">>]}, + {<<"e0000000-ffffffff">>,[<<"dbcore@node.local">>]}]}}], + Shards1 = mem3_util:build_shards(<<"testdb1">>, DocProps1), + ExpectedShards1 = + [{shard,<<"shards/00000000-1fffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [0,536870911], + undefined}, + {shard,<<"shards/20000000-3fffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [536870912,1073741823], + undefined}, + {shard,<<"shards/40000000-5fffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [1073741824,1610612735], + undefined}, + {shard,<<"shards/60000000-7fffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [1610612736,2147483647], + undefined}, + {shard,<<"shards/80000000-9fffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [2147483648,2684354559], + undefined}, + {shard,<<"shards/a0000000-bfffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [2684354560,3221225471], + undefined}, + {shard,<<"shards/c0000000-dfffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [3221225472,3758096383], + undefined}, + {shard,<<"shards/e0000000-ffffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [3758096384,4294967295], + undefined}], + ?assertEqual(ExpectedShards1, Shards1), + ok. + + +%% n_val tests + +nval_test() -> + ?assertEqual(2, mem3_util:n_val(2,4)), + ?assertEqual(1, mem3_util:n_val(-1,4)), + ?assertEqual(4, mem3_util:n_val(6,4)), + ok. + +config_01_setup() -> + Ini = filename:join([code:lib_dir(mem3, test), "01-config-default.ini"]), + {ok, Pid} = couch_config:start_link([Ini]), + Pid. + +config_teardown(_Pid) -> + couch_config:stop(). + +n_val_test_() -> + {"n_val tests", + [ + {setup, + fun config_01_setup/0, + fun config_teardown/1, + fun(Pid) -> + {with, Pid, [ + fun n_val_1/1 + ]} + end} + ] + }. + +n_val_1(_Pid) -> + ?assertEqual(3, mem3_util:n_val(undefined, 4)). diff --git a/test/partitions_test.erl b/test/partitions_test.erl deleted file mode 100644 index 834719b7..00000000 --- a/test/partitions_test.erl +++ /dev/null @@ -1,53 +0,0 @@ --module(partitions_test). --author('brad@cloudant.com'). - --include("../include/config.hrl"). --include("../include/common.hrl"). --include_lib("eunit/include/eunit.hrl"). - --define(FOUR_NODES, [a,b,c,d]). --define(Map1, [{d,0}, - {a,292300327466180583640736966543256603931186508596}, - {b,584600654932361167281473933086513207862373017192}, - {c,876900982398541750922210899629769811793559525788}, - {d,1169201309864722334562947866173026415724746034384}]). --define(Map2, [{c,0}, - {d,182687704666362864775460604089535377456991567873}, - {a,365375409332725729550921208179070754913983135746}, - {b,548063113999088594326381812268606132370974703619}, - {c,730750818665451459101842416358141509827966271492}, - {d,913438523331814323877303020447676887284957839365}, - {a,1096126227998177188652763624537212264741949407238}, - {b,1278813932664540053428224228626747642198940975111}]). --define(Map3, [{d,0}, - {c,0}, - {a,365375409332725729550921208179070754913983135745}, - {d,365375409332725729550921208179070754913983135745}, - {b,730750818665451459101842416358141509827966271490}, - {a,730750818665451459101842416358141509827966271490}, - {c,1096126227998177188652763624537212264741949407235}, - {b,1096126227998177188652763624537212264741949407235}]). - -%%==================================================================== -%% Tests -%%==================================================================== - -fullmap_n1_test() -> - Map1 = partitions:fullmap(<<"test">>, ?FOUR_NODES, opts(1,5)), - ?assertEqual(?Map1, Map1), - Map2 = partitions:fullmap(<<"boorad">>, ?FOUR_NODES, opts(1,8)), - ?assertEqual(?Map2, Map2), - ok. - -fullmap_Ngt1_test() -> - Map3 = partitions:fullmap(<<"boorad">>, ?FOUR_NODES, opts(2,4)), - ?assertEqual(?Map3, Map3), - ok. - - -%%==================================================================== -%% Internal functions -%%==================================================================== - -opts(N,Q) -> - [{n,integer_to_list(N)},{q,integer_to_list(Q)}]. diff --git a/test/test_suite.erl b/test/test_suite.erl deleted file mode 100644 index 22416c5d..00000000 --- a/test/test_suite.erl +++ /dev/null @@ -1,9 +0,0 @@ --module(test_suite). - --include_lib("eunit/include/eunit.hrl"). - -all_test_() -> - [ - {module, mem3_test}, - {module, partitions_test} - ]. -- cgit v1.2.3