summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Kocoloski <kocolosk@apache.org>2009-08-09 14:59:09 +0000
committerAdam Kocoloski <kocolosk@apache.org>2009-08-09 14:59:09 +0000
commitd245eb8c02bb5003a1e391748cb0e718dd582b4f (patch)
treea7d229174f368594fbc2e1b4ca095dd10999c7b1
parentf708c1a13b4eb267c17db5866af0405b03c4e6ec (diff)
next piece of new replicator -- missing_revs buffer
git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@802548 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/couchdb/Makefile.am2
-rw-r--r--src/couchdb/couch_rep_missing_revs.erl180
-rw-r--r--test/etap/112-replication-missing-revs.t188
3 files changed, 370 insertions, 0 deletions
diff --git a/src/couchdb/Makefile.am b/src/couchdb/Makefile.am
index c3806310..f32497fa 100644
--- a/src/couchdb/Makefile.am
+++ b/src/couchdb/Makefile.am
@@ -76,6 +76,7 @@ source_files = \
couch_rep.erl \
couch_rep_changes_feed.erl \
couch_rep_httpc.erl \
+ couch_rep_missing_revs.erl \
couch_rep_sup.erl \
couch_server.erl \
couch_server_sup.erl \
@@ -123,6 +124,7 @@ compiled_files = \
couch_ref_counter.beam \
couch_rep.beam \
couch_rep_changes_feed.beam \
+ couch_rep_missing_revs.beam \
couch_rep_httpc.beam \
couch_rep_sup.beam \
couch_server.beam \
diff --git a/src/couchdb/couch_rep_missing_revs.erl b/src/couchdb/couch_rep_missing_revs.erl
new file mode 100644
index 00000000..bd7cda66
--- /dev/null
+++ b/src/couchdb/couch_rep_missing_revs.erl
@@ -0,0 +1,180 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_rep_missing_revs).
+-behaviour(gen_server).
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3]).
+
+-export([start_link/4, next/1, stop/1]).
+
+-define(BUFFER_SIZE, 1000).
+
+-include("couch_db.hrl").
+
+-record (state, {
+ changes_loop,
+ changes_from = nil,
+ target,
+ parent,
+ complete = false,
+ count = 0,
+ reply_to = nil,
+ rows = queue:new(),
+ high_source_seq = 0,
+ high_missing_seq = 0,
+ high_committed_seq = 0
+}).
+
+start_link(Parent, Target, ChangesFeed, PostProps) ->
+ gen_server:start_link(?MODULE, [Parent, Target, ChangesFeed, PostProps], []).
+
+next(Server) ->
+ gen_server:call(Server, next_missing_revs, infinity).
+
+stop(Server) ->
+ gen_server:call(Server, stop).
+
+init([Parent, Target, ChangesFeed, _PostProps]) ->
+ process_flag(trap_exit, true),
+ Self = self(),
+ Pid = spawn_link(fun() -> changes_loop(Self, ChangesFeed, Target) end),
+ {ok, #state{changes_loop=Pid, target=Target, parent=Parent}}.
+
+handle_call({add_missing_revs, {HighSeq, Revs}}, From, State) ->
+ handle_add_missing_revs(HighSeq, Revs, From, State);
+
+handle_call(next_missing_revs, From, State) ->
+ handle_next_missing_revs(From, State);
+
+handle_call({update_committed_seq, N}, _From, State) ->
+ if State#state.high_committed_seq < N ->
+ ?LOG_DEBUG("missing_revs updating committed seq to ~p", [N]);
+ true -> ok end,
+ {reply, ok, State#state{high_committed_seq=N}}.
+
+handle_cast(_Msg, State) ->
+ {noreply, State}.
+
+handle_info({'EXIT', Pid, Reason}, #state{changes_loop=Pid} = State) ->
+ handle_changes_loop_exit(Reason, State);
+
+handle_info(Msg, State) ->
+ ?LOG_INFO("unexpected message ~p", [Msg]),
+ {noreply, State}.
+
+terminate(_Reason, #state{changes_loop=Pid}) when is_pid(Pid) ->
+ exit(Pid, shutdown),
+ ok;
+terminate(_Reason, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%internal funs
+
+handle_add_missing_revs(HighSeq, [], _From, State) ->
+ maybe_checkpoint(State),
+ {reply, ok, State#state{high_source_seq=HighSeq}};
+handle_add_missing_revs(HighSeq, Revs, From, #state{reply_to=nil} = State) ->
+ #state{rows=Rows, count=Count} = State,
+ NewState = State#state{
+ rows = queue:join(Rows, queue:from_list(Revs)),
+ count = Count + length(Revs),
+ high_source_seq = HighSeq,
+ high_missing_seq = HighSeq
+ },
+ if NewState#state.count < ?BUFFER_SIZE ->
+ {reply, ok, NewState};
+ true ->
+ {noreply, NewState#state{changes_from=From}}
+ end;
+handle_add_missing_revs(HighSeq, Revs, _From, #state{count=0} = State) ->
+ gen_server:reply(State#state.reply_to, {HighSeq, Revs}),
+ NewState = State#state{
+ high_source_seq = HighSeq,
+ high_missing_seq = HighSeq,
+ reply_to = nil
+ },
+ {reply, ok, NewState}.
+
+handle_next_missing_revs(From, #state{count=0} = State) ->
+ if State#state.complete ->
+ {stop, normal, complete, State};
+ true ->
+ {noreply, State#state{reply_to=From}}
+ end;
+handle_next_missing_revs(_From, State) ->
+ #state{
+ changes_from = ChangesFrom,
+ high_missing_seq = HighSeq,
+ rows = Rows
+ } = State,
+ if ChangesFrom =/= nil -> gen_server:reply(ChangesFrom, ok); true -> ok end,
+ NewState = State#state{count=0, changes_from=nil, rows=queue:new()},
+ {reply, {HighSeq, queue:to_list(Rows)}, NewState}.
+
+handle_changes_loop_exit(normal, State) ->
+ if State#state.reply_to =/= nil ->
+ gen_server:reply(State#state.reply_to, complete),
+ {stop, normal, State};
+ true ->
+ {noreply, State#state{complete=true, changes_loop=nil}}
+ end;
+handle_changes_loop_exit(Reason, State) ->
+ ?LOG_ERROR("changes_loop died with reason ~p", [Reason]),
+ {stop, changes_loop_died, State#state{changes_loop=nil}}.
+
+changes_loop(OurServer, SourceChangesServer, Target) ->
+ case couch_rep_changes_feed:next(SourceChangesServer) of
+ complete ->
+ exit(normal);
+ Changes ->
+ MissingRevs = get_missing_revs(Target, Changes),
+ gen_server:call(OurServer, {add_missing_revs, MissingRevs}, infinity)
+ end,
+ changes_loop(OurServer, SourceChangesServer, Target).
+
+get_missing_revs(#http_db{}=Target, Changes) ->
+ Transform = fun({[{<<"seq">>,_}, {<<"id">>,Id}, {<<"changes">>,C}]}) ->
+ {Id, [couch_doc:rev_to_str(R) || {[{<<"rev">>, R}]} <- C]} end,
+ IdRevsList = [Transform(Change) || Change <- Changes],
+ {[{<<"seq">>, HighSeq}, _, _]} = lists:last(Changes),
+ Request = Target#http_db{
+ resource = "_missing_revs",
+ method = post,
+ body = {IdRevsList}
+ },
+ {Resp} = couch_rep_httpc:request(Request),
+ {MissingRevs} = proplists:get_value(<<"missing_revs">>, Resp),
+ X = [{Id, couch_doc:parse_revs(RevStrs)} || {Id,RevStrs} <- MissingRevs],
+ {HighSeq, X};
+
+get_missing_revs(Target, Changes) ->
+ Transform = fun({[{<<"seq">>,_}, {<<"id">>,Id}, {<<"changes">>,C}]}) ->
+ {Id, [R || {[{<<"rev">>, R}]} <- C]} end,
+ IdRevsList = [Transform(Change) || Change <- Changes],
+ {[{<<"seq">>, HighSeq}, _, _]} = lists:last(Changes),
+ {ok, Results} = couch_db:get_missing_revs(Target, IdRevsList),
+ {HighSeq, Results}.
+
+%% save a checkpoint if no revs are missing on target so we don't
+%% rescan metadata unnecessarily
+maybe_checkpoint(#state{high_missing_seq=N, high_committed_seq=N} = State) ->
+ #state{
+ parent = Parent,
+ high_source_seq = SourceSeq
+ } = State,
+ Parent ! {missing_revs_checkpoint, SourceSeq};
+maybe_checkpoint(_State) ->
+ ok.
diff --git a/test/etap/112-replication-missing-revs.t b/test/etap/112-replication-missing-revs.t
new file mode 100644
index 00000000..aa4b152c
--- /dev/null
+++ b/test/etap/112-replication-missing-revs.t
@@ -0,0 +1,188 @@
+#!/usr/bin/env escript
+%% -*- erlang -*-
+
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+%% XXX: Figure out how to -include("couch_db.hrl")
+
+-record(doc, {id= <<"">>, revs={0, []}, body={[]},
+ attachments=[], deleted=false, meta=[]}).
+
+-record(http_db, {
+ url,
+ auth = [],
+ resource = "",
+ headers = [
+ {"User-Agent", "CouchDb/"++couch_server:get_version()},
+ {"Accept", "application/json"},
+ {"Accept-Encoding", "gzip"}
+ ],
+ qs = [],
+ method = get,
+ body = nil,
+ options = [
+ {response_format,binary},
+ {inactivity_timeout, 30000}
+ ],
+ retries = 10,
+ pause = 1,
+ conn = nil
+}).
+
+main(_) ->
+ code:add_pathz("src/couchdb"),
+ code:add_pathz("src/ibrowse"),
+ code:add_pathz("src/mochiweb"),
+ code:add_pathz("src/erlang-oauth"),
+
+ etap:plan(12),
+ case (catch test()) of
+ ok ->
+ etap:end_tests();
+ Other ->
+ etap:diag(io_lib:format("Test died abnormally: ~p", [Other])),
+ etap:bail(Other)
+ end,
+ ok.
+
+test() ->
+ couch_server:start(
+ ["etc/couchdb/default_dev.ini", "etc/couchdb/local_dev.ini"]
+ ),
+ ibrowse:start(),
+ crypto:start(),
+
+ couch_server:delete(<<"etap-test-source">>, []),
+ couch_server:delete(<<"etap-test-target">>, []),
+
+ Dbs1 = setup(),
+ test_all(local, local),
+ ok = teardown(Dbs1),
+
+ Dbs2 = setup(),
+ test_all(local, remote),
+ ok = teardown(Dbs2),
+
+ Dbs3 = setup(),
+ test_all(remote, local),
+ ok = teardown(Dbs3),
+
+ Dbs4 = setup(),
+ test_all(remote, remote),
+ ok = teardown(Dbs4),
+
+ ok.
+
+test_all(SrcType, TgtType) ->
+ test_unchanged_db(SrcType, TgtType),
+ test_multiple_changes(SrcType, TgtType),
+ test_changes_not_missing(SrcType, TgtType).
+
+test_unchanged_db(SrcType, TgtType) ->
+ {ok, Pid1} = start_changes_feed(SrcType, 0, false),
+ {ok, Pid2} = start_missing_revs(TgtType, Pid1),
+ etap:is(
+ couch_rep_missing_revs:next(Pid2),
+ complete,
+ io_lib:format(
+ "(~p, ~p) no missing revs if source is unchanged",
+ [SrcType, TgtType])
+ ).
+
+test_multiple_changes(SrcType, TgtType) ->
+ Expect = {2, [generate_change(), generate_change()]},
+ {ok, Pid1} = start_changes_feed(SrcType, 0, false),
+ {ok, Pid2} = start_missing_revs(TgtType, Pid1),
+ etap:is(
+ get_all_missing_revs(Pid2, {0, []}),
+ Expect,
+ io_lib:format("(~p, ~p) add src docs, get missing tgt revs + high seq",
+ [SrcType, TgtType])
+ ).
+
+test_changes_not_missing(SrcType, TgtType) ->
+ %% put identical changes on source and target
+ Id = couch_util:new_uuid(),
+ {Id, [Rev]} = Expect = generate_change(Id, {[]}, get_db(source)),
+ {Id, [Rev]} = generate_change(Id, {[]}, get_db(target)),
+
+ %% confirm that this change is not in missing revs feed
+ {ok, Pid1} = start_changes_feed(SrcType, 0, false),
+ {ok, Pid2} = start_missing_revs(TgtType, Pid1),
+ {HighSeq, AllRevs} = get_all_missing_revs(Pid2, {0, []}),
+
+ %% etap:none/3 has a bug, so just define it correctly here
+ etap:is(
+ lists:member(Expect, AllRevs),
+ false,
+ io_lib:format(
+ "(~p, ~p) skip revs that already exist on target",
+ [SrcType, TgtType])
+ ).
+
+generate_change() ->
+ generate_change(couch_util:new_uuid()).
+
+generate_change(Id) ->
+ generate_change(Id, {[]}).
+
+generate_change(Id, EJson) ->
+ generate_change(Id, EJson, get_db(source)).
+
+generate_change(Id, EJson, Db) ->
+ Doc = couch_doc:from_json_obj(EJson),
+ {ok, Rev} = couch_db:update_doc(Db, Doc#doc{id = Id}, [full_commit]),
+ couch_db:close(Db),
+ {Id, [Rev]}.
+
+get_all_missing_revs(Pid, {HighSeq, Revs}) ->
+ case couch_rep_missing_revs:next(Pid) of
+ complete ->
+ {HighSeq, lists:flatten(lists:reverse(Revs))};
+ {Seq, More} ->
+ get_all_missing_revs(Pid, {Seq, [More|Revs]})
+ end.
+
+get_db(source) ->
+ {ok, Db} = couch_db:open(<<"etap-test-source">>, []),
+ Db;
+get_db(target) ->
+ {ok, Db} = couch_db:open(<<"etap-test-target">>, []),
+ Db.
+
+setup() ->
+ {ok, DbA} = couch_db:create(<<"etap-test-source">>, []),
+ {ok, DbB} = couch_db:create(<<"etap-test-target">>, []),
+ [DbA, DbB].
+
+teardown([DbA, DbB]) ->
+ couch_db:close(DbA),
+ couch_db:close(DbB),
+ couch_server:delete(<<"etap-test-source">>, []),
+ couch_server:delete(<<"etap-test-target">>, []),
+ ok.
+
+start_changes_feed(local, Since, Continuous) ->
+ Props = [{<<"continuous">>, Continuous}],
+ couch_rep_changes_feed:start_link(self(), get_db(source), Since, Props);
+start_changes_feed(remote, Since, Continuous) ->
+ Props = [{<<"continuous">>, Continuous}],
+ Db = #http_db{url = "http://127.0.0.1:5984/etap-test-source/"},
+ couch_rep_changes_feed:start_link(self(), Db, Since, Props).
+
+start_missing_revs(local, Changes) ->
+ couch_rep_missing_revs:start_link(self(), get_db(target), Changes, []);
+start_missing_revs(remote, Changes) ->
+ Db = #http_db{url = "http://127.0.0.1:5984/etap-test-target/"},
+ couch_rep_missing_revs:start_link(self(), Db, Changes, []).
+ \ No newline at end of file