From d245eb8c02bb5003a1e391748cb0e718dd582b4f Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Sun, 9 Aug 2009 14:59:09 +0000 Subject: next piece of new replicator -- missing_revs buffer git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@802548 13f79535-47bb-0310-9956-ffa450edef68 --- src/couchdb/Makefile.am | 2 + src/couchdb/couch_rep_missing_revs.erl | 180 +++++++++++++++++++++++++++++ test/etap/112-replication-missing-revs.t | 188 +++++++++++++++++++++++++++++++ 3 files changed, 370 insertions(+) create mode 100644 src/couchdb/couch_rep_missing_revs.erl create mode 100644 test/etap/112-replication-missing-revs.t diff --git a/src/couchdb/Makefile.am b/src/couchdb/Makefile.am index c3806310..f32497fa 100644 --- a/src/couchdb/Makefile.am +++ b/src/couchdb/Makefile.am @@ -76,6 +76,7 @@ source_files = \ couch_rep.erl \ couch_rep_changes_feed.erl \ couch_rep_httpc.erl \ + couch_rep_missing_revs.erl \ couch_rep_sup.erl \ couch_server.erl \ couch_server_sup.erl \ @@ -123,6 +124,7 @@ compiled_files = \ couch_ref_counter.beam \ couch_rep.beam \ couch_rep_changes_feed.beam \ + couch_rep_missing_revs.beam \ couch_rep_httpc.beam \ couch_rep_sup.beam \ couch_server.beam \ diff --git a/src/couchdb/couch_rep_missing_revs.erl b/src/couchdb/couch_rep_missing_revs.erl new file mode 100644 index 00000000..bd7cda66 --- /dev/null +++ b/src/couchdb/couch_rep_missing_revs.erl @@ -0,0 +1,180 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_missing_revs). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/4, next/1, stop/1]). + +-define(BUFFER_SIZE, 1000). + +-include("couch_db.hrl"). + +-record (state, { + changes_loop, + changes_from = nil, + target, + parent, + complete = false, + count = 0, + reply_to = nil, + rows = queue:new(), + high_source_seq = 0, + high_missing_seq = 0, + high_committed_seq = 0 +}). + +start_link(Parent, Target, ChangesFeed, PostProps) -> + gen_server:start_link(?MODULE, [Parent, Target, ChangesFeed, PostProps], []). + +next(Server) -> + gen_server:call(Server, next_missing_revs, infinity). + +stop(Server) -> + gen_server:call(Server, stop). + +init([Parent, Target, ChangesFeed, _PostProps]) -> + process_flag(trap_exit, true), + Self = self(), + Pid = spawn_link(fun() -> changes_loop(Self, ChangesFeed, Target) end), + {ok, #state{changes_loop=Pid, target=Target, parent=Parent}}. + +handle_call({add_missing_revs, {HighSeq, Revs}}, From, State) -> + handle_add_missing_revs(HighSeq, Revs, From, State); + +handle_call(next_missing_revs, From, State) -> + handle_next_missing_revs(From, State); + +handle_call({update_committed_seq, N}, _From, State) -> + if State#state.high_committed_seq < N -> + ?LOG_DEBUG("missing_revs updating committed seq to ~p", [N]); + true -> ok end, + {reply, ok, State#state{high_committed_seq=N}}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({'EXIT', Pid, Reason}, #state{changes_loop=Pid} = State) -> + handle_changes_loop_exit(Reason, State); + +handle_info(Msg, State) -> + ?LOG_INFO("unexpected message ~p", [Msg]), + {noreply, State}. + +terminate(_Reason, #state{changes_loop=Pid}) when is_pid(Pid) -> + exit(Pid, shutdown), + ok; +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%internal funs + +handle_add_missing_revs(HighSeq, [], _From, State) -> + maybe_checkpoint(State), + {reply, ok, State#state{high_source_seq=HighSeq}}; +handle_add_missing_revs(HighSeq, Revs, From, #state{reply_to=nil} = State) -> + #state{rows=Rows, count=Count} = State, + NewState = State#state{ + rows = queue:join(Rows, queue:from_list(Revs)), + count = Count + length(Revs), + high_source_seq = HighSeq, + high_missing_seq = HighSeq + }, + if NewState#state.count < ?BUFFER_SIZE -> + {reply, ok, NewState}; + true -> + {noreply, NewState#state{changes_from=From}} + end; +handle_add_missing_revs(HighSeq, Revs, _From, #state{count=0} = State) -> + gen_server:reply(State#state.reply_to, {HighSeq, Revs}), + NewState = State#state{ + high_source_seq = HighSeq, + high_missing_seq = HighSeq, + reply_to = nil + }, + {reply, ok, NewState}. + +handle_next_missing_revs(From, #state{count=0} = State) -> + if State#state.complete -> + {stop, normal, complete, State}; + true -> + {noreply, State#state{reply_to=From}} + end; +handle_next_missing_revs(_From, State) -> + #state{ + changes_from = ChangesFrom, + high_missing_seq = HighSeq, + rows = Rows + } = State, + if ChangesFrom =/= nil -> gen_server:reply(ChangesFrom, ok); true -> ok end, + NewState = State#state{count=0, changes_from=nil, rows=queue:new()}, + {reply, {HighSeq, queue:to_list(Rows)}, NewState}. + +handle_changes_loop_exit(normal, State) -> + if State#state.reply_to =/= nil -> + gen_server:reply(State#state.reply_to, complete), + {stop, normal, State}; + true -> + {noreply, State#state{complete=true, changes_loop=nil}} + end; +handle_changes_loop_exit(Reason, State) -> + ?LOG_ERROR("changes_loop died with reason ~p", [Reason]), + {stop, changes_loop_died, State#state{changes_loop=nil}}. + +changes_loop(OurServer, SourceChangesServer, Target) -> + case couch_rep_changes_feed:next(SourceChangesServer) of + complete -> + exit(normal); + Changes -> + MissingRevs = get_missing_revs(Target, Changes), + gen_server:call(OurServer, {add_missing_revs, MissingRevs}, infinity) + end, + changes_loop(OurServer, SourceChangesServer, Target). + +get_missing_revs(#http_db{}=Target, Changes) -> + Transform = fun({[{<<"seq">>,_}, {<<"id">>,Id}, {<<"changes">>,C}]}) -> + {Id, [couch_doc:rev_to_str(R) || {[{<<"rev">>, R}]} <- C]} end, + IdRevsList = [Transform(Change) || Change <- Changes], + {[{<<"seq">>, HighSeq}, _, _]} = lists:last(Changes), + Request = Target#http_db{ + resource = "_missing_revs", + method = post, + body = {IdRevsList} + }, + {Resp} = couch_rep_httpc:request(Request), + {MissingRevs} = proplists:get_value(<<"missing_revs">>, Resp), + X = [{Id, couch_doc:parse_revs(RevStrs)} || {Id,RevStrs} <- MissingRevs], + {HighSeq, X}; + +get_missing_revs(Target, Changes) -> + Transform = fun({[{<<"seq">>,_}, {<<"id">>,Id}, {<<"changes">>,C}]}) -> + {Id, [R || {[{<<"rev">>, R}]} <- C]} end, + IdRevsList = [Transform(Change) || Change <- Changes], + {[{<<"seq">>, HighSeq}, _, _]} = lists:last(Changes), + {ok, Results} = couch_db:get_missing_revs(Target, IdRevsList), + {HighSeq, Results}. + +%% save a checkpoint if no revs are missing on target so we don't +%% rescan metadata unnecessarily +maybe_checkpoint(#state{high_missing_seq=N, high_committed_seq=N} = State) -> + #state{ + parent = Parent, + high_source_seq = SourceSeq + } = State, + Parent ! {missing_revs_checkpoint, SourceSeq}; +maybe_checkpoint(_State) -> + ok. diff --git a/test/etap/112-replication-missing-revs.t b/test/etap/112-replication-missing-revs.t new file mode 100644 index 00000000..aa4b152c --- /dev/null +++ b/test/etap/112-replication-missing-revs.t @@ -0,0 +1,188 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% XXX: Figure out how to -include("couch_db.hrl") + +-record(doc, {id= <<"">>, revs={0, []}, body={[]}, + attachments=[], deleted=false, meta=[]}). + +-record(http_db, { + url, + auth = [], + resource = "", + headers = [ + {"User-Agent", "CouchDb/"++couch_server:get_version()}, + {"Accept", "application/json"}, + {"Accept-Encoding", "gzip"} + ], + qs = [], + method = get, + body = nil, + options = [ + {response_format,binary}, + {inactivity_timeout, 30000} + ], + retries = 10, + pause = 1, + conn = nil +}). + +main(_) -> + code:add_pathz("src/couchdb"), + code:add_pathz("src/ibrowse"), + code:add_pathz("src/mochiweb"), + code:add_pathz("src/erlang-oauth"), + + etap:plan(12), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + couch_server:start( + ["etc/couchdb/default_dev.ini", "etc/couchdb/local_dev.ini"] + ), + ibrowse:start(), + crypto:start(), + + couch_server:delete(<<"etap-test-source">>, []), + couch_server:delete(<<"etap-test-target">>, []), + + Dbs1 = setup(), + test_all(local, local), + ok = teardown(Dbs1), + + Dbs2 = setup(), + test_all(local, remote), + ok = teardown(Dbs2), + + Dbs3 = setup(), + test_all(remote, local), + ok = teardown(Dbs3), + + Dbs4 = setup(), + test_all(remote, remote), + ok = teardown(Dbs4), + + ok. + +test_all(SrcType, TgtType) -> + test_unchanged_db(SrcType, TgtType), + test_multiple_changes(SrcType, TgtType), + test_changes_not_missing(SrcType, TgtType). + +test_unchanged_db(SrcType, TgtType) -> + {ok, Pid1} = start_changes_feed(SrcType, 0, false), + {ok, Pid2} = start_missing_revs(TgtType, Pid1), + etap:is( + couch_rep_missing_revs:next(Pid2), + complete, + io_lib:format( + "(~p, ~p) no missing revs if source is unchanged", + [SrcType, TgtType]) + ). + +test_multiple_changes(SrcType, TgtType) -> + Expect = {2, [generate_change(), generate_change()]}, + {ok, Pid1} = start_changes_feed(SrcType, 0, false), + {ok, Pid2} = start_missing_revs(TgtType, Pid1), + etap:is( + get_all_missing_revs(Pid2, {0, []}), + Expect, + io_lib:format("(~p, ~p) add src docs, get missing tgt revs + high seq", + [SrcType, TgtType]) + ). + +test_changes_not_missing(SrcType, TgtType) -> + %% put identical changes on source and target + Id = couch_util:new_uuid(), + {Id, [Rev]} = Expect = generate_change(Id, {[]}, get_db(source)), + {Id, [Rev]} = generate_change(Id, {[]}, get_db(target)), + + %% confirm that this change is not in missing revs feed + {ok, Pid1} = start_changes_feed(SrcType, 0, false), + {ok, Pid2} = start_missing_revs(TgtType, Pid1), + {HighSeq, AllRevs} = get_all_missing_revs(Pid2, {0, []}), + + %% etap:none/3 has a bug, so just define it correctly here + etap:is( + lists:member(Expect, AllRevs), + false, + io_lib:format( + "(~p, ~p) skip revs that already exist on target", + [SrcType, TgtType]) + ). + +generate_change() -> + generate_change(couch_util:new_uuid()). + +generate_change(Id) -> + generate_change(Id, {[]}). + +generate_change(Id, EJson) -> + generate_change(Id, EJson, get_db(source)). + +generate_change(Id, EJson, Db) -> + Doc = couch_doc:from_json_obj(EJson), + {ok, Rev} = couch_db:update_doc(Db, Doc#doc{id = Id}, [full_commit]), + couch_db:close(Db), + {Id, [Rev]}. + +get_all_missing_revs(Pid, {HighSeq, Revs}) -> + case couch_rep_missing_revs:next(Pid) of + complete -> + {HighSeq, lists:flatten(lists:reverse(Revs))}; + {Seq, More} -> + get_all_missing_revs(Pid, {Seq, [More|Revs]}) + end. + +get_db(source) -> + {ok, Db} = couch_db:open(<<"etap-test-source">>, []), + Db; +get_db(target) -> + {ok, Db} = couch_db:open(<<"etap-test-target">>, []), + Db. + +setup() -> + {ok, DbA} = couch_db:create(<<"etap-test-source">>, []), + {ok, DbB} = couch_db:create(<<"etap-test-target">>, []), + [DbA, DbB]. + +teardown([DbA, DbB]) -> + couch_db:close(DbA), + couch_db:close(DbB), + couch_server:delete(<<"etap-test-source">>, []), + couch_server:delete(<<"etap-test-target">>, []), + ok. + +start_changes_feed(local, Since, Continuous) -> + Props = [{<<"continuous">>, Continuous}], + couch_rep_changes_feed:start_link(self(), get_db(source), Since, Props); +start_changes_feed(remote, Since, Continuous) -> + Props = [{<<"continuous">>, Continuous}], + Db = #http_db{url = "http://127.0.0.1:5984/etap-test-source/"}, + couch_rep_changes_feed:start_link(self(), Db, Since, Props). + +start_missing_revs(local, Changes) -> + couch_rep_missing_revs:start_link(self(), get_db(target), Changes, []); +start_missing_revs(remote, Changes) -> + Db = #http_db{url = "http://127.0.0.1:5984/etap-test-target/"}, + couch_rep_missing_revs:start_link(self(), Db, Changes, []). + \ No newline at end of file -- cgit v1.2.3