diff options
author | Robert Newson <rnewson@apache.org> | 2011-05-17 11:15:14 +0000 |
---|---|---|
committer | Robert Newson <rnewson@apache.org> | 2011-05-17 11:15:14 +0000 |
commit | e8e4b0d293021fe90326a85828f3cfb087bf18b7 (patch) | |
tree | 986f544eac623ec23b769b36828894f93a173aa3 /1.1.x/src/couchdb/couch_rep_changes_feed.erl | |
parent | da6a5322b0b8084f434752060caa8be214c6f4fa (diff) |
tagging 1.1.0
git-svn-id: https://svn.apache.org/repos/asf/couchdb/tags/1.1.0@1104149 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to '1.1.x/src/couchdb/couch_rep_changes_feed.erl')
-rw-r--r-- | 1.1.x/src/couchdb/couch_rep_changes_feed.erl | 503 |
1 files changed, 503 insertions, 0 deletions
diff --git a/1.1.x/src/couchdb/couch_rep_changes_feed.erl b/1.1.x/src/couchdb/couch_rep_changes_feed.erl new file mode 100644 index 00000000..1c298937 --- /dev/null +++ b/1.1.x/src/couchdb/couch_rep_changes_feed.erl @@ -0,0 +1,503 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_changes_feed). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/4, next/1, stop/1]). + +-define(BUFFER_SIZE, 1000). +-define(DOC_IDS_FILTER_NAME, "_doc_ids"). + +-include("couch_db.hrl"). +-include("../ibrowse/ibrowse.hrl"). + +-record (state, { + changes_from = nil, + changes_loop = nil, + init_args, + last_seq, + conn = nil, + reqid = nil, + complete = false, + count = 0, + partial_chunk = <<>>, + reply_to = nil, + rows = queue:new(), + doc_ids = nil +}). + +-import(couch_util, [ + get_value/2, + get_value/3 +]). + +start_link(Parent, Source, StartSeq, PostProps) -> + gen_server:start_link(?MODULE, [Parent, Source, StartSeq, PostProps], []). + +next(Server) -> + gen_server:call(Server, next_changes, infinity). + +stop(Server) -> + catch gen_server:call(Server, stop), + ok. + +init([Parent, #http_db{headers = Headers0} = Source, Since, PostProps]) -> + process_flag(trap_exit, true), + Feed = case get_value(<<"continuous">>, PostProps, false) of + false -> + normal; + true -> + continuous + end, + BaseQS = [ + {"style", all_docs}, + {"heartbeat", 10000}, + {"since", Since}, + {"feed", Feed} + ], + {QS, Method, Body, Headers} = case get_value(<<"doc_ids">>, PostProps) of + undefined -> + {maybe_add_filter_qs_params(PostProps, BaseQS), get, nil, Headers0}; + DocIds when is_list(DocIds) -> + Headers1 = [{"Content-Type", "application/json"} | Headers0], + QS1 = [{"filter", ?l2b(?DOC_IDS_FILTER_NAME)} | BaseQS], + {QS1, post, {[{<<"doc_ids">>, DocIds}]}, Headers1} + end, + Pid = couch_rep_httpc:spawn_link_worker_process(Source), + Req = Source#http_db{ + method = Method, + body = Body, + resource = "_changes", + qs = QS, + conn = Pid, + options = [{stream_to, {self(), once}}] ++ + lists:keydelete(inactivity_timeout, 1, Source#http_db.options), + headers = Headers -- [{"Accept-Encoding", "gzip"}] + }, + {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Req), + Args = [Parent, Req, Since, PostProps], + State = #state{ + conn = Pid, + last_seq = Since, + reqid = ReqId, + init_args = Args, + doc_ids = get_value(<<"doc_ids">>, PostProps, nil) + }, + + receive + {ibrowse_async_headers, ReqId, "200", _} -> + ibrowse:stream_next(ReqId), + {ok, State}; + {ibrowse_async_headers, ReqId, Code, Hdrs} + when Code =:= "301"; Code =:= "302"; Code =:= "303" -> + {ReqId2, Req2} = redirect_req(Req, Code, Hdrs), + receive + {ibrowse_async_headers, ReqId2, "200", _} -> + {ok, State#state{ + conn = Req2#http_db.conn, + reqid = ReqId2, + init_args = [Parent, Req2, Since, PostProps]}}; + {ibrowse_async_headers, ReqId2, "405", _} when Method =:= post -> + {ReqId3, Req3} = req_no_builtin_doc_ids(Req2, ReqId2), + receive + {ibrowse_async_headers, ReqId3, "200", _} -> + {ok, State#state{ + conn = Req3#http_db.conn, + reqid = ReqId3, + init_args = [Parent, Req3, Since, PostProps]}} + after 30000 -> + {stop, changes_timeout} + end + after 30000 -> + {stop, changes_timeout} + end; + {ibrowse_async_headers, ReqId, "404", _} -> + stop_link_worker(Pid), + ?LOG_INFO("source doesn't have _changes, trying _all_docs_by_seq", []), + Self = self(), + BySeqPid = spawn_link(fun() -> by_seq_loop(Self, Source, Since) end), + {ok, State#state{changes_loop = BySeqPid}}; + {ibrowse_async_headers, ReqId, "405", _} when Method =:= post -> + {ReqId2, Req2} = req_no_builtin_doc_ids(Req, ReqId), + receive + {ibrowse_async_headers, ReqId2, "200", _} -> + {ok, State#state{ + conn = Req2#http_db.conn, + reqid = ReqId2, + init_args = [Parent, Req2, Since, PostProps]}}; + {ibrowse_async_headers, ReqId, Code, Hdrs} + when Code =:= "301"; Code =:= "302"; Code =:= "303" -> + {ReqId3, Req3} = redirect_req(Req2, Code, Hdrs), + receive + {ibrowse_async_headers, ReqId3, "200", _} -> + {ok, State#state{ + conn = Req3#http_db.conn, + reqid = ReqId3, + init_args = [Parent, Req3, Since, PostProps]}} + after 30000 -> + {stop, changes_timeout} + end + after 30000 -> + {stop, changes_timeout} + end; + {ibrowse_async_headers, ReqId, Code, _} -> + {stop, {changes_error_code, list_to_integer(Code)}} + after 10000 -> + {stop, changes_timeout} + end; + +init([_Parent, Source, Since, PostProps] = InitArgs) -> + process_flag(trap_exit, true), + Server = self(), + Filter = case get_value(<<"doc_ids">>, PostProps) of + undefined -> + ?b2l(get_value(<<"filter">>, PostProps, <<>>)); + DocIds when is_list(DocIds) -> + ?DOC_IDS_FILTER_NAME + end, + ChangesArgs = #changes_args{ + style = all_docs, + since = Since, + filter = Filter, + feed = case get_value(<<"continuous">>, PostProps, false) of + true -> + "continuous"; + false -> + "normal" + end, + timeout = infinity + }, + ChangesPid = spawn_link(fun() -> + ChangesFeedFun = couch_changes:handle_changes( + ChangesArgs, + {json_req, filter_json_req(Filter, Source, PostProps)}, + Source + ), + ChangesFeedFun(fun({change, Change, _}, _) -> + gen_server:call(Server, {add_change, Change}, infinity); + (_, _) -> + ok + end) + end), + {ok, #state{changes_loop=ChangesPid, init_args=InitArgs}}. + +maybe_add_filter_qs_params(PostProps, BaseQS) -> + case get_value(<<"filter">>, PostProps) of + undefined -> + BaseQS; + FilterName -> + {Params} = get_value(<<"query_params">>, PostProps, {[]}), + lists:foldr( + fun({K, V}, QSAcc) -> + Ks = couch_util:to_list(K), + case proplists:is_defined(Ks, QSAcc) of + true -> + QSAcc; + false -> + [{Ks, V} | QSAcc] + end + end, + [{"filter", FilterName} | BaseQS], + Params + ) + end. + +filter_json_req([], _Db, _PostProps) -> + {[]}; +filter_json_req(?DOC_IDS_FILTER_NAME, _Db, PostProps) -> + {[{<<"doc_ids">>, get_value(<<"doc_ids">>, PostProps)}]}; +filter_json_req(FilterName, Db, PostProps) -> + {Query} = get_value(<<"query_params">>, PostProps, {[]}), + {ok, Info} = couch_db:get_db_info(Db), + % simulate a request to db_name/_changes + {[ + {<<"info">>, {Info}}, + {<<"id">>, null}, + {<<"method">>, 'GET'}, + {<<"path">>, [couch_db:name(Db), <<"_changes">>]}, + {<<"query">>, {[{<<"filter">>, FilterName} | Query]}}, + {<<"headers">>, []}, + {<<"body">>, []}, + {<<"peer">>, <<"replicator">>}, + {<<"form">>, []}, + {<<"cookie">>, []}, + {<<"userCtx">>, couch_util:json_user_ctx(Db)} + ]}. + +handle_call({add_change, Row}, From, State) -> + handle_add_change(Row, From, State); + +handle_call(next_changes, From, State) -> + handle_next_changes(From, State); + +handle_call(stop, _From, State) -> + {stop, normal, ok, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({ibrowse_async_headers, Id, Code, Hdrs}, #state{reqid=Id}=State) -> + handle_headers(list_to_integer(Code), Hdrs, State); + +handle_info({ibrowse_async_response, Id, {error, sel_conn_closed}}, + #state{reqid=Id}=State) -> + handle_retry(State); + +handle_info({ibrowse_async_response, Id, {error, connection_closed}}, + #state{reqid=Id}=State) -> + handle_retry(State); + +handle_info({ibrowse_async_response, Id, {error,E}}, #state{reqid=Id}=State) -> + {stop, {error, E}, State}; + +handle_info({ibrowse_async_response, Id, Chunk}, #state{reqid=Id}=State) -> + Messages = [M || M <- re:split(Chunk, ",?\n", [trim]), M =/= <<>>], + handle_messages(Messages, State); + +handle_info({ibrowse_async_response_end, Id}, #state{reqid=Id} = State) -> + handle_feed_completion(State); + +handle_info({'EXIT', From, normal}, #state{changes_loop=From} = State) -> + handle_feed_completion(State); + +handle_info({'EXIT', From, normal}, #state{conn=From, complete=true} = State) -> + {noreply, State}; + +handle_info({'EXIT', From, Reason}, #state{changes_loop=From} = State) -> + ?LOG_ERROR("changes_loop died with reason ~p", [Reason]), + {stop, changes_loop_died, State}; + +handle_info({'EXIT', From, Reason}, State) -> + ?LOG_ERROR("changes loop, process ~p died with reason ~p", [From, Reason]), + {stop, {From, Reason}, State}; + +handle_info(Msg, #state{init_args = InitArgs} = State) -> + case Msg of + changes_timeout -> + [_, #http_db{url = Url} | _] = InitArgs, + ?LOG_ERROR("changes loop timeout, no data received from ~s", + [couch_util:url_strip_password(Url)]); + _ -> + ?LOG_ERROR("changes loop received unexpected message ~p", [Msg]) + end, + {stop, Msg, State}. + +terminate(_Reason, State) -> + #state{ + changes_loop = ChangesPid, + conn = Conn + } = State, + if is_pid(ChangesPid) -> exit(ChangesPid, stop); true -> ok end, + stop_link_worker(Conn). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%internal funs + +handle_add_change(Row, From, #state{reply_to=nil} = State) -> + {Rows2, Count2} = queue_changes_row(Row, State), + NewState = State#state{count = Count2, rows = Rows2}, + if Count2 =< ?BUFFER_SIZE -> + {reply, ok, NewState}; + true -> + {noreply, NewState#state{changes_from=From}} + end; +handle_add_change(Row, _From, #state{count=0} = State) -> + gen_server:reply(State#state.reply_to, [Row]), + {reply, ok, State#state{reply_to=nil}}. + +handle_next_changes(From, #state{count=0}=State) -> + if State#state.complete -> + {stop, normal, complete, State}; + true -> + {noreply, State#state{reply_to=From}} + end; +handle_next_changes(_From, State) -> + #state{ + changes_from = ChangesFrom, + rows = Rows + } = State, + NewState = State#state{count=0, changes_from=nil, rows=queue:new()}, + maybe_stream_next(NewState), + if ChangesFrom =/= nil -> gen_server:reply(ChangesFrom, ok); true -> ok end, + {reply, queue:to_list(Rows), NewState}. + +handle_headers(200, _, State) -> + maybe_stream_next(State), + {noreply, State}; +handle_headers(Code, Hdrs, #state{init_args = InitArgs} = State) + when Code =:= 301 ; Code =:= 302 ; Code =:= 303 -> + stop_link_worker(State#state.conn), + [Parent, Source, Since, PostProps] = InitArgs, + Source2 = couch_rep_httpc:redirected_request(Code, Hdrs, Source), + Pid2 = couch_rep_httpc:spawn_link_worker_process(Source2), + Source3 = Source2#http_db{conn = Pid2}, + {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Source3), + InitArgs2 = [Parent, Source3, Since, PostProps], + {noreply, State#state{conn=Pid2, reqid=ReqId, init_args=InitArgs2}}; +handle_headers(Code, Hdrs, State) -> + ?LOG_ERROR("replicator changes feed failed with code ~s and Headers ~n~p", + [Code,Hdrs]), + {stop, {error, Code}, State}. + +handle_messages([], State) -> + maybe_stream_next(State), + {noreply, State}; +handle_messages([<<"{\"results\":[">>|Rest], State) -> + handle_messages(Rest, State); +handle_messages([<<"]">>, <<"\"last_seq\":", _/binary>>], State) -> + handle_feed_completion(State); +handle_messages([<<"{\"last_seq\":", _/binary>>], State) -> + handle_feed_completion(State); +handle_messages([Chunk|Rest], #state{partial_chunk = Partial} = State) -> + NewState = try + Row = {Props} = decode_row(<<Partial/binary, Chunk/binary>>), + case State of + #state{reply_to=nil} -> + {Rows2, Count2} = queue_changes_row(Row, State), + State#state{ + last_seq = couch_util:get_value(<<"seq">>, Props), + partial_chunk = <<>>, + rows = Rows2, + count = Count2 + }; + #state{count=0, reply_to=From}-> + gen_server:reply(From, [Row]), + State#state{reply_to = nil, partial_chunk = <<>>} + end + catch + throw:{invalid_json, Bad} -> + State#state{partial_chunk = Bad} + end, + handle_messages(Rest, NewState). + +handle_feed_completion(#state{reply_to=nil} = State)-> + {noreply, State#state{complete=true}}; +handle_feed_completion(#state{count=0} = State) -> + gen_server:reply(State#state.reply_to, complete), + {stop, normal, State}. + +handle_retry(State) -> + ?LOG_DEBUG("retrying changes feed because our connection closed", []), + #state{ + count = Count, + init_args = [_, Source, _, PostProps], + last_seq = Since, + reply_to = ReplyTo, + rows = Rows + } = State, + case init([nil, Source, Since, PostProps]) of + {ok, State1} -> + MergedState = State1#state{ + count = Count, + reply_to = ReplyTo, + rows = Rows + }, + {noreply, MergedState}; + _ -> + {stop, {error, connection_closed}, State} + end. + +by_seq_loop(Server, Source, StartSeq) -> + Req = Source#http_db{ + resource = "_all_docs_by_seq", + qs = [{limit, 1000}, {startkey, StartSeq}] + }, + {Results} = couch_rep_httpc:request(Req), + Rows = couch_util:get_value(<<"rows">>, Results), + if Rows =:= [] -> exit(normal); true -> ok end, + EndSeq = lists:foldl(fun({RowInfoList}, _) -> + Id = couch_util:get_value(<<"id">>, RowInfoList), + Seq = couch_util:get_value(<<"key">>, RowInfoList), + {RowProps} = couch_util:get_value(<<"value">>, RowInfoList), + RawRevs = [ + couch_util:get_value(<<"rev">>, RowProps), + couch_util:get_value(<<"conflicts">>, RowProps, []), + couch_util:get_value(<<"deleted_conflicts">>, RowProps, []) + ], + ParsedRevs = couch_doc:parse_revs(lists:flatten(RawRevs)), + Change = {[ + {<<"seq">>, Seq}, + {<<"id">>, Id}, + {<<"changes">>, [{[{<<"rev">>,R}]} || R <- ParsedRevs]} + ]}, + gen_server:call(Server, {add_change, Change}, infinity), + Seq + end, 0, Rows), + by_seq_loop(Server, Source, EndSeq). + +decode_row(<<",", Rest/binary>>) -> + decode_row(Rest); +decode_row(Row) -> + ?JSON_DECODE(Row). + +maybe_stream_next(#state{reqid=nil}) -> + ok; +maybe_stream_next(#state{complete=false, count=N} = S) when N < ?BUFFER_SIZE -> + timer:cancel(get(timeout)), + {ok, Timeout} = timer:send_after(31000, changes_timeout), + put(timeout, Timeout), + ibrowse:stream_next(S#state.reqid); +maybe_stream_next(_) -> + timer:cancel(get(timeout)). + +stop_link_worker(Conn) when is_pid(Conn) -> + unlink(Conn), + receive {'EXIT', Conn, _} -> ok after 0 -> ok end, + catch ibrowse:stop_worker_process(Conn); +stop_link_worker(_) -> + ok. + +redirect_req(#http_db{conn = WorkerPid} = Req, Code, Headers) -> + stop_link_worker(WorkerPid), + Req2 = couch_rep_httpc:redirected_request(Code, Headers, Req), + WorkerPid2 = couch_rep_httpc:spawn_link_worker_process(Req2), + Req3 = Req2#http_db{conn = WorkerPid2}, + {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Req3), + {ReqId, Req3}. + +req_no_builtin_doc_ids(#http_db{conn = WorkerPid, qs = QS} = Req, ReqId) -> + % CouchDB versions prior to 1.1.0 don't have the builtin filter _doc_ids + % and don't allow POSTing to /database/_changes + purge_req_messages(ReqId), + stop_link_worker(WorkerPid), + Req2 = Req#http_db{method = get, qs = lists:keydelete("filter", 1, QS)}, + WorkerPid2 = couch_rep_httpc:spawn_link_worker_process(Req2), + Req3 = Req2#http_db{conn = WorkerPid2}, + {ibrowse_req_id, ReqId2} = couch_rep_httpc:request(Req3), + {ReqId2, Req3}. + +purge_req_messages(ReqId) -> + ibrowse:stream_next(ReqId), + receive + {ibrowse_async_response, ReqId, {error, _}} -> + ok; + {ibrowse_async_response, ReqId, _Data} -> + purge_req_messages(ReqId); + {ibrowse_async_response_end, ReqId} -> + ok + end. + +queue_changes_row(Row, #state{doc_ids = nil, count = Count, rows = Rows}) -> + {queue:in(Row, Rows), Count + 1}; +queue_changes_row({RowProps} = Row, + #state{doc_ids = Ids, count = Count, rows = Rows}) -> + case lists:member(get_value(<<"id">>, RowProps), Ids) of + true -> + {queue:in(Row, Rows), Count + 1}; + false -> + {Rows, Count} + end. |