Conditionally apply JSON encoding to update_seq values
[cloudant_bigcouch.git] / apps / couch / src / couch_rep_changes_feed.erl
1 % Licensed under the Apache License, Version 2.0 (the "License"); you may not
2 % use this file except in compliance with the License. You may obtain a copy of
3 % the License at
4 %
5 %   http://www.apache.org/licenses/LICENSE-2.0
6 %
7 % Unless required by applicable law or agreed to in writing, software
8 % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9 % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10 % License for the specific language governing permissions and limitations under
11 % the License.
12
13 -module(couch_rep_changes_feed).
14 -behaviour(gen_server).
15 -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
16     code_change/3]).
17
18 -export([start_link/4, next/1, stop/1]).
19
20 -define(BUFFER_SIZE, 1000).
21 -define(DOC_IDS_FILTER_NAME, "_doc_ids").
22
23 -include("couch_db.hrl").
24 -include_lib("ibrowse/include/ibrowse.hrl").
25
26 -record (state, {
27     changes_from = nil,
28     changes_loop = nil,
29     init_args,
30     last_seq,
31     conn = nil,
32     reqid = nil,
33     complete = false,
34     count = 0,
35     partial_chunk = <<>>,
36     reply_to = nil,
37     rows = queue:new(),
38     doc_ids = nil
39 }).
40
41 -import(couch_util, [
42     get_value/2,
43     get_value/3
44 ]).
45
46 start_link(Parent, Source, StartSeq, PostProps) ->
47     gen_server:start_link(?MODULE, [Parent, Source, StartSeq, PostProps], []).
48
49 next(Server) ->
50     gen_server:call(Server, next_changes, infinity).
51
52 stop(Server) ->
53     catch gen_server:call(Server, stop),
54     ok.
55
56 init([Parent, #http_db{headers = Headers0} = Source, Since, PostProps]) ->
57     process_flag(trap_exit, true),
58     Feed = case get_value(<<"continuous">>, PostProps, false) of
59     false ->
60         normal;
61     true ->
62         continuous
63     end,
64     BaseQS = [
65         {"style", all_docs},
66         {"heartbeat", 10000},
67         {"since", case Since of Bin when is_binary(Bin) -> Bin;
68             Else -> iolist_to_binary(?JSON_ENCODE(Else)) end},
69         {"feed", Feed}
70     ],
71     {QS, Method, Body, Headers} = case get_value(<<"doc_ids">>, PostProps) of
72     undefined ->
73         {maybe_add_filter_qs_params(PostProps, BaseQS), get, nil, Headers0};
74     DocIds when is_list(DocIds) ->
75         Headers1 = [{"Content-Type", "application/json"} | Headers0],
76         QS1 = [{"filter", ?l2b(?DOC_IDS_FILTER_NAME)} | BaseQS],
77         {QS1, post, {[{<<"doc_ids">>, DocIds}]}, Headers1}
78     end,
79     Pid = couch_rep_httpc:spawn_link_worker_process(Source),
80     Req = Source#http_db{
81         method = Method,
82         body = Body,
83         resource = "_changes",
84         qs = QS,
85         conn = Pid,
86         options = [{stream_to, {self(), once}}] ++
87                 lists:keydelete(inactivity_timeout, 1, Source#http_db.options),
88         headers = Headers -- [{"Accept-Encoding", "gzip"}]
89     },
90     {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Req),
91     Args = [Parent, Req, Since, PostProps],
92     State = #state{
93         conn = Pid,
94         last_seq = Since,
95         reqid = ReqId,
96         init_args = Args,
97         doc_ids = get_value(<<"doc_ids">>, PostProps, nil)
98     },
99
100     receive
101     {ibrowse_async_headers, ReqId, "200", _} ->
102         ibrowse:stream_next(ReqId),
103         {ok, State};
104     {ibrowse_async_headers, ReqId, Code, Hdrs}
105             when Code =:= "301"; Code =:= "302"; Code =:= "303" ->
106         {ReqId2, Req2} = redirect_req(Req, Code, Hdrs),
107         receive
108         {ibrowse_async_headers, ReqId2, "200", _} ->
109             {ok, State#state{
110                 conn = Req2#http_db.conn,
111                 reqid = ReqId2,
112                 init_args = [Parent, Req2, Since, PostProps]}};
113         {ibrowse_async_headers, ReqId2, "405", _} when Method =:= post ->
114             {ReqId3, Req3} = req_no_builtin_doc_ids(Req2, ReqId2),
115             receive
116             {ibrowse_async_headers, ReqId3, "200", _} ->
117                 {ok, State#state{
118                     conn = Req3#http_db.conn,
119                     reqid = ReqId3,
120                     init_args = [Parent, Req3, Since, PostProps]}}
121             after 30000 ->
122                 {stop, changes_timeout}
123             end
124         after 30000 ->
125             {stop, changes_timeout}
126         end;
127     {ibrowse_async_headers, ReqId, "404", _} ->
128         stop_link_worker(Pid),
129         ?LOG_INFO("source doesn't have _changes, trying _all_docs_by_seq", []),
130         Self = self(),
131         BySeqPid = spawn_link(fun() -> by_seq_loop(Self, Source, Since) end),
132         {ok, State#state{changes_loop = BySeqPid}};
133     {ibrowse_async_headers, ReqId, "405", _}  when Method =:= post ->
134         {ReqId2, Req2} = req_no_builtin_doc_ids(Req, ReqId),
135         receive
136         {ibrowse_async_headers, ReqId2, "200", _} ->
137             {ok, State#state{
138                 conn = Req2#http_db.conn,
139                 reqid = ReqId2,
140                 init_args = [Parent, Req2, Since, PostProps]}};
141         {ibrowse_async_headers, ReqId, Code, Hdrs}
142             when Code =:= "301"; Code =:= "302"; Code =:= "303" ->
143             {ReqId3, Req3} = redirect_req(Req2, Code, Hdrs),
144             receive
145             {ibrowse_async_headers, ReqId3, "200", _} ->
146                 {ok, State#state{
147                     conn = Req3#http_db.conn,
148                     reqid = ReqId3,
149                     init_args = [Parent, Req3, Since, PostProps]}}
150             after 30000 ->
151                 {stop, changes_timeout}
152             end
153         after 30000 ->
154             {stop, changes_timeout}
155         end;
156     {ibrowse_async_headers, ReqId, Code, _} ->
157         {stop, {changes_error_code, list_to_integer(Code)}}
158     after 30000 ->
159         {stop, changes_timeout}
160     end;
161
162 init([_Parent, Source, Since, PostProps] = InitArgs) ->
163     process_flag(trap_exit, true),
164     Server = self(),
165     Filter = case get_value(<<"doc_ids">>, PostProps) of
166     undefined ->
167         ?b2l(get_value(<<"filter">>, PostProps, <<>>));
168     DocIds when is_list(DocIds) ->
169         ?DOC_IDS_FILTER_NAME
170     end,
171     ChangesArgs = #changes_args{
172         style = all_docs,
173         since = Since,
174         filter = Filter,
175         feed = case get_value(<<"continuous">>, PostProps, false) of
176             true ->
177                 "continuous";
178             false ->
179                 "normal"
180         end,
181         timeout = infinity
182     },
183     ChangesPid = spawn_link(fun() ->
184         ChangesFeedFun = couch_changes:handle_changes(
185             ChangesArgs,
186             {json_req, filter_json_req(Filter, Source, PostProps)},
187             Source
188         ),
189         ChangesFeedFun(fun({change, Change, _}, _) ->
190                 gen_server:call(Server, {add_change, Change}, infinity);
191             (_, _) ->
192                 ok
193         end)
194     end),
195     {ok, #state{changes_loop=ChangesPid, init_args=InitArgs}}.
196
197 maybe_add_filter_qs_params(PostProps, BaseQS) ->
198     case get_value(<<"filter">>, PostProps) of
199     undefined ->
200         BaseQS;
201     FilterName ->
202         {Params} = get_value(<<"query_params">>, PostProps, {[]}),
203         lists:foldr(
204             fun({K, V}, QSAcc) ->
205                 Ks = couch_util:to_list(K),
206                 case proplists:is_defined(Ks, QSAcc) of
207                 true ->
208                     QSAcc;
209                 false ->
210                     [{Ks, V} | QSAcc]
211                 end
212             end,
213             [{"filter", FilterName} | BaseQS],
214             Params
215         )
216     end.
217
218 filter_json_req([], _Db, _PostProps) ->
219     {[]};
220 filter_json_req(?DOC_IDS_FILTER_NAME, _Db, PostProps) ->
221     {[{<<"doc_ids">>, get_value(<<"doc_ids">>, PostProps)}]};
222 filter_json_req(FilterName, Db, PostProps) ->
223     {Query} = get_value(<<"query_params">>, PostProps, {[]}),
224     {ok, Info} = couch_db:get_db_info(Db),
225     % simulate a request to db_name/_changes
226     {[
227         {<<"info">>, {Info}},
228         {<<"id">>, null},
229         {<<"method">>, 'GET'},
230         {<<"path">>, [couch_db:name(Db), <<"_changes">>]},
231         {<<"query">>, {[{<<"filter">>, FilterName} | Query]}},
232         {<<"headers">>, []},
233         {<<"body">>, []},
234         {<<"peer">>, <<"replicator">>},
235         {<<"form">>, []},
236         {<<"cookie">>, []},
237         {<<"userCtx">>, couch_util:json_user_ctx(Db)}
238     ]}.
239
240 handle_call({add_change, Row}, From, State) ->
241     handle_add_change(Row, From, State);
242
243 handle_call(next_changes, From, State) ->
244     handle_next_changes(From, State);
245     
246 handle_call(stop, _From, State) ->
247     {stop, normal, ok, State}.
248
249 handle_cast(_Msg, State) ->
250     {noreply, State}.
251
252 handle_info({ibrowse_async_headers, Id, Code, Hdrs}, #state{reqid=Id}=State) ->
253     handle_headers(list_to_integer(Code), Hdrs, State);
254
255 handle_info({ibrowse_async_response, Id, {error, sel_conn_closed}},
256         #state{reqid=Id}=State) ->
257     handle_retry(State);
258
259 handle_info({ibrowse_async_response, Id, {error, connection_closed}},
260         #state{reqid=Id}=State) ->
261     handle_retry(State);
262
263 handle_info({ibrowse_async_response, Id, {error,E}}, #state{reqid=Id}=State) ->
264     {stop, {error, E}, State};
265
266 handle_info({ibrowse_async_response, Id, Chunk}, #state{reqid=Id}=State) ->
267     Messages = [M || M <- re:split(Chunk, ",?\n", [trim]), M =/= <<>>],
268     handle_messages(Messages, State);
269
270 handle_info({ibrowse_async_response_end, Id}, #state{reqid=Id} = State) ->
271     handle_feed_completion(State);
272
273 handle_info({'EXIT', From, normal}, #state{changes_loop=From} = State) ->
274     handle_feed_completion(State);
275
276 handle_info({'EXIT', From, normal}, #state{conn=From, complete=true} = State) ->
277     {noreply, State};
278
279 handle_info({'EXIT', From, Reason}, #state{changes_loop=From} = State) ->
280     ?LOG_ERROR("changes_loop died with reason ~p", [Reason]),
281     {stop, changes_loop_died, State};
282
283 handle_info({'EXIT', From, Reason}, State) ->
284     ?LOG_ERROR("changes loop, process ~p died with reason ~p", [From, Reason]),
285     {stop, {From, Reason}, State};
286
287 handle_info(Msg, #state{init_args = InitArgs} = State) ->
288     case Msg of
289     changes_timeout ->
290         [_, #http_db{url = Url} | _] = InitArgs,
291         ?LOG_ERROR("changes loop timeout, no data received from ~s",
292             [couch_util:url_strip_password(Url)]);
293     _ ->
294         ?LOG_ERROR("changes loop received unexpected message ~p", [Msg])
295     end,
296     {stop, Msg, State}.
297
298 terminate(_Reason, State) ->
299     #state{
300         changes_loop = ChangesPid,
301         conn = Conn
302     } = State,
303     if is_pid(ChangesPid) -> exit(ChangesPid, stop); true -> ok end,
304     stop_link_worker(Conn).
305
306 code_change(_OldVsn, State, _Extra) ->
307     {ok, State}.
308
309 %internal funs
310
311 handle_add_change(Row, From, #state{reply_to=nil} = State) ->
312     {Rows2, Count2} = queue_changes_row(Row, State),
313     NewState = State#state{count = Count2, rows = Rows2},
314     if Count2 =< ?BUFFER_SIZE ->
315         {reply, ok, NewState};
316     true ->
317         {noreply, NewState#state{changes_from=From}}
318     end;
319 handle_add_change(Row, _From, #state{count=0} = State) ->
320     gen_server:reply(State#state.reply_to, [Row]),
321     {reply, ok, State#state{reply_to=nil}}.
322
323 handle_next_changes(From, #state{count=0}=State) ->
324     if State#state.complete ->
325         {stop, normal, complete, State};
326     true ->
327         {noreply, State#state{reply_to=From}}
328     end;
329 handle_next_changes(_From, State) ->
330     #state{
331         changes_from = ChangesFrom,
332         rows = Rows
333     } = State,
334     NewState = State#state{count=0, changes_from=nil, rows=queue:new()},
335     maybe_stream_next(NewState),
336     if ChangesFrom =/= nil -> gen_server:reply(ChangesFrom, ok); true -> ok end,
337     {reply, queue:to_list(Rows), NewState}.
338
339 handle_headers(200, _, State) ->
340     maybe_stream_next(State),
341     {noreply, State};
342 handle_headers(Code, Hdrs, #state{init_args = InitArgs} = State)
343         when Code =:= 301 ; Code =:= 302 ; Code =:= 303 ->
344     stop_link_worker(State#state.conn),
345     [Parent, Source, Since, PostProps] = InitArgs,
346     Source2 = couch_rep_httpc:redirected_request(Code, Hdrs, Source),
347     Pid2 = couch_rep_httpc:spawn_link_worker_process(Source2),
348     Source3 = Source2#http_db{conn = Pid2},
349     {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Source3),
350     InitArgs2 = [Parent, Source3, Since, PostProps],
351     {noreply, State#state{conn=Pid2, reqid=ReqId, init_args=InitArgs2}};
352 handle_headers(Code, Hdrs, State) ->
353     ?LOG_ERROR("replicator changes feed failed with code ~s and Headers ~n~p",
354         [Code,Hdrs]),
355     {stop, {error, Code}, State}.
356
357 handle_messages([], State) ->
358     maybe_stream_next(State),
359     {noreply, State};
360 handle_messages([<<"{\"results\":[">>|Rest], State) ->
361     handle_messages(Rest, State);
362 handle_messages([<<"]">>, <<"\"last_seq\":", _/binary>>], State) ->
363     handle_feed_completion(State);
364 handle_messages([<<"{\"last_seq\":", _/binary>>], State) ->
365     handle_feed_completion(State);
366 handle_messages([Chunk|Rest], #state{partial_chunk = Partial} = State) ->
367     NewState = try
368         Row = {Props} = decode_row(<<Partial/binary, Chunk/binary>>),
369         case State of
370         #state{reply_to=nil} ->
371             {Rows2, Count2} = queue_changes_row(Row, State),
372             State#state{
373                 last_seq = couch_util:get_value(<<"seq">>, Props),
374                 partial_chunk = <<>>,
375                 rows = Rows2,
376                 count = Count2
377             };
378         #state{count=0, reply_to=From}->
379             gen_server:reply(From, [Row]),
380             State#state{reply_to = nil, partial_chunk = <<>>}
381         end
382     catch
383     throw:{invalid_json, Bad} ->
384         State#state{partial_chunk = Bad}
385     end,
386     handle_messages(Rest, NewState).
387
388 handle_feed_completion(#state{reply_to=nil} = State)->
389     {noreply, State#state{complete=true}};
390 handle_feed_completion(#state{count=0} = State) ->
391     gen_server:reply(State#state.reply_to, complete),
392     {stop, normal, State}.
393
394 handle_retry(State) ->
395     ?LOG_DEBUG("retrying changes feed because our connection closed", []),
396     #state{
397         count = Count,
398         init_args = [_, Source, _, PostProps],
399         last_seq = Since,
400         reply_to = ReplyTo,
401         rows = Rows
402     } = State,
403     case init([nil, Source, Since, PostProps]) of
404     {ok, State1} ->
405         MergedState = State1#state{
406             count = Count,
407             reply_to = ReplyTo,
408             rows = Rows
409         },
410         {noreply, MergedState};
411     _ ->
412         {stop, {error, connection_closed}, State}
413     end.
414
415 by_seq_loop(Server, Source, StartSeq) ->
416     Req = Source#http_db{
417         resource = "_all_docs_by_seq",
418         qs = [{limit, 1000}, {startkey, StartSeq}]
419     },
420     {Results} = couch_rep_httpc:request(Req),
421     Rows = couch_util:get_value(<<"rows">>, Results),
422     if Rows =:= [] -> exit(normal); true -> ok end,
423     EndSeq = lists:foldl(fun({RowInfoList}, _) ->
424         Id = couch_util:get_value(<<"id">>, RowInfoList),
425         Seq = couch_util:get_value(<<"key">>, RowInfoList),
426         {RowProps} = couch_util:get_value(<<"value">>, RowInfoList),
427         RawRevs = [
428             couch_util:get_value(<<"rev">>, RowProps),
429             couch_util:get_value(<<"conflicts">>, RowProps, []),
430             couch_util:get_value(<<"deleted_conflicts">>, RowProps, [])
431         ],
432         ParsedRevs = couch_doc:parse_revs(lists:flatten(RawRevs)),
433         Change = {[
434             {<<"seq">>, Seq},
435             {<<"id">>, Id},
436             {<<"changes">>, [{[{<<"rev">>,R}]} || R <- ParsedRevs]}
437         ]},
438         gen_server:call(Server, {add_change, Change}, infinity),
439         Seq
440     end, 0, Rows),
441     by_seq_loop(Server, Source, EndSeq).
442
443 decode_row(<<",", Rest/binary>>) ->
444     decode_row(Rest);
445 decode_row(Row) ->
446     ?JSON_DECODE(Row).
447
448 maybe_stream_next(#state{reqid=nil}) ->
449     ok;
450 maybe_stream_next(#state{complete=false, count=N} = S) when N < ?BUFFER_SIZE ->
451     timer:cancel(get(timeout)),
452     {ok, Timeout} = timer:send_after(31000, changes_timeout),
453     put(timeout, Timeout),
454     ibrowse:stream_next(S#state.reqid);
455 maybe_stream_next(_) ->
456     timer:cancel(get(timeout)).
457
458 stop_link_worker(Conn) when is_pid(Conn) ->
459     unlink(Conn),
460     receive {'EXIT', Conn, _} -> ok after 0 -> ok end,
461     catch ibrowse:stop_worker_process(Conn);
462 stop_link_worker(_) ->
463     ok.
464
465 redirect_req(#http_db{conn = WorkerPid} = Req, Code, Headers) ->
466     stop_link_worker(WorkerPid),
467     Req2 = couch_rep_httpc:redirected_request(Code, Headers, Req),
468     WorkerPid2 = couch_rep_httpc:spawn_link_worker_process(Req2),
469     Req3 = Req2#http_db{conn = WorkerPid2},
470     {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Req3),
471     {ReqId, Req3}.
472
473 req_no_builtin_doc_ids(#http_db{conn = WorkerPid, qs = QS} = Req, ReqId) ->
474     % CouchDB versions prior to 1.1.0 don't have the builtin filter _doc_ids
475     % and don't allow POSTing to /database/_changes
476     purge_req_messages(ReqId),
477     stop_link_worker(WorkerPid),
478     Req2 = Req#http_db{method = get, qs = lists:keydelete("filter", 1, QS)},
479     WorkerPid2 = couch_rep_httpc:spawn_link_worker_process(Req2),
480     Req3 = Req2#http_db{conn = WorkerPid2},
481     {ibrowse_req_id, ReqId2} = couch_rep_httpc:request(Req3),
482     {ReqId2, Req3}.
483
484 purge_req_messages(ReqId) ->
485     ibrowse:stream_next(ReqId),
486     receive
487     {ibrowse_async_response, ReqId, {error, _}} ->
488         ok;
489     {ibrowse_async_response, ReqId, _Data} ->
490         purge_req_messages(ReqId);
491     {ibrowse_async_response_end, ReqId} ->
492         ok
493     end.
494
495 queue_changes_row(Row, #state{doc_ids = nil} = State) ->
496     maybe_queue_row(Row, State);
497 queue_changes_row({RowProps} = Row,
498     #state{doc_ids = Ids, count = Count, rows = Rows} = State) ->
499     case lists:member(get_value(<<"id">>, RowProps), Ids) of
500     true ->
501         maybe_queue_row(Row, State);
502     false ->
503         {Rows, Count}
504     end.
505
506 maybe_queue_row({Props} = Row, #state{count = Count, rows = Rows} = State) ->
507     case get_value(<<"id">>, Props) of
508     <<>> ->
509         [_, Db | _] = State#state.init_args,
510         ?LOG_ERROR("Replicator: ignoring document with empty ID in source "
511             "database `~s` (_changes sequence ~p)",
512             [dbname(Db), couch_util:get_value(<<"seq">>, Props)]),
513         {Rows, Count};
514     _ ->
515         {queue:in(Row, Rows), Count + 1}
516     end.
517
518 dbname(#http_db{url = Url}) ->
519     couch_util:url_strip_password(Url);
520 dbname(#db{name = Name}) ->
521     Name.