summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFilipe David Borba Manana <fdmanana@apache.org>2011-08-18 05:50:42 +0000
committerFilipe David Borba Manana <fdmanana@apache.org>2011-08-18 05:50:42 +0000
commit82a10d77d0c206c89e640ca7097d8e12e95eb8be (patch)
tree834674bf643f2088c92331b44494f6721b3bc148
parent0dc0f8b41f627876f65183ea1d99c68b16abeae0 (diff)
Merge revision 1159045 from trunk
Fix dead lock case in the os process pool Part of this patch was done by Paul Davis. The patch also introduces a test case to validate that the os process pool (couch_query_servers) operates as it should. Closes COUCHDB-1246. git-svn-id: https://svn.apache.org/repos/asf/couchdb/branches/1.1.x@1159049 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/couchdb/couch_os_process.erl6
-rw-r--r--src/couchdb/couch_query_servers.erl22
-rw-r--r--test/etap/210-os-proc-pool.t161
-rw-r--r--test/etap/Makefile.am4
4 files changed, 180 insertions, 13 deletions
diff --git a/src/couchdb/couch_os_process.erl b/src/couchdb/couch_os_process.erl
index 5776776b..3ee52865 100644
--- a/src/couchdb/couch_os_process.erl
+++ b/src/couchdb/couch_os_process.erl
@@ -104,7 +104,6 @@ readjson(OsProc) when is_record(OsProc, os_proc) ->
% gen_server API
init([Command, Options, PortOptions]) ->
- process_flag(trap_exit, true),
PrivDir = couch_util:priv_dir(),
Spawnkiller = filename:join(PrivDir, "couchspawnkillable"),
BaseProc = #os_proc{
@@ -175,10 +174,7 @@ handle_info({Port, {exit_status, 0}}, #os_proc{port=Port}=OsProc) ->
{stop, normal, OsProc};
handle_info({Port, {exit_status, Status}}, #os_proc{port=Port}=OsProc) ->
?LOG_ERROR("OS Process died with status: ~p", [Status]),
- {stop, {exit_status, Status}, OsProc};
-handle_info(Msg, OsProc) ->
- ?LOG_DEBUG("OS Proc: Unknown info: ~p", [Msg]),
- {noreply, OsProc}.
+ {stop, {exit_status, Status}, OsProc}.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
diff --git a/src/couchdb/couch_query_servers.erl b/src/couchdb/couch_query_servers.erl
index b0e46937..f8bbcaed 100644
--- a/src/couchdb/couch_query_servers.erl
+++ b/src/couchdb/couch_query_servers.erl
@@ -22,7 +22,8 @@
-export([with_ddoc_proc/2, proc_prompt/2, ddoc_prompt/3, ddoc_proc_prompt/3, json_doc/1]).
-% -export([test/0]).
+% For 210-os-proc-pool.t
+-export([get_os_process/1, ret_os_process/1]).
-include("couch_db.hrl").
@@ -343,8 +344,7 @@ handle_call({get_proc, Lang}, From, Server) ->
Error ->
{reply, Error, Server}
end;
-handle_call({unlink_proc, Pid}, _From, #qserver{pid_procs=PidProcs}=Server) ->
- rem_value(PidProcs, Pid),
+handle_call({unlink_proc, Pid}, _From, Server) ->
unlink(Pid),
{reply, ok, Server};
handle_call({ret_proc, Proc}, _From, #qserver{
@@ -352,15 +352,22 @@ handle_call({ret_proc, Proc}, _From, #qserver{
lang_procs=LangProcs}=Server) ->
% Along with max process limit, here we should check
% if we're over the limit and discard when we are.
- add_value(PidProcs, Proc#proc.pid, Proc),
- add_to_list(LangProcs, Proc#proc.lang, Proc),
- link(Proc#proc.pid),
+ case is_process_alive(Proc#proc.pid) of
+ true ->
+ add_value(PidProcs, Proc#proc.pid, Proc),
+ add_to_list(LangProcs, Proc#proc.lang, Proc),
+ link(Proc#proc.pid);
+ false ->
+ ok
+ end,
{reply, true, service_waitlist(Server)}.
handle_cast(_Whatever, Server) ->
{noreply, Server}.
-handle_info({'EXIT', Pid, Status}, #qserver{
+handle_info({'EXIT', _, _}, Server) ->
+ {noreply, Server};
+handle_info({'DOWN', _, process, Pid, Status}, #qserver{
pid_procs=PidProcs,
lang_procs=LangProcs,
lang_limits=LangLimits}=Server) ->
@@ -461,6 +468,7 @@ new_process(Langs, LangLimits, Lang) ->
case ets:lookup(Langs, Lang) of
[{Lang, Mod, Func, Arg}] ->
{ok, Pid} = apply(Mod, Func, Arg),
+ erlang:monitor(process, Pid),
true = ets:insert(LangLimits, {Lang, Lim, Current+1}),
{ok, #proc{lang=Lang,
pid=Pid,
diff --git a/test/etap/210-os-proc-pool.t b/test/etap/210-os-proc-pool.t
new file mode 100644
index 00000000..b68d66be
--- /dev/null
+++ b/test/etap/210-os-proc-pool.t
@@ -0,0 +1,161 @@
+#!/usr/bin/env escript
+%% -*- erlang -*-
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+main(_) ->
+ test_util:init_code_path(),
+
+ etap:plan(19),
+ case (catch test()) of
+ ok ->
+ etap:end_tests();
+ Other ->
+ etap:diag(io_lib:format("Test died abnormally: ~p", [Other])),
+ etap:bail(Other)
+ end,
+ ok.
+
+
+test() ->
+ couch_server_sup:start_link(test_util:config_files()),
+ couch_config:set("query_server_config", "os_process_limit", "3", false),
+
+ test_pool_full(),
+ test_client_unexpected_exit(),
+
+ couch_server_sup:stop(),
+ ok.
+
+
+test_pool_full() ->
+ Client1 = spawn_client(),
+ Client2 = spawn_client(),
+ Client3 = spawn_client(),
+
+ etap:diag("Check that we can spawn the max number of processes."),
+ etap:is(ping_client(Client1), ok, "Client 1 started ok."),
+ etap:is(ping_client(Client2), ok, "Client 2 started ok."),
+ etap:is(ping_client(Client3), ok, "Client 3 started ok."),
+
+ Proc1 = get_client_proc(Client1, "1"),
+ Proc2 = get_client_proc(Client2, "2"),
+ Proc3 = get_client_proc(Client3, "3"),
+ etap:isnt(Proc1, Proc2, "Clients 1 and 2 got different procs."),
+ etap:isnt(Proc2, Proc3, "Clients 2 and 3 got different procs."),
+
+ etap:diag("Check that client 4 blocks waiting for a process."),
+ Client4 = spawn_client(),
+ etap:is(ping_client(Client4), timeout, "Client 4 blocked while waiting."),
+
+ etap:diag("Check that stopping a client gives up its process."),
+ etap:is(stop_client(Client1), ok, "First client stopped."),
+
+ etap:diag("And check that our blocked process has been unblocked."),
+ etap:is(ping_client(Client4), ok, "Client was unblocked."),
+
+ Proc4 = get_client_proc(Client4, "4"),
+ etap:is(Proc4, Proc1, "Client 4 got proc that client 1 got before."),
+
+ lists:map(fun(C) -> ok = stop_client(C) end, [Client2, Client3, Client4]).
+
+
+test_client_unexpected_exit() ->
+ Client1 = spawn_client(),
+ Client2 = spawn_client(),
+ Client3 = spawn_client(),
+
+ etap:diag("Check that up to os_process_limit clients started."),
+ etap:is(ping_client(Client1), ok, "Client 1 started ok."),
+ etap:is(ping_client(Client2), ok, "Client 2 started ok."),
+ etap:is(ping_client(Client3), ok, "Client 3 started ok."),
+
+ Proc1 = get_client_proc(Client1, "1"),
+ Proc2 = get_client_proc(Client2, "2"),
+ Proc3 = get_client_proc(Client3, "3"),
+ etap:isnt(Proc1, Proc2, "Clients 1 and 2 got different procs."),
+ etap:isnt(Proc2, Proc3, "Clients 2 and 3 got different procs."),
+
+ etap:diag("Check that killing a client frees an os_process."),
+ etap:is(kill_client(Client1), ok, "Client 1 died all right."),
+
+ etap:diag("Check that a new client is not blocked on boot."),
+ Client4 = spawn_client(),
+ etap:is(ping_client(Client4), ok, "New client booted without blocking."),
+
+ Proc4 = get_client_proc(Client4, "4"),
+ etap:isnt(Proc4, Proc1,
+ "Client 4 got a proc different from the one client 1 got before."),
+ etap:isnt(Proc4, Proc2, "Client 4's proc different from client 2's proc."),
+ etap:isnt(Proc4, Proc3, "Client 4's proc different from client 3's proc."),
+
+ lists:map(fun(C) -> ok = stop_client(C) end, [Client2, Client3, Client4]).
+
+
+spawn_client() ->
+ Parent = self(),
+ Ref = make_ref(),
+ Pid = spawn(fun() ->
+ Proc = couch_query_servers:get_os_process(<<"javascript">>),
+ loop(Parent, Ref, Proc)
+ end),
+ {Pid, Ref}.
+
+
+ping_client({Pid, Ref}) ->
+ Pid ! ping,
+ receive
+ {pong, Ref} -> ok
+ after 3000 -> timeout
+ end.
+
+
+get_client_proc({Pid, Ref}, ClientName) ->
+ Pid ! get_proc,
+ receive
+ {proc, Ref, Proc} -> Proc
+ after 3000 ->
+ etap:bail("Timeout getting client " ++ ClientName ++ " proc.")
+ end.
+
+
+stop_client({Pid, Ref}) ->
+ Pid ! stop,
+ receive
+ {stop, Ref} -> ok
+ after 3000 -> timeout
+ end.
+
+
+kill_client({Pid, Ref}) ->
+ Pid ! die,
+ receive
+ {die, Ref} -> ok
+ after 3000 -> timeout
+ end.
+
+
+loop(Parent, Ref, Proc) ->
+ receive
+ ping ->
+ Parent ! {pong, Ref},
+ loop(Parent, Ref, Proc);
+ get_proc ->
+ Parent ! {proc, Ref, Proc},
+ loop(Parent, Ref, Proc);
+ stop ->
+ couch_query_servers:ret_os_process(Proc),
+ Parent ! {stop, Ref};
+ die ->
+ Parent ! {die, Ref},
+ exit(some_error)
+ end.
diff --git a/test/etap/Makefile.am b/test/etap/Makefile.am
index ecbc3a93..c3a4ddab 100644
--- a/test/etap/Makefile.am
+++ b/test/etap/Makefile.am
@@ -87,4 +87,6 @@ EXTRA_DIST = \
180-http-proxy.ini \
180-http-proxy.t \
190-oauth.t \
- 200-view-group-no-db-leaks.t
+ 200-view-group-no-db-leaks.t \
+ 210-os-proc-pool.t
+