From 82a10d77d0c206c89e640ca7097d8e12e95eb8be Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Thu, 18 Aug 2011 05:50:42 +0000 Subject: Merge revision 1159045 from trunk Fix dead lock case in the os process pool Part of this patch was done by Paul Davis. The patch also introduces a test case to validate that the os process pool (couch_query_servers) operates as it should. Closes COUCHDB-1246. git-svn-id: https://svn.apache.org/repos/asf/couchdb/branches/1.1.x@1159049 13f79535-47bb-0310-9956-ffa450edef68 --- src/couchdb/couch_os_process.erl | 6 +- src/couchdb/couch_query_servers.erl | 22 +++-- test/etap/210-os-proc-pool.t | 161 ++++++++++++++++++++++++++++++++++++ test/etap/Makefile.am | 4 +- 4 files changed, 180 insertions(+), 13 deletions(-) create mode 100644 test/etap/210-os-proc-pool.t diff --git a/src/couchdb/couch_os_process.erl b/src/couchdb/couch_os_process.erl index 5776776b..3ee52865 100644 --- a/src/couchdb/couch_os_process.erl +++ b/src/couchdb/couch_os_process.erl @@ -104,7 +104,6 @@ readjson(OsProc) when is_record(OsProc, os_proc) -> % gen_server API init([Command, Options, PortOptions]) -> - process_flag(trap_exit, true), PrivDir = couch_util:priv_dir(), Spawnkiller = filename:join(PrivDir, "couchspawnkillable"), BaseProc = #os_proc{ @@ -175,10 +174,7 @@ handle_info({Port, {exit_status, 0}}, #os_proc{port=Port}=OsProc) -> {stop, normal, OsProc}; handle_info({Port, {exit_status, Status}}, #os_proc{port=Port}=OsProc) -> ?LOG_ERROR("OS Process died with status: ~p", [Status]), - {stop, {exit_status, Status}, OsProc}; -handle_info(Msg, OsProc) -> - ?LOG_DEBUG("OS Proc: Unknown info: ~p", [Msg]), - {noreply, OsProc}. + {stop, {exit_status, Status}, OsProc}. code_change(_OldVsn, State, _Extra) -> {ok, State}. diff --git a/src/couchdb/couch_query_servers.erl b/src/couchdb/couch_query_servers.erl index b0e46937..f8bbcaed 100644 --- a/src/couchdb/couch_query_servers.erl +++ b/src/couchdb/couch_query_servers.erl @@ -22,7 +22,8 @@ -export([with_ddoc_proc/2, proc_prompt/2, ddoc_prompt/3, ddoc_proc_prompt/3, json_doc/1]). -% -export([test/0]). +% For 210-os-proc-pool.t +-export([get_os_process/1, ret_os_process/1]). -include("couch_db.hrl"). @@ -343,8 +344,7 @@ handle_call({get_proc, Lang}, From, Server) -> Error -> {reply, Error, Server} end; -handle_call({unlink_proc, Pid}, _From, #qserver{pid_procs=PidProcs}=Server) -> - rem_value(PidProcs, Pid), +handle_call({unlink_proc, Pid}, _From, Server) -> unlink(Pid), {reply, ok, Server}; handle_call({ret_proc, Proc}, _From, #qserver{ @@ -352,15 +352,22 @@ handle_call({ret_proc, Proc}, _From, #qserver{ lang_procs=LangProcs}=Server) -> % Along with max process limit, here we should check % if we're over the limit and discard when we are. - add_value(PidProcs, Proc#proc.pid, Proc), - add_to_list(LangProcs, Proc#proc.lang, Proc), - link(Proc#proc.pid), + case is_process_alive(Proc#proc.pid) of + true -> + add_value(PidProcs, Proc#proc.pid, Proc), + add_to_list(LangProcs, Proc#proc.lang, Proc), + link(Proc#proc.pid); + false -> + ok + end, {reply, true, service_waitlist(Server)}. handle_cast(_Whatever, Server) -> {noreply, Server}. -handle_info({'EXIT', Pid, Status}, #qserver{ +handle_info({'EXIT', _, _}, Server) -> + {noreply, Server}; +handle_info({'DOWN', _, process, Pid, Status}, #qserver{ pid_procs=PidProcs, lang_procs=LangProcs, lang_limits=LangLimits}=Server) -> @@ -461,6 +468,7 @@ new_process(Langs, LangLimits, Lang) -> case ets:lookup(Langs, Lang) of [{Lang, Mod, Func, Arg}] -> {ok, Pid} = apply(Mod, Func, Arg), + erlang:monitor(process, Pid), true = ets:insert(LangLimits, {Lang, Lim, Current+1}), {ok, #proc{lang=Lang, pid=Pid, diff --git a/test/etap/210-os-proc-pool.t b/test/etap/210-os-proc-pool.t new file mode 100644 index 00000000..b68d66be --- /dev/null +++ b/test/etap/210-os-proc-pool.t @@ -0,0 +1,161 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + + etap:plan(19), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + + +test() -> + couch_server_sup:start_link(test_util:config_files()), + couch_config:set("query_server_config", "os_process_limit", "3", false), + + test_pool_full(), + test_client_unexpected_exit(), + + couch_server_sup:stop(), + ok. + + +test_pool_full() -> + Client1 = spawn_client(), + Client2 = spawn_client(), + Client3 = spawn_client(), + + etap:diag("Check that we can spawn the max number of processes."), + etap:is(ping_client(Client1), ok, "Client 1 started ok."), + etap:is(ping_client(Client2), ok, "Client 2 started ok."), + etap:is(ping_client(Client3), ok, "Client 3 started ok."), + + Proc1 = get_client_proc(Client1, "1"), + Proc2 = get_client_proc(Client2, "2"), + Proc3 = get_client_proc(Client3, "3"), + etap:isnt(Proc1, Proc2, "Clients 1 and 2 got different procs."), + etap:isnt(Proc2, Proc3, "Clients 2 and 3 got different procs."), + + etap:diag("Check that client 4 blocks waiting for a process."), + Client4 = spawn_client(), + etap:is(ping_client(Client4), timeout, "Client 4 blocked while waiting."), + + etap:diag("Check that stopping a client gives up its process."), + etap:is(stop_client(Client1), ok, "First client stopped."), + + etap:diag("And check that our blocked process has been unblocked."), + etap:is(ping_client(Client4), ok, "Client was unblocked."), + + Proc4 = get_client_proc(Client4, "4"), + etap:is(Proc4, Proc1, "Client 4 got proc that client 1 got before."), + + lists:map(fun(C) -> ok = stop_client(C) end, [Client2, Client3, Client4]). + + +test_client_unexpected_exit() -> + Client1 = spawn_client(), + Client2 = spawn_client(), + Client3 = spawn_client(), + + etap:diag("Check that up to os_process_limit clients started."), + etap:is(ping_client(Client1), ok, "Client 1 started ok."), + etap:is(ping_client(Client2), ok, "Client 2 started ok."), + etap:is(ping_client(Client3), ok, "Client 3 started ok."), + + Proc1 = get_client_proc(Client1, "1"), + Proc2 = get_client_proc(Client2, "2"), + Proc3 = get_client_proc(Client3, "3"), + etap:isnt(Proc1, Proc2, "Clients 1 and 2 got different procs."), + etap:isnt(Proc2, Proc3, "Clients 2 and 3 got different procs."), + + etap:diag("Check that killing a client frees an os_process."), + etap:is(kill_client(Client1), ok, "Client 1 died all right."), + + etap:diag("Check that a new client is not blocked on boot."), + Client4 = spawn_client(), + etap:is(ping_client(Client4), ok, "New client booted without blocking."), + + Proc4 = get_client_proc(Client4, "4"), + etap:isnt(Proc4, Proc1, + "Client 4 got a proc different from the one client 1 got before."), + etap:isnt(Proc4, Proc2, "Client 4's proc different from client 2's proc."), + etap:isnt(Proc4, Proc3, "Client 4's proc different from client 3's proc."), + + lists:map(fun(C) -> ok = stop_client(C) end, [Client2, Client3, Client4]). + + +spawn_client() -> + Parent = self(), + Ref = make_ref(), + Pid = spawn(fun() -> + Proc = couch_query_servers:get_os_process(<<"javascript">>), + loop(Parent, Ref, Proc) + end), + {Pid, Ref}. + + +ping_client({Pid, Ref}) -> + Pid ! ping, + receive + {pong, Ref} -> ok + after 3000 -> timeout + end. + + +get_client_proc({Pid, Ref}, ClientName) -> + Pid ! get_proc, + receive + {proc, Ref, Proc} -> Proc + after 3000 -> + etap:bail("Timeout getting client " ++ ClientName ++ " proc.") + end. + + +stop_client({Pid, Ref}) -> + Pid ! stop, + receive + {stop, Ref} -> ok + after 3000 -> timeout + end. + + +kill_client({Pid, Ref}) -> + Pid ! die, + receive + {die, Ref} -> ok + after 3000 -> timeout + end. + + +loop(Parent, Ref, Proc) -> + receive + ping -> + Parent ! {pong, Ref}, + loop(Parent, Ref, Proc); + get_proc -> + Parent ! {proc, Ref, Proc}, + loop(Parent, Ref, Proc); + stop -> + couch_query_servers:ret_os_process(Proc), + Parent ! {stop, Ref}; + die -> + Parent ! {die, Ref}, + exit(some_error) + end. diff --git a/test/etap/Makefile.am b/test/etap/Makefile.am index ecbc3a93..c3a4ddab 100644 --- a/test/etap/Makefile.am +++ b/test/etap/Makefile.am @@ -87,4 +87,6 @@ EXTRA_DIST = \ 180-http-proxy.ini \ 180-http-proxy.t \ 190-oauth.t \ - 200-view-group-no-db-leaks.t + 200-view-group-no-db-leaks.t \ + 210-os-proc-pool.t + -- cgit v1.2.3