summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul J. Davis <paul.joseph.davis@gmail.com>2011-12-03 14:12:17 -0500
committerRobert Newson <robert.newson@cloudant.com>2012-11-14 17:26:43 +0000
commitb9f0271cf63efed0af5c3c4aec5a31c928da1079 (patch)
treef0fbbdc1802cf2df28f297c7241a655028156fa6
parent90c656fd3097272d1e574da3f5944d60b476e418 (diff)
Allow processes to idle before exiting
When system load exceeds the ability of os_process_soft_limit to keep up with demand we enter a fork-use-kill (FUK) cycle. The constant spawning and destruction os these processes thrashes system resources and causes general instability. This patch changes the behavior from killing each process as its returned to letting it idle for a configurable amount of time (default five minutes) which allows it to be reused by other clients. This way we can avoid adding unnecessary load when demand for couchjs processes exceeds os_process_soft_limit. As a happy benefit this should also allow os_process_soft_limit to be set much lower since the number of processes will now more closely follow actual demand (instead of provisioning for the worst case scenario). Conflicts: apps/couch/src/couch_os_process.erl apps/couch/src/couch_proc_manager.erl Conflicts: apps/couch/src/couch_os_process.erl
-rw-r--r--apps/couch/src/couch_os_process.erl37
-rw-r--r--apps/couch/src/couch_proc_manager.erl31
2 files changed, 42 insertions, 26 deletions
diff --git a/apps/couch/src/couch_os_process.erl b/apps/couch/src/couch_os_process.erl
index 2a6d92a7..0c6f284f 100644
--- a/apps/couch/src/couch_os_process.erl
+++ b/apps/couch/src/couch_os_process.erl
@@ -27,7 +27,8 @@
port,
writer,
reader,
- timeout=5000
+ timeout=5000,
+ idle
}).
start_link(Command) ->
@@ -104,12 +105,15 @@ readjson(#os_proc{} = OsProc) ->
% gen_server API
init([Command, Options, PortOptions]) ->
+ V = couch_config:get("query_server_config", "os_process_idle_limit", "300"),
+ IdleLimit = list_to_integer(V) * 1000,
Spawnkiller = filename:join([code:priv_dir(couch), "couchspawnkillable.sh"]),
BaseProc = #os_proc{
command=Command,
port=open_port({spawn, Spawnkiller ++ " " ++ Command}, PortOptions),
writer=fun writejson/2,
- reader=fun readjson/1
+ reader=fun readjson/1,
+ idle=IdleLimit
},
KillCmd = readline(BaseProc),
Pid = self(),
@@ -131,32 +135,32 @@ init([Command, Options, PortOptions]) ->
Proc#os_proc{timeout=TimeOut}
end
end, BaseProc, Options),
- {ok, OsProc}.
+ {ok, OsProc, IdleLimit}.
terminate(_Reason, #os_proc{port=Port}) ->
catch port_close(Port),
ok.
-handle_call({set_timeout, TimeOut}, _From, OsProc) ->
- {reply, ok, OsProc#os_proc{timeout=TimeOut}};
-handle_call({prompt, Data}, _From, OsProc) ->
+handle_call({set_timeout, TimeOut}, _From, #os_proc{idle=Idle}=OsProc) ->
+ {reply, ok, OsProc#os_proc{timeout=TimeOut}, Idle};
+handle_call({prompt, Data}, _From, #os_proc{idle=Idle}=OsProc) ->
#os_proc{writer=Writer, reader=Reader} = OsProc,
try
Writer(OsProc, Data),
- {reply, {ok, Reader(OsProc)}, OsProc}
+ {reply, {ok, Reader(OsProc)}, OsProc, Idle}
catch
throw:{error, OsError} ->
- {reply, OsError, OsProc};
+ {reply, OsError, OsProc, Idle};
throw:{fatal, OsError} ->
{stop, normal, OsError, OsProc};
throw:OtherError ->
{stop, normal, OtherError, OsProc}
end.
-handle_cast({send, Data}, #os_proc{writer=Writer}=OsProc) ->
+handle_cast({send, Data}, #os_proc{writer=Writer, idle=Idle}=OsProc) ->
try
Writer(OsProc, Data),
- {noreply, OsProc}
+ {noreply, OsProc, Idle}
catch
throw:OsError ->
?LOG_ERROR("Failed sending data: ~p -> ~p", [Data, OsError]),
@@ -164,16 +168,23 @@ handle_cast({send, Data}, #os_proc{writer=Writer}=OsProc) ->
end;
handle_cast(stop, OsProc) ->
{stop, normal, OsProc};
-handle_cast(Msg, OsProc) ->
+handle_cast(Msg, #os_proc{idle=Idle}=OsProc) ->
?LOG_DEBUG("OS Proc: Unknown cast: ~p", [Msg]),
- {noreply, OsProc}.
+ {noreply, OsProc, Idle}.
+handle_info(timeout, #os_proc{idle=Idle}=OsProc) ->
+ gen_server:cast(couch_proc_manager, {os_proc_idle, self()}),
+ erlang:garbage_collect(),
+ {noreply, OsProc, Idle};
handle_info({Port, {exit_status, 0}}, #os_proc{port=Port}=OsProc) ->
?LOG_INFO("OS Process terminated normally", []),
{stop, normal, OsProc};
handle_info({Port, {exit_status, Status}}, #os_proc{port=Port}=OsProc) ->
?LOG_ERROR("OS Process died with status: ~p", [Status]),
- {stop, {exit_status, Status}, OsProc}.
+ {stop, {exit_status, Status}, OsProc};
+handle_info(Msg, #os_proc{idle=Idle}=OsProc) ->
+ ?LOG_DEBUG("OS Proc: Unknown info: ~p", [Msg]),
+ {noreply, OsProc, Idle}.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
diff --git a/apps/couch/src/couch_proc_manager.erl b/apps/couch/src/couch_proc_manager.erl
index 0f35422b..568608bc 100644
--- a/apps/couch/src/couch_proc_manager.erl
+++ b/apps/couch/src/couch_proc_manager.erl
@@ -67,13 +67,29 @@ handle_call({ret_proc, #proc{client=Ref, pid=Pid} = Proc}, _From, State) ->
% #proc{} from our own table, so the alternative is to do a lookup in the
% table before the insert. Don't know which approach is cheaper.
case is_process_alive(Pid) of true ->
- maybe_reuse_proc(State#state.tab, Proc);
+ ets:insert(State#state.tab, Proc);
false -> ok end,
{reply, true, State};
handle_call(_Call, _From, State) ->
{reply, ignored, State}.
+handle_cast({os_proc_idle, Pid}, #state{tab=Tab}=State) ->
+ Limit = couch_config:get("query_server_config", "os_process_soft_limit", "100"),
+ case ets:info(Tab, size) > list_to_integer(Limit) of
+ true ->
+ ets:delete(Tab, Pid),
+ case is_process_alive(Pid) of
+ true ->
+ unlink(Pid),
+ gen_server:cast(Pid, stop);
+ _ ->
+ ok
+ end;
+ _ ->
+ ok
+ end,
+ {noreply, State};
handle_cast(_Msg, State) ->
{noreply, State}.
@@ -95,7 +111,7 @@ handle_info({'DOWN', Ref, _, _, _Reason}, State) ->
ok;
[#proc{pid = Pid} = Proc] ->
case is_process_alive(Pid) of true ->
- maybe_reuse_proc(State#state.tab, Proc);
+ ets:insert(State#state.tab, Proc);
false -> ok end
end,
{noreply, State};
@@ -110,17 +126,6 @@ terminate(_Reason, #state{tab=Tab}) ->
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
-maybe_reuse_proc(Tab, #proc{pid = Pid} = Proc) ->
- Limit = couch_config:get("query_server_config", "os_process_soft_limit", "100"),
- case ets:info(Tab, size) > list_to_integer(Limit) of
- true ->
- ets:delete(Tab, Pid),
- unlink(Pid),
- exit(Pid, kill);
- false ->
- garbage_collect(Pid),
- ets:insert(Tab, Proc#proc{client=nil})
- end.
get_procs(Tab, Lang) when is_binary(Lang) ->
get_procs(Tab, binary_to_list(Lang));