summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDamien F. Katz <damien@apache.org>2009-05-30 02:43:52 +0000
committerDamien F. Katz <damien@apache.org>2009-05-30 02:43:52 +0000
commit7860f047be25f99c4d1e83f9456c7eb245ad37dc (patch)
tree8a53ccfc11cb17bd666035892c8dbf14e36d5b0e /src
parentc1ca0dd42524fb6999e1f05de94ccff71bc497b6 (diff)
Test and fix for infinite loops in view_servers, fix for crashed OS processes causing leaked erlang processes and fix for view server crashing when view group process terminates.
git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@780165 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src')
-rw-r--r--src/couchdb/Makefile.am10
-rw-r--r--src/couchdb/couch.app.tpl.in2
-rw-r--r--src/couchdb/couch_os_process.erl63
-rw-r--r--src/couchdb/couch_query_servers.erl63
-rw-r--r--src/couchdb/couch_server_sup.erl4
-rw-r--r--src/couchdb/couch_view.erl2
-rw-r--r--src/couchdb/couch_view_group.erl7
-rw-r--r--src/couchdb/priv/couchspawnkillable20
8 files changed, 104 insertions, 67 deletions
diff --git a/src/couchdb/Makefile.am b/src/couchdb/Makefile.am
index 7105300b..e19a7539 100644
--- a/src/couchdb/Makefile.am
+++ b/src/couchdb/Makefile.am
@@ -13,9 +13,11 @@
ICU_LOCAL_FLAGS = $(ICU_LOCAL_CFLAGS) $(ICU_LOCAL_LDFLAGS)
# devdocdir = $(localdocdir)/developer/couchdb
-couchprivlibdir = $(localerlanglibdir)/couch-$(version)/priv/lib
-couchincludedir = $(localerlanglibdir)/couch-$(version)/include
-couchebindir = $(localerlanglibdir)/couch-$(version)/ebin
+couchlibdir = $(localerlanglibdir)/couch-$(version)
+couchprivdir = $(couchlibdir)/priv
+couchprivlibdir = $(couchlibdir)/priv/lib
+couchincludedir = $(couchlibdir)/include
+couchebindir = $(couchlibdir)/ebin
couchprivlib_LTLIBRARIES = couch_erl_driver.la
couch_erl_driver_la_SOURCES = couch_erl_driver.c
@@ -33,6 +35,8 @@ couchinclude_DATA = couch_db.hrl
couchebin_DATA = $(compiled_files)
+couchpriv_SCRIPTS = priv/couchspawnkillable
+
# dist_devdoc_DATA = $(doc_base) $(doc_modules)
CLEANFILES = $(compiled_files) $(doc_base)
diff --git a/src/couchdb/couch.app.tpl.in b/src/couchdb/couch.app.tpl.in
index e0100cb4..259bac6c 100644
--- a/src/couchdb/couch.app.tpl.in
+++ b/src/couchdb/couch.app.tpl.in
@@ -1,4 +1,4 @@
-{application,couch,
+{application,couchdb,
[{description,"@package_name@"},
{vsn,"@version@"},
{modules,[couch_btree,
diff --git a/src/couchdb/couch_os_process.erl b/src/couchdb/couch_os_process.erl
index 9a1fcb0c..75937eb8 100644
--- a/src/couchdb/couch_os_process.erl
+++ b/src/couchdb/couch_os_process.erl
@@ -14,7 +14,7 @@
-behaviour(gen_server).
-export([start_link/1, start_link/2, start_link/3, stop/1]).
--export([set_timeout/2, write/2, read/1, prompt/2, async/3]).
+-export([set_timeout/2, prompt/2]).
-export([writeline/2, readline/1, writejson/2, readjson/1]).
-export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2, code_change/3]).
@@ -27,7 +27,7 @@
port,
writer,
reader,
- timeout
+ timeout=5000
}).
start_link(Command) ->
@@ -44,24 +44,15 @@ stop(Pid) ->
set_timeout(Pid, TimeOut) when is_integer(TimeOut) ->
ok = gen_server:call(Pid, {set_timeout, TimeOut}).
-write(Pid, Data) ->
- gen_server:call(Pid, {write, Data}).
-
-read(Pid) ->
- gen_server:call(Pid, read).
-
prompt(Pid, Data) ->
case gen_server:call(Pid, {prompt, Data}, infinity) of
{ok, Result} ->
Result;
- {error, Error} ->
+ Error ->
?LOG_DEBUG("OS Process Error ~p",[Error]),
throw(Error)
end.
-async(Pid, Data, CallBack) ->
- gen_server:cast(Pid, {async, Data, CallBack}).
-
% Utility functions for reading and writing
% in custom functions
writeline(OsProc, Data) when is_record(OsProc, os_proc) ->
@@ -103,17 +94,31 @@ readjson(OsProc) when is_record(OsProc, os_proc) ->
Result
end.
+
% gen_server API
init([Command, Options, PortOptions]) ->
- BaseTimeOut = list_to_integer(couch_config:get(
- "couchdb", "os_process_timeout", "5000")),
+ case code:priv_dir(couch) of
+ {error, bad_name} ->
+ % small hack, in dev mode "app" is couchdb. Fixing requires renaming
+ % src/couch to src/couch. Not really worth the hassle.-Damien
+ PrivDir = code:priv_dir(couchdb);
+ PrivDir -> ok
+ end,
+ Spawnkiller = filename:join(PrivDir, "couchspawnkillable"),
BaseProc = #os_proc{
command=Command,
- port=open_port({spawn, Command}, PortOptions),
+ port=open_port({spawn, Spawnkiller ++ " " ++ Command}, PortOptions),
writer=fun writejson/2,
- reader=fun readjson/1,
- timeout=BaseTimeOut
+ reader=fun readjson/1
},
+ KillCmd = readline(BaseProc),
+ Pid = self(),
+ spawn(fun() ->
+ % this ensure the real os process is killed when this process dies.
+ erlang:monitor(process, Pid),
+ receive _ -> ok end,
+ os:cmd(?b2l(KillCmd))
+ end),
OsProc =
lists:foldl(fun(Opt, Proc) ->
case Opt of
@@ -127,36 +132,22 @@ init([Command, Options, PortOptions]) ->
end, BaseProc, Options),
{ok, OsProc}.
-terminate(_Reason, #os_proc{port=nil}) ->
- ok;
terminate(_Reason, #os_proc{port=Port}) ->
catch port_close(Port),
ok.
handle_call({set_timeout, TimeOut}, _From, OsProc) ->
{reply, ok, OsProc#os_proc{timeout=TimeOut}};
-handle_call({write, Data}, _From, OsProc) ->
- Writer = OsProc#os_proc.writer,
- {reply, Writer(OsProc, Data), OsProc};
-handle_call(read, _From, OsProc) ->
- Reader = OsProc#os_proc.reader,
- {reply, Reader(OsProc), OsProc};
handle_call({prompt, Data}, _From, OsProc) ->
#os_proc{writer=Writer, reader=Reader} = OsProc,
- Writer(OsProc, Data),
- Result = try Reader(OsProc) of
- Ok -> {ok, Ok}
+ try
+ Writer(OsProc, Data),
+ {reply, {ok, Reader(OsProc)}, OsProc}
catch
throw:OsError ->
- {error, OsError}
- end,
- {reply, Result, OsProc}.
+ {stop, normal, OsError, OsProc}
+ end.
-handle_cast({async, Data, CallBack}, OsProc) ->
- #os_proc{writer=Writer, reader=Reader} = OsProc,
- Writer(OsProc, Data),
- CallBack(Reader(OsProc)),
- {noreply, OsProc};
handle_cast(stop, OsProc) ->
{stop, normal, OsProc};
handle_cast(Msg, OsProc) ->
diff --git a/src/couchdb/couch_query_servers.erl b/src/couchdb/couch_query_servers.erl
index 7c82abe7..ef2bde3b 100644
--- a/src/couchdb/couch_query_servers.erl
+++ b/src/couchdb/couch_query_servers.erl
@@ -118,7 +118,7 @@ recombine_reduce_results([<<"_", _/binary>>|RedSrcs], OsResults, [BRes|BuiltinRe
recombine_reduce_results([_OsFun|RedSrcs], [OsR|OsResults], BuiltinResults, Acc) ->
recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, [OsR|Acc]).
-os_reduce(Lang, [], KVs) ->
+os_reduce(_Lang, [], _KVs) ->
{ok, []};
os_reduce(Lang, OsRedSrcs, KVs) ->
Pid = get_os_process(Lang),
@@ -130,7 +130,7 @@ os_reduce(Lang, OsRedSrcs, KVs) ->
end,
{ok, OsResults}.
-builtin_reduce(_Re, [], KVs, Acc) ->
+builtin_reduce(_Re, [], _KVs, Acc) ->
{ok, lists:reverse(Acc)};
builtin_reduce(Re, [<<"_sum">>|BuiltinReds], KVs, Acc) ->
Sum = builtin_sum_rows(KVs),
@@ -242,6 +242,7 @@ init([]) ->
lists:foreach(fun({Lang, Command}) ->
true = ets:insert(Langs, {?l2b(Lang), Command})
end, couch_config:get("query_servers")),
+ process_flag(trap_exit, true),
{ok, {Langs, PidLangs, Pids, InUse}}.
terminate(_Reason, _Server) ->
@@ -255,13 +256,15 @@ handle_call({get_proc, Lang}, _From, {Langs, PidLangs, Pids, InUse}=Server) ->
add_value(PidLangs, Pid, Lang),
rem_from_list(Pids, Lang, Pid),
add_to_list(InUse, Lang, Pid),
- QueryConfig = get_query_server_config(),
- true = couch_os_process:prompt(Pid, [<<"reset">>, QueryConfig]),
- {reply, Pid, Server};
+ {reply, {recycled, Pid, get_query_server_config()}, Server};
_ ->
- {ok, Pid} = new_process(Langs, Lang),
- add_to_list(InUse, Lang, Pid),
- {reply, Pid, Server}
+ case (catch new_process(Langs, Lang)) of
+ {ok, Pid} ->
+ add_to_list(InUse, Lang, Pid),
+ {reply, {new, Pid}, Server};
+ Error ->
+ {reply, Error, Server}
+ end
end;
handle_call({ret_proc, Lang, Pid}, _From, {_, _, Pids, InUse}=Server) ->
% Along with max process limit, here we should check
@@ -273,22 +276,20 @@ handle_call({ret_proc, Lang, Pid}, _From, {_, _, Pids, InUse}=Server) ->
handle_cast(_Whatever, Server) ->
{noreply, Server}.
-handle_info({'EXIT', Pid, Status}, {Langs, PidLangs, Pids, InUse}) ->
+handle_info({'EXIT', Pid, Status}, {_, PidLangs, Pids, InUse}=Server) ->
case ets:lookup(PidLangs, Pid) of
[{Pid, Lang}] ->
case Status of
normal -> ok;
_ -> ?LOG_DEBUG("Linked process died abnormally: ~p (reason: ~p)", [Pid, Status])
end,
- {ok, {
- Langs,
- rem_value(PidLangs, Pid),
- rem_from_list(Pids, Lang, Pid),
- rem_from_list(InUse, Lang, Pid)
- }};
+ rem_value(PidLangs, Pid),
+ catch rem_from_list(Pids, Lang, Pid),
+ catch rem_from_list(InUse, Lang, Pid),
+ {noreply, Server};
[] ->
?LOG_DEBUG("Unknown linked process died: ~p (reason: ~p)", [Pid, Status]),
- {ok, {Langs, PidLangs, Pids, InUse}}
+ {stop, Status, Server}
end.
code_change(_OldVsn, State, _Extra) ->
@@ -302,27 +303,45 @@ get_query_server_config() ->
{[{<<"reduce_limit">>, ReduceLimit}]}.
new_process(Langs, Lang) ->
- Proc =
case ets:lookup(Langs, Lang) of
[{Lang, Command}] ->
couch_os_process:start_link(Command);
_ ->
- throw({unknown_query_language, Lang})
- end,
- Proc.
+ {unknown_query_language, Lang}
+ end.
get_os_process(Lang) ->
- gen_server:call(couch_query_servers, {get_proc, Lang}).
+ case gen_server:call(couch_query_servers, {get_proc, Lang}) of
+ {new, Pid} ->
+ couch_os_process:set_timeout(Pid, list_to_integer(couch_config:get(
+ "couchdb", "os_process_timeout", "5000"))),
+ link(Pid),
+ Pid;
+ {recycled, Pid, QueryConfig} ->
+ case (catch couch_os_process:prompt(Pid, [<<"reset">>, QueryConfig])) of
+ true ->
+ couch_os_process:set_timeout(Pid, list_to_integer(couch_config:get(
+ "couchdb", "os_process_timeout", "5000"))),
+ link(Pid),
+ Pid;
+ _ ->
+ catch couch_os_process:stop(Pid),
+ get_os_process(Lang)
+ end;
+ Error ->
+ throw(Error)
+ end.
ret_os_process(Lang, Pid) ->
true = gen_server:call(couch_query_servers, {ret_proc, Lang, Pid}),
+ catch unlink(Pid),
ok.
add_value(Tid, Key, Value) ->
true = ets:insert(Tid, {Key, Value}).
rem_value(Tid, Key) ->
- true = ets:insert(Tid, Key).
+ true = ets:delete(Tid, Key).
add_to_list(Tid, Key, Value) ->
case ets:lookup(Tid, Key) of
diff --git a/src/couchdb/couch_server_sup.erl b/src/couchdb/couch_server_sup.erl
index 344073a3..e59286f3 100644
--- a/src/couchdb/couch_server_sup.erl
+++ b/src/couchdb/couch_server_sup.erl
@@ -32,8 +32,10 @@ start_link(IniFiles) ->
end.
restart_core_server() ->
+ supervisor:terminate_child(couch_secondary_services, couch_server),
supervisor:terminate_child(couch_primary_services, couch_server),
- supervisor:restart_child(couch_primary_services, couch_server).
+ supervisor:restart_child(couch_primary_services, couch_server),
+ supervisor:restart_child(couch_secondary_services, couch_server).
couch_config_start_link_wrapper(IniFiles, FirstConfigPid) ->
case is_process_alive(FirstConfigPid) of
diff --git a/src/couchdb/couch_view.erl b/src/couchdb/couch_view.erl
index b74fb47d..301ea8b7 100644
--- a/src/couchdb/couch_view.erl
+++ b/src/couchdb/couch_view.erl
@@ -260,7 +260,7 @@ handle_call({start_group_server, DbName, GroupId}, _From, #server{root_dir=Root}
case ets:lookup(group_servers_by_name, {DbName, GroupId}) of
[] ->
?LOG_DEBUG("Spawning new group server for view group ~s in database ~s.", [GroupId, DbName]),
- case couch_view_group:start_link({view, Root, DbName, GroupId}) of
+ case (catch couch_view_group:start_link({view, Root, DbName, GroupId})) of
{ok, NewPid} ->
add_to_ets(NewPid, DbName, GroupId),
{reply, {ok, NewPid}, Server};
diff --git a/src/couchdb/couch_view_group.erl b/src/couchdb/couch_view_group.erl
index 28679927..0ab1077e 100644
--- a/src/couchdb/couch_view_group.erl
+++ b/src/couchdb/couch_view_group.erl
@@ -274,9 +274,10 @@ handle_info({'DOWN',_,_,_,_}, State) ->
{stop, normal, reply_all(State, shutdown)}.
-terminate(Reason, State) ->
- reply_all(State, Reason),
- couch_util:terminate_linked(Reason),
+terminate(Reason, #group_state{updater_pid=Update, compactor_pid=Compact}=S) ->
+ reply_all(S, Reason),
+ catch exit(Update, Reason),
+ catch exit(Compact, Reason),
ok.
code_change(_OldVsn, State, _Extra) ->
diff --git a/src/couchdb/priv/couchspawnkillable b/src/couchdb/priv/couchspawnkillable
new file mode 100644
index 00000000..3d85ea3c
--- /dev/null
+++ b/src/couchdb/priv/couchspawnkillable
@@ -0,0 +1,20 @@
+#! /bin/sh -e
+
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+# The purpose of this script is to echo an OS specific command before launching
+# the actual process. This provides a way for Erlang to hard-kill its external
+# processes.
+
+echo "kill -9 $$"
+exec $* \ No newline at end of file