diff options
Diffstat (limited to 'src')
102 files changed, 12925 insertions, 4690 deletions
diff --git a/src/couchdb/Makefile.am b/src/couchdb/Makefile.am index 9e909964..c325440d 100644 --- a/src/couchdb/Makefile.am +++ b/src/couchdb/Makefile.am @@ -17,7 +17,7 @@ couchlibdir = $(localerlanglibdir)/couch-$(version) couchincludedir = $(couchlibdir)/include couchebindir = $(couchlibdir)/ebin -couchinclude_DATA = couch_db.hrl +couchinclude_DATA = couch_db.hrl couch_js_functions.hrl couchebin_DATA = $(compiled_files) # dist_devdoc_DATA = $(doc_base) $(doc_modules) @@ -29,7 +29,9 @@ CLEANFILES = $(compiled_files) $(doc_base) source_files = \ couch.erl \ couch_app.erl \ + couch_auth_cache.erl \ couch_btree.erl \ + couch_changes.erl \ couch_config.erl \ couch_config_writer.erl \ couch_db.erl \ @@ -48,10 +50,14 @@ source_files = \ couch_httpd_show.erl \ couch_httpd_view.erl \ couch_httpd_misc_handlers.erl \ + couch_httpd_proxy.erl \ + couch_httpd_rewrite.erl \ couch_httpd_stats_handlers.erl \ + couch_httpd_vhost.erl \ couch_key_tree.erl \ couch_log.erl \ couch_native_process.erl \ + couch_os_daemons.erl \ couch_os_process.erl \ couch_query_servers.erl \ couch_ref_counter.erl \ @@ -63,6 +69,7 @@ source_files = \ couch_rep_reader.erl \ couch_rep_sup.erl \ couch_rep_writer.erl \ + couch_rep_db_listener.erl \ couch_server.erl \ couch_server_sup.erl \ couch_stats_aggregator.erl \ @@ -78,13 +85,15 @@ source_files = \ couch_db_updater.erl \ couch_work_queue.erl -EXTRA_DIST = $(source_files) couch_db.hrl +EXTRA_DIST = $(source_files) couch_db.hrl couch_js_functions.hrl compiled_files = \ couch.app \ couch.beam \ couch_app.beam \ + couch_auth_cache.beam \ couch_btree.beam \ + couch_changes.beam \ couch_config.beam \ couch_config_writer.beam \ couch_db.beam \ @@ -99,14 +108,18 @@ compiled_files = \ couch_httpd_db.beam \ couch_httpd_auth.beam \ couch_httpd_oauth.beam \ + couch_httpd_proxy.beam \ couch_httpd_external.beam \ couch_httpd_show.beam \ couch_httpd_view.beam \ couch_httpd_misc_handlers.beam \ + couch_httpd_rewrite.beam \ couch_httpd_stats_handlers.beam \ + couch_httpd_vhost.beam \ couch_key_tree.beam \ couch_log.beam \ couch_native_process.beam \ + couch_os_daemons.beam \ couch_os_process.beam \ couch_query_servers.beam \ couch_ref_counter.beam \ @@ -118,6 +131,7 @@ compiled_files = \ couch_rep_reader.beam \ couch_rep_sup.beam \ couch_rep_writer.beam \ + couch_rep_db_listener.beam \ couch_server.beam \ couch_server_sup.beam \ couch_stats_aggregator.beam \ @@ -175,7 +189,7 @@ couch.app: couch.app.tpl $@ < $< else couch.app: couch.app.tpl - modules=`find . -name "*.erl" -exec basename {} .erl \; | tr '\n' ',' | sed "s/,$$//"`; \ + modules=`{ find . -name "*.erl" -exec basename {} .erl \; | tr '\n' ','; echo ''; } | sed "s/,$$//"`; \ sed -e "s|%package_name%|@package_name@|g" \ -e "s|%version%|@version@|g" \ -e "s|@modules@|$$modules|g" \ @@ -190,6 +204,6 @@ endif # $(ERL) -noshell -run edoc_run files [\"$<\"] -%.beam: %.erl couch_db.hrl +%.beam: %.erl couch_db.hrl couch_js_functions.hrl $(ERLC) $(ERLC_FLAGS) ${TEST} $<; diff --git a/src/couchdb/couch.app.tpl.in b/src/couchdb/couch.app.tpl.in index fa86d2ec..36b0b34c 100644 --- a/src/couchdb/couch.app.tpl.in +++ b/src/couchdb/couch.app.tpl.in @@ -26,4 +26,4 @@ ]}}, {applications, [kernel, stdlib]}, {included_applications, [crypto, sasl, inets, oauth, ibrowse, mochiweb]} -]}.
\ No newline at end of file +]}. diff --git a/src/couchdb/couch_app.erl b/src/couchdb/couch_app.erl index 1b64434a..232953d9 100644 --- a/src/couchdb/couch_app.erl +++ b/src/couchdb/couch_app.erl @@ -20,7 +20,7 @@ start(_Type, DefaultIniFiles) -> IniFiles = get_ini_files(DefaultIniFiles), - case start_apps([crypto, sasl, inets, oauth, ssl, ibrowse, mochiweb]) of + case start_apps([crypto, public_key, sasl, inets, oauth, ssl, ibrowse, mochiweb]) of ok -> couch_server_sup:start_link(IniFiles); {error, Reason} -> @@ -48,6 +48,9 @@ start_apps([App|Rest]) -> start_apps(Rest); {error, {already_started, App}} -> start_apps(Rest); + {error, _Reason} when App =:= public_key -> + % ignore on R12B5 + start_apps(Rest); {error, _Reason} -> {error, {app_would_not_start, App}} end. diff --git a/src/couchdb/couch_auth_cache.erl b/src/couchdb/couch_auth_cache.erl new file mode 100644 index 00000000..e0715b88 --- /dev/null +++ b/src/couchdb/couch_auth_cache.erl @@ -0,0 +1,419 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_auth_cache). +-behaviour(gen_server). + +% public API +-export([get_user_creds/1]). + +% gen_server API +-export([start_link/0, init/1, handle_call/3, handle_info/2, handle_cast/2]). +-export([code_change/3, terminate/2]). + +-include("couch_db.hrl"). +-include("couch_js_functions.hrl"). + +-define(STATE, auth_state_ets). +-define(BY_USER, auth_by_user_ets). +-define(BY_ATIME, auth_by_atime_ets). + +-record(state, { + max_cache_size = 0, + cache_size = 0, + db_notifier = nil +}). + + +-spec get_user_creds(UserName::string() | binary()) -> + Credentials::list() | nil. + +get_user_creds(UserName) when is_list(UserName) -> + get_user_creds(?l2b(UserName)); + +get_user_creds(UserName) -> + UserCreds = case couch_config:get("admins", ?b2l(UserName)) of + "-hashed-" ++ HashedPwdAndSalt -> + % the name is an admin, now check to see if there is a user doc + % which has a matching name, salt, and password_sha + [HashedPwd, Salt] = string:tokens(HashedPwdAndSalt, ","), + case get_from_cache(UserName) of + nil -> + [{<<"roles">>, [<<"_admin">>]}, + {<<"salt">>, ?l2b(Salt)}, + {<<"password_sha">>, ?l2b(HashedPwd)}]; + UserProps when is_list(UserProps) -> + DocRoles = couch_util:get_value(<<"roles">>, UserProps), + [{<<"roles">>, [<<"_admin">> | DocRoles]}, + {<<"salt">>, ?l2b(Salt)}, + {<<"password_sha">>, ?l2b(HashedPwd)}] + end; + _Else -> + get_from_cache(UserName) + end, + validate_user_creds(UserCreds). + + +get_from_cache(UserName) -> + exec_if_auth_db( + fun(_AuthDb) -> + maybe_refresh_cache(), + case ets:lookup(?BY_USER, UserName) of + [] -> + gen_server:call(?MODULE, {fetch, UserName}, infinity); + [{UserName, {Credentials, _ATime}}] -> + couch_stats_collector:increment({couchdb, auth_cache_hits}), + gen_server:cast(?MODULE, {cache_hit, UserName}), + Credentials + end + end, + nil + ). + + +validate_user_creds(nil) -> + nil; +validate_user_creds(UserCreds) -> + case couch_util:get_value(<<"_conflicts">>, UserCreds) of + undefined -> + ok; + _ConflictList -> + throw({unauthorized, + <<"User document conflicts must be resolved before the document", + " is used for authentication purposes.">> + }) + end, + UserCreds. + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +init(_) -> + ?STATE = ets:new(?STATE, [set, protected, named_table]), + ?BY_USER = ets:new(?BY_USER, [set, protected, named_table]), + ?BY_ATIME = ets:new(?BY_ATIME, [ordered_set, private, named_table]), + AuthDbName = couch_config:get("couch_httpd_auth", "authentication_db"), + true = ets:insert(?STATE, {auth_db_name, ?l2b(AuthDbName)}), + true = ets:insert(?STATE, {auth_db, open_auth_db()}), + process_flag(trap_exit, true), + ok = couch_config:register( + fun("couch_httpd_auth", "auth_cache_size", SizeList) -> + Size = list_to_integer(SizeList), + ok = gen_server:call(?MODULE, {new_max_cache_size, Size}, infinity) + end + ), + ok = couch_config:register( + fun("couch_httpd_auth", "authentication_db", DbName) -> + ok = gen_server:call(?MODULE, {new_auth_db, ?l2b(DbName)}, infinity) + end + ), + {ok, Notifier} = couch_db_update_notifier:start_link(fun handle_db_event/1), + State = #state{ + db_notifier = Notifier, + max_cache_size = list_to_integer( + couch_config:get("couch_httpd_auth", "auth_cache_size", "50") + ) + }, + {ok, State}. + + +handle_db_event({Event, DbName}) -> + [{auth_db_name, AuthDbName}] = ets:lookup(?STATE, auth_db_name), + case DbName =:= AuthDbName of + true -> + case Event of + deleted -> gen_server:call(?MODULE, auth_db_deleted, infinity); + created -> gen_server:call(?MODULE, auth_db_created, infinity); + compacted -> gen_server:call(?MODULE, auth_db_compacted, infinity); + _Else -> ok + end; + false -> + ok + end. + + +handle_call({new_auth_db, AuthDbName}, _From, State) -> + NewState = clear_cache(State), + true = ets:insert(?STATE, {auth_db_name, AuthDbName}), + true = ets:insert(?STATE, {auth_db, open_auth_db()}), + {reply, ok, NewState}; + +handle_call(auth_db_deleted, _From, State) -> + NewState = clear_cache(State), + true = ets:insert(?STATE, {auth_db, nil}), + {reply, ok, NewState}; + +handle_call(auth_db_created, _From, State) -> + NewState = clear_cache(State), + true = ets:insert(?STATE, {auth_db, open_auth_db()}), + {reply, ok, NewState}; + +handle_call(auth_db_compacted, _From, State) -> + exec_if_auth_db( + fun(AuthDb) -> + true = ets:insert(?STATE, {auth_db, reopen_auth_db(AuthDb)}) + end + ), + {reply, ok, State}; + +handle_call({new_max_cache_size, NewSize}, _From, State) -> + case NewSize >= State#state.cache_size of + true -> + ok; + false -> + lists:foreach( + fun(_) -> + LruTime = ets:last(?BY_ATIME), + [{LruTime, UserName}] = ets:lookup(?BY_ATIME, LruTime), + true = ets:delete(?BY_ATIME, LruTime), + true = ets:delete(?BY_USER, UserName) + end, + lists:seq(1, State#state.cache_size - NewSize) + ) + end, + NewState = State#state{ + max_cache_size = NewSize, + cache_size = lists:min([NewSize, State#state.cache_size]) + }, + {reply, ok, NewState}; + +handle_call({fetch, UserName}, _From, State) -> + {Credentials, NewState} = case ets:lookup(?BY_USER, UserName) of + [{UserName, {Creds, ATime}}] -> + couch_stats_collector:increment({couchdb, auth_cache_hits}), + cache_hit(UserName, Creds, ATime), + {Creds, State}; + [] -> + couch_stats_collector:increment({couchdb, auth_cache_misses}), + Creds = get_user_props_from_db(UserName), + State1 = add_cache_entry(UserName, Creds, erlang:now(), State), + {Creds, State1} + end, + {reply, Credentials, NewState}; + +handle_call(refresh, _From, State) -> + exec_if_auth_db(fun refresh_entries/1), + {reply, ok, State}. + + +handle_cast({cache_hit, UserName}, State) -> + case ets:lookup(?BY_USER, UserName) of + [{UserName, {Credentials, ATime}}] -> + cache_hit(UserName, Credentials, ATime); + _ -> + ok + end, + {noreply, State}. + + +handle_info(_Msg, State) -> + {noreply, State}. + + +terminate(_Reason, #state{db_notifier = Notifier}) -> + couch_db_update_notifier:stop(Notifier), + exec_if_auth_db(fun(AuthDb) -> catch couch_db:close(AuthDb) end), + true = ets:delete(?BY_USER), + true = ets:delete(?BY_ATIME), + true = ets:delete(?STATE). + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +clear_cache(State) -> + exec_if_auth_db(fun(AuthDb) -> catch couch_db:close(AuthDb) end), + true = ets:delete_all_objects(?BY_USER), + true = ets:delete_all_objects(?BY_ATIME), + State#state{cache_size = 0}. + + +add_cache_entry(UserName, Credentials, ATime, State) -> + case State#state.cache_size >= State#state.max_cache_size of + true -> + free_mru_cache_entry(); + false -> + ok + end, + true = ets:insert(?BY_ATIME, {ATime, UserName}), + true = ets:insert(?BY_USER, {UserName, {Credentials, ATime}}), + State#state{cache_size = couch_util:get_value(size, ets:info(?BY_USER))}. + + +free_mru_cache_entry() -> + case ets:last(?BY_ATIME) of + '$end_of_table' -> + ok; % empty cache + LruTime -> + [{LruTime, UserName}] = ets:lookup(?BY_ATIME, LruTime), + true = ets:delete(?BY_ATIME, LruTime), + true = ets:delete(?BY_USER, UserName) + end. + + +cache_hit(UserName, Credentials, ATime) -> + NewATime = erlang:now(), + true = ets:delete(?BY_ATIME, ATime), + true = ets:insert(?BY_ATIME, {NewATime, UserName}), + true = ets:insert(?BY_USER, {UserName, {Credentials, NewATime}}). + + +refresh_entries(AuthDb) -> + case reopen_auth_db(AuthDb) of + nil -> + ok; + AuthDb2 -> + case AuthDb2#db.update_seq > AuthDb#db.update_seq of + true -> + {ok, _, _} = couch_db:enum_docs_since( + AuthDb2, + AuthDb#db.update_seq, + fun(DocInfo, _, _) -> refresh_entry(AuthDb2, DocInfo) end, + AuthDb#db.update_seq, + [] + ), + true = ets:insert(?STATE, {auth_db, AuthDb2}); + false -> + ok + end + end. + + +refresh_entry(Db, #doc_info{high_seq = DocSeq} = DocInfo) -> + case is_user_doc(DocInfo) of + {true, UserName} -> + case ets:lookup(?BY_USER, UserName) of + [] -> + ok; + [{UserName, {_OldCreds, ATime}}] -> + {ok, Doc} = couch_db:open_doc(Db, DocInfo, [conflicts, deleted]), + NewCreds = user_creds(Doc), + true = ets:insert(?BY_USER, {UserName, {NewCreds, ATime}}) + end; + false -> + ok + end, + {ok, DocSeq}. + + +user_creds(#doc{deleted = true}) -> + nil; +user_creds(#doc{} = Doc) -> + {Creds} = couch_query_servers:json_doc(Doc), + Creds. + + +is_user_doc(#doc_info{id = <<"org.couchdb.user:", UserName/binary>>}) -> + {true, UserName}; +is_user_doc(_) -> + false. + + +maybe_refresh_cache() -> + case cache_needs_refresh() of + true -> + ok = gen_server:call(?MODULE, refresh, infinity); + false -> + ok + end. + + +cache_needs_refresh() -> + exec_if_auth_db( + fun(AuthDb) -> + case reopen_auth_db(AuthDb) of + nil -> + false; + AuthDb2 -> + AuthDb2#db.update_seq > AuthDb#db.update_seq + end + end, + false + ). + + +reopen_auth_db(AuthDb) -> + case (catch couch_db:reopen(AuthDb)) of + {ok, AuthDb2} -> + AuthDb2; + _ -> + nil + end. + + +exec_if_auth_db(Fun) -> + exec_if_auth_db(Fun, ok). + +exec_if_auth_db(Fun, DefRes) -> + case ets:lookup(?STATE, auth_db) of + [{auth_db, #db{} = AuthDb}] -> + Fun(AuthDb); + _ -> + DefRes + end. + + +open_auth_db() -> + [{auth_db_name, DbName}] = ets:lookup(?STATE, auth_db_name), + {ok, AuthDb} = ensure_users_db_exists(DbName, [sys_db]), + AuthDb. + + +get_user_props_from_db(UserName) -> + exec_if_auth_db( + fun(AuthDb) -> + Db = reopen_auth_db(AuthDb), + DocId = <<"org.couchdb.user:", UserName/binary>>, + try + {ok, Doc} = couch_db:open_doc(Db, DocId, [conflicts]), + {DocProps} = couch_query_servers:json_doc(Doc), + DocProps + catch + _:_Error -> + nil + end + end, + nil + ). + +ensure_users_db_exists(DbName, Options) -> + Options1 = [{user_ctx, #user_ctx{roles=[<<"_admin">>]}} | Options], + case couch_db:open(DbName, Options1) of + {ok, Db} -> + ensure_auth_ddoc_exists(Db, <<"_design/_auth">>), + {ok, Db}; + _Error -> + {ok, Db} = couch_db:create(DbName, Options1), + ok = ensure_auth_ddoc_exists(Db, <<"_design/_auth">>), + {ok, Db} + end. + +ensure_auth_ddoc_exists(Db, DDocId) -> + case couch_db:open_doc(Db, DDocId) of + {not_found, _Reason} -> + {ok, AuthDesign} = auth_design_doc(DDocId), + {ok, _Rev} = couch_db:update_doc(Db, AuthDesign, []); + _ -> + ok + end, + ok. + +auth_design_doc(DocId) -> + DocProps = [ + {<<"_id">>, DocId}, + {<<"language">>,<<"javascript">>}, + {<<"validate_doc_update">>, ?AUTH_DB_DOC_VALIDATE_FUNCTION} + ], + {ok, couch_doc:from_json_obj({DocProps})}. diff --git a/src/couchdb/couch_btree.erl b/src/couchdb/couch_btree.erl index 73d50805..c63cd8cf 100644 --- a/src/couchdb/couch_btree.erl +++ b/src/couchdb/couch_btree.erl @@ -13,7 +13,7 @@ -module(couch_btree). -export([open/2, open/3, query_modify/4, add/2, add_remove/3]). --export([fold/4, full_reduce/1, final_reduce/2,foldl/3,foldl/4]). +-export([fold/4, full_reduce/1, final_reduce/2, foldl/3, foldl/4]). -export([fold_reduce/4, lookup/2, get_state/1, set_options/2]). -define(CHUNK_THRESHOLD, 16#4ff). @@ -70,10 +70,10 @@ final_reduce(Reduce, {KVs, Reductions}) -> final_reduce(Reduce, {[], [Red | Reductions]}). fold_reduce(#btree{root=Root}=Bt, Fun, Acc, Options) -> - Dir = proplists:get_value(dir, Options, fwd), - StartKey = proplists:get_value(start_key, Options), - EndKey = proplists:get_value(end_key, Options), - KeyGroupFun = proplists:get_value(key_group_fun, Options, fun(_,_) -> true end), + Dir = couch_util:get_value(dir, Options, fwd), + StartKey = couch_util:get_value(start_key, Options), + EndKey = couch_util:get_value(end_key, Options), + KeyGroupFun = couch_util:get_value(key_group_fun, Options, fun(_,_) -> true end), {StartKey2, EndKey2} = case Dir of rev -> {EndKey, StartKey}; @@ -107,9 +107,9 @@ convert_fun_arity(Fun) when is_function(Fun, 3) -> Fun. % Already arity 3 make_key_in_end_range_function(#btree{less=Less}, fwd, Options) -> - case proplists:get_value(end_key_gt, Options) of + case couch_util:get_value(end_key_gt, Options) of undefined -> - case proplists:get_value(end_key, Options) of + case couch_util:get_value(end_key, Options) of undefined -> fun(_Key) -> true end; LastKey -> @@ -119,9 +119,9 @@ make_key_in_end_range_function(#btree{less=Less}, fwd, Options) -> fun(Key) -> Less(Key, EndKey) end end; make_key_in_end_range_function(#btree{less=Less}, rev, Options) -> - case proplists:get_value(end_key_gt, Options) of + case couch_util:get_value(end_key_gt, Options) of undefined -> - case proplists:get_value(end_key, Options) of + case couch_util:get_value(end_key, Options) of undefined -> fun(_Key) -> true end; LastKey -> @@ -142,15 +142,15 @@ foldl(Bt, Fun, Acc, Options) -> fold(#btree{root=nil}, _Fun, Acc, _Options) -> {ok, {[], []}, Acc}; fold(#btree{root=Root}=Bt, Fun, Acc, Options) -> - Dir = proplists:get_value(dir, Options, fwd), + Dir = couch_util:get_value(dir, Options, fwd), InRange = make_key_in_end_range_function(Bt, Dir, Options), Result = - case proplists:get_value(start_key, Options) of + case couch_util:get_value(start_key, Options) of undefined -> - stream_node(Bt, [], Bt#btree.root, InRange, Dir, + stream_node(Bt, [], Bt#btree.root, InRange, Dir, convert_fun_arity(Fun), Acc); StartKey -> - stream_node(Bt, [], Bt#btree.root, StartKey, InRange, Dir, + stream_node(Bt, [], Bt#btree.root, StartKey, InRange, Dir, convert_fun_arity(Fun), Acc) end, case Result of @@ -191,7 +191,7 @@ query_modify(Bt, LookupKeys, InsertValues, RemoveKeys) -> {ok, NewRoot, Bt3} = complete_root(Bt2, KeyPointers), {ok, QueryResults, Bt3#btree{root=NewRoot}}. -% for ordering different operatations with the same key. +% for ordering different operations with the same key. % fetch < remove < insert op_order(fetch) -> 1; op_order(remove) -> 2; @@ -203,8 +203,7 @@ lookup(#btree{root=Root, less=Less}=Bt, Keys) -> % We want to return the results in the same order as the keys were input % but we may have changed the order when we sorted. So we need to put the % order back into the results. - KeyDict = dict:from_list(SortedResults), - [dict:fetch(Key, KeyDict) || Key <- Keys]. + couch_util:reorder_results(Keys, SortedResults). lookup(_Bt, nil, Keys) -> {ok, [{Key, not_found} || Key <- Keys]}; @@ -271,29 +270,26 @@ complete_root(Bt, KPs) -> % written. Plus with the "case byte_size(term_to_binary(InList)) of" code % it's probably really inefficient. -% dialyzer says this pattern is never matched -% chunkify(_Bt, []) -> -% []; -chunkify(Bt, InList) -> +chunkify(InList) -> case byte_size(term_to_binary(InList)) of Size when Size > ?CHUNK_THRESHOLD -> NumberOfChunksLikely = ((Size div ?CHUNK_THRESHOLD) + 1), ChunkThreshold = Size div NumberOfChunksLikely, - chunkify(Bt, InList, ChunkThreshold, [], 0, []); + chunkify(InList, ChunkThreshold, [], 0, []); _Else -> [InList] end. -chunkify(_Bt, [], _ChunkThreshold, [], 0, OutputChunks) -> +chunkify([], _ChunkThreshold, [], 0, OutputChunks) -> lists:reverse(OutputChunks); -chunkify(_Bt, [], _ChunkThreshold, OutList, _OutListSize, OutputChunks) -> +chunkify([], _ChunkThreshold, OutList, _OutListSize, OutputChunks) -> lists:reverse([lists:reverse(OutList) | OutputChunks]); -chunkify(Bt, [InElement | RestInList], ChunkThreshold, OutList, OutListSize, OutputChunks) -> +chunkify([InElement | RestInList], ChunkThreshold, OutList, OutListSize, OutputChunks) -> case byte_size(term_to_binary(InElement)) of Size when (Size + OutListSize) > ChunkThreshold andalso OutList /= [] -> - chunkify(Bt, RestInList, ChunkThreshold, [], 0, [lists:reverse([InElement | OutList]) | OutputChunks]); + chunkify(RestInList, ChunkThreshold, [], 0, [lists:reverse([InElement | OutList]) | OutputChunks]); Size -> - chunkify(Bt, RestInList, ChunkThreshold, [InElement | OutList], OutListSize + Size, OutputChunks) + chunkify(RestInList, ChunkThreshold, [InElement | OutList], OutListSize + Size, OutputChunks) end. modify_node(Bt, RootPointerInfo, Actions, QueryOutput) -> @@ -336,7 +332,7 @@ get_node(#btree{fd = Fd}, NodePos) -> write_node(Bt, NodeType, NodeList) -> % split up nodes into smaller sizes - NodeListList = chunkify(Bt, NodeList), + NodeListList = chunkify(NodeList), % now write out each chunk and return the KeyPointer pairs for those nodes ResultList = [ begin diff --git a/src/couchdb/couch_changes.erl b/src/couchdb/couch_changes.erl new file mode 100644 index 00000000..1e53161b --- /dev/null +++ b/src/couchdb/couch_changes.erl @@ -0,0 +1,334 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_changes). +-include("couch_db.hrl"). + +-export([handle_changes/3]). + +%% @type Req -> #httpd{} | {json_req, JsonObj()} +handle_changes(#changes_args{style=Style}=Args1, Req, Db) -> + #changes_args{feed = Feed} = Args = Args1#changes_args{ + filter = make_filter_fun(Args1#changes_args.filter, Style, Req, Db) + }, + StartSeq = case Args#changes_args.dir of + rev -> + couch_db:get_update_seq(Db); + fwd -> + Args#changes_args.since + end, + if Feed == "continuous" orelse Feed == "longpoll" -> + fun(CallbackAcc) -> + {Callback, UserAcc} = get_callback_acc(CallbackAcc), + Self = self(), + {ok, Notify} = couch_db_update_notifier:start_link( + fun({_, DbName}) when DbName == Db#db.name -> + Self ! db_updated; + (_) -> + ok + end + ), + UserAcc2 = start_sending_changes(Callback, UserAcc, Feed), + {Timeout, TimeoutFun} = get_changes_timeout(Args, Callback), + try + keep_sending_changes( + Args, + Callback, + UserAcc2, + Db, + StartSeq, + <<"">>, + Timeout, + TimeoutFun + ) + after + couch_db_update_notifier:stop(Notify), + get_rest_db_updated(ok) % clean out any remaining update messages + end + end; + true -> + fun(CallbackAcc) -> + {Callback, UserAcc} = get_callback_acc(CallbackAcc), + UserAcc2 = start_sending_changes(Callback, UserAcc, Feed), + {ok, {_, LastSeq, _Prepend, _, _, UserAcc3, _, _, _}} = + send_changes( + Args#changes_args{feed="normal"}, + Callback, + UserAcc2, + Db, + StartSeq, + <<>> + ), + end_sending_changes(Callback, UserAcc3, LastSeq, Feed) + end + end. + +get_callback_acc({Callback, _UserAcc} = Pair) when is_function(Callback, 3) -> + Pair; +get_callback_acc(Callback) when is_function(Callback, 2) -> + {fun(Ev, Data, _) -> Callback(Ev, Data) end, ok}. + +%% @type Req -> #httpd{} | {json_req, JsonObj()} +make_filter_fun([$_ | _] = FilterName, Style, Req, Db) -> + builtin_filter_fun(FilterName, Style, Req, Db); +make_filter_fun(FilterName, Style, Req, Db) -> + os_filter_fun(FilterName, Style, Req, Db). + +os_filter_fun(FilterName, Style, Req, Db) -> + case [list_to_binary(couch_httpd:unquote(Part)) + || Part <- string:tokens(FilterName, "/")] of + [] -> + fun(#doc_info{revs=Revs}) -> + builtin_results(Style, Revs) + end; + [DName, FName] -> + DesignId = <<"_design/", DName/binary>>, + DDoc = couch_httpd_db:couch_doc_open(Db, DesignId, nil, []), + % validate that the ddoc has the filter fun + #doc{body={Props}} = DDoc, + couch_util:get_nested_json_value({Props}, [<<"filters">>, FName]), + fun(DocInfo) -> + DocInfos = + case Style of + main_only -> + [DocInfo]; + all_docs -> + [DocInfo#doc_info{revs=[Rev]}|| Rev <- DocInfo#doc_info.revs] + end, + Docs = [Doc || {ok, Doc} <- [ + couch_db:open_doc(Db, DocInfo2, [deleted, conflicts]) + || DocInfo2 <- DocInfos]], + {ok, Passes} = couch_query_servers:filter_docs( + Req, Db, DDoc, FName, Docs + ), + [{[{<<"rev">>, couch_doc:rev_to_str({RevPos,RevId})}]} + || {Pass, #doc{revs={RevPos,[RevId|_]}}} + <- lists:zip(Passes, Docs), Pass == true] + end; + _Else -> + throw({bad_request, + "filter parameter must be of the form `designname/filtername`"}) + end. + +builtin_filter_fun("_doc_ids", Style, {json_req, {Props}}, _Db) -> + filter_docids(couch_util:get_value(<<"doc_ids">>, Props), Style); +builtin_filter_fun("_doc_ids", Style, #httpd{method='POST'}=Req, _Db) -> + {Props} = couch_httpd:json_body_obj(Req), + DocIds = couch_util:get_value(<<"doc_ids">>, Props, nil), + filter_docids(DocIds, Style); +builtin_filter_fun("_doc_ids", Style, #httpd{method='GET'}=Req, _Db) -> + DocIds = ?JSON_DECODE(couch_httpd:qs_value(Req, "doc_ids", "null")), + filter_docids(DocIds, Style); +builtin_filter_fun("_design", Style, _Req, _Db) -> + filter_designdoc(Style); +builtin_filter_fun(_FilterName, _Style, _Req, _Db) -> + throw({bad_request, "unknown builtin filter name"}). + +filter_docids(DocIds, Style) when is_list(DocIds)-> + fun(#doc_info{id=DocId, revs=Revs}) -> + case lists:member(DocId, DocIds) of + true -> + builtin_results(Style, Revs); + _ -> [] + end + end; +filter_docids(_, _) -> + throw({bad_request, "`doc_ids` filter parameter is not a list."}). + +filter_designdoc(Style) -> + fun(#doc_info{id=DocId, revs=Revs}) -> + case DocId of + <<"_design", _/binary>> -> + builtin_results(Style, Revs); + _ -> [] + end + end. + +builtin_results(Style, [#rev_info{rev=Rev}|_]=Revs) -> + case Style of + main_only -> + [{[{<<"rev">>, couch_doc:rev_to_str(Rev)}]}]; + all_docs -> + [{[{<<"rev">>, couch_doc:rev_to_str(R)}]} + || #rev_info{rev=R} <- Revs] + end. + +get_changes_timeout(Args, Callback) -> + #changes_args{ + heartbeat = Heartbeat, + timeout = Timeout, + feed = ResponseType + } = Args, + DefaultTimeout = list_to_integer( + couch_config:get("httpd", "changes_timeout", "60000") + ), + case Heartbeat of + undefined -> + case Timeout of + undefined -> + {DefaultTimeout, fun(UserAcc) -> {stop, UserAcc} end}; + infinity -> + {infinity, fun(UserAcc) -> {stop, UserAcc} end}; + _ -> + {lists:min([DefaultTimeout, Timeout]), + fun(UserAcc) -> {stop, UserAcc} end} + end; + true -> + {DefaultTimeout, + fun(UserAcc) -> {ok, Callback(timeout, ResponseType, UserAcc)} end}; + _ -> + {lists:min([DefaultTimeout, Heartbeat]), + fun(UserAcc) -> {ok, Callback(timeout, ResponseType, UserAcc)} end} + end. + +start_sending_changes(_Callback, UserAcc, "continuous") -> + UserAcc; +start_sending_changes(Callback, UserAcc, ResponseType) -> + Callback(start, ResponseType, UserAcc). + +send_changes(Args, Callback, UserAcc, Db, StartSeq, Prepend) -> + #changes_args{ + style = Style, + include_docs = IncludeDocs, + limit = Limit, + feed = ResponseType, + dir = Dir, + filter = FilterFun + } = Args, + couch_db:changes_since( + Db, + Style, + StartSeq, + fun changes_enumerator/2, + [{dir, Dir}], + {Db, StartSeq, Prepend, FilterFun, Callback, UserAcc, ResponseType, + Limit, IncludeDocs} + ). + +keep_sending_changes(Args, Callback, UserAcc, Db, StartSeq, Prepend, Timeout, + TimeoutFun) -> + #changes_args{ + feed = ResponseType, + limit = Limit, + db_open_options = DbOptions + } = Args, + % ?LOG_INFO("send_changes start ~p",[StartSeq]), + {ok, {_, EndSeq, Prepend2, _, _, UserAcc2, _, NewLimit, _}} = send_changes( + Args#changes_args{dir=fwd}, Callback, UserAcc, Db, StartSeq, Prepend + ), + % ?LOG_INFO("send_changes last ~p",[EndSeq]), + couch_db:close(Db), + if Limit > NewLimit, ResponseType == "longpoll" -> + end_sending_changes(Callback, UserAcc2, EndSeq, ResponseType); + true -> + case wait_db_updated(Timeout, TimeoutFun, UserAcc2) of + {updated, UserAcc3} -> + % ?LOG_INFO("wait_db_updated updated ~p",[{Db#db.name, EndSeq}]), + DbOptions1 = [{user_ctx, Db#db.user_ctx} | DbOptions], + case couch_db:open(Db#db.name, DbOptions1) of + {ok, Db2} -> + keep_sending_changes( + Args#changes_args{limit=NewLimit}, + Callback, + UserAcc3, + Db2, + EndSeq, + Prepend2, + Timeout, + TimeoutFun + ); + _Else -> + end_sending_changes(Callback, UserAcc2, EndSeq, ResponseType) + end; + {stop, UserAcc3} -> + % ?LOG_INFO("wait_db_updated stop ~p",[{Db#db.name, EndSeq}]), + end_sending_changes(Callback, UserAcc3, EndSeq, ResponseType) + end + end. + +end_sending_changes(Callback, UserAcc, EndSeq, ResponseType) -> + Callback({stop, EndSeq}, ResponseType, UserAcc). + +changes_enumerator(DocInfo, {Db, _, _, FilterFun, Callback, UserAcc, + "continuous", Limit, IncludeDocs}) -> + + #doc_info{id=Id, high_seq=Seq, + revs=[#rev_info{deleted=Del,rev=Rev}|_]} = DocInfo, + Results0 = FilterFun(DocInfo), + Results = [Result || Result <- Results0, Result /= null], + Go = if Limit =< 1 -> stop; true -> ok end, + case Results of + [] -> + {Go, {Db, Seq, nil, FilterFun, Callback, UserAcc, "continuous", Limit, + IncludeDocs} + }; + _ -> + ChangesRow = changes_row(Db, Seq, Id, Del, Results, Rev, IncludeDocs), + UserAcc2 = Callback({change, ChangesRow, <<>>}, "continuous", UserAcc), + {Go, {Db, Seq, nil, FilterFun, Callback, UserAcc2, "continuous", + Limit - 1, IncludeDocs} + } + end; +changes_enumerator(DocInfo, {Db, _, Prepend, FilterFun, Callback, UserAcc, + ResponseType, Limit, IncludeDocs}) -> + + #doc_info{id=Id, high_seq=Seq, revs=[#rev_info{deleted=Del,rev=Rev}|_]} + = DocInfo, + Results0 = FilterFun(DocInfo), + Results = [Result || Result <- Results0, Result /= null], + Go = if Limit =< 1 -> stop; true -> ok end, + case Results of + [] -> + {Go, {Db, Seq, Prepend, FilterFun, Callback, UserAcc, ResponseType, + Limit, IncludeDocs} + }; + _ -> + ChangesRow = changes_row(Db, Seq, Id, Del, Results, Rev, IncludeDocs), + UserAcc2 = Callback({change, ChangesRow, Prepend}, ResponseType, UserAcc), + {Go, {Db, Seq, <<",\n">>, FilterFun, Callback, UserAcc2, ResponseType, + Limit - 1, IncludeDocs} + } + end. + + +changes_row(Db, Seq, Id, Del, Results, Rev, true) -> + {[{<<"seq">>, Seq}, {<<"id">>, Id}, {<<"changes">>, Results}] ++ + deleted_item(Del) ++ couch_httpd_view:doc_member(Db, {Id, Rev})}; +changes_row(_, Seq, Id, Del, Results, _, false) -> + {[{<<"seq">>, Seq}, {<<"id">>, Id}, {<<"changes">>, Results}] ++ + deleted_item(Del)}. + +deleted_item(true) -> [{<<"deleted">>, true}]; +deleted_item(_) -> []. + +% waits for a db_updated msg, if there are multiple msgs, collects them. +wait_db_updated(Timeout, TimeoutFun, UserAcc) -> + receive + db_updated -> + get_rest_db_updated(UserAcc) + after Timeout -> + {Go, UserAcc2} = TimeoutFun(UserAcc), + case Go of + ok -> + wait_db_updated(Timeout, TimeoutFun, UserAcc2); + stop -> + {stop, UserAcc2} + end + end. + +get_rest_db_updated(UserAcc) -> + receive + db_updated -> + get_rest_db_updated(UserAcc) + after 0 -> + {updated, UserAcc} + end. diff --git a/src/couchdb/couch_config.erl b/src/couchdb/couch_config.erl index d8473e08..be53e3a3 100644 --- a/src/couchdb/couch_config.erl +++ b/src/couchdb/couch_config.erl @@ -44,7 +44,7 @@ stop() -> all() -> - lists:sort(gen_server:call(?MODULE, all)). + lists:sort(gen_server:call(?MODULE, all, infinity)). get(Section) when is_binary(Section) -> @@ -111,7 +111,7 @@ terminate(_Reason, _State) -> handle_call(all, _From, Config) -> Resp = lists:sort((ets:tab2list(?MODULE))), {reply, Resp, Config}; -handle_call({set, Sec, Key, Val, Persist}, _From, Config) -> +handle_call({set, Sec, Key, Val, Persist}, From, Config) -> true = ets:insert(?MODULE, {{Sec, Key}, Val}), case {Persist, Config#config.write_filename} of {true, undefined} -> @@ -121,9 +121,12 @@ handle_call({set, Sec, Key, Val, Persist}, _From, Config) -> _ -> ok end, - [catch F(Sec, Key, Val, Persist) || {_Pid, F} <- Config#config.notify_funs], - {reply, ok, Config}; -handle_call({delete, Sec, Key, Persist}, _From, Config) -> + spawn_link(fun() -> + [catch F(Sec, Key, Val, Persist) || {_Pid, F} <- Config#config.notify_funs], + gen_server:reply(From, ok) + end), + {noreply, Config}; +handle_call({delete, Sec, Key, Persist}, From, Config) -> true = ets:delete(?MODULE, {Sec,Key}), case {Persist, Config#config.write_filename} of {true, undefined} -> @@ -133,8 +136,11 @@ handle_call({delete, Sec, Key, Persist}, _From, Config) -> _ -> ok end, - [catch F(Sec, Key, deleted, Persist) || {_Pid, F} <- Config#config.notify_funs], - {reply, ok, Config}; + spawn_link(fun() -> + [catch F(Sec, Key, deleted, Persist) || {_Pid, F} <- Config#config.notify_funs], + gen_server:reply(From, ok) + end), + {noreply, Config}; handle_call({register, Fun, Pid}, _From, #config{notify_funs=PidFuns}=Config) -> erlang:monitor(process, Pid), % convert 1 and 2 arity to 3 arity @@ -194,8 +200,28 @@ parse_ini_file(IniFile) -> {AccSectionName, AccValues}; Line2 -> case re:split(Line2, "\s?=\s?", [{return, list}]) of - [_SingleElement] -> % no "=" found, ignore this line - {AccSectionName, AccValues}; + [Value] -> + MultiLineValuePart = case re:run(Line, "^ \\S", []) of + {match, _} -> + true; + _ -> + false + end, + case {MultiLineValuePart, AccValues} of + {true, [{{_, ValueName}, PrevValue} | AccValuesRest]} -> + % remove comment + case re:split(Value, " ;|\t;", [{return, list}]) of + [[]] -> + % empty line + {AccSectionName, AccValues}; + [LineValue | _Rest] -> + E = {{AccSectionName, ValueName}, + PrevValue ++ " " ++ LineValue}, + {AccSectionName, [E | AccValuesRest]} + end; + _ -> + {AccSectionName, AccValues} + end; [""|_LineValues] -> % line begins with "=", ignore {AccSectionName, AccValues}; [ValueName|LineValues] -> % yeehaw, got a line! @@ -205,7 +231,7 @@ parse_ini_file(IniFile) -> [[]] -> % empty line means delete this key ets:delete(?MODULE, {AccSectionName, ValueName}), - {AccSectionName, AccValues}; + {AccSectionName, AccValues}; [LineValue | _Rest] -> {AccSectionName, [{{AccSectionName, ValueName}, LineValue} | AccValues]} diff --git a/src/couchdb/couch_db.erl b/src/couchdb/couch_db.erl index 79e00ff8..4e945ad4 100644 --- a/src/couchdb/couch_db.erl +++ b/src/couchdb/couch_db.erl @@ -13,18 +13,21 @@ -module(couch_db). -behaviour(gen_server). --export([open/2,close/1,create/2,start_compact/1,get_db_info/1,get_design_docs/1]). +-export([open/2,open_int/2,close/1,create/2,start_compact/1,get_db_info/1,get_design_docs/1]). -export([open_ref_counted/2,is_idle/1,monitor/1,count_changes_since/2]). -export([update_doc/3,update_doc/4,update_docs/4,update_docs/2,update_docs/3,delete_doc/3]). -export([get_doc_info/2,open_doc/2,open_doc/3,open_doc_revs/4]). --export([set_revs_limit/2,get_revs_limit/1,register_update_notifier/3]). +-export([set_revs_limit/2,get_revs_limit/1]). -export([get_missing_revs/2,name/1,doc_to_tree/1,get_update_seq/1,get_committed_update_seq/1]). -export([enum_docs/4,enum_docs_since/5]). -export([enum_docs_since_reduce_to_count/1,enum_docs_reduce_to_count/1]). -export([increment_update_seq/1,get_purge_seq/1,purge_docs/2,get_last_purged/1]). --export([start_link/3,open_doc_int/3,set_admins/2,get_admins/1,ensure_full_commit/1]). +-export([start_link/3,open_doc_int/3,ensure_full_commit/1]). +-export([set_security/2,get_security/1]). -export([init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3,handle_info/2]). -export([changes_since/5,changes_since/6,read_doc/2,new_revid/1]). +-export([check_is_admin/1, check_is_reader/1]). +-export([reopen/1]). -include("couch_db.hrl"). @@ -63,9 +66,39 @@ open_db_file(Filepath, Options) -> create(DbName, Options) -> couch_server:create(DbName, Options). -open(DbName, Options) -> +% this is for opening a database for internal purposes like the replicator +% or the view indexer. it never throws a reader error. +open_int(DbName, Options) -> couch_server:open(DbName, Options). +% this should be called anytime an http request opens the database. +% it ensures that the http userCtx is a valid reader +open(DbName, Options) -> + case couch_server:open(DbName, Options) of + {ok, Db} -> + try + check_is_reader(Db), + {ok, Db} + catch + throw:Error -> + close(Db), + throw(Error) + end; + Else -> Else + end. + +reopen(#db{main_pid = Pid, fd_ref_counter = OldRefCntr, user_ctx = UserCtx}) -> + {ok, #db{fd_ref_counter = NewRefCntr} = NewDb} = + gen_server:call(Pid, get_db, infinity), + case NewRefCntr =:= OldRefCntr of + true -> + ok; + false -> + couch_ref_counter:add(NewRefCntr), + couch_ref_counter:drop(OldRefCntr) + end, + {ok, NewDb#db{user_ctx = UserCtx}}. + ensure_full_commit(#db{update_pid=UpdatePid,instance_start_time=StartTime}) -> ok = gen_server:call(UpdatePid, full_commit, infinity), {ok, StartTime}. @@ -73,9 +106,8 @@ ensure_full_commit(#db{update_pid=UpdatePid,instance_start_time=StartTime}) -> close(#db{fd_ref_counter=RefCntr}) -> couch_ref_counter:drop(RefCntr). -open_ref_counted(MainPid, UserCtx) -> - {ok, Db} = gen_server:call(MainPid, {open_ref_count, self()}), - {ok, Db#db{user_ctx=UserCtx}}. +open_ref_counted(MainPid, OpenedPid) -> + gen_server:call(MainPid, {open_ref_count, OpenedPid}). is_idle(MainPid) -> gen_server:call(MainPid, is_idle). @@ -83,11 +115,8 @@ is_idle(MainPid) -> monitor(#db{main_pid=MainPid}) -> erlang:monitor(process, MainPid). -register_update_notifier(#db{main_pid=Pid}, Seq, Fun) -> - gen_server:call(Pid, {register_update_notifier, Seq, Fun}). - start_compact(#db{update_pid=Pid}) -> - gen_server:cast(Pid, start_compact). + gen_server:call(Pid, start_compact). delete_doc(Db, Id, Revisions) -> DeletedDocs = [#doc{id=Id, revs=[Rev], deleted=true} || Rev <- Revisions], @@ -98,23 +127,52 @@ open_doc(Db, IdOrDocInfo) -> open_doc(Db, IdOrDocInfo, []). open_doc(Db, Id, Options) -> - couch_stats_collector:increment({couchdb, database_reads}), + increment_stat(Db, {couchdb, database_reads}), case open_doc_int(Db, Id, Options) of {ok, #doc{deleted=true}=Doc} -> case lists:member(deleted, Options) of true -> - {ok, Doc}; + apply_open_options({ok, Doc},Options); false -> {not_found, deleted} end; Else -> - Else + apply_open_options(Else,Options) + end. + +apply_open_options({ok, Doc},Options) -> + apply_open_options2(Doc,Options); +apply_open_options(Else,_Options) -> + Else. + +apply_open_options2(Doc,[]) -> + {ok, Doc}; +apply_open_options2(#doc{atts=Atts,revs=Revs}=Doc, + [{atts_since, PossibleAncestors}|Rest]) -> + RevPos = find_ancestor_rev_pos(Revs, PossibleAncestors), + apply_open_options2(Doc#doc{atts=[A#att{data= + if AttPos>RevPos -> Data; true -> stub end} + || #att{revpos=AttPos,data=Data}=A <- Atts]}, Rest); +apply_open_options2(Doc,[_|Rest]) -> + apply_open_options2(Doc,Rest). + + +find_ancestor_rev_pos({_, []}, _AttsSinceRevs) -> + 0; +find_ancestor_rev_pos(_DocRevs, []) -> + 0; +find_ancestor_rev_pos({RevPos, [RevId|Rest]}, AttsSinceRevs) -> + case lists:member({RevPos, RevId}, AttsSinceRevs) of + true -> + RevPos; + false -> + find_ancestor_rev_pos({RevPos - 1, Rest}, AttsSinceRevs) end. open_doc_revs(Db, Id, Revs, Options) -> - couch_stats_collector:increment({couchdb, database_reads}), - [Result] = open_doc_revs_int(Db, [{Id, Revs}], Options), - Result. + increment_stat(Db, {couchdb, database_reads}), + [{ok, Results}] = open_doc_revs_int(Db, [{Id, Revs}], Options), + {ok, [apply_open_options(Result, Options) || Result <- Results]}. % Each returned result is a list of tuples: % {Id, MissingRevs, PossibleAncestors} @@ -135,9 +193,9 @@ find_missing([{Id, Revs}|RestIdRevs], [{ok, FullInfo} | RestLookupInfo]) -> % Find the revs that are possible parents of this rev PossibleAncestors = lists:foldl(fun({LeafPos, LeafRevId}, Acc) -> - % this leaf is a "possible ancenstor" of the missing + % this leaf is a "possible ancenstor" of the missing % revs if this LeafPos lessthan any of the missing revs - case lists:any(fun({MissingPos, _}) -> + case lists:any(fun({MissingPos, _}) -> LeafPos < MissingPos end, MissingRevs) of true -> [{LeafPos, LeafRevId} | Acc]; @@ -194,7 +252,8 @@ get_db_info(Db) -> update_seq=SeqNum, name=Name, fulldocinfo_by_id_btree=FullDocBtree, - instance_start_time=StartTime} = Db, + instance_start_time=StartTime, + committed_update_seq=CommittedUpdateSeq} = Db, {ok, Size} = couch_file:bytes(Fd), {ok, {Count, DelCount}} = couch_btree:full_reduce(FullDocBtree), InfoList = [ @@ -206,7 +265,8 @@ get_db_info(Db) -> {compact_running, Compactor/=nil}, {disk_size, Size}, {instance_start_time, StartTime}, - {disk_format_version, DiskVersion} + {disk_format_version, DiskVersion}, + {committed_update_seq, CommittedUpdateSeq} ], {ok, InfoList}. @@ -221,22 +281,88 @@ get_design_docs(#db{fulldocinfo_by_id_btree=Btree}=Db) -> [], [{start_key, <<"_design/">>}, {end_key_gt, <<"_design0">>}]), {ok, Docs}. -check_is_admin(#db{admins=Admins, user_ctx=#user_ctx{name=Name,roles=Roles}}) -> - DbAdmins = [<<"_admin">> | Admins], - case DbAdmins -- [Name | Roles] of - DbAdmins -> % same list, not an admin - throw({unauthorized, <<"You are not a db or server admin.">>}); +check_is_admin(#db{user_ctx=#user_ctx{name=Name,roles=Roles}}=Db) -> + {Admins} = get_admins(Db), + AdminRoles = [<<"_admin">> | couch_util:get_value(<<"roles">>, Admins, [])], + AdminNames = couch_util:get_value(<<"names">>, Admins,[]), + case AdminRoles -- Roles of + AdminRoles -> % same list, not an admin role + case AdminNames -- [Name] of + AdminNames -> % same names, not an admin + throw({unauthorized, <<"You are not a db or server admin.">>}); + _ -> + ok + end; _ -> ok end. -get_admins(#db{admins=Admins}) -> - Admins. +check_is_reader(#db{user_ctx=#user_ctx{name=Name,roles=Roles}=UserCtx}=Db) -> + case (catch check_is_admin(Db)) of + ok -> ok; + _ -> + {Readers} = get_readers(Db), + ReaderRoles = couch_util:get_value(<<"roles">>, Readers,[]), + WithAdminRoles = [<<"_admin">> | ReaderRoles], + ReaderNames = couch_util:get_value(<<"names">>, Readers,[]), + case ReaderRoles ++ ReaderNames of + [] -> ok; % no readers == public access + _Else -> + case WithAdminRoles -- Roles of + WithAdminRoles -> % same list, not an reader role + case ReaderNames -- [Name] of + ReaderNames -> % same names, not a reader + ?LOG_DEBUG("Not a reader: UserCtx ~p vs Names ~p Roles ~p",[UserCtx, ReaderNames, WithAdminRoles]), + throw({unauthorized, <<"You are not authorized to access this db.">>}); + _ -> + ok + end; + _ -> + ok + end + end + end. + +get_admins(#db{security=SecProps}) -> + couch_util:get_value(<<"admins">>, SecProps, {[]}). + +get_readers(#db{security=SecProps}) -> + couch_util:get_value(<<"readers">>, SecProps, {[]}). -set_admins(#db{update_pid=Pid}=Db, Admins) when is_list(Admins) -> +get_security(#db{security=SecProps}) -> + {SecProps}. + +set_security(#db{update_pid=Pid}=Db, {NewSecProps}) when is_list(NewSecProps) -> check_is_admin(Db), - gen_server:call(Pid, {set_admins, Admins}, infinity). + ok = validate_security_object(NewSecProps), + ok = gen_server:call(Pid, {set_security, NewSecProps}, infinity), + {ok, _} = ensure_full_commit(Db), + ok; +set_security(_, _) -> + throw(bad_request). + +validate_security_object(SecProps) -> + Admins = couch_util:get_value(<<"admins">>, SecProps, {[]}), + Readers = couch_util:get_value(<<"readers">>, SecProps, {[]}), + ok = validate_names_and_roles(Admins), + ok = validate_names_and_roles(Readers), + ok. +% validate user input +validate_names_and_roles({Props}) when is_list(Props) -> + case couch_util:get_value(<<"names">>,Props,[]) of + Ns when is_list(Ns) -> + [throw("names must be a JSON list of strings") ||N <- Ns, not is_binary(N)], + Ns; + _ -> throw("names must be a JSON list of strings") + end, + case couch_util:get_value(<<"roles">>,Props,[]) of + Rs when is_list(Rs) -> + [throw("roles must be a JSON list of strings") ||R <- Rs, not is_binary(R)], + Rs; + _ -> throw("roles must be a JSON list of strings") + end, + ok. get_revs_limit(#db{revs_limit=Limit}) -> Limit. @@ -257,8 +383,14 @@ update_doc(Db, Doc, Options, UpdateType) -> case update_docs(Db, [Doc], Options, UpdateType) of {ok, [{ok, NewRev}]} -> {ok, NewRev}; + {ok, [{{_Id, _Rev}, Error}]} -> + throw(Error); {ok, [Error]} -> - throw(Error) + throw(Error); + {ok, []} -> + % replication success + {Pos, [RevId | _]} = Doc#doc.revs, + {ok, {Pos, RevId}} end. update_docs(Db, Docs) -> @@ -285,18 +417,8 @@ group_alike_docs([Doc|Rest], [Bucket|RestBuckets]) -> group_alike_docs(Rest, [[Doc]|[Bucket|RestBuckets]]) end. - -validate_doc_update(#db{user_ctx=UserCtx, admins=Admins}, - #doc{id= <<"_design/",_/binary>>}, _GetDiskDocFun) -> - UserNames = [UserCtx#user_ctx.name | UserCtx#user_ctx.roles], - % if the user is a server admin or db admin, allow the save - case length(UserNames -- [<<"_admin">> | Admins]) =:= length(UserNames) of - true -> - % not an admin - {unauthorized, <<"You are not a server or database admin.">>}; - false -> - ok - end; +validate_doc_update(#db{}=Db, #doc{id= <<"_design/",_/binary>>}, _GetDiskDocFun) -> + catch check_is_admin(Db); validate_doc_update(#db{validate_doc_funs=[]}, _Doc, _GetDiskDocFun) -> ok; validate_doc_update(_Db, #doc{id= <<"_local/",_/binary>>}, _GetDiskDocFun) -> @@ -304,7 +426,8 @@ validate_doc_update(_Db, #doc{id= <<"_local/",_/binary>>}, _GetDiskDocFun) -> validate_doc_update(Db, Doc, GetDiskDocFun) -> DiskDoc = GetDiskDocFun(), JsonCtx = couch_util:json_user_ctx(Db), - try [case Fun(Doc, DiskDoc, JsonCtx) of + SecObj = get_security(Db), + try [case Fun(Doc, DiskDoc, JsonCtx, SecObj) of ok -> ok; Error -> throw(Error) end || Fun <- Db#db.validate_doc_funs], @@ -352,15 +475,15 @@ prep_and_validate_update(Db, #doc{id=Id,revs={RevStart, Revs}}=Doc, prep_and_validate_updates(_Db, [], [], _AllowConflict, AccPrepped, AccFatalErrors) -> {AccPrepped, AccFatalErrors}; -prep_and_validate_updates(Db, [DocBucket|RestBuckets], [not_found|RestLookups], +prep_and_validate_updates(Db, [DocBucket|RestBuckets], [not_found|RestLookups], AllowConflict, AccPrepped, AccErrors) -> [#doc{id=Id}|_]=DocBucket, % no existing revs are known, {PreppedBucket, AccErrors3} = lists:foldl( - fun(#doc{revs=Revs}=Doc, {AccBucket, AccErrors2}) -> + fun(#doc{revs=Revs}=Doc, {AccBucket, AccErrors2}) -> case couch_doc:has_stubs(Doc) of true -> - couch_doc:merge_doc(Doc, #doc{}); % will throw exception + couch_doc:merge_stubs(Doc, #doc{}); % will throw exception false -> ok end, case Revs of @@ -398,12 +521,12 @@ prep_and_validate_updates(Db, [DocBucket|RestBuckets], end end, {[], AccErrors}, DocBucket), - prep_and_validate_updates(Db, RestBuckets, RestLookups, AllowConflict, + prep_and_validate_updates(Db, RestBuckets, RestLookups, AllowConflict, [PreppedBucket | AccPrepped], AccErrors3). -update_docs(#db{update_pid=UpdatePid}=Db, Docs, Options) -> - update_docs(#db{update_pid=UpdatePid}=Db, Docs, Options, interactive_edit). +update_docs(Db, Docs, Options) -> + update_docs(Db, Docs, Options, interactive_edit). prep_and_validate_replicated_updates(_Db, [], [], AccPrepped, AccErrors) -> @@ -414,10 +537,10 @@ prep_and_validate_replicated_updates(Db, [Bucket|RestBuckets], [OldInfo|RestOldI case OldInfo of not_found -> {ValidatedBucket, AccErrors3} = lists:foldl( - fun(Doc, {AccPrepped2, AccErrors2}) -> + fun(Doc, {AccPrepped2, AccErrors2}) -> case couch_doc:has_stubs(Doc) of true -> - couch_doc:merge_doc(Doc, #doc{}); % will throw exception + couch_doc:merge_stubs(Doc, #doc{}); % will throw exception false -> ok end, case validate_doc_update(Db, Doc, fun() -> nil end) of @@ -432,7 +555,7 @@ prep_and_validate_replicated_updates(Db, [Bucket|RestBuckets], [OldInfo|RestOldI {ok, #full_doc_info{rev_tree=OldTree}} -> NewRevTree = lists:foldl( fun(NewDoc, AccTree) -> - {NewTree, _} = couch_key_tree:merge(AccTree, [couch_db:doc_to_tree(NewDoc)]), + {NewTree, _} = couch_key_tree:merge(AccTree, couch_db:doc_to_tree(NewDoc)), NewTree end, OldTree, Bucket), @@ -487,7 +610,7 @@ new_revid(#doc{body=Body,revs={OldStart,OldRevs}, ?l2b(integer_to_list(couch_util:rand32())); Atts2 -> OldRev = case OldRevs of [] -> 0; [OldRev0|_] -> OldRev0 end, - erlang:md5(term_to_binary([Deleted, OldStart, OldRev, Body, Atts2])) + couch_util:md5(term_to_binary([Deleted, OldStart, OldRev, Body, Atts2])) end. new_revs([], OutBuckets, IdRevsAcc) -> @@ -515,7 +638,7 @@ check_dup_atts2(_) -> update_docs(Db, Docs, Options, replicated_changes) -> - couch_stats_collector:increment({couchdb, database_writes}), + increment_stat(Db, {couchdb, database_writes}), DocBuckets = group_alike_docs(Docs), case (Db#db.validate_doc_funs /= []) orelse @@ -541,7 +664,7 @@ update_docs(Db, Docs, Options, replicated_changes) -> {ok, DocErrors}; update_docs(Db, Docs, Options, interactive_edit) -> - couch_stats_collector:increment({couchdb, database_writes}), + increment_stat(Db, {couchdb, database_writes}), AllOrNothing = lists:member(all_or_nothing, Options), % go ahead and generate the new revision ids for the documents. % separate out the NonRep documents from the rest of the documents @@ -645,7 +768,7 @@ collect_results(UpdatePid, MRef, ResultsAcc) -> exit(Reason) end. -write_and_commit(#db{update_pid=UpdatePid, user_ctx=Ctx}=Db, DocBuckets, +write_and_commit(#db{update_pid=UpdatePid}=Db, DocBuckets, NonRepDocs, Options0) -> Options = set_commit_option(Options0), MergeConflicts = lists:member(merge_conflicts, Options), @@ -658,7 +781,7 @@ write_and_commit(#db{update_pid=UpdatePid, user_ctx=Ctx}=Db, DocBuckets, retry -> % This can happen if the db file we wrote to was swapped out by % compaction. Retry by reopening the db and writing to the current file - {ok, Db2} = open_ref_counted(Db#db.main_pid, Ctx), + {ok, Db2} = open_ref_counted(Db#db.main_pid, self()), DocBuckets2 = [[doc_flush_atts(Doc, Db2#db.fd) || Doc <- Bucket] || Bucket <- DocBuckets], % We only retry once close(Db2), @@ -693,18 +816,19 @@ flush_att(Fd, #att{data={Fd0, _}}=Att) when Fd0 == Fd -> % already written to our file, nothing to write Att; -flush_att(Fd, #att{data={OtherFd,StreamPointer}, md5=InMd5}=Att) -> - {NewStreamData, Len, Md5} = +flush_att(Fd, #att{data={OtherFd,StreamPointer}, md5=InMd5, + disk_len=InDiskLen} = Att) -> + {NewStreamData, Len, _IdentityLen, Md5, IdentityMd5} = couch_stream:copy_to_new_stream(OtherFd, StreamPointer, Fd), - check_md5(Md5, InMd5), - Att#att{data={Fd, NewStreamData}, md5=Md5, len=Len}; + check_md5(IdentityMd5, InMd5), + Att#att{data={Fd, NewStreamData}, md5=Md5, att_len=Len, disk_len=InDiskLen}; flush_att(Fd, #att{data=Data}=Att) when is_binary(Data) -> with_stream(Fd, Att, fun(OutputStream) -> couch_stream:write(OutputStream, Data) end); -flush_att(Fd, #att{data=Fun,len=undefined}=Att) when is_function(Fun) -> +flush_att(Fd, #att{data=Fun,att_len=undefined}=Att) when is_function(Fun) -> with_stream(Fd, Att, fun(OutputStream) -> % Fun(MaxChunkSize, WriterFun) must call WriterFun % once for each chunk of the attachment, @@ -726,9 +850,9 @@ flush_att(Fd, #att{data=Fun,len=undefined}=Att) when is_function(Fun) -> end, ok) end); -flush_att(Fd, #att{data=Fun,len=Len}=Att) when is_function(Fun) -> +flush_att(Fd, #att{data=Fun,att_len=AttLen}=Att) when is_function(Fun) -> with_stream(Fd, Att, fun(OutputStream) -> - write_streamed_attachment(OutputStream, Fun, Len) + write_streamed_attachment(OutputStream, Fun, AttLen) end). % From RFC 2616 3.6.1 - Chunked Transfer Coding @@ -741,8 +865,17 @@ flush_att(Fd, #att{data=Fun,len=Len}=Att) when is_function(Fun) -> % is present in the request, but there is no Content-MD5 % trailer, we're free to ignore this inconsistency and % pretend that no Content-MD5 exists. -with_stream(Fd, #att{md5=InMd5}=Att, Fun) -> - {ok, OutputStream} = couch_stream:open(Fd), +with_stream(Fd, #att{md5=InMd5,type=Type,encoding=Enc}=Att, Fun) -> + {ok, OutputStream} = case (Enc =:= identity) andalso + couch_util:compressible_att_type(Type) of + true -> + CompLevel = list_to_integer( + couch_config:get("attachments", "compression_level", "0") + ), + couch_stream:open(Fd, gzip, [{compression_level, CompLevel}]); + _ -> + couch_stream:open(Fd) + end, ReqMd5 = case Fun(OutputStream) of {md5, FooterMd5} -> case InMd5 of @@ -752,9 +885,36 @@ with_stream(Fd, #att{md5=InMd5}=Att, Fun) -> _ -> InMd5 end, - {StreamInfo, Len, Md5} = couch_stream:close(OutputStream), - check_md5(Md5, ReqMd5), - Att#att{data={Fd,StreamInfo},len=Len,md5=Md5}. + {StreamInfo, Len, IdentityLen, Md5, IdentityMd5} = + couch_stream:close(OutputStream), + check_md5(IdentityMd5, ReqMd5), + {AttLen, DiskLen, NewEnc} = case Enc of + identity -> + case {Md5, IdentityMd5} of + {Same, Same} -> + {Len, IdentityLen, identity}; + _ -> + {Len, IdentityLen, gzip} + end; + gzip -> + case {Att#att.att_len, Att#att.disk_len} of + {AL, DL} when AL =:= undefined orelse DL =:= undefined -> + % Compressed attachment uploaded through the standalone API. + {Len, Len, gzip}; + {AL, DL} -> + % This case is used for efficient push-replication, where a + % compressed attachment is located in the body of multipart + % content-type request. + {AL, DL, gzip} + end + end, + Att#att{ + data={Fd,StreamInfo}, + att_len=AttLen, + disk_len=DiskLen, + md5=Md5, + encoding=NewEnc + }. write_streamed_attachment(_Stream, _F, 0) -> @@ -779,17 +939,18 @@ changes_since(Db, Style, StartSeq, Fun, Acc) -> changes_since(Db, Style, StartSeq, Fun, Options, Acc) -> Wrapper = fun(DocInfo, _Offset, Acc2) -> #doc_info{revs=Revs} = DocInfo, + DocInfo2 = case Style of main_only -> - Infos = [DocInfo]; + DocInfo; all_docs -> - % make each rev it's own doc info - Infos = [DocInfo#doc_info{revs=[RevInfo]} || - #rev_info{seq=RevSeq}=RevInfo <- Revs, StartSeq < RevSeq] + % remove revs before the seq + DocInfo#doc_info{revs=[RevInfo || + #rev_info{seq=RevSeq}=RevInfo <- Revs, StartSeq < RevSeq]} end, - Fun(Infos, Acc2) + Fun(DocInfo2, Acc2) end, - {ok, _LastReduction, AccOut} = couch_btree:fold(Db#db.docinfo_by_seq_btree, + {ok, _LastReduction, AccOut} = couch_btree:fold(Db#db.docinfo_by_seq_btree, Wrapper, Acc, [{start_key, StartSeq + 1}] ++ Options), {ok, AccOut}. @@ -816,11 +977,17 @@ init({DbName, Filepath, Fd, Options}) -> {ok, UpdaterPid} = gen_server:start_link(couch_db_updater, {self(), DbName, Filepath, Fd, Options}, []), {ok, #db{fd_ref_counter=RefCntr}=Db} = gen_server:call(UpdaterPid, get_db), couch_ref_counter:add(RefCntr), - couch_stats_collector:track_process_count({couchdb, open_databases}), + case lists:member(sys_db, Options) of + true -> + ok; + false -> + couch_stats_collector:track_process_count({couchdb, open_databases}) + end, + process_flag(trap_exit, true), {ok, Db}. -terminate(Reason, _Db) -> - couch_util:terminate_linked(Reason), +terminate(_Reason, Db) -> + couch_util:shutdown_sync(Db#db.update_pid), ok. handle_call({open_ref_count, OpenerPid}, _, #db{fd_ref_counter=RefCntr}=Db) -> @@ -839,7 +1006,9 @@ handle_call({db_updated, NewDb}, _From, #db{fd_ref_counter=OldRefCntr}) -> couch_ref_counter:add(NewRefCntr), couch_ref_counter:drop(OldRefCntr) end, - {reply, ok, NewDb}. + {reply, ok, NewDb}; +handle_call(get_db, _From, Db) -> + {reply, {ok, Db}, Db}. handle_cast(Msg, Db) -> @@ -848,7 +1017,11 @@ handle_cast(Msg, Db) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. - + +handle_info({'EXIT', _Pid, normal}, Db) -> + {noreply, Db}; +handle_info({'EXIT', _Pid, Reason}, Server) -> + {stop, Reason, Server}; handle_info(Msg, Db) -> ?LOG_ERROR("Bad message received for db ~s: ~p", [Db#db.name, Msg]), exit({error, Msg}). @@ -983,17 +1156,39 @@ make_doc(#db{fd=Fd}=Db, Id, Deleted, Bp, RevisionPath) -> {ok, {BodyData0, Atts0}} = read_doc(Db, Bp), {BodyData0, lists:map( - fun({Name,Type,Sp,Len,RevPos,Md5}) -> + fun({Name,Type,Sp,AttLen,DiskLen,RevPos,Md5,Enc}) -> #att{name=Name, type=Type, - len=Len, + att_len=AttLen, + disk_len=DiskLen, + md5=Md5, + revpos=RevPos, + data={Fd,Sp}, + encoding= + case Enc of + true -> + % 0110 UPGRADE CODE + gzip; + false -> + % 0110 UPGRADE CODE + identity; + _ -> + Enc + end + }; + ({Name,Type,Sp,AttLen,RevPos,Md5}) -> + #att{name=Name, + type=Type, + att_len=AttLen, + disk_len=AttLen, md5=Md5, revpos=RevPos, data={Fd,Sp}}; - ({Name,{Type,Sp,Len}}) -> + ({Name,{Type,Sp,AttLen}}) -> #att{name=Name, type=Type, - len=Len, + att_len=AttLen, + disk_len=AttLen, md5= <<>>, revpos=0, data={Fd,Sp}} @@ -1008,4 +1203,7 @@ make_doc(#db{fd=Fd}=Db, Id, Deleted, Bp, RevisionPath) -> }. - +increment_stat(#db{is_sys_db = true}, _Stat) -> + ok; +increment_stat(#db{}, Stat) -> + couch_stats_collector:increment(Stat). diff --git a/src/couchdb/couch_db.hrl b/src/couchdb/couch_db.hrl index 99ef8997..74d2c630 100644 --- a/src/couchdb/couch_db.hrl +++ b/src/couchdb/couch_db.hrl @@ -20,7 +20,6 @@ -define(JSON_ENCODE(V), couch_util:json_encode(V)). -define(JSON_DECODE(V), couch_util:json_decode(V)). --define(b2a(V), list_to_atom(binary_to_list(V))). -define(b2l(V), binary_to_list(V)). -define(l2b(V), list_to_binary(V)). @@ -28,18 +27,23 @@ -define(LOG_DEBUG(Format, Args), case couch_log:debug_on() of - true -> error_logger:info_report(couch_debug, {Format, Args}); + true -> + gen_event:sync_notify(error_logger, + {self(), couch_debug, {Format, Args}}); false -> ok end). -define(LOG_INFO(Format, Args), case couch_log:info_on() of - true -> error_logger:info_report(couch_info, {Format, Args}); + true -> + gen_event:sync_notify(error_logger, + {self(), couch_info, {Format, Args}}); false -> ok end). -define(LOG_ERROR(Format, Args), - error_logger:error_report(couch_error, {Format, Args})). + gen_event:sync_notify(error_logger, + {self(), couch_error, {Format, Args}})). -record(rev_info, @@ -68,12 +72,15 @@ {mochi_req, peer, method, + requested_path_parts, path_parts, db_url_handlers, user_ctx, req_body = undefined, design_url_handlers, - auth + auth, + default_fun, + url_handlers }). @@ -99,10 +106,17 @@ { name, type, - len, + att_len, + disk_len, % length of the attachment in its identity form + % (that is, without a content encoding applied to it) + % differs from att_len when encoding /= identity md5= <<>>, revpos=0, - data + data, + encoding=identity % currently supported values are: + % identity, gzip + % additional values to support in the future: + % deflate, compress }). @@ -110,8 +124,7 @@ { name=null, roles=[], - handler, - user_doc + handler }). % This should be updated anytime a header change happens that requires more @@ -124,7 +137,7 @@ % if the disk revision is incremented, then new upgrade logic will need to be % added to couch_db_updater:init_db. --define(LATEST_DISK_VERSION, 4). +-define(LATEST_DISK_VERSION, 5). -record(db_header, {disk_version = ?LATEST_DISK_VERSION, @@ -135,7 +148,7 @@ local_docs_btree_state = nil, purge_seq = 0, purged_docs = nil, - admins_ptr = nil, + security_ptr = nil, revs_limit = 1000 }). @@ -155,12 +168,13 @@ name, filepath, validate_doc_funs = [], - admins = [], - admins_ptr = nil, + security = [], + security_ptr = nil, user_ctx = #user_ctx{}, waiting_delayed_commit = nil, revs_limit = 1000, - fsync_options = [] + fsync_options = [], + is_sys_db = false }). @@ -214,6 +228,7 @@ def_lang, design_options=[], views, + lib, id_btree=nil, current_seq=0, purge_seq=0, @@ -223,6 +238,8 @@ -record(view, {id_num, + update_seq=0, + purge_seq=0, map_names=[], def, btree=nil, @@ -251,7 +268,13 @@ body = nil, options = [ {response_format,binary}, - {inactivity_timeout, 30000} + {inactivity_timeout, 30000}, + {max_sessions, list_to_integer( + couch_config:get("replicator", "max_http_sessions", "10") + )}, + {max_pipeline_size, list_to_integer( + couch_config:get("replicator", "max_http_pipeline_size", "10") + )} ], retries = 10, pause = 500, @@ -260,3 +283,17 @@ % small value used in revision trees to indicate the revision isn't stored -define(REV_MISSING, []). + +-record(changes_args, { + feed = "normal", + dir = fwd, + since = 0, + limit = 1000000000000000, + style = main_only, + heartbeat, + timeout, + filter = "", + include_docs = false, + db_open_options = [] +}). + diff --git a/src/couchdb/couch_db_updater.erl b/src/couchdb/couch_db_updater.erl index 7292221a..633ae230 100644 --- a/src/couchdb/couch_db_updater.erl +++ b/src/couchdb/couch_db_updater.erl @@ -20,25 +20,38 @@ init({MainPid, DbName, Filepath, Fd, Options}) -> + process_flag(trap_exit, true), case lists:member(create, Options) of true -> % create a new header and writes it to the file Header = #db_header{}, ok = couch_file:write_header(Fd, Header), % delete any old compaction files that might be hanging around - file:delete(Filepath ++ ".compact"); + RootDir = couch_config:get("couchdb", "database_dir", "."), + couch_file:delete(RootDir, Filepath ++ ".compact"); false -> ok = couch_file:upgrade_old_header(Fd, <<$g, $m, $k, 0>>), % 09 UPGRADE CODE - {ok, Header} = couch_file:read_header(Fd) + case couch_file:read_header(Fd) of + {ok, Header} -> + ok; + no_valid_header -> + % create a new header and writes it to the file + Header = #db_header{}, + ok = couch_file:write_header(Fd, Header), + % delete any old compaction files that might be hanging around + file:delete(Filepath ++ ".compact") + end end, Db = init_db(DbName, Filepath, Fd, Header), Db2 = refresh_validate_doc_funs(Db), - {ok, Db2#db{main_pid=MainPid}}. + {ok, Db2#db{main_pid = MainPid, is_sys_db = lists:member(sys_db, Options)}}. -terminate(Reason, _Srv) -> - couch_util:terminate_linked(Reason), +terminate(_Reason, Db) -> + couch_file:close(Db#db.fd), + couch_util:shutdown_sync(Db#db.compactor_pid), + couch_util:shutdown_sync(Db#db.fd_ref_counter), ok. handle_call(get_db, _From, Db) -> @@ -53,9 +66,9 @@ handle_call(increment_update_seq, _From, Db) -> couch_db_update_notifier:notify({updated, Db#db.name}), {reply, {ok, Db2#db.update_seq}, Db2}; -handle_call({set_admins, NewAdmins}, _From, Db) -> - {ok, Ptr} = couch_file:append_term(Db#db.fd, NewAdmins), - Db2 = commit_data(Db#db{admins=NewAdmins, admins_ptr=Ptr, +handle_call({set_security, NewSec}, _From, Db) -> + {ok, Ptr} = couch_file:append_term(Db#db.fd, NewSec), + Db2 = commit_data(Db#db{security=NewSec, security_ptr=Ptr, update_seq=Db#db.update_seq+1}), ok = gen_server:call(Db2#db.main_pid, {db_updated, Db2}), {reply, ok, Db2}; @@ -130,21 +143,22 @@ handle_call({purge_docs, IdRevs}, _From, Db) -> ok = gen_server:call(Db2#db.main_pid, {db_updated, Db2}), couch_db_update_notifier:notify({updated, Db#db.name}), - {reply, {ok, Db2#db.update_seq, IdRevsPurged}, Db2}. - - -handle_cast(start_compact, Db) -> + {reply, {ok, (Db2#db.header)#db_header.purge_seq, IdRevsPurged}, Db2}; +handle_call(start_compact, _From, Db) -> case Db#db.compactor_pid of nil -> ?LOG_INFO("Starting compaction for db \"~s\"", [Db#db.name]), Pid = spawn_link(fun() -> start_copy_compact(Db) end), Db2 = Db#db{compactor_pid=Pid}, ok = gen_server:call(Db#db.main_pid, {db_updated, Db2}), - {noreply, Db2}; + {reply, ok, Db2}; _ -> % compact currently running, this is a no-op - {noreply, Db} - end; + {reply, ok, Db} + end. + + + handle_cast({compact_done, CompactFilepath}, #db{filepath=Filepath}=Db) -> {ok, NewFd} = couch_file:open(CompactFilepath), {ok, NewHeader} = couch_file:read_header(NewFd), @@ -158,17 +172,25 @@ handle_cast({compact_done, CompactFilepath}, #db{filepath=Filepath}=Db) -> fun(Value, _Offset, Acc) -> {ok, [Value | Acc]} end, []), {ok, NewLocalBtree} = couch_btree:add(NewDb#db.local_docs_btree, LocalDocs), - NewDb2 = commit_data( NewDb#db{local_docs_btree=NewLocalBtree, - main_pid = Db#db.main_pid,filepath = Filepath}), + NewDb2 = commit_data(NewDb#db{ + local_docs_btree = NewLocalBtree, + main_pid = Db#db.main_pid, + filepath = Filepath, + instance_start_time = Db#db.instance_start_time, + revs_limit = Db#db.revs_limit + }), ?LOG_DEBUG("CouchDB swapping files ~s and ~s.", [Filepath, CompactFilepath]), - file:delete(Filepath), + RootDir = couch_config:get("couchdb", "database_dir", "."), + couch_file:delete(RootDir, Filepath), ok = file:rename(CompactFilepath, Filepath), close_db(Db), - ok = gen_server:call(Db#db.main_pid, {db_updated, NewDb2}), + NewDb3 = refresh_validate_doc_funs(NewDb2), + ok = gen_server:call(Db#db.main_pid, {db_updated, NewDb3}, infinity), + couch_db_update_notifier:notify({compacted, NewDb3#db.name}), ?LOG_INFO("Compaction for db \"~s\" completed.", [Db#db.name]), - {noreply, NewDb2#db{compactor_pid=nil}}; + {noreply, NewDb3#db{compactor_pid=nil}}; false -> ?LOG_INFO("Compaction file still behind main file " "(update seq=~p. compact update seq=~p). Retrying.", @@ -180,11 +202,11 @@ handle_cast({compact_done, CompactFilepath}, #db{filepath=Filepath}=Db) -> end. -handle_info({update_docs, Client, GroupedDocs, NonRepDocs, MergeConflicts, +handle_info({update_docs, Client, GroupedDocs, NonRepDocs, MergeConflicts, FullCommit}, Db) -> GroupedDocs2 = [[{Client, D} || D <- DocGroup] || DocGroup <- GroupedDocs], if NonRepDocs == [] -> - {GroupedDocs3, Clients, FullCommit2} = collect_updates(GroupedDocs2, + {GroupedDocs3, Clients, FullCommit2} = collect_updates(GroupedDocs2, [Client], MergeConflicts, FullCommit); true -> GroupedDocs3 = GroupedDocs2, @@ -192,7 +214,7 @@ handle_info({update_docs, Client, GroupedDocs, NonRepDocs, MergeConflicts, Clients = [Client] end, NonRepDocs2 = [{Client, NRDoc} || NRDoc <- NonRepDocs], - try update_docs_int(Db, GroupedDocs3, NonRepDocs2, MergeConflicts, + try update_docs_int(Db, GroupedDocs3, NonRepDocs2, MergeConflicts, FullCommit2) of {ok, Db2} -> ok = gen_server:call(Db#db.main_pid, {db_updated, Db2}), @@ -207,6 +229,9 @@ handle_info({update_docs, Client, GroupedDocs, NonRepDocs, MergeConflicts, [catch(ClientPid ! {retry, self()}) || ClientPid <- Clients], {noreply, Db} end; +handle_info(delayed_commit, #db{waiting_delayed_commit=nil}=Db) -> + %no outstanding delayed commits, ignore + {noreply, Db}; handle_info(delayed_commit, Db) -> case commit_data(Db) of Db -> @@ -214,7 +239,11 @@ handle_info(delayed_commit, Db) -> Db2 -> ok = gen_server:call(Db2#db.main_pid, {db_updated, Db2}), {noreply, Db2} - end. + end; +handle_info({'EXIT', _Pid, normal}, Db) -> + {noreply, Db}; +handle_info({'EXIT', _Pid, Reason}, Db) -> + {stop, Reason, Db}. code_change(_OldVsn, State, _Extra) -> {ok, State}. @@ -243,7 +272,7 @@ collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit) -> {update_docs, Client, GroupedDocs, [], MergeConflicts, FullCommit2} -> GroupedDocs2 = [[{Client, Doc} || Doc <- DocGroup] || DocGroup <- GroupedDocs], - GroupedDocsAcc2 = + GroupedDocsAcc2 = merge_updates(GroupedDocsAcc, GroupedDocs2, []), collect_updates(GroupedDocsAcc2, [Client | ClientsAcc], MergeConflicts, (FullCommit or FullCommit2)) @@ -326,7 +355,7 @@ btree_by_seq_reduce(rereduce, Reds) -> simple_upgrade_record(Old, New) when tuple_size(Old) =:= tuple_size(New) -> Old; -simple_upgrade_record(Old, New) -> +simple_upgrade_record(Old, New) when tuple_size(Old) < tuple_size(New) -> OldSz = tuple_size(Old), NewValuesTail = lists:sublist(tuple_to_list(New), OldSz + 1, tuple_size(New) - OldSz), @@ -337,9 +366,10 @@ init_db(DbName, Filepath, Fd, Header0) -> Header1 = simple_upgrade_record(Header0, #db_header{}), Header = case element(2, Header1) of - 1 -> Header1#db_header{unused = 0}; % 0.9 - 2 -> Header1#db_header{unused = 0}; % post 0.9 and pre 0.10 - 3 -> Header1; % post 0.9 and pre 0.10 + 1 -> Header1#db_header{unused = 0, security_ptr = nil}; % 0.9 + 2 -> Header1#db_header{unused = 0, security_ptr = nil}; % post 0.9 and pre 0.10 + 3 -> Header1#db_header{security_ptr = nil}; % post 0.9 and pre 0.10 + 4 -> Header1#db_header{security_ptr = nil}; % 0.10 and pre 0.11 ?LATEST_DISK_VERSION -> Header1; _ -> throw({database_disk_version_error, "Incorrect disk header version"}) end, @@ -362,12 +392,12 @@ init_db(DbName, Filepath, Fd, Header0) -> {join, fun(X,Y) -> btree_by_seq_join(X,Y) end}, {reduce, fun(X,Y) -> btree_by_seq_reduce(X,Y) end}]), {ok, LocalDocsBtree} = couch_btree:open(Header#db_header.local_docs_btree_state, Fd), - case Header#db_header.admins_ptr of + case Header#db_header.security_ptr of nil -> - Admins = [], - AdminsPtr = nil; - AdminsPtr -> - {ok, Admins} = couch_file:pread_term(Fd, AdminsPtr) + Security = [], + SecurityPtr = nil; + SecurityPtr -> + {ok, Security} = couch_file:pread_term(Fd, SecurityPtr) end, % convert start time tuple to microsecs and store as a binary string {MegaSecs, Secs, MicroSecs} = now(), @@ -386,8 +416,8 @@ init_db(DbName, Filepath, Fd, Header0) -> update_seq = Header#db_header.update_seq, name = DbName, filepath = Filepath, - admins = Admins, - admins_ptr = AdminsPtr, + security = Security, + security_ptr = SecurityPtr, instance_start_time = StartTime, revs_limit = Header#db_header.revs_limit, fsync_options = FsyncOptions @@ -429,8 +459,9 @@ flush_trees(#db{fd=Fd,header=Header}=Db, case Atts of [] -> []; [#att{data={BinFd, _Sp}} | _ ] when BinFd == Fd -> - [{N,T,P,L,R,M} - || #att{name=N,type=T,data={_,P},md5=M,revpos=R,len=L} + [{N,T,P,AL,DL,R,M,E} + || #att{name=N,type=T,data={_,P},md5=M,revpos=R, + att_len=AL,disk_len=DL,encoding=E} <- Atts]; _ -> % BinFd must not equal our Fd. This can happen when a database @@ -458,16 +489,16 @@ send_result(Client, Id, OriginalRevs, NewResult) -> % used to send a result to the client catch(Client ! {result, self(), {{Id, OriginalRevs}, NewResult}}). -merge_rev_trees(_MergeConflicts, [], [], AccNewInfos, AccRemoveSeqs, AccSeq) -> +merge_rev_trees(_Limit, _Merge, [], [], AccNewInfos, AccRemoveSeqs, AccSeq) -> {ok, lists:reverse(AccNewInfos), AccRemoveSeqs, AccSeq}; -merge_rev_trees(MergeConflicts, [NewDocs|RestDocsList], +merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList], [OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) -> #full_doc_info{id=Id,rev_tree=OldTree,deleted=OldDeleted,update_seq=OldSeq} = OldDocInfo, - NewRevTree = lists:foldl( + NewRevTree0 = lists:foldl( fun({Client, #doc{revs={Pos,[_Rev|PrevRevs]}}=NewDoc}, AccTree) -> if not MergeConflicts -> - case couch_key_tree:merge(AccTree, [couch_db:doc_to_tree(NewDoc)]) of + case couch_key_tree:merge(AccTree, couch_db:doc_to_tree(NewDoc)) of {_NewTree, conflicts} when (not OldDeleted) -> send_result(Client, Id, {Pos-1,PrevRevs}, conflict), AccTree; @@ -492,15 +523,15 @@ merge_rev_trees(MergeConflicts, [NewDocs|RestDocsList], % this means we are recreating a brand new document % into a state that already existed before. % put the rev into a subsequent edit of the deletion - #doc_info{revs=[#rev_info{rev={OldPos,OldRev}}|_]} = + #doc_info{revs=[#rev_info{rev={OldPos,OldRev}}|_]} = couch_doc:to_doc_info(OldDocInfo), NewRevId = couch_db:new_revid( NewDoc#doc{revs={OldPos, [OldRev]}}), NewDoc2 = NewDoc#doc{revs={OldPos + 1, [NewRevId, OldRev]}}, {NewTree2, _} = couch_key_tree:merge(AccTree, - [couch_db:doc_to_tree(NewDoc2)]), + couch_db:doc_to_tree(NewDoc2)), % we changed the rev id, this tells the caller we did - send_result(Client, Id, {Pos-1,PrevRevs}, + send_result(Client, Id, {Pos-1,PrevRevs}, {ok, {OldPos + 1, NewRevId}}), NewTree2; true -> @@ -512,15 +543,16 @@ merge_rev_trees(MergeConflicts, [NewDocs|RestDocsList], end; true -> {NewTree, _} = couch_key_tree:merge(AccTree, - [couch_db:doc_to_tree(NewDoc)]), + couch_db:doc_to_tree(NewDoc)), NewTree - end + end end, OldTree, NewDocs), + NewRevTree = couch_key_tree:stem(NewRevTree0, Limit), if NewRevTree == OldTree -> % nothing changed - merge_rev_trees(MergeConflicts, RestDocsList, RestOldInfo, AccNewInfos, - AccRemoveSeqs, AccSeq); + merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo, + AccNewInfos, AccRemoveSeqs, AccSeq); true -> % we have updated the document, give it a new seq # NewInfo = #full_doc_info{id=Id,update_seq=AccSeq+1,rev_tree=NewRevTree}, @@ -528,8 +560,8 @@ merge_rev_trees(MergeConflicts, [NewDocs|RestDocsList], 0 -> AccRemoveSeqs; _ -> [OldSeq | AccRemoveSeqs] end, - merge_rev_trees(MergeConflicts, RestDocsList, RestOldInfo, - [NewInfo|AccNewInfos], RemoveSeqs, AccSeq+1) + merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo, + [NewInfo|AccNewInfos], RemoveSeqs, AccSeq+1) end. @@ -552,7 +584,8 @@ update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) -> #db{ fulldocinfo_by_id_btree = DocInfoByIdBTree, docinfo_by_seq_btree = DocInfoBySeqBTree, - update_seq = LastSeq + update_seq = LastSeq, + revs_limit = RevsLimit } = Db, Ids = [Id || [{_Client, #doc{id=Id}}|_] <- DocsList], % lookup up the old documents, if they exist. @@ -565,11 +598,9 @@ update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) -> end, Ids, OldDocLookups), % Merge the new docs into the revision trees. - {ok, NewDocInfos0, RemoveSeqs, NewSeq} = merge_rev_trees( + {ok, NewFullDocInfos, RemoveSeqs, NewSeq} = merge_rev_trees(RevsLimit, MergeConflicts, DocsList, OldDocInfos, [], [], LastSeq), - NewFullDocInfos = stem_full_doc_infos(Db, NewDocInfos0), - % All documents are now ready to write. {ok, Db2} = update_local_docs(Db, NonRepDocs), @@ -654,35 +685,35 @@ db_to_header(Db, Header) -> docinfo_by_seq_btree_state = couch_btree:get_state(Db#db.docinfo_by_seq_btree), fulldocinfo_by_id_btree_state = couch_btree:get_state(Db#db.fulldocinfo_by_id_btree), local_docs_btree_state = couch_btree:get_state(Db#db.local_docs_btree), - admins_ptr = Db#db.admins_ptr, + security_ptr = Db#db.security_ptr, revs_limit = Db#db.revs_limit}. -commit_data(#db{fd=Fd,header=OldHeader,fsync_options=FsyncOptions}=Db, Delay) -> - Header = db_to_header(Db, OldHeader), - if OldHeader == Header -> - Db; - Delay and (Db#db.waiting_delayed_commit == nil) -> - Db#db{waiting_delayed_commit= - erlang:send_after(1000, self(), delayed_commit)}; - Delay -> - Db; - true -> - if Db#db.waiting_delayed_commit /= nil -> - case erlang:cancel_timer(Db#db.waiting_delayed_commit) of - false -> receive delayed_commit -> ok after 0 -> ok end; - _ -> ok - end; - true -> ok - end, +commit_data(#db{waiting_delayed_commit=nil} = Db, true) -> + Db#db{waiting_delayed_commit=erlang:send_after(1000,self(),delayed_commit)}; +commit_data(Db, true) -> + Db; +commit_data(Db, _) -> + #db{ + fd = Fd, + filepath = Filepath, + header = OldHeader, + fsync_options = FsyncOptions, + waiting_delayed_commit = Timer + } = Db, + if is_reference(Timer) -> erlang:cancel_timer(Timer); true -> ok end, + case db_to_header(Db, OldHeader) of + OldHeader -> + Db#db{waiting_delayed_commit=nil}; + Header -> case lists:member(before_header, FsyncOptions) of - true -> ok = couch_file:sync(Fd); + true -> ok = couch_file:sync(Filepath); _ -> ok end, ok = couch_file:write_header(Fd, Header), case lists:member(after_header, FsyncOptions) of - true -> ok = couch_file:sync(Fd); + true -> ok = couch_file:sync(Filepath); _ -> ok end, @@ -696,21 +727,41 @@ copy_doc_attachments(#db{fd=SrcFd}=SrcDb, {Pos,_RevId}, SrcSp, DestFd) -> {ok, {BodyData, BinInfos}} = couch_db:read_doc(SrcDb, SrcSp), % copy the bin values NewBinInfos = lists:map( - fun({Name, {Type, BinSp, Len}}) when is_tuple(BinSp) orelse BinSp == null -> + fun({Name, {Type, BinSp, AttLen}}) when is_tuple(BinSp) orelse BinSp == null -> % 09 UPGRADE CODE - {NewBinSp, Len, Md5} = couch_stream:old_copy_to_new_stream(SrcFd, BinSp, Len, DestFd), - {Name, Type, NewBinSp, Len, Pos, Md5}; - ({Name, {Type, BinSp, Len}}) -> + {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} = + couch_stream:old_copy_to_new_stream(SrcFd, BinSp, AttLen, DestFd), + {Name, Type, NewBinSp, AttLen, AttLen, Pos, Md5, identity}; + ({Name, {Type, BinSp, AttLen}}) -> % 09 UPGRADE CODE - {NewBinSp, Len, Md5} = couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd), - {Name, Type, NewBinSp, Len, Pos, Md5}; - ({Name, Type, BinSp, Len, RevPos, <<>>}) when is_tuple(BinSp) orelse BinSp == null -> + {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} = + couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd), + {Name, Type, NewBinSp, AttLen, AttLen, Pos, Md5, identity}; + ({Name, Type, BinSp, AttLen, _RevPos, <<>>}) when + is_tuple(BinSp) orelse BinSp == null -> % 09 UPGRADE CODE - {NewBinSp, Len, Md5} = couch_stream:old_copy_to_new_stream(SrcFd, BinSp, Len, DestFd), - {Name, Type, NewBinSp, Len, Len, Md5}; - ({Name, Type, BinSp, Len, RevPos, Md5}) -> - {NewBinSp, Len, Md5} = couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd), - {Name, Type, NewBinSp, Len, RevPos, Md5} + {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} = + couch_stream:old_copy_to_new_stream(SrcFd, BinSp, AttLen, DestFd), + {Name, Type, NewBinSp, AttLen, AttLen, AttLen, Md5, identity}; + ({Name, Type, BinSp, AttLen, RevPos, Md5}) -> + % 010 UPGRADE CODE + {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} = + couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd), + {Name, Type, NewBinSp, AttLen, AttLen, RevPos, Md5, identity}; + ({Name, Type, BinSp, AttLen, DiskLen, RevPos, Md5, Enc1}) -> + {NewBinSp, AttLen, _, Md5, _IdentityMd5} = + couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd), + Enc = case Enc1 of + true -> + % 0110 UPGRADE CODE + gzip; + false -> + % 0110 UPGRADE CODE + identity; + _ -> + Enc1 + end, + {Name, Type, NewBinSp, AttLen, DiskLen, RevPos, Md5, Enc} end, BinInfos), {BodyData, NewBinInfos}. @@ -724,7 +775,10 @@ copy_rev_tree_attachments(SrcDb, DestFd, Tree) -> end, Tree). -copy_docs(Db, #db{fd=DestFd}=NewDb, InfoBySeq, Retry) -> +copy_docs(Db, #db{fd=DestFd}=NewDb, InfoBySeq0, Retry) -> + % COUCHDB-968, make sure we prune duplicates during compaction + InfoBySeq = lists:usort(fun(#doc_info{id=A}, #doc_info{id=B}) -> A =< B end, + InfoBySeq0), Ids = [Id || #doc_info{id=Id} <- InfoBySeq], LookupResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids), @@ -790,7 +844,7 @@ copy_compact(Db, NewDb0, Retry) -> couch_task_status:set_update_frequency(500), {ok, _, {NewDb2, Uncopied, TotalChanges}} = - couch_btree:foldl(Db#db.docinfo_by_seq_btree, EnumBySeqFun, + couch_btree:foldl(Db#db.docinfo_by_seq_btree, EnumBySeqFun, {NewDb, [], 0}, [{start_key, NewDb#db.update_seq + 1}]), @@ -799,9 +853,9 @@ copy_compact(Db, NewDb0, Retry) -> NewDb3 = copy_docs(Db, NewDb2, lists:reverse(Uncopied), Retry), % copy misc header values - if NewDb3#db.admins /= Db#db.admins -> - {ok, Ptr} = couch_file:append_term(NewDb3#db.fd, Db#db.admins), - NewDb4 = NewDb3#db{admins=Db#db.admins, admins_ptr=Ptr}; + if NewDb3#db.security /= Db#db.security -> + {ok, Ptr} = couch_file:append_term(NewDb3#db.fd, Db#db.security), + NewDb4 = NewDb3#db{security=Db#db.security, security_ptr=Ptr}; true -> NewDb4 = NewDb3 end, @@ -815,7 +869,12 @@ start_copy_compact(#db{name=Name,filepath=Filepath}=Db) -> {ok, Fd} -> couch_task_status:add_task(<<"Database Compaction">>, <<Name/binary, " retry">>, <<"Starting">>), Retry = true, - {ok, Header} = couch_file:read_header(Fd); + case couch_file:read_header(Fd) of + {ok, Header} -> + ok; + no_valid_header -> + ok = couch_file:write_header(Fd, Header=#db_header{}) + end; {error, enoent} -> couch_task_status:add_task(<<"Database Compaction">>, Name, <<"Starting">>), {ok, Fd} = couch_file:open(CompactFile, [create]), @@ -823,8 +882,8 @@ start_copy_compact(#db{name=Name,filepath=Filepath}=Db) -> ok = couch_file:write_header(Fd, Header=#db_header{}) end, NewDb = init_db(Name, CompactFile, Fd, Header), + unlink(Fd), NewDb2 = copy_compact(Db, NewDb, Retry), - - gen_server:cast(Db#db.update_pid, {compact_done, CompactFile}), - close_db(NewDb2). + close_db(NewDb2), + gen_server:cast(Db#db.update_pid, {compact_done, CompactFile}). diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl index ba5c7450..7c64b8a9 100644 --- a/src/couchdb/couch_doc.erl +++ b/src/couchdb/couch_doc.erl @@ -13,7 +13,7 @@ -module(couch_doc). -export([to_doc_info/1,to_doc_info_path/1,parse_rev/1,parse_revs/1,rev_to_str/1,revs_to_strs/1]). --export([att_foldl/3,get_validate_doc_fun/1]). +-export([att_foldl/3,range_att_foldl/5,att_foldl_decode/3,get_validate_doc_fun/1]). -export([from_json_obj/1,to_json_obj/2,has_stubs/1, merge_stubs/2]). -export([validate_docid/1]). -export([doc_from_multi_part_stream/2]). @@ -27,8 +27,8 @@ to_json_rev(0, []) -> to_json_rev(Start, [FirstRevId|_]) -> [{<<"_rev">>, ?l2b([integer_to_list(Start),"-",revid_to_str(FirstRevId)])}]. -to_json_body(true, _Body) -> - [{<<"_deleted">>, true}]; +to_json_body(true, {Body}) -> + Body ++ [{<<"_deleted">>, true}]; to_json_body(false, {Body}) -> Body. @@ -73,35 +73,48 @@ to_json_meta(Meta) -> end, Meta). to_json_attachments(Attachments, Options) -> - case lists:member(attachments, Options) of - true -> % return all the binaries - to_json_attachments(Attachments, 0, lists:member(follows, Options)); - false -> - % note the default is [], because this sorts higher than all numbers. - % and will return all the binaries. - RevPos = proplists:get_value(atts_after_revpos, Options, []), - to_json_attachments(Attachments, RevPos, lists:member(follows, Options)) - end. - -to_json_attachments([], _RevPosIncludeAfter, _DataToFollow) -> + to_json_attachments( + Attachments, + lists:member(attachments, Options), + lists:member(follows, Options), + lists:member(att_encoding_info, Options) + ). + +to_json_attachments([], _OutputData, _DataToFollow, _ShowEncInfo) -> []; -to_json_attachments(Atts, RevPosIncludeAfter, DataToFollow) -> +to_json_attachments(Atts, OutputData, DataToFollow, ShowEncInfo) -> AttProps = lists:map( - fun(#att{len=Len}=Att) -> + fun(#att{disk_len=DiskLen, att_len=AttLen, encoding=Enc}=Att) -> {Att#att.name, {[ {<<"content_type">>, Att#att.type}, {<<"revpos">>, Att#att.revpos} ] ++ - if Att#att.revpos > RevPosIncludeAfter -> + if not OutputData orelse Att#att.data == stub -> + [{<<"length">>, DiskLen}, {<<"stub">>, true}]; + true -> if DataToFollow -> - [{<<"length">>, Len}, {<<"follows">>, true}]; + [{<<"length">>, DiskLen}, {<<"follows">>, true}]; true -> - [{<<"data">>, - couch_util:encodeBase64(att_to_iolist(Att))}] - end; - true -> - [{<<"length">>, Len}, {<<"stub">>, true}] - end + AttData = case Enc of + gzip -> + zlib:gunzip(att_to_bin(Att)); + identity -> + att_to_bin(Att) + end, + [{<<"data">>, base64:encode(AttData)}] + end + end ++ + case {ShowEncInfo, Enc} of + {false, _} -> + []; + {true, identity} -> + []; + {true, _} -> + [ + {<<"encoding">>, couch_util:to_binary(Enc)}, + {<<"encoded_length">>, AttLen} + ] + end }} end, Atts), [{<<"_attachments">>, {AttProps}}]. @@ -182,34 +195,38 @@ transfer_fields([{<<"_rev">>, _Rev} | Rest], Doc) -> transfer_fields([{<<"_attachments">>, {JsonBins}} | Rest], Doc) -> Atts = lists:map(fun({Name, {BinProps}}) -> - case proplists:get_value(<<"stub">>, BinProps) of + case couch_util:get_value(<<"stub">>, BinProps) of true -> - Type = proplists:get_value(<<"content_type">>, BinProps), - Length = proplists:get_value(<<"length">>, BinProps), - RevPos = proplists:get_value(<<"revpos">>, BinProps, 0), - #att{name=Name, data=stub, type=Type, len=Length, revpos=RevPos}; + Type = couch_util:get_value(<<"content_type">>, BinProps), + RevPos = couch_util:get_value(<<"revpos">>, BinProps, nil), + DiskLen = couch_util:get_value(<<"length">>, BinProps), + {Enc, EncLen} = att_encoding_info(BinProps), + #att{name=Name, data=stub, type=Type, att_len=EncLen, + disk_len=DiskLen, encoding=Enc, revpos=RevPos}; _ -> - Type = proplists:get_value(<<"content_type">>, BinProps, + Type = couch_util:get_value(<<"content_type">>, BinProps, ?DEFAULT_ATTACHMENT_CONTENT_TYPE), - RevPos = proplists:get_value(<<"revpos">>, BinProps, 0), - case proplists:get_value(<<"follows">>, BinProps) of + RevPos = couch_util:get_value(<<"revpos">>, BinProps, 0), + case couch_util:get_value(<<"follows">>, BinProps) of true -> - #att{name=Name, data=follows, type=Type, - len=proplists:get_value(<<"length">>, BinProps), - revpos=RevPos}; + DiskLen = couch_util:get_value(<<"length">>, BinProps), + {Enc, EncLen} = att_encoding_info(BinProps), + #att{name=Name, data=follows, type=Type, encoding=Enc, + att_len=EncLen, disk_len=DiskLen, revpos=RevPos}; _ -> - Value = proplists:get_value(<<"data">>, BinProps), - Bin = couch_util:decodeBase64(Value), - #att{name=Name, data=Bin, type=Type, len=size(Bin), - revpos=RevPos} + Value = couch_util:get_value(<<"data">>, BinProps), + Bin = base64:decode(Value), + LenBin = size(Bin), + #att{name=Name, data=Bin, type=Type, att_len=LenBin, + disk_len=LenBin, revpos=RevPos} end end end, JsonBins), transfer_fields(Rest, Doc#doc{atts=Atts}); transfer_fields([{<<"_revisions">>, {Props}} | Rest], Doc) -> - RevIds = proplists:get_value(<<"ids">>, Props), - Start = proplists:get_value(<<"start">>, Props), + RevIds = couch_util:get_value(<<"ids">>, Props), + Start = couch_util:get_value(<<"start">>, Props), if not is_integer(Start) -> throw({doc_validation, "_revisions.start isn't an integer."}); not is_list(RevIds) -> @@ -235,6 +252,17 @@ transfer_fields([{<<"_conflicts">>, _} | Rest], Doc) -> transfer_fields([{<<"_deleted_conflicts">>, _} | Rest], Doc) -> transfer_fields(Rest, Doc); +% special fields for replication documents +transfer_fields([{<<"_replication_state">>, _} = Field | Rest], + #doc{body=Fields} = Doc) -> + transfer_fields(Rest, Doc#doc{body=[Field|Fields]}); +transfer_fields([{<<"_replication_state_time">>, _} = Field | Rest], + #doc{body=Fields} = Doc) -> + transfer_fields(Rest, Doc#doc{body=[Field|Fields]}); +transfer_fields([{<<"_replication_id">>, _} = Field | Rest], + #doc{body=Fields} = Doc) -> + transfer_fields(Rest, Doc#doc{body=[Field|Fields]}); + % unknown special field transfer_fields([{<<"_",Name/binary>>, _} | _], _) -> throw({doc_validation, @@ -243,6 +271,16 @@ transfer_fields([{<<"_",Name/binary>>, _} | _], _) -> transfer_fields([Field | Rest], #doc{body=Fields}=Doc) -> transfer_fields(Rest, Doc#doc{body=[Field|Fields]}). +att_encoding_info(BinProps) -> + DiskLen = couch_util:get_value(<<"length">>, BinProps), + case couch_util:get_value(<<"encoding">>, BinProps) of + undefined -> + {identity, DiskLen}; + Enc -> + EncodedLen = couch_util:get_value(<<"encoded_length">>, BinProps, DiskLen), + {list_to_existing_atom(?b2l(Enc)), EncodedLen} + end. + to_doc_info(FullDocInfo) -> {DocInfo, _Path} = to_doc_info_path(FullDocInfo), DocInfo. @@ -272,33 +310,51 @@ to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree}) -> att_foldl(#att{data=Bin}, Fun, Acc) when is_binary(Bin) -> Fun(Bin, Acc); -att_foldl(#att{data={Fd,Sp},len=Len}, Fun, Acc) when is_tuple(Sp) orelse Sp == null -> +att_foldl(#att{data={Fd,Sp},att_len=Len}, Fun, Acc) when is_tuple(Sp) orelse Sp == null -> % 09 UPGRADE CODE couch_stream:old_foldl(Fd, Sp, Len, Fun, Acc); att_foldl(#att{data={Fd,Sp},md5=Md5}, Fun, Acc) -> couch_stream:foldl(Fd, Sp, Md5, Fun, Acc); -att_foldl(#att{data=DataFun,len=Len}, Fun, Acc) when is_function(DataFun) -> +att_foldl(#att{data=DataFun,att_len=Len}, Fun, Acc) when is_function(DataFun) -> fold_streamed_data(DataFun, Len, Fun, Acc). +range_att_foldl(#att{data={Fd,Sp}}, From, To, Fun, Acc) -> + couch_stream:range_foldl(Fd, Sp, From, To, Fun, Acc). -att_to_iolist(#att{data=Bin}) when is_binary(Bin) -> +att_foldl_decode(#att{data={Fd,Sp},md5=Md5,encoding=Enc}, Fun, Acc) -> + couch_stream:foldl_decode(Fd, Sp, Md5, Enc, Fun, Acc); +att_foldl_decode(#att{data=Fun2,att_len=Len, encoding=identity}, Fun, Acc) -> + fold_streamed_data(Fun2, Len, Fun, Acc). + +att_to_bin(#att{data=Bin}) when is_binary(Bin) -> Bin; -att_to_iolist(#att{data=Iolist}) when is_list(Iolist) -> - Iolist; -att_to_iolist(#att{data={Fd,Sp}}=Att) -> - lists:reverse(att_foldl(Att, - fun(Bin,Acc) -> [Bin|Acc] end, [])); -att_to_iolist(#att{data=DataFun, len=Len}) when is_function(DataFun)-> - lists:reverse(fold_streamed_data(DataFun, Len, - fun(Data, Acc) -> [Data | Acc] end, [])). +att_to_bin(#att{data=Iolist}) when is_list(Iolist) -> + iolist_to_binary(Iolist); +att_to_bin(#att{data={_Fd,_Sp}}=Att) -> + iolist_to_binary( + lists:reverse(att_foldl( + Att, + fun(Bin,Acc) -> [Bin|Acc] end, + [] + )) + ); +att_to_bin(#att{data=DataFun, att_len=Len}) when is_function(DataFun)-> + iolist_to_binary( + lists:reverse(fold_streamed_data( + DataFun, + Len, + fun(Data, Acc) -> [Data | Acc] end, + [] + )) + ). get_validate_doc_fun(#doc{body={Props}}=DDoc) -> - case proplists:get_value(<<"validate_doc_update">>, Props) of + case couch_util:get_value(<<"validate_doc_update">>, Props) of undefined -> nil; _Else -> - fun(EditDoc, DiskDoc, Ctx) -> - couch_query_servers:validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx) + fun(EditDoc, DiskDoc, Ctx, SecObj) -> + couch_query_servers:validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx, SecObj) end end. @@ -315,9 +371,10 @@ has_stubs([_Att|Rest]) -> merge_stubs(#doc{id=Id,atts=MemBins}=StubsDoc, #doc{atts=DiskBins}) -> BinDict = dict:from_list([{Name, Att} || #att{name=Name}=Att <- DiskBins]), MergedBins = lists:map( - fun(#att{name=Name, data=stub, revpos=RevPos}) -> + fun(#att{name=Name, data=stub, revpos=StubRevPos}) -> case dict:find(Name, BinDict) of - {ok, #att{revpos=RevPos}=DiskAtt} -> + {ok, #att{revpos=DiskRevPos}=DiskAtt} + when DiskRevPos == StubRevPos orelse StubRevPos == nil -> DiskAtt; _ -> throw({missing_stub, @@ -335,70 +392,103 @@ fold_streamed_data(RcvFun, LenLeft, Fun, Acc) when LenLeft > 0-> ResultAcc = Fun(Bin, Acc), fold_streamed_data(RcvFun, LenLeft - size(Bin), Fun, ResultAcc). -len_doc_to_multi_part_stream(Boundary,JsonBytes,Atts,AttsSinceRevPos) -> - 2 + % "--" - size(Boundary) + - 36 + % "\r\ncontent-type: application/json\r\n\r\n" - iolist_size(JsonBytes) + - 4 + % "\r\n--" - size(Boundary) + - + lists:foldl(fun(#att{revpos=RevPos,len=Len}, AccAttsSize) -> - if RevPos > AttsSinceRevPos -> - AccAttsSize + +len_doc_to_multi_part_stream(Boundary, JsonBytes, Atts, SendEncodedAtts) -> + AttsSize = lists:foldl(fun(#att{data=Data} = Att, AccAttsSize) -> + case Data of + stub -> + AccAttsSize; + _ -> + AccAttsSize + 4 + % "\r\n\r\n" - Len + + case SendEncodedAtts of + true -> + Att#att.att_len; + _ -> + Att#att.disk_len + end + 4 + % "\r\n--" - size(Boundary); - true -> - AccAttsSize + size(Boundary) end - end, 0, Atts) + - 2. % "--" - -doc_to_multi_part_stream(Boundary,JsonBytes,Atts,AttsSinceRevPos,WriteFun) -> - WriteFun([<<"--", Boundary/binary, - "\r\ncontent-type: application/json\r\n\r\n">>, - JsonBytes, <<"\r\n--", Boundary/binary>>]), - atts_to_mp(Atts, Boundary, WriteFun, AttsSinceRevPos). - -atts_to_mp([], _Boundary, WriteFun, _AttsSinceRevPos) -> + end, 0, Atts), + if AttsSize == 0 -> + {<<"application/json">>, iolist_size(JsonBytes)}; + true -> + {<<"multipart/related; boundary=\"", Boundary/binary, "\"">>, + 2 + % "--" + size(Boundary) + + 36 + % "\r\ncontent-type: application/json\r\n\r\n" + iolist_size(JsonBytes) + + 4 + % "\r\n--" + size(Boundary) + + + AttsSize + + 2 % "--" + } + end. + +doc_to_multi_part_stream(Boundary, JsonBytes, Atts, WriteFun, + SendEncodedAtts) -> + case lists:any(fun(#att{data=Data})-> Data /= stub end, Atts) of + true -> + WriteFun([<<"--", Boundary/binary, + "\r\ncontent-type: application/json\r\n\r\n">>, + JsonBytes, <<"\r\n--", Boundary/binary>>]), + atts_to_mp(Atts, Boundary, WriteFun, SendEncodedAtts); + false -> + WriteFun(JsonBytes) + end. + +atts_to_mp([], _Boundary, WriteFun, _SendEncAtts) -> WriteFun(<<"--">>); -atts_to_mp([#att{revpos=RevPos} = Att | RestAtts], Boundary, WriteFun, - AttsSinceRevPos) when RevPos > AttsSinceRevPos -> +atts_to_mp([#att{data=stub} | RestAtts], Boundary, WriteFun, + SendEncodedAtts) -> + atts_to_mp(RestAtts, Boundary, WriteFun, SendEncodedAtts); +atts_to_mp([Att | RestAtts], Boundary, WriteFun, + SendEncodedAtts) -> WriteFun(<<"\r\n\r\n">>), - att_foldl(Att, fun(Data, ok) -> WriteFun(Data) end, ok), + AttFun = case SendEncodedAtts of + false -> + fun att_foldl_decode/3; + true -> + fun att_foldl/3 + end, + AttFun(Att, fun(Data, _) -> WriteFun(Data) end, ok), WriteFun(<<"\r\n--", Boundary/binary>>), - atts_to_mp(RestAtts, Boundary, WriteFun, AttsSinceRevPos); -atts_to_mp([_ | RestAtts], Boundary, WriteFun, AttsSinceRevPos) -> - atts_to_mp(RestAtts, Boundary, WriteFun, AttsSinceRevPos). + atts_to_mp(RestAtts, Boundary, WriteFun, SendEncodedAtts). doc_from_multi_part_stream(ContentType, DataFun) -> - Parser = spawn_link(fun() -> + Self = self(), + Parser = spawn_link(fun() -> couch_httpd:parse_multipart_request(ContentType, DataFun, - fun(Next)-> mp_parse_doc(Next, []) end) + fun(Next)-> mp_parse_doc(Next, []) end), + unlink(Self) end), Parser ! {get_doc_bytes, self()}, - receive {doc_bytes, DocBytes} -> ok end, - Doc = from_json_obj(?JSON_DECODE(DocBytes)), - % go through the attachments looking for 'follows' in the data, - % replace with function that reads the data from MIME stream. - ReadAttachmentDataFun = fun() -> - Parser ! {get_bytes, self()}, - receive {bytes, Bytes} -> Bytes end - end, - Atts2 = lists:map( - fun(#att{data=follows}=A) -> - A#att{data=ReadAttachmentDataFun}; - (A) -> - A - end, Doc#doc.atts), - Doc#doc{atts=Atts2}. + receive + {doc_bytes, DocBytes} -> + erlang:put(mochiweb_request_recv, true), + Doc = from_json_obj(?JSON_DECODE(DocBytes)), + % go through the attachments looking for 'follows' in the data, + % replace with function that reads the data from MIME stream. + ReadAttachmentDataFun = fun() -> + Parser ! {get_bytes, self()}, + receive {bytes, Bytes} -> Bytes end + end, + Atts2 = lists:map( + fun(#att{data=follows}=A) -> + A#att{data=ReadAttachmentDataFun}; + (A) -> + A + end, Doc#doc.atts), + {ok, Doc#doc{atts=Atts2}} + end. mp_parse_doc({headers, H}, []) -> - {"application/json", _} = proplists:get_value("content-type", H), - fun (Next) -> - mp_parse_doc(Next, []) + case couch_util:get_value("content-type", H) of + {"application/json", _} -> + fun (Next) -> + mp_parse_doc(Next, []) + end end; mp_parse_doc({body, Bytes}, AccBytes) -> fun (Next) -> diff --git a/src/couchdb/couch_external_server.erl b/src/couchdb/couch_external_server.erl index 8e495320..045fcee9 100644 --- a/src/couchdb/couch_external_server.erl +++ b/src/couchdb/couch_external_server.erl @@ -50,9 +50,11 @@ terminate(_Reason, {_Name, _Command, Pid}) -> handle_call({execute, JsonReq}, _From, {Name, Command, Pid}) -> {reply, couch_os_process:prompt(Pid, JsonReq), {Name, Command, Pid}}. +handle_info({'EXIT', _Pid, normal}, State) -> + {noreply, State}; handle_info({'EXIT', Pid, Reason}, {Name, Command, Pid}) -> ?LOG_INFO("EXTERNAL: Process for ~s exiting. (reason: ~w)", [Name, Reason]), - {stop, normal, {Name, Command, Pid}}. + {stop, Reason, {Name, Command, Pid}}. handle_cast(stop, {Name, Command, Pid}) -> ?LOG_INFO("EXTERNAL: Shutting down ~s", [Name]), diff --git a/src/couchdb/couch_file.erl b/src/couchdb/couch_file.erl index 5904260c..56ebc4f2 100644 --- a/src/couchdb/couch_file.erl +++ b/src/couchdb/couch_file.erl @@ -19,7 +19,8 @@ -record(file, { fd, - tail_append_begin=0 % 09 UPGRADE CODE + tail_append_begin = 0, % 09 UPGRADE CODE + eof = 0 }). -export([open/1, open/2, close/1, bytes/1, sync/1, append_binary/2,old_pread/3]). @@ -27,6 +28,7 @@ -export([pread_binary/2, read_header/1, truncate/2, upgrade_old_header/2]). -export([append_term_md5/2,append_binary_md5/2]). -export([init/1, terminate/2, handle_call/3, handle_cast/2, code_change/3, handle_info/2]). +-export([delete/2,delete/3,init_delete_dir/1]). %%---------------------------------------------------------------------- %% Args: Valid Options are [create] and [create,overwrite]. @@ -88,8 +90,8 @@ append_binary(Fd, Bin) -> append_binary_md5(Fd, Bin) -> Size = iolist_size(Bin), - gen_server:call(Fd, {append_bin, - [<<1:1/integer,Size:31/integer>>, erlang:md5(Bin), Bin]}, infinity). + gen_server:call(Fd, {append_bin, + [<<1:1/integer,Size:31/integer>>, couch_util:md5(Bin), Bin]}, infinity). %%---------------------------------------------------------------------- @@ -118,33 +120,18 @@ pread_binary(Fd, Pos) -> pread_iolist(Fd, Pos) -> - {ok, LenIolist, NextPos} = read_raw_iolist(Fd, Pos, 4), - case iolist_to_binary(LenIolist) of - <<1:1/integer,Len:31/integer>> -> - {ok, Md5List, ValPos} = read_raw_iolist(Fd, NextPos, 16), - Md5 = iolist_to_binary(Md5List), - {ok, IoList, _} = read_raw_iolist(Fd,ValPos,Len), - case erlang:md5(IoList) of - Md5 -> ok; - _ -> throw(file_corruption) - end, + case gen_server:call(Fd, {pread_iolist, Pos}, infinity) of + {ok, IoList, <<>>} -> {ok, IoList}; - <<0:1/integer,Len:31/integer>> -> - {ok, Iolist, _} = read_raw_iolist(Fd, NextPos, Len), - {ok, Iolist} - end. - - -read_raw_iolist(Fd, Pos, Len) -> - BlockOffset = Pos rem ?SIZE_BLOCK, - TotalBytes = calculate_total_read_len(BlockOffset, Len), - {ok, <<RawBin:TotalBytes/binary>>, HasPrefixes} = gen_server:call(Fd, {pread, Pos, TotalBytes}, infinity), - if HasPrefixes -> - {ok, remove_block_prefixes(BlockOffset, RawBin), Pos + TotalBytes}; - true -> - % 09 UPGRADE CODE - <<ReturnBin:Len/binary, _/binary>> = RawBin, - {ok, [ReturnBin], Pos + Len} + {ok, IoList, Md5} -> + case couch_util:md5(IoList) of + Md5 -> + {ok, IoList}; + _ -> + exit({file_corruption, <<"file corruption">>}) + end; + Error -> + Error end. %%---------------------------------------------------------------------- @@ -172,17 +159,49 @@ truncate(Fd, Pos) -> %% or {error, Reason}. %%---------------------------------------------------------------------- +sync(Filepath) when is_list(Filepath) -> + {ok, Fd} = file:open(Filepath, [append, raw]), + try file:sync(Fd) after file:close(Fd) end; sync(Fd) -> gen_server:call(Fd, sync, infinity). %%---------------------------------------------------------------------- -%% Purpose: Close the file. Is performed asynchronously. +%% Purpose: Close the file. %% Returns: ok %%---------------------------------------------------------------------- close(Fd) -> - Result = gen_server:cast(Fd, close), - catch unlink(Fd), - Result. + couch_util:shutdown_sync(Fd). + + +delete(RootDir, Filepath) -> + delete(RootDir, Filepath, true). + + +delete(RootDir, Filepath, Async) -> + DelFile = filename:join([RootDir,".delete", ?b2l(couch_uuids:random())]), + case file:rename(Filepath, DelFile) of + ok -> + if (Async) -> + spawn(file, delete, [DelFile]), + ok; + true -> + file:delete(DelFile) + end; + Error -> + Error + end. + + +init_delete_dir(RootDir) -> + Dir = filename:join(RootDir,".delete"), + % note: ensure_dir requires an actual filename companent, which is the + % reason for "foo". + filelib:ensure_dir(filename:join(Dir,"foo")), + filelib:fold_files(Dir, ".*", true, + fun(Filename, _) -> + ok = file:delete(Filename) + end, ok). + % 09 UPGRADE CODE old_pread(Fd, Pos, Len) -> @@ -204,7 +223,7 @@ read_header(Fd) -> write_header(Fd, Data) -> Bin = term_to_binary(Data), - Md5 = erlang:md5(Bin), + Md5 = couch_util:md5(Bin), % now we assemble the final header binary and write to disk FinalBin = <<Md5/binary, Bin/binary>>, gen_server:call(Fd, {write_header, FinalBin}, infinity). @@ -219,10 +238,11 @@ init_status_error(ReturnPid, Ref, Error) -> % server functions init({Filepath, Options, ReturnPid, Ref}) -> + process_flag(trap_exit, true), case lists:member(create, Options) of true -> filelib:ensure_dir(Filepath), - case file:open(Filepath, [read, write, raw, binary]) of + case file:open(Filepath, [read, append, raw, binary]) of {ok, Fd} -> {ok, Length} = file:position(Fd, eof), case Length > 0 of @@ -235,16 +255,14 @@ init({Filepath, Options, ReturnPid, Ref}) -> {ok, 0} = file:position(Fd, 0), ok = file:truncate(Fd), ok = file:sync(Fd), - couch_stats_collector:track_process_count( - {couchdb, open_os_files}), + maybe_track_open_os_files(Options), {ok, #file{fd=Fd}}; false -> ok = file:close(Fd), init_status_error(ReturnPid, Ref, file_exists) end; false -> - couch_stats_collector:track_process_count( - {couchdb, open_os_files}), + maybe_track_open_os_files(Options), {ok, #file{fd=Fd}} end; Error -> @@ -254,41 +272,71 @@ init({Filepath, Options, ReturnPid, Ref}) -> % open in read mode first, so we don't create the file if it doesn't exist. case file:open(Filepath, [read, raw]) of {ok, Fd_Read} -> - {ok, Fd} = file:open(Filepath, [read, write, raw, binary]), + {ok, Fd} = file:open(Filepath, [read, append, raw, binary]), ok = file:close(Fd_Read), - couch_stats_collector:track_process_count({couchdb, open_os_files}), - {ok, #file{fd=Fd}}; + maybe_track_open_os_files(Options), + {ok, Length} = file:position(Fd, eof), + {ok, #file{fd=Fd, eof=Length}}; Error -> init_status_error(ReturnPid, Ref, Error) end end. +maybe_track_open_os_files(FileOptions) -> + case lists:member(sys_db, FileOptions) of + true -> + ok; + false -> + couch_stats_collector:track_process_count({couchdb, open_os_files}) + end. -terminate(_Reason, _Fd) -> - ok. +terminate(_Reason, #file{fd = Fd}) -> + ok = file:close(Fd). +handle_call({pread_iolist, Pos}, _From, File) -> + {RawData, NextPos} = try + % up to 8Kbs of read ahead + read_raw_iolist_int(File, Pos, 2 * ?SIZE_BLOCK - (Pos rem ?SIZE_BLOCK)) + catch + _:_ -> + read_raw_iolist_int(File, Pos, 4) + end, + <<Prefix:1/integer, Len:31/integer, RestRawData/binary>> = + iolist_to_binary(RawData), + case Prefix of + 1 -> + {Md5, IoList} = extract_md5( + maybe_read_more_iolist(RestRawData, 16 + Len, NextPos, File)), + {reply, {ok, IoList, Md5}, File}; + 0 -> + IoList = maybe_read_more_iolist(RestRawData, Len, NextPos, File), + {reply, {ok, IoList, <<>>}, File} + end; handle_call({pread, Pos, Bytes}, _From, #file{fd=Fd,tail_append_begin=TailAppendBegin}=File) -> {ok, Bin} = file:pread(Fd, Pos, Bytes), {reply, {ok, Bin, Pos >= TailAppendBegin}, File}; -handle_call(bytes, _From, #file{fd=Fd}=File) -> - {reply, file:position(Fd, eof), File}; +handle_call(bytes, _From, #file{eof=Length}=File) -> + {reply, {ok, Length}, File}; handle_call(sync, _From, #file{fd=Fd}=File) -> {reply, file:sync(Fd), File}; handle_call({truncate, Pos}, _From, #file{fd=Fd}=File) -> {ok, Pos} = file:position(Fd, Pos), - {reply, file:truncate(Fd), File}; -handle_call({append_bin, Bin}, _From, #file{fd=Fd}=File) -> - {ok, Pos} = file:position(Fd, eof), + case file:truncate(Fd) of + ok -> + {reply, ok, File#file{eof=Pos}}; + Error -> + {reply, Error, File} + end; +handle_call({append_bin, Bin}, _From, #file{fd=Fd, eof=Pos}=File) -> Blocks = make_blocks(Pos rem ?SIZE_BLOCK, Bin), - case file:pwrite(Fd, Pos, Blocks) of + case file:write(Fd, Blocks) of ok -> - {reply, {ok, Pos}, File}; + {reply, {ok, Pos}, File#file{eof=Pos+iolist_size(Blocks)}}; Error -> {reply, Error, File} end; -handle_call({write_header, Bin}, _From, #file{fd=Fd}=File) -> - {ok, Pos} = file:position(Fd, eof), +handle_call({write_header, Bin}, _From, #file{fd=Fd, eof=Pos}=File) -> BinSize = size(Bin), case Pos rem ?SIZE_BLOCK of 0 -> @@ -296,16 +344,21 @@ handle_call({write_header, Bin}, _From, #file{fd=Fd}=File) -> BlockOffset -> Padding = <<0:(8*(?SIZE_BLOCK-BlockOffset))>> end, - FinalBin = [Padding, <<1, BinSize:32/integer>> | make_blocks(1, [Bin])], - {reply, file:pwrite(Fd, Pos, FinalBin), File}; + FinalBin = [Padding, <<1, BinSize:32/integer>> | make_blocks(5, [Bin])], + case file:write(Fd, FinalBin) of + ok -> + {reply, ok, File#file{eof=Pos+iolist_size(FinalBin)}}; + Error -> + {reply, Error, File} + end; handle_call({upgrade_old_header, Prefix}, _From, #file{fd=Fd}=File) -> case (catch read_old_header(Fd, Prefix)) of {ok, Header} -> - {ok, TailAppendBegin} = file:position(Fd, eof), + TailAppendBegin = File#file.eof, Bin = term_to_binary(Header), - Md5 = erlang:md5(Bin), + Md5 = couch_util:md5(Bin), % now we assemble the final header binary and write to disk FinalBin = <<Md5/binary, Bin/binary>>, {reply, ok, _} = handle_call({write_header, FinalBin}, ok, File), @@ -321,8 +374,7 @@ handle_call({upgrade_old_header, Prefix}, _From, #file{fd=Fd}=File) -> end; -handle_call(find_header, _From, #file{fd=Fd}=File) -> - {ok, Pos} = file:position(Fd, eof), +handle_call(find_header, _From, #file{fd=Fd, eof=Pos}=File) -> {reply, find_header(Fd, Pos div ?SIZE_BLOCK), File}. % 09 UPGRADE CODE @@ -386,7 +438,7 @@ extract_header(Prefix, Bin) -> case HeaderPrefix of Prefix -> % check the integrity signature - case erlang:md5(TermBin) == Sig of + case couch_util:md5(TermBin) == Sig of true -> Header = binary_to_term(TermBin), {ok, Header}; @@ -416,7 +468,7 @@ write_old_header(Fd, Prefix, Data) -> ok = file:sync(Fd), % pad out the header with zeros, then take the md5 hash PadZeros = <<0:(8*(?HEADER_SIZE - FilledSize2))>>, - Sig = erlang:md5([TermBin2, PadZeros]), + Sig = couch_util:md5([TermBin2, PadZeros]), % now we assemble the final header binary and write to disk WriteBin = <<Prefix/binary, TermBin2/binary, PadZeros/binary, Sig/binary>>, ?HEADER_SIZE = size(WriteBin), % sanity check @@ -432,6 +484,8 @@ handle_cast(close, Fd) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. +handle_info({'EXIT', _, normal}, Fd) -> + {noreply, Fd}; handle_info({'EXIT', _, Reason}, Fd) -> {stop, Reason, Fd}. @@ -447,16 +501,53 @@ find_header(Fd, Block) -> end. load_header(Fd, Block) -> - {ok, <<1>>} = file:pread(Fd, Block*?SIZE_BLOCK, 1), - {ok, <<HeaderLen:32/integer>>} = file:pread(Fd, (Block*?SIZE_BLOCK) + 1, 4), + {ok, <<1, HeaderLen:32/integer, RestBlock/binary>>} = + file:pread(Fd, Block * ?SIZE_BLOCK, ?SIZE_BLOCK), TotalBytes = calculate_total_read_len(1, HeaderLen), - {ok, <<RawBin:TotalBytes/binary>>} = - file:pread(Fd, (Block*?SIZE_BLOCK) + 5, TotalBytes), + case TotalBytes > byte_size(RestBlock) of + false -> + <<RawBin:TotalBytes/binary, _/binary>> = RestBlock; + true -> + {ok, Missing} = file:pread( + Fd, (Block * ?SIZE_BLOCK) + 5 + byte_size(RestBlock), + TotalBytes - byte_size(RestBlock)), + RawBin = <<RestBlock/binary, Missing/binary>> + end, <<Md5Sig:16/binary, HeaderBin/binary>> = iolist_to_binary(remove_block_prefixes(1, RawBin)), - Md5Sig = erlang:md5(HeaderBin), + Md5Sig = couch_util:md5(HeaderBin), {ok, HeaderBin}. +maybe_read_more_iolist(Buffer, DataSize, _, _) + when DataSize =< byte_size(Buffer) -> + <<Data:DataSize/binary, _/binary>> = Buffer, + [Data]; +maybe_read_more_iolist(Buffer, DataSize, NextPos, File) -> + {Missing, _} = + read_raw_iolist_int(File, NextPos, DataSize - byte_size(Buffer)), + [Buffer, Missing]. + +-spec read_raw_iolist_int(#file{}, Pos::non_neg_integer(), Len::non_neg_integer()) -> + {Data::iolist(), CurPos::non_neg_integer()}. +read_raw_iolist_int(Fd, {Pos, _Size}, Len) -> % 0110 UPGRADE CODE + read_raw_iolist_int(Fd, Pos, Len); +read_raw_iolist_int(#file{fd=Fd, tail_append_begin=TAB}, Pos, Len) -> + BlockOffset = Pos rem ?SIZE_BLOCK, + TotalBytes = calculate_total_read_len(BlockOffset, Len), + {ok, <<RawBin:TotalBytes/binary>>} = file:pread(Fd, Pos, TotalBytes), + if Pos >= TAB -> + {remove_block_prefixes(BlockOffset, RawBin), Pos + TotalBytes}; + true -> + % 09 UPGRADE CODE + <<ReturnBin:Len/binary, _/binary>> = RawBin, + {[ReturnBin], Pos + Len} + end. + +-spec extract_md5(iolist()) -> {binary(), iolist()}. +extract_md5(FullIoList) -> + {Md5List, IoList} = split_iolist(FullIoList, 16, []), + {iolist_to_binary(Md5List), IoList}. + calculate_total_read_len(0, FinalLen) -> calculate_total_read_len(1, FinalLen) + 1; calculate_total_read_len(BlockOffset, FinalLen) -> @@ -495,6 +586,11 @@ make_blocks(BlockOffset, IoList) -> IoList end. +%% @doc Returns a tuple where the first element contains the leading SplitAt +%% bytes of the original iolist, and the 2nd element is the tail. If SplitAt +%% is larger than byte_size(IoList), return the difference. +-spec split_iolist(IoList::iolist(), SplitAt::non_neg_integer(), Acc::list()) -> + {iolist(), iolist()} | non_neg_integer(). split_iolist(List, 0, BeginAcc) -> {lists:reverse(BeginAcc), List}; split_iolist([], SplitAt, _BeginAcc) -> diff --git a/src/couchdb/couch_httpd.erl b/src/couchdb/couch_httpd.erl index 252ecdb7..6bd5c893 100644 --- a/src/couchdb/couch_httpd.erl +++ b/src/couchdb/couch_httpd.erl @@ -13,27 +13,51 @@ -module(couch_httpd). -include("couch_db.hrl"). --export([start_link/0, stop/0, handle_request/5]). +-export([start_link/0, start_link/1, stop/0, handle_request/5]). --export([header_value/2,header_value/3,qs_value/2,qs_value/3,qs/1,path/1,absolute_uri/2,body_length/1]). +-export([header_value/2,header_value/3,qs_value/2,qs_value/3,qs/1,qs_json_value/3]). +-export([path/1,absolute_uri/2,body_length/1]). -export([verify_is_server_admin/1,unquote/1,quote/1,recv/2,recv_chunked/4,error_info/1]). -export([make_fun_spec_strs/1]). +-export([make_arity_1_fun/1, make_arity_2_fun/1, make_arity_3_fun/1]). -export([parse_form/1,json_body/1,json_body_obj/1,body/1,doc_etag/1, make_etag/1, etag_respond/3]). -export([primary_header_value/2,partition/1,serve_file/3,serve_file/4, server_header/0]). -export([start_chunked_response/3,send_chunk/2,log_request/2]). --export([start_response_length/4, send/2]). +-export([start_response_length/4, start_response/3, send/2]). -export([start_json_response/2, start_json_response/3, end_json_response/1]). -export([send_response/4,send_method_not_allowed/2,send_error/4, send_redirect/2,send_chunked_error/2]). -export([send_json/2,send_json/3,send_json/4,last_chunk/1,parse_multipart_request/3]). +-export([accepted_encodings/1,handle_request_int/5,validate_referer/1,validate_ctype/2]). start_link() -> + start_link(http). +start_link(http) -> + Port = couch_config:get("httpd", "port", "5984"), + start_link(?MODULE, [{port, Port}]); +start_link(https) -> + Port = couch_config:get("ssl", "port", "5984"), + CertFile = couch_config:get("ssl", "cert_file", nil), + KeyFile = couch_config:get("ssl", "key_file", nil), + Options = case CertFile /= nil andalso KeyFile /= nil of + true -> + [{port, Port}, + {ssl, true}, + {ssl_opts, [ + {certfile, CertFile}, + {keyfile, KeyFile}]}]; + false -> + io:format("SSL enabled but PEM certificates are missing.", []), + throw({error, missing_certs}) + end, + start_link(https, Options). +start_link(Name, Options) -> % read config and register for configuration changes % just stop if one of the config settings change. couch_server_sup % will restart us and then we will pick up the new settings. BindAddress = couch_config:get("httpd", "bind_address", any), - Port = couch_config:get("httpd", "port", "5984"), + NoDelay = "true" == couch_config:get("httpd", "nodelay", "false"), DefaultSpec = "{couch_httpd_db, handle_request}", DefaultFun = make_arity_1_fun( @@ -66,11 +90,11 @@ start_link() -> % and off we go - {ok, Pid} = case mochiweb_http:start([ + {ok, Pid} = case mochiweb_http:start(Options ++ [ {loop, Loop}, - {name, ?MODULE}, + {name, Name}, {ip, BindAddress}, - {port, Port} + {nodelay,NoDelay} ]) of {ok, MochiPid} -> {ok, MochiPid}; {error, Reason} -> @@ -83,11 +107,17 @@ start_link() -> ?MODULE:stop(); ("httpd", "port") -> ?MODULE:stop(); + ("httpd", "max_connections") -> + ?MODULE:stop(); ("httpd", "default_handler") -> ?MODULE:stop(); ("httpd_global_handlers", _) -> ?MODULE:stop(); ("httpd_db_handlers", _) -> + ?MODULE:stop(); + ("vhosts", _) -> + ?MODULE:stop(); + ("ssl", _) -> ?MODULE:stop() end, Pid), @@ -121,14 +151,21 @@ make_arity_3_fun(SpecStr) -> % SpecStr is "{my_module, my_fun}, {my_module2, my_fun2}" make_fun_spec_strs(SpecStr) -> - [FunSpecStr || FunSpecStr <- re:split(SpecStr, "(?<=})\\s*,\\s*(?={)", [{return, list}])]. + re:split(SpecStr, "(?<=})\\s*,\\s*(?={)", [{return, list}]). stop() -> mochiweb_http:stop(?MODULE). -handle_request(MochiReq, DefaultFun, - UrlHandlers, DbUrlHandlers, DesignUrlHandlers) -> +handle_request(MochiReq, DefaultFun, UrlHandlers, DbUrlHandlers, + DesignUrlHandlers) -> + + MochiReq1 = couch_httpd_vhost:match_vhost(MochiReq), + handle_request_int(MochiReq1, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers). + +handle_request_int(MochiReq, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers) -> Begin = now(), AuthenticationSrcs = make_fun_spec_strs( couch_config:get("httpd", "authentication_handlers")), @@ -137,6 +174,14 @@ handle_request(MochiReq, DefaultFun, RawUri = MochiReq:get(raw_path), {"/" ++ Path, _, _} = mochiweb_util:urlsplit_path(RawUri), + Headers = MochiReq:get(headers), + + % get requested path + RequestedPath = case MochiReq:get_header_value("x-couchdb-vhost-path") of + undefined -> RawUri; + P -> P + end, + HandlerKey = case mochiweb_util:partition(Path, "/") of {"", "", ""} -> @@ -144,10 +189,11 @@ handle_request(MochiReq, DefaultFun, {FirstPart, _, _} -> list_to_binary(FirstPart) end, - ?LOG_DEBUG("~p ~s ~p~nHeaders: ~p", [ + ?LOG_DEBUG("~p ~s ~p from ~p~nHeaders: ~p", [ MochiReq:get(method), RawUri, MochiReq:get(version), + MochiReq:get(peer), mochiweb_headers:to_list(MochiReq:get(headers)) ]), @@ -161,9 +207,26 @@ handle_request(MochiReq, DefaultFun, Meth -> couch_util:to_existing_atom(Meth) end, increment_method_stats(Method1), + + % allow broken HTTP clients to fake a full method vocabulary with an X-HTTP-METHOD-OVERRIDE header + MethodOverride = MochiReq:get_primary_header_value("X-HTTP-Method-Override"), + Method2 = case lists:member(MethodOverride, ["GET", "HEAD", "POST", "PUT", "DELETE", "TRACE", "CONNECT", "COPY"]) of + true -> + ?LOG_INFO("MethodOverride: ~s (real method was ~s)", [MethodOverride, Method1]), + case Method1 of + 'POST' -> couch_util:to_existing_atom(MethodOverride); + _ -> + % Ignore X-HTTP-Method-Override when the original verb isn't POST. + % I'd like to send a 406 error to the client, but that'd require a nasty refactor. + % throw({not_acceptable, <<"X-HTTP-Method-Override may only be used with POST requests.">>}) + Method1 + end; + _ -> Method1 + end, + % alias HEAD to GET as mochiweb takes care of stripping the body - Method = case Method1 of - 'HEAD' -> 'GET'; + Method = case Method2 of + 'HEAD' -> 'GET'; Other -> Other end, @@ -171,10 +234,14 @@ handle_request(MochiReq, DefaultFun, mochi_req = MochiReq, peer = MochiReq:get(peer), method = Method, + requested_path_parts = [list_to_binary(couch_httpd:unquote(Part)) + || Part <- string:tokens(RequestedPath, "/")], path_parts = [list_to_binary(couch_httpd:unquote(Part)) || Part <- string:tokens(Path, "/")], db_url_handlers = DbUrlHandlers, - design_url_handlers = DesignUrlHandlers + design_url_handlers = DesignUrlHandlers, + default_fun = DefaultFun, + url_handlers = UrlHandlers }, HandlerFun = couch_util:dict_find(HandlerKey, UrlHandlers, DefaultFun), @@ -191,8 +258,15 @@ handle_request(MochiReq, DefaultFun, throw:{http_head_abort, Resp0} -> {ok, Resp0}; throw:{invalid_json, S} -> - ?LOG_ERROR("attempted upload of invalid JSON ~s", [S]), - send_error(HttpReq, {bad_request, "invalid UTF-8 JSON"}); + ?LOG_ERROR("attempted upload of invalid JSON (set log_level to debug to log it)", []), + ?LOG_DEBUG("Invalid JSON: ~p",[S]), + send_error(HttpReq, {bad_request, io_lib:format("invalid UTF-8 JSON: ~p",[S])}); + throw:unacceptable_encoding -> + ?LOG_ERROR("unsupported encoding method for the response", []), + send_error(HttpReq, {not_acceptable, "unsupported encoding"}); + throw:bad_accept_encoding_value -> + ?LOG_ERROR("received invalid Accept-Encoding header", []), + send_error(HttpReq, bad_request); exit:normal -> exit(normal); throw:Error -> @@ -243,6 +317,33 @@ authenticate_request(Response, _AuthSrcs) -> increment_method_stats(Method) -> couch_stats_collector:increment({httpd_request_methods, Method}). +validate_referer(Req) -> + Host = host_for_request(Req), + Referer = header_value(Req, "Referer", fail), + case Referer of + fail -> + throw({bad_request, <<"Referer header required.">>}); + Referer -> + {_,RefererHost,_,_,_} = mochiweb_util:urlsplit(Referer), + if + RefererHost =:= Host -> ok; + true -> throw({bad_request, <<"Referer header must match host.">>}) + end + end. + +validate_ctype(Req, Ctype) -> + case couch_httpd:header_value(Req, "Content-Type") of + undefined -> + throw({bad_ctype, "Content-Type must be "++Ctype}); + ReqCtype -> + % ?LOG_ERROR("Ctype ~p ReqCtype ~p",[Ctype,ReqCtype]), + case re:split(ReqCtype, ";", [{return, list}]) of + [Ctype] -> ok; + [Ctype, _Rest] -> ok; + _Else -> + throw({bad_ctype, "Content-Type must be "++Ctype}) + end + end. % Utilities @@ -261,7 +362,17 @@ header_value(#httpd{mochi_req=MochiReq}, Key, Default) -> primary_header_value(#httpd{mochi_req=MochiReq}, Key) -> MochiReq:get_primary_header_value(Key). -serve_file(#httpd{mochi_req=MochiReq}=Req, RelativePath, DocumentRoot) -> +accepted_encodings(#httpd{mochi_req=MochiReq}) -> + case MochiReq:accepted_encodings(["gzip", "identity"]) of + bad_accept_encoding_value -> + throw(bad_accept_encoding_value); + [] -> + throw(unacceptable_encoding); + EncList -> + EncList + end. + +serve_file(Req, RelativePath, DocumentRoot) -> serve_file(Req, RelativePath, DocumentRoot, []). serve_file(#httpd{mochi_req=MochiReq}=Req, RelativePath, DocumentRoot, ExtraHeaders) -> @@ -272,7 +383,15 @@ qs_value(Req, Key) -> qs_value(Req, Key, undefined). qs_value(Req, Key, Default) -> - proplists:get_value(Key, qs(Req), Default). + couch_util:get_value(Key, qs(Req), Default). + +qs_json_value(Req, Key, Default) -> + case qs_value(Req, Key, Default) of + Default -> + Default; + Result -> + ?JSON_DECODE(Result) + end. qs(#httpd{mochi_req=MochiReq}) -> MochiReq:parse_qs(). @@ -280,30 +399,36 @@ qs(#httpd{mochi_req=MochiReq}) -> path(#httpd{mochi_req=MochiReq}) -> MochiReq:get(path). -absolute_uri(#httpd{mochi_req=MochiReq}, Path) -> +host_for_request(#httpd{mochi_req=MochiReq}) -> XHost = couch_config:get("httpd", "x_forwarded_host", "X-Forwarded-Host"), - Host = case MochiReq:get_header_value(XHost) of + case MochiReq:get_header_value(XHost) of undefined -> case MochiReq:get_header_value("Host") of - undefined -> + undefined -> {ok, {Address, Port}} = inet:sockname(MochiReq:get(socket)), inet_parse:ntoa(Address) ++ ":" ++ integer_to_list(Port); Value1 -> Value1 end; Value -> Value - end, + end. + +absolute_uri(#httpd{mochi_req=MochiReq}=Req, Path) -> + Host = host_for_request(Req), XSsl = couch_config:get("httpd", "x_forwarded_ssl", "X-Forwarded-Ssl"), Scheme = case MochiReq:get_header_value(XSsl) of - "on" -> "https"; - _ -> - XProto = couch_config:get("httpd", "x_forwarded_proto", "X-Forwarded-Proto"), - case MochiReq:get_header_value(XProto) of - % Restrict to "https" and "http" schemes only - "https" -> "https"; - _ -> "http" - end - end, + "on" -> "https"; + _ -> + XProto = couch_config:get("httpd", "x_forwarded_proto", "X-Forwarded-Proto"), + case MochiReq:get_header_value(XProto) of + %% Restrict to "https" and "http" schemes only + "https" -> "https"; + _ -> case MochiReq:get(scheme) of + https -> "https"; + http -> "http" + end + end + end, Scheme ++ "://" ++ Host ++ Path. unquote(UrlEncodedString) -> @@ -362,7 +487,7 @@ doc_etag(#doc{revs={Start, [DiskRev|_]}}) -> "\"" ++ ?b2l(couch_doc:rev_to_str({Start, DiskRev})) ++ "\"". make_etag(Term) -> - <<SigInt:128/integer>> = erlang:md5(term_to_binary(Term)), + <<SigInt:128/integer>> = couch_util:md5(term_to_binary(Term)), list_to_binary("\"" ++ lists:flatten(io_lib:format("~.36B",[SigInt])) ++ "\""). etag_match(Req, CurrentEtag) when is_binary(CurrentEtag) -> @@ -391,10 +516,10 @@ verify_is_server_admin(#user_ctx{roles=Roles}) -> false -> throw({unauthorized, <<"You are not a server admin.">>}) end. -log_request(#httpd{mochi_req=MochiReq,peer=Peer,method=Method}, Code) -> +log_request(#httpd{mochi_req=MochiReq,peer=Peer}, Code) -> ?LOG_INFO("~s - - ~p ~s ~B", [ Peer, - Method, + couch_util:to_existing_atom(MochiReq:get(method)), MochiReq:get(raw_path), couch_util:to_integer(Code) ]). @@ -410,6 +535,18 @@ start_response_length(#httpd{mochi_req=MochiReq}=Req, Code, Headers, Length) -> end, {ok, Resp}. +start_response(#httpd{mochi_req=MochiReq}=Req, Code, Headers) -> + log_request(Req, Code), + couch_stats_collector:increment({httpd_status_cdes, Code}), + CookieHeader = couch_httpd_auth:cookie_auth_header(Req, Headers), + Headers2 = Headers ++ server_header() ++ CookieHeader, + Resp = MochiReq:start_response({Code, Headers2}), + case MochiReq:get(method) of + 'HEAD' -> throw({http_head_abort, Resp}); + _ -> ok + end, + {ok, Resp}. + send(Resp, Data) -> Resp:send(Data), {ok, Resp}. @@ -513,8 +650,18 @@ start_jsonp(Req) -> [] -> []; CallBack -> try - validate_callback(CallBack), - CallBack ++ "(" + % make sure jsonp is configured on (default off) + case couch_config:get("httpd", "allow_jsonp", "false") of + "true" -> + validate_callback(CallBack), + CallBack ++ "("; + _Else -> + % this could throw an error message, but instead we just ignore the + % jsonp parameter + % throw({bad_request, <<"JSONP must be configured before using.">>}) + put(jsonp, no_jsonp), + [] + end catch Error -> put(jsonp, no_jsonp), @@ -578,10 +725,12 @@ error_info(file_exists) -> "created, the file already exists.">>}; error_info({bad_ctype, Reason}) -> {415, <<"bad_content_type">>, Reason}; +error_info(requested_range_not_satisfiable) -> + {416, <<"requested_range_not_satisfiable">>, <<"Requested range not satisfiable">>}; error_info({error, illegal_database_name}) -> {400, <<"illegal_database_name">>, <<"Only lowercase characters (a-z), " "digits (0-9), and any of the characters _, $, (, ), +, -, and / " - "are allowed">>}; + "are allowed. Must begin with a letter.">>}; error_info({missing_stub, Reason}) -> {412, <<"missing_stub">>, Reason}; error_info({Error, Reason}) -> @@ -589,28 +738,66 @@ error_info({Error, Reason}) -> error_info(Error) -> {500, <<"unknown_error">>, couch_util:to_binary(Error)}. -send_error(_Req, {already_sent, Resp, _Error}) -> - {ok, Resp}; - -send_error(#httpd{mochi_req=MochiReq}=Req, Error) -> - {Code, ErrorStr, ReasonStr} = error_info(Error), - Headers = if Code == 401 -> +error_headers(#httpd{mochi_req=MochiReq}=Req, Code, ErrorStr, ReasonStr) -> + if Code == 401 -> % this is where the basic auth popup is triggered case MochiReq:get_header_value("X-CouchDB-WWW-Authenticate") of undefined -> case couch_config:get("httpd", "WWW-Authenticate", nil) of nil -> - []; + % If the client is a browser and the basic auth popup isn't turned on + % redirect to the session page. + case ErrorStr of + <<"unauthorized">> -> + case couch_config:get("couch_httpd_auth", "authentication_redirect", nil) of + nil -> {Code, []}; + AuthRedirect -> + case couch_config:get("couch_httpd_auth", "require_valid_user", "false") of + "true" -> + % send the browser popup header no matter what if we are require_valid_user + {Code, [{"WWW-Authenticate", "Basic realm=\"server\""}]}; + _False -> + case MochiReq:accepts_content_type("text/html") of + false -> + {Code, []}; + true -> + % Redirect to the path the user requested, not + % the one that is used internally. + UrlReturnRaw = case MochiReq:get_header_value("x-couchdb-vhost-path") of + undefined -> + MochiReq:get(path); + VHostPath -> + VHostPath + end, + RedirectLocation = lists:flatten([ + AuthRedirect, + "?return=", couch_util:url_encode(UrlReturnRaw), + "&reason=", couch_util:url_encode(ReasonStr) + ]), + {302, [{"Location", absolute_uri(Req, RedirectLocation)}]} + end + end + end; + _Else -> + {Code, []} + end; Type -> - [{"WWW-Authenticate", Type}] + {Code, [{"WWW-Authenticate", Type}]} end; Type -> - [{"WWW-Authenticate", Type}] + {Code, [{"WWW-Authenticate", Type}]} end; true -> - [] - end, - send_error(Req, Code, Headers, ErrorStr, ReasonStr). + {Code, []} + end. + +send_error(_Req, {already_sent, Resp, _Error}) -> + {ok, Resp}; + +send_error(Req, Error) -> + {Code, ErrorStr, ReasonStr} = error_info(Error), + {Code1, Headers} = error_headers(Req, Code, ErrorStr, ReasonStr), + send_error(Req, Code1, Headers, ErrorStr, ReasonStr). send_error(Req, Code, ErrorStr, ReasonStr) -> send_error(Req, Code, [], ErrorStr, ReasonStr). @@ -667,22 +854,24 @@ parse_multipart_request(ContentType, DataFun, Callback) -> buffer= <<>>, data_fun=DataFun, callback=Callback}, - {Mp2, _NilCallback} = read_until(Mp, <<"--", Boundary0/binary>>, + {Mp2, _NilCallback} = read_until(Mp, <<"--", Boundary0/binary>>, fun(Next)-> nil_callback(Next) end), - #mp{buffer=Buffer, data_fun=DataFun2, callback=Callback2} = + #mp{buffer=Buffer, data_fun=DataFun2, callback=Callback2} = parse_part_header(Mp2), {Buffer, DataFun2, Callback2}. nil_callback(_Data)-> fun(Next) -> nil_callback(Next) end. -get_boundary(ContentType) -> - {"multipart/" ++ _, Opts} = mochiweb_util:parse_header(ContentType), - case proplists:get_value("boundary", Opts) of +get_boundary({"multipart/" ++ _, Opts}) -> + case couch_util:get_value("boundary", Opts) of S when is_list(S) -> S - end. - + end; +get_boundary(ContentType) -> + {"multipart/" ++ _ , Opts} = mochiweb_util:parse_header(ContentType), + get_boundary({"multipart/", Opts}). + split_header(<<>>) -> @@ -700,6 +889,11 @@ read_until(#mp{data_fun=DataFun, buffer=Buffer}=Mp, Pattern, Callback) -> {Buffer2, DataFun2} = DataFun(), Buffer3 = iolist_to_binary(Buffer2), read_until(Mp#mp{data_fun=DataFun2,buffer=Buffer3}, Pattern, Callback2); + {partial, 0} -> + {NewData, DataFun2} = DataFun(), + read_until(Mp#mp{data_fun=DataFun2, + buffer= iolist_to_binary([Buffer,NewData])}, + Pattern, Callback); {partial, Skip} -> <<DataChunk:Skip/binary, Rest/binary>> = Buffer, Callback2 = Callback(DataChunk), @@ -707,6 +901,10 @@ read_until(#mp{data_fun=DataFun, buffer=Buffer}=Mp, Pattern, Callback) -> read_until(Mp#mp{data_fun=DataFun2, buffer= iolist_to_binary([Rest | NewData])}, Pattern, Callback2); + {exact, 0} -> + PatternLen = size(Pattern), + <<_:PatternLen/binary, Rest/binary>> = Buffer, + {Mp#mp{buffer= Rest}, Callback}; {exact, Skip} -> PatternLen = size(Pattern), <<DataChunk:Skip/binary, _:PatternLen/binary, Rest/binary>> = Buffer, diff --git a/src/couchdb/couch_httpd_auth.erl b/src/couchdb/couch_httpd_auth.erl index 554886ca..a370ebdf 100644 --- a/src/couchdb/couch_httpd_auth.erl +++ b/src/couchdb/couch_httpd_auth.erl @@ -16,9 +16,9 @@ -export([default_authentication_handler/1,special_test_authentication_handler/1]). -export([cookie_authentication_handler/1]). -export([null_authentication_handler/1]). +-export([proxy_authentification_handler/1]). -export([cookie_auth_header/2]). -export([handle_session_req/1]). --export([ensure_users_db_exists/1, get_user/1]). -import(couch_httpd, [header_value/2, send_json/2,send_json/4, send_method_not_allowed/2]). @@ -43,11 +43,11 @@ special_test_authentication_handler(Req) -> Req#httpd{user_ctx=#user_ctx{roles=[<<"_admin">>]}} end. -basic_username_pw(Req) -> +basic_name_pw(Req) -> AuthorizationHeader = header_value(Req, "Authorization"), case AuthorizationHeader of "Basic " ++ Base64Value -> - case string:tokens(?b2l(couch_util:decodeBase64(Base64Value)),":") of + case string:tokens(?b2l(base64:decode(Base64Value)),":") of ["_", "_"] -> % special name and pass to be logged out nil; @@ -63,20 +63,20 @@ basic_username_pw(Req) -> end. default_authentication_handler(Req) -> - case basic_username_pw(Req) of + case basic_name_pw(Req) of {User, Pass} -> - case get_user(?l2b(User)) of + case couch_auth_cache:get_user_creds(User) of nil -> throw({unauthorized, <<"Name or password is incorrect.">>}); UserProps -> - UserSalt = proplists:get_value(<<"salt">>, UserProps, <<>>), + UserSalt = couch_util:get_value(<<"salt">>, UserProps, <<>>), PasswordHash = hash_password(?l2b(Pass), UserSalt), - case proplists:get_value(<<"password_sha">>, UserProps, nil) of - ExpectedHash when ExpectedHash == PasswordHash -> + ExpectedHash = couch_util:get_value(<<"password_sha">>, UserProps, nil), + case couch_util:verify(ExpectedHash, PasswordHash) of + true -> Req#httpd{user_ctx=#user_ctx{ name=?l2b(User), - roles=proplists:get_value(<<"roles">>, UserProps, []), - user_doc={UserProps} + roles=couch_util:get_value(<<"roles">>, UserProps, []) }}; _Else -> throw({unauthorized, <<"Name or password is incorrect.">>}) @@ -99,176 +99,106 @@ default_authentication_handler(Req) -> null_authentication_handler(Req) -> Req#httpd{user_ctx=#user_ctx{roles=[<<"_admin">>]}}. -% maybe we can use hovercraft to simplify running this view query -% rename to get_user_from_users_db -get_user(UserName) -> - case couch_config:get("admins", ?b2l(UserName)) of - "-hashed-" ++ HashedPwdAndSalt -> - % the username is an admin, now check to see if there is a user doc - % which has a matching username, salt, and password_sha - [HashedPwd, Salt] = string:tokens(HashedPwdAndSalt, ","), - case get_user_props_from_db(UserName) of - nil -> - [{<<"roles">>, [<<"_admin">>]}, - {<<"salt">>, ?l2b(Salt)}, - {<<"password_sha">>, ?l2b(HashedPwd)}]; - UserProps when is_list(UserProps) -> - DocRoles = proplists:get_value(<<"roles">>, UserProps), - [{<<"roles">>, [<<"_admin">> | DocRoles]}, - {<<"salt">>, ?l2b(Salt)}, - {<<"password_sha">>, ?l2b(HashedPwd)}, - {<<"user_doc">>, {UserProps}}] - end; - Else -> - get_user_props_from_db(UserName) - end. - -get_user_props_from_db(UserName) -> - DbName = couch_config:get("couch_httpd_auth", "authentication_db"), - {ok, Db} = ensure_users_db_exists(?l2b(DbName)), - DocId = <<"org.couchdb.user:", UserName/binary>>, - try couch_httpd_db:couch_doc_open(Db, DocId, nil, []) of - #doc{}=Doc -> - {DocProps} = couch_query_servers:json_doc(Doc), - DocProps - catch - throw:Throw -> - nil - end. - -% this should handle creating the ddoc -ensure_users_db_exists(DbName) -> - case couch_db:open(DbName, [{user_ctx, #user_ctx{roles=[<<"_admin">>]}}]) of - {ok, Db} -> - ensure_auth_ddoc_exists(Db, <<"_design/_auth">>), - {ok, Db}; - _Error -> - {ok, Db} = couch_db:create(DbName, [{user_ctx, #user_ctx{roles=[<<"_admin">>]}}]), - ensure_auth_ddoc_exists(Db, <<"_design/_auth">>), - {ok, Db} +%% @doc proxy auth handler. +% +% This handler allows creation of a userCtx object from a user authenticated remotly. +% The client just pass specific headers to CouchDB and the handler create the userCtx. +% Headers name can be defined in local.ini. By thefault they are : +% +% * X-Auth-CouchDB-UserName : contain the username, (x_auth_username in +% couch_httpd_auth section) +% * X-Auth-CouchDB-Roles : contain the user roles, list of roles separated by a +% comma (x_auth_roles in couch_httpd_auth section) +% * X-Auth-CouchDB-Token : token to authenticate the authorization (x_auth_token +% in couch_httpd_auth section). This token is an hmac-sha1 created from secret key +% and username. The secret key should be the same in the client and couchdb node. s +% ecret key is the secret key in couch_httpd_auth section of ini. This token is optional +% if value of proxy_use_secret key in couch_httpd_auth section of ini isn't true. +% +proxy_authentification_handler(Req) -> + case proxy_auth_user(Req) of + nil -> Req; + Req2 -> Req2 end. -ensure_auth_ddoc_exists(Db, DDocId) -> - try couch_httpd_db:couch_doc_open(Db, DDocId, nil, []) of - _Foo -> ok - catch - _:Error -> - % create the design document - {ok, AuthDesign} = auth_design_doc(DDocId), - {ok, _Rev} = couch_db:update_doc(Db, AuthDesign, []), - ok +proxy_auth_user(Req) -> + XHeaderUserName = couch_config:get("couch_httpd_auth", "x_auth_username", + "X-Auth-CouchDB-UserName"), + XHeaderRoles = couch_config:get("couch_httpd_auth", "x_auth_roles", + "X-Auth-CouchDB-Roles"), + XHeaderToken = couch_config:get("couch_httpd_auth", "x_auth_token", + "X-Auth-CouchDB-Token"), + case header_value(Req, XHeaderUserName) of + undefined -> nil; + UserName -> + Roles = case header_value(Req, XHeaderRoles) of + undefined -> []; + Else -> + [?l2b(R) || R <- string:tokens(Else, ",")] + end, + case couch_config:get("couch_httpd_auth", "proxy_use_secret", "false") of + "true" -> + case couch_config:get("couch_httpd_auth", "secret", nil) of + nil -> + Req#httpd{user_ctx=#user_ctx{name=?l2b(UserName), roles=Roles}}; + Secret -> + ExpectedToken = couch_util:to_hex(crypto:sha_mac(Secret, UserName)), + case header_value(Req, XHeaderToken) of + Token when Token == ExpectedToken -> + Req#httpd{user_ctx=#user_ctx{name=?l2b(UserName), + roles=Roles}}; + _ -> nil + end + end; + _ -> + Req#httpd{user_ctx=#user_ctx{name=?l2b(UserName), roles=Roles}} + end end. -% add the validation function here -auth_design_doc(DocId) -> - DocProps = [ - {<<"_id">>, DocId}, - {<<"language">>,<<"javascript">>}, - {<<"views">>, - {[{<<"users">>, - {[{<<"map">>, - <<"function (doc) {\n if (doc.type == \"user\") {\n emit(doc.username, doc);\n}\n}">> - }]} - }]} - }, - { - <<"validate_doc_update">>, - <<"function(newDoc, oldDoc, userCtx) { - if (newDoc.type != 'user') { - return; - } // we only validate user docs for now - if (!newDoc.username) { - throw({forbidden : 'doc.username is required'}); - } - if (!(newDoc.roles && (typeof newDoc.roles.length != 'undefined') )) { - throw({forbidden : 'doc.roles must be an array'}); - } - if (newDoc._id != 'org.couchdb.user:'+newDoc.username) { - throw({forbidden : 'Docid must be of the form org.couchdb.user:username'}); - } - if (oldDoc) { // validate all updates - if (oldDoc.username != newDoc.username) { - throw({forbidden : 'Usernames may not be changed.'}); - } - } - if (newDoc.password_sha && !newDoc.salt) { - throw({forbidden : 'Users with password_sha must have a salt. See /_utils/script/couch.js for example code.'}); - } - if (userCtx.roles.indexOf('_admin') == -1) { // not an admin - if (oldDoc) { // validate non-admin updates - if (userCtx.name != newDoc.username) { - throw({forbidden : 'You may only update your own user document.'}); - } - // validate role updates - var oldRoles = oldDoc.roles.sort(); - var newRoles = newDoc.roles.sort(); - if (oldRoles.length != newRoles.length) { - throw({forbidden : 'Only _admin may edit roles'}); - } - for (var i=0; i < oldRoles.length; i++) { - if (oldRoles[i] != newRoles[i]) { - throw({forbidden : 'Only _admin may edit roles'}); - } - }; - } else if (newDoc.roles.length > 0) { - throw({forbidden : 'Only _admin may set roles'}); - } - } - // no system roles in users db - for (var i=0; i < newDoc.roles.length; i++) { - if (newDoc.roles[i][0] == '_') { - throw({forbidden : 'No system roles (starting with underscore) in users db.'}); - } - }; - // no system names as usernames - if (newDoc.username[0] == '_') { - throw({forbidden : 'Username may not start with underscore.'}); - } - }">> - }], - {ok, couch_doc:from_json_obj({DocProps})}. cookie_authentication_handler(#httpd{mochi_req=MochiReq}=Req) -> case MochiReq:get_cookie_value("AuthSession") of undefined -> Req; [] -> Req; - Cookie -> + Cookie -> [User, TimeStr | HashParts] = try AuthSession = couch_util:decodeBase64Url(Cookie), - [A, B | Cs] = string:tokens(?b2l(AuthSession), ":") + [_A, _B | _Cs] = string:tokens(?b2l(AuthSession), ":") catch - _:Error -> + _:_Error -> Reason = <<"Malformed AuthSession cookie. Please clear your cookies.">>, throw({bad_request, Reason}) end, % Verify expiry and hash - {NowMS, NowS, _} = erlang:now(), - CurrentTime = NowMS * 1000000 + NowS, + CurrentTime = make_cookie_time(), case couch_config:get("couch_httpd_auth", "secret", nil) of - nil -> - ?LOG_ERROR("cookie auth secret is not set",[]), + nil -> + ?LOG_DEBUG("cookie auth secret is not set",[]), Req; SecretStr -> Secret = ?l2b(SecretStr), - case get_user(?l2b(User)) of + case couch_auth_cache:get_user_creds(User) of nil -> Req; UserProps -> - UserSalt = proplists:get_value(<<"salt">>, UserProps, <<"">>), + UserSalt = couch_util:get_value(<<"salt">>, UserProps, <<"">>), FullSecret = <<Secret/binary, UserSalt/binary>>, ExpectedHash = crypto:sha_mac(FullSecret, User ++ ":" ++ TimeStr), Hash = ?l2b(string:join(HashParts, ":")), Timeout = to_int(couch_config:get("couch_httpd_auth", "timeout", 600)), ?LOG_DEBUG("timeout ~p", [Timeout]), case (catch erlang:list_to_integer(TimeStr, 16)) of - TimeStamp when CurrentTime < TimeStamp + Timeout - andalso ExpectedHash == Hash -> - TimeLeft = TimeStamp + Timeout - CurrentTime, - ?LOG_DEBUG("Successful cookie auth as: ~p", [User]), - Req#httpd{user_ctx=#user_ctx{ - name=?l2b(User), - roles=proplists:get_value(<<"roles">>, UserProps, []), - user_doc=proplists:get_value(<<"user_doc">>, UserProps, null) - }, auth={FullSecret, TimeLeft < Timeout*0.9}}; + TimeStamp when CurrentTime < TimeStamp + Timeout -> + case couch_util:verify(ExpectedHash, Hash) of + true -> + TimeLeft = TimeStamp + Timeout - CurrentTime, + ?LOG_DEBUG("Successful cookie auth as: ~p", [User]), + Req#httpd{user_ctx=#user_ctx{ + name=?l2b(User), + roles=couch_util:get_value(<<"roles">>, UserProps, []) + }, auth={FullSecret, TimeLeft < Timeout*0.9}}; + _Else -> + Req + end; _Else -> Req end @@ -285,12 +215,11 @@ cookie_auth_header(#httpd{user_ctx=#user_ctx{name=User}, auth={Secret, true}}, H % or logout handler. % The login and logout handlers need to set the AuthSession cookie % themselves. - CookieHeader = proplists:get_value("Set-Cookie", Headers, ""), + CookieHeader = couch_util:get_value("Set-Cookie", Headers, ""), Cookies = mochiweb_cookies:parse_cookie(CookieHeader), - AuthSession = proplists:get_value("AuthSession", Cookies), + AuthSession = couch_util:get_value("AuthSession", Cookies), if AuthSession == undefined -> - {NowMS, NowS, _} = erlang:now(), - TimeStamp = NowMS * 1000000 + NowS, + TimeStamp = make_cookie_time(), [cookie_auth_cookie(?b2l(User), Secret, TimeStamp)]; true -> [] @@ -318,30 +247,35 @@ ensure_cookie_auth_secret() -> % session handlers % Login handler with user db -% TODO this should also allow a JSON POST handle_session_req(#httpd{method='POST', mochi_req=MochiReq}=Req) -> ReqBody = MochiReq:recv_body(), Form = case MochiReq:get_primary_header_value("content-type") of + % content type should be json "application/x-www-form-urlencoded" ++ _ -> mochiweb_util:parse_qs(ReqBody); + "application/json" ++ _ -> + {Pairs} = ?JSON_DECODE(ReqBody), + lists:map(fun({Key, Value}) -> + {?b2l(Key), ?b2l(Value)} + end, Pairs); _ -> [] end, - UserName = ?l2b(proplists:get_value("username", Form, "")), - Password = ?l2b(proplists:get_value("password", Form, "")), + UserName = ?l2b(couch_util:get_value("name", Form, "")), + Password = ?l2b(couch_util:get_value("password", Form, "")), ?LOG_DEBUG("Attempt Login: ~s",[UserName]), - User = case get_user(UserName) of + User = case couch_auth_cache:get_user_creds(UserName) of nil -> []; Result -> Result end, - UserSalt = proplists:get_value(<<"salt">>, User, <<>>), + UserSalt = couch_util:get_value(<<"salt">>, User, <<>>), PasswordHash = hash_password(Password, UserSalt), - case proplists:get_value(<<"password_sha">>, User, nil) of - ExpectedHash when ExpectedHash == PasswordHash -> + ExpectedHash = couch_util:get_value(<<"password_sha">>, User, nil), + case couch_util:verify(ExpectedHash, PasswordHash) of + true -> % setup the session cookie Secret = ?l2b(ensure_cookie_auth_secret()), - {NowMS, NowS, _} = erlang:now(), - CurrentTime = NowMS * 1000000 + NowS, + CurrentTime = make_cookie_time(), Cookie = cookie_auth_cookie(?b2l(UserName), <<Secret/binary, UserSalt/binary>>, CurrentTime), % TODO document the "next" feature in Futon {Code, Headers} = case couch_httpd:qs_value(Req, "next", nil) of @@ -353,9 +287,8 @@ handle_session_req(#httpd{method='POST', mochi_req=MochiReq}=Req) -> send_json(Req#httpd{req_body=ReqBody}, Code, Headers, {[ {ok, true}, - {name, proplists:get_value(<<"username">>, User, null)}, - {roles, proplists:get_value(<<"roles">>, User, [])}, - {user_doc, proplists:get_value(<<"user_doc">>, User, null)} + {name, couch_util:get_value(<<"name">>, User, null)}, + {roles, couch_util:get_value(<<"roles">>, User, [])} ]}); _Else -> % clear the session @@ -363,6 +296,7 @@ handle_session_req(#httpd{method='POST', mochi_req=MochiReq}=Req) -> send_json(Req, 401, [Cookie], {[{error, <<"unauthorized">>},{reason, <<"Name or password is incorrect.">>}]}) end; % get user info +% GET /_session handle_session_req(#httpd{method='GET', user_ctx=UserCtx}=Req) -> Name = UserCtx#user_ctx.name, ForceLogin = couch_httpd:qs_value(Req, "basic", "false"), @@ -371,15 +305,20 @@ handle_session_req(#httpd{method='GET', user_ctx=UserCtx}=Req) -> throw({unauthorized, <<"Please login.">>}); {Name, _} -> send_json(Req, {[ + % remove this ok {ok, true}, - {name, Name}, - {roles, UserCtx#user_ctx.roles}, + {<<"userCtx">>, {[ + {name, Name}, + {roles, UserCtx#user_ctx.roles} + ]}}, {info, {[ - {user_db, ?l2b(couch_config:get("couch_httpd_auth", "authentication_db"))}, - {handlers, [?l2b(H) || H <- couch_httpd:make_fun_spec_strs( + {authentication_db, ?l2b(couch_config:get("couch_httpd_auth", "authentication_db"))}, + {authentication_handlers, [auth_name(H) || H <- couch_httpd:make_fun_spec_strs( couch_config:get("httpd", "authentication_handlers"))]} - ] ++ maybe_value(authenticated, UserCtx#user_ctx.handler)}} - ] ++ maybe_value(user_doc, UserCtx#user_ctx.user_doc)}) + ] ++ maybe_value(authenticated, UserCtx#user_ctx.handler, fun(Handler) -> + auth_name(?b2l(Handler)) + end)}} + ]}) end; % logout by deleting the session handle_session_req(#httpd{method='DELETE'}=Req) -> @@ -394,12 +333,21 @@ handle_session_req(#httpd{method='DELETE'}=Req) -> handle_session_req(Req) -> send_method_not_allowed(Req, "GET,HEAD,POST,DELETE"). -maybe_value(Key, undefined) -> []; -maybe_value(Key, Else) -> [{Key, Else}]. +maybe_value(_Key, undefined, _Fun) -> []; +maybe_value(Key, Else, Fun) -> + [{Key, Fun(Else)}]. + +auth_name(String) when is_list(String) -> + [_,_,_,_,_,Name|_] = re:split(String, "[\\W_]", [{return, list}]), + ?l2b(Name). to_int(Value) when is_binary(Value) -> - to_int(?b2l(Value)); + to_int(?b2l(Value)); to_int(Value) when is_list(Value) -> list_to_integer(Value); to_int(Value) when is_integer(Value) -> Value. + +make_cookie_time() -> + {NowMS, NowS, _} = erlang:now(), + NowMS * 1000000 + NowS. diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl index 9233e953..c11d2aef 100644 --- a/src/couchdb/couch_httpd_db.erl +++ b/src/couchdb/couch_httpd_db.erl @@ -20,8 +20,8 @@ -import(couch_httpd, [send_json/2,send_json/3,send_json/4,send_method_not_allowed/2, - start_json_response/2,start_json_response/3, - send_chunk/2,end_json_response/1, + send_response/4,start_json_response/2,start_json_response/3, + send_chunk/2,last_chunk/1,end_json_response/1, start_chunked_response/3, absolute_uri/2, send/2, start_response_length/4]). @@ -55,205 +55,80 @@ handle_request(#httpd{path_parts=[DbName|RestParts],method=Method, do_db_req(Req, Handler) end. -get_changes_timeout(Req, Resp) -> - DefaultTimeout = list_to_integer( - couch_config:get("httpd", "changes_timeout", "60000")), - case couch_httpd:qs_value(Req, "heartbeat") of - undefined -> - case couch_httpd:qs_value(Req, "timeout") of - undefined -> - {DefaultTimeout, fun() -> stop end}; - TimeoutList -> - {lists:min([DefaultTimeout, list_to_integer(TimeoutList)]), - fun() -> stop end} - end; - "true" -> - {DefaultTimeout, fun() -> send_chunk(Resp, "\n"), ok end}; - TimeoutList -> - {lists:min([DefaultTimeout, list_to_integer(TimeoutList)]), - fun() -> send_chunk(Resp, "\n"), ok end} - end. - - -start_sending_changes(_Resp, "continuous") -> - ok; -start_sending_changes(Resp, _Else) -> - send_chunk(Resp, "{\"results\":[\n"). - -handle_changes_req(#httpd{method='GET',path_parts=[DbName|_]}=Req, Db) -> - FilterFun = make_filter_fun(Req, Db), - {ok, Info} = couch_db:get_db_info(Db), - Seq = proplists:get_value(update_seq, Info), - {Dir, StartSeq} = case couch_httpd:qs_value(Req, "descending", "false") of - "false" -> - {fwd, list_to_integer(couch_httpd:qs_value(Req, "since", "0"))}; - "true" -> - {rev, Seq}; - _Bad -> throw({bad_request, "descending must be true or false"}) - end, - Limit = list_to_integer(couch_httpd:qs_value(Req, "limit", "1000000000000000")), - ResponseType = couch_httpd:qs_value(Req, "feed", "normal"), - if ResponseType == "continuous" orelse ResponseType == "longpoll" -> - {ok, Resp} = start_json_response(Req, 200), - start_sending_changes(Resp, ResponseType), - - Self = self(), - {ok, Notify} = couch_db_update_notifier:start_link( - fun({_, DbName0}) when DbName0 == DbName -> - Self ! db_updated; - (_) -> - ok - end), - {Timeout, TimeoutFun} = get_changes_timeout(Req, Resp), - couch_stats_collector:track_process_count(Self, - {httpd, clients_requesting_changes}), - try - keep_sending_changes(Req, Resp, Db, StartSeq, <<"">>, Timeout, - TimeoutFun, ResponseType, Limit, FilterFun) - after - couch_db_update_notifier:stop(Notify), - get_rest_db_updated() % clean out any remaining update messages - end; - true -> - CurrentEtag = couch_httpd:make_etag(Info), - couch_httpd:etag_respond(Req, CurrentEtag, fun() -> - % send the etag - {ok, Resp} = start_json_response(Req, 200, [{"Etag", CurrentEtag}]), - start_sending_changes(Resp, ResponseType), - {ok, {_, LastSeq, _Prepend, _, _, _, _, _}} = - send_changes(Req, Resp, Db, Dir, StartSeq, <<"">>, "normal", - Limit, FilterFun), - end_sending_changes(Resp, LastSeq, ResponseType) - end) - end; - +handle_changes_req(#httpd{method='POST'}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + handle_changes_req1(Req, Db); +handle_changes_req(#httpd{method='GET'}=Req, Db) -> + handle_changes_req1(Req, Db); handle_changes_req(#httpd{path_parts=[_,<<"_changes">>]}=Req, _Db) -> - send_method_not_allowed(Req, "GET,HEAD"). - -% waits for a db_updated msg, if there are multiple msgs, collects them. -wait_db_updated(Timeout, TimeoutFun) -> - receive db_updated -> get_rest_db_updated() - after Timeout -> - case TimeoutFun() of - ok -> wait_db_updated(Timeout, TimeoutFun); - stop -> stop + send_method_not_allowed(Req, "GET,HEAD,POST"). + +handle_changes_req1(Req, Db) -> + MakeCallback = fun(Resp) -> + fun({change, Change, _}, "continuous") -> + send_chunk(Resp, [?JSON_ENCODE(Change) | "\n"]); + ({change, Change, Prepend}, _) -> + send_chunk(Resp, [Prepend, ?JSON_ENCODE(Change)]); + (start, "continuous") -> + ok; + (start, _) -> + send_chunk(Resp, "{\"results\":[\n"); + ({stop, EndSeq}, "continuous") -> + send_chunk( + Resp, + [?JSON_ENCODE({[{<<"last_seq">>, EndSeq}]}) | "\n"] + ), + end_json_response(Resp); + ({stop, EndSeq}, _) -> + send_chunk( + Resp, + io_lib:format("\n],\n\"last_seq\":~w}\n", [EndSeq]) + ), + end_json_response(Resp); + (timeout, _) -> + send_chunk(Resp, "\n") end - end. - -get_rest_db_updated() -> - receive db_updated -> get_rest_db_updated() - after 0 -> updated - end. - -end_sending_changes(Resp, EndSeq, "continuous") -> - send_chunk(Resp, [?JSON_ENCODE({[{<<"last_seq">>, EndSeq}]}) | "\n"]), - end_json_response(Resp); -end_sending_changes(Resp, EndSeq, _Else) -> - send_chunk(Resp, io_lib:format("\n],\n\"last_seq\":~w}\n", [EndSeq])), - end_json_response(Resp). - -keep_sending_changes(#httpd{user_ctx=UserCtx,path_parts=[DbName|_]}=Req, Resp, - Db, StartSeq, Prepend, Timeout, TimeoutFun, ResponseType, Limit, Filter) -> - {ok, {_, EndSeq, Prepend2, _, _, _, NewLimit, _}} = send_changes(Req, Resp, Db, fwd, StartSeq, - Prepend, ResponseType, Limit, Filter), - couch_db:close(Db), - if - Limit > NewLimit, ResponseType == "longpoll" -> - end_sending_changes(Resp, EndSeq, ResponseType); - true -> - case wait_db_updated(Timeout, TimeoutFun) of - updated -> - case couch_db:open(DbName, [{user_ctx, UserCtx}]) of - {ok, Db2} -> - keep_sending_changes(Req, Resp, Db2, EndSeq, Prepend2, Timeout, - TimeoutFun, ResponseType, NewLimit, Filter); - _Else -> - end_sending_changes(Resp, EndSeq, ResponseType) - end; - stop -> - end_sending_changes(Resp, EndSeq, ResponseType) - end - end. - -changes_enumerator(DocInfos, {Db, _, _, FilterFun, Resp, "continuous", Limit, IncludeDocs}) -> - [#doc_info{id=Id, high_seq=Seq, revs=[#rev_info{deleted=Del,rev=Rev}|_]}|_] = DocInfos, - Results0 = FilterFun(DocInfos), - Results = [Result || Result <- Results0, Result /= null], - Go = if Limit =< 1 -> stop; true -> ok end, - case Results of - [] -> - {Go, {Db, Seq, nil, FilterFun, Resp, "continuous", Limit, IncludeDocs}}; - _ -> - send_chunk(Resp, [?JSON_ENCODE(changes_row(Db, Seq, Id, Del, Results, Rev, IncludeDocs)) - |"\n"]), - {Go, {Db, Seq, nil, FilterFun, Resp, "continuous", Limit-1, IncludeDocs}} - end; -changes_enumerator(DocInfos, {Db, _, Prepend, FilterFun, Resp, _, Limit, IncludeDocs}) -> - [#doc_info{id=Id, high_seq=Seq, revs=[#rev_info{deleted=Del,rev=Rev}|_]}|_] = DocInfos, - Results0 = FilterFun(DocInfos), - Results = [Result || Result <- Results0, Result /= null], - Go = if Limit =< 1 -> stop; true -> ok end, - case Results of - [] -> - {Go, {Db, Seq, Prepend, FilterFun, Resp, nil, Limit, IncludeDocs}}; + end, + ChangesArgs = parse_changes_query(Req), + ChangesFun = couch_changes:handle_changes(ChangesArgs, Req, Db), + WrapperFun = case ChangesArgs#changes_args.feed of + "normal" -> + {ok, Info} = couch_db:get_db_info(Db), + CurrentEtag = couch_httpd:make_etag(Info), + fun(FeedChangesFun) -> + couch_httpd:etag_respond( + Req, + CurrentEtag, + fun() -> + {ok, Resp} = couch_httpd:start_json_response( + Req, 200, [{"Etag", CurrentEtag}] + ), + FeedChangesFun(MakeCallback(Resp)) + end + ) + end; _ -> - send_chunk(Resp, [Prepend, ?JSON_ENCODE( - changes_row(Db, Seq, Id, Del, Results, Rev, IncludeDocs))]), - {Go, {Db, Seq, <<",\n">>, FilterFun, Resp, nil, Limit-1, IncludeDocs}} - end. + % "longpoll" or "continuous" + {ok, Resp} = couch_httpd:start_json_response(Req, 200), + fun(FeedChangesFun) -> + FeedChangesFun(MakeCallback(Resp)) + end + end, + couch_stats_collector:track_process_count( + {httpd, clients_requesting_changes} + ), + WrapperFun(ChangesFun). -changes_row(Db, Seq, Id, Del, Results, Rev, true) -> - {[{seq,Seq},{id,Id},{changes,Results}] ++ deleted_item(Del) ++ - couch_httpd_view:doc_member(Db, {Id, Rev})}; -changes_row(_, Seq, Id, Del, Results, _, false) -> - {[{seq,Seq},{id,Id},{changes,Results}] ++ deleted_item(Del)}. - -deleted_item(true) -> [{deleted,true}]; -deleted_item(_) -> []. - -send_changes(Req, Resp, Db, Dir, StartSeq, Prepend, ResponseType, Limit, FilterFun) -> - Style = list_to_existing_atom( - couch_httpd:qs_value(Req, "style", "main_only")), - IncludeDocs = list_to_existing_atom( - couch_httpd:qs_value(Req, "include_docs", "false")), - couch_db:changes_since(Db, Style, StartSeq, fun changes_enumerator/2, - [{dir, Dir}], {Db, StartSeq, Prepend, FilterFun, Resp, ResponseType, Limit, IncludeDocs}). - -make_filter_fun(Req, Db) -> - Filter = couch_httpd:qs_value(Req, "filter", ""), - case [list_to_binary(couch_httpd:unquote(Part)) - || Part <- string:tokens(Filter, "/")] of - [] -> - fun(DocInfos) -> - % doing this as a batch is more efficient for external filters - [{[{rev, couch_doc:rev_to_str(Rev)}]} || - #doc_info{revs=[#rev_info{rev=Rev}|_]} <- DocInfos] - end; - [DName, FName] -> - DesignId = <<"_design/", DName/binary>>, - DDoc = couch_httpd_db:couch_doc_open(Db, DesignId, nil, []), - % validate that the ddoc has the filter fun - #doc{body={Props}} = DDoc, - couch_util:get_nested_json_value({Props}, [<<"filters">>, FName]), - fun(DocInfos) -> - Docs = [Doc || {ok, Doc} <- [ - {ok, Doc} = couch_db:open_doc(Db, DInfo, [deleted]) - || DInfo <- DocInfos]], - {ok, Passes} = couch_query_servers:filter_docs(Req, Db, DDoc, FName, Docs), - [{[{rev, couch_doc:rev_to_str(Rev)}]} - || #doc_info{revs=[#rev_info{rev=Rev}|_]} <- DocInfos, - Pass <- Passes, Pass == true] - end; - _Else -> - throw({bad_request, - "filter parameter must be of the form `designname/filtername`"}) - end. -handle_compact_req(#httpd{method='POST',path_parts=[DbName,_,Id|_]}=Req, _Db) -> +handle_compact_req(#httpd{method='POST',path_parts=[DbName,_,Id|_]}=Req, Db) -> + ok = couch_db:check_is_admin(Db), + couch_httpd:validate_ctype(Req, "application/json"), ok = couch_view_compactor:start_compact(DbName, Id), send_json(Req, 202, {[{ok, true}]}); handle_compact_req(#httpd{method='POST'}=Req, Db) -> + ok = couch_db:check_is_admin(Db), + couch_httpd:validate_ctype(Req, "application/json"), ok = couch_db:start_compact(Db), send_json(Req, 202, {[{ok, true}]}); @@ -262,6 +137,8 @@ handle_compact_req(Req, _Db) -> handle_view_cleanup_req(#httpd{method='POST'}=Req, Db) -> % delete unreferenced index files + ok = couch_db:check_is_admin(Db), + couch_httpd:validate_ctype(Req, "application/json"), ok = couch_view:cleanup_index_files(Db), send_json(Req, 202, {[{ok, true}]}); @@ -276,7 +153,9 @@ handle_design_req(#httpd{ % load ddoc DesignId = <<"_design/", DesignName/binary>>, DDoc = couch_httpd_db:couch_doc_open(Db, DesignId, nil, []), - Handler = couch_util:dict_find(Action, DesignUrlHandlers, fun db_req/2), + Handler = couch_util:dict_find(Action, DesignUrlHandlers, fun(_, _, _) -> + throw({not_found, <<"missing handler: ", Action/binary>>}) + end), Handler(Req, Db, DDoc); handle_design_req(Req, Db) -> @@ -298,23 +177,13 @@ handle_design_info_req(Req, _Db, _DDoc) -> create_db_req(#httpd{user_ctx=UserCtx}=Req, DbName) -> ok = couch_httpd:verify_is_server_admin(Req), - LDbName = ?b2l(DbName), - case couch_config:get("couch_httpd_auth", "authentication_db") of - LDbName -> - % make sure user's db always has the auth ddoc - {ok, Db} = couch_httpd_auth:ensure_users_db_exists(DbName), - couch_db:close(Db), - DbUrl = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName)), - send_json(Req, 201, [{"Location", DbUrl}], {[{ok, true}]}); - _Else -> - case couch_server:create(DbName, [{user_ctx, UserCtx}]) of - {ok, Db} -> - couch_db:close(Db), - DbUrl = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName)), - send_json(Req, 201, [{"Location", DbUrl}], {[{ok, true}]}); - Error -> - throw(Error) - end + case couch_server:create(DbName, [{user_ctx, UserCtx}]) of + {ok, Db} -> + couch_db:close(Db), + DbUrl = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName)), + send_json(Req, 201, [{"Location", DbUrl}], {[{ok, true}]}); + Error -> + throw(Error) end. delete_db_req(#httpd{user_ctx=UserCtx}=Req, DbName) -> @@ -327,15 +196,6 @@ delete_db_req(#httpd{user_ctx=UserCtx}=Req, DbName) -> end. do_db_req(#httpd{user_ctx=UserCtx,path_parts=[DbName|_]}=Req, Fun) -> - LDbName = ?b2l(DbName), - % I hope this lookup is cheap. - case couch_config:get("couch_httpd_auth", "authentication_db") of - LDbName -> - % make sure user's db always has the auth ddoc - {ok, ADb} = couch_httpd_auth:ensure_users_db_exists(DbName), - couch_db:close(ADb); - _Else -> ok - end, case couch_db:open(DbName, [{user_ctx, UserCtx}]) of {ok, Db} -> try @@ -352,6 +212,7 @@ db_req(#httpd{method='GET',path_parts=[_DbName]}=Req, Db) -> send_json(Req, {DbInfo}); db_req(#httpd{method='POST',path_parts=[DbName]}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), Doc = couch_doc:from_json_obj(couch_httpd:json_body(Req)), Doc2 = case Doc#doc.id of <<"">> -> @@ -392,6 +253,7 @@ db_req(#httpd{path_parts=[_DbName]}=Req, _Db) -> send_method_not_allowed(Req, "DELETE,GET,HEAD,POST"); db_req(#httpd{method='POST',path_parts=[_,<<"_ensure_full_commit">>]}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), UpdateSeq = couch_db:get_update_seq(Db), CommittedSeq = couch_db:get_committed_update_seq(Db), {ok, StartTime} = @@ -404,7 +266,6 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_ensure_full_commit">>]}=Req, Db) - throw({bad_request, "can't do a full commit ahead of current update_seq"}); RequiredSeq > CommittedSeq -> - % user asked for an explicit sequence, don't commit any batches couch_db:ensure_full_commit(Db); true -> {ok, Db#db.instance_start_time} @@ -420,8 +281,9 @@ db_req(#httpd{path_parts=[_,<<"_ensure_full_commit">>]}=Req, _Db) -> db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> couch_stats_collector:increment({httpd, bulk_requests}), + couch_httpd:validate_ctype(Req, "application/json"), {JsonProps} = couch_httpd:json_body_obj(Req), - DocsArray = proplists:get_value(<<"docs">>, JsonProps), + DocsArray = couch_util:get_value(<<"docs">>, JsonProps), case couch_httpd:header_value(Req, "X-Couch-Full-Commit") of "true" -> Options = [full_commit]; @@ -430,7 +292,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> _ -> Options = [] end, - case proplists:get_value(<<"new_edits">>, JsonProps, true) of + case couch_util:get_value(<<"new_edits">>, JsonProps, true) of true -> Docs = lists:map( fun({ObjProps} = JsonObj) -> @@ -440,7 +302,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> <<>> -> couch_uuids:new(); Id0 -> Id0 end, - case proplists:get_value(<<"_rev">>, ObjProps) of + case couch_util:get_value(<<"_rev">>, ObjProps) of undefined -> Revs = {0, []}; Rev -> @@ -451,7 +313,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> end, DocsArray), Options2 = - case proplists:get_value(<<"all_or_nothing">>, JsonProps) of + case couch_util:get_value(<<"all_or_nothing">>, JsonProps) of true -> [all_or_nothing|Options]; _ -> Options end, @@ -467,7 +329,11 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> send_json(Req, 417, ErrorsJson) end; false -> - Docs = [couch_doc:from_json_obj(JsonObj) || JsonObj <- DocsArray], + Docs = lists:map(fun(JsonObj) -> + Doc = couch_doc:from_json_obj(JsonObj), + validate_attachment_names(Doc), + Doc + end, DocsArray), {ok, Errors} = couch_db:update_docs(Db, Docs, Options, replicated_changes), ErrorsJson = lists:map(fun update_doc_result_to_json/1, Errors), @@ -477,6 +343,7 @@ db_req(#httpd{path_parts=[_,<<"_bulk_docs">>]}=Req, _Db) -> send_method_not_allowed(Req, "POST"); db_req(#httpd{method='POST',path_parts=[_,<<"_purge">>]}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), {IdsRevs} = couch_httpd:json_body_obj(Req), IdsRevs2 = [{Id, couch_doc:parse_revs(Revs)} || {Id, Revs} <- IdsRevs], @@ -492,11 +359,13 @@ db_req(#httpd{path_parts=[_,<<"_purge">>]}=Req, _Db) -> send_method_not_allowed(Req, "POST"); db_req(#httpd{method='GET',path_parts=[_,<<"_all_docs">>]}=Req, Db) -> - all_docs_view(Req, Db, nil); + Keys = couch_httpd:qs_json_value(Req, "keys", nil), + all_docs_view(Req, Db, Keys); db_req(#httpd{method='POST',path_parts=[_,<<"_all_docs">>]}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), {Fields} = couch_httpd:json_body_obj(Req), - case proplists:get_value(<<"keys">>, Fields, nil) of + case couch_util:get_value(<<"keys">>, Fields, nil) of nil -> ?LOG_DEBUG("POST to _all_docs with no keys member.", []), all_docs_view(Req, Db, nil); @@ -521,39 +390,36 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_missing_revs">>]}=Req, Db) -> db_req(#httpd{path_parts=[_,<<"_missing_revs">>]}=Req, _Db) -> send_method_not_allowed(Req, "POST"); - db_req(#httpd{method='POST',path_parts=[_,<<"_revs_diff">>]}=Req, Db) -> {JsonDocIdRevs} = couch_httpd:json_body_obj(Req), - JsonDocIdRevs2 = + JsonDocIdRevs2 = [{Id, couch_doc:parse_revs(RevStrs)} || {Id, RevStrs} <- JsonDocIdRevs], {ok, Results} = couch_db:get_missing_revs(Db, JsonDocIdRevs2), - Results2 = + Results2 = lists:map(fun({Id, MissingRevs, PossibleAncestors}) -> {Id, - {[{missing, couch_doc:revs_to_strs(MissingRevs)}] ++ + {[{missing, couch_doc:revs_to_strs(MissingRevs)}] ++ if PossibleAncestors == [] -> []; - true -> - [{possible_ancestors, + true -> + [{possible_ancestors, couch_doc:revs_to_strs(PossibleAncestors)}] end}} end, Results), - send_json(Req, {[{missing, {Results2}}]}); + send_json(Req, {Results2}); db_req(#httpd{path_parts=[_,<<"_revs_diff">>]}=Req, _Db) -> send_method_not_allowed(Req, "POST"); - -db_req(#httpd{method='PUT',path_parts=[_,<<"_admins">>]}=Req, - Db) -> - Admins = couch_httpd:json_body(Req), - ok = couch_db:set_admins(Db, Admins), +db_req(#httpd{method='PUT',path_parts=[_,<<"_security">>]}=Req, Db) -> + SecObj = couch_httpd:json_body(Req), + ok = couch_db:set_security(Db, SecObj), send_json(Req, {[{<<"ok">>, true}]}); -db_req(#httpd{method='GET',path_parts=[_,<<"_admins">>]}=Req, Db) -> - send_json(Req, couch_db:get_admins(Db)); +db_req(#httpd{method='GET',path_parts=[_,<<"_security">>]}=Req, Db) -> + send_json(Req, couch_db:get_security(Db)); -db_req(#httpd{path_parts=[_,<<"_admins">>]}=Req, _Db) -> +db_req(#httpd{path_parts=[_,<<"_security">>]}=Req, _Db) -> send_method_not_allowed(Req, "PUT,GET"); db_req(#httpd{method='PUT',path_parts=[_,<<"_revs_limit">>]}=Req, @@ -620,7 +486,7 @@ all_docs_view(Req, Db, Keys) -> CurrentEtag = couch_httpd:make_etag(Info), couch_httpd:etag_respond(Req, CurrentEtag, fun() -> - TotalRowCount = proplists:get_value(doc_count, Info), + TotalRowCount = couch_util:get_value(doc_count, Info), StartId = if is_binary(StartKey) -> StartKey; true -> StartDocId end, @@ -628,10 +494,16 @@ all_docs_view(Req, Db, Keys) -> true -> EndDocId end, FoldAccInit = {Limit, SkipCount, undefined, []}, - + UpdateSeq = couch_db:get_update_seq(Db), + JsonParams = case couch_httpd:qs_value(Req, "update_seq") of + "true" -> + [{update_seq, UpdateSeq}]; + _Else -> + [] + end, case Keys of nil -> - FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs, CurrentEtag, Db, + FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs, CurrentEtag, Db, UpdateSeq, TotalRowCount, #view_fold_helper_funs{ reduce_count = fun couch_db:enum_docs_reduce_to_count/1 }), @@ -646,9 +518,9 @@ all_docs_view(Req, Db, Keys) -> {ok, LastOffset, FoldResult} = couch_db:enum_docs(Db, AdapterFun, FoldAccInit, [{start_key, StartId}, {dir, Dir}, {if Inclusive -> end_key; true -> end_key_gt end, EndId}]), - couch_httpd_view:finish_view_fold(Req, TotalRowCount, LastOffset, FoldResult); + couch_httpd_view:finish_view_fold(Req, TotalRowCount, LastOffset, FoldResult, JsonParams); _ -> - FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs, CurrentEtag, Db, + FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs, CurrentEtag, Db, UpdateSeq, TotalRowCount, #view_fold_helper_funs{ reduce_count = fun(Offset) -> Offset end }), @@ -675,7 +547,7 @@ all_docs_view(Req, Db, Keys) -> {_, FoldAcc2} = FoldlFun(Doc, 0, FoldAcc), FoldAcc2 end, FoldAccInit, Keys), - couch_httpd_view:finish_view_fold(Req, TotalRowCount, 0, FoldResult) + couch_httpd_view:finish_view_fold(Req, TotalRowCount, 0, FoldResult, JsonParams) end end). @@ -684,72 +556,73 @@ db_doc_req(#httpd{method='DELETE'}=Req, Db, DocId) -> couch_doc_open(Db, DocId, nil, []), case couch_httpd:qs_value(Req, "rev") of undefined -> - update_doc(Req, Db, DocId, + update_doc(Req, Db, DocId, couch_doc_from_req(Req, DocId, {[{<<"_deleted">>,true}]})); Rev -> - update_doc(Req, Db, DocId, - couch_doc_from_req(Req, DocId, + update_doc(Req, Db, DocId, + couch_doc_from_req(Req, DocId, {[{<<"_rev">>, ?l2b(Rev)},{<<"_deleted">>,true}]})) end; -db_doc_req(#httpd{method='GET'}=Req, Db, DocId) -> +db_doc_req(#httpd{method = 'GET', mochi_req = MochiReq} = Req, Db, DocId) -> #doc_query_args{ rev = Rev, open_revs = Revs, - options = Options, + options = Options1, atts_since = AttsSince } = parse_doc_query(Req), + Options = case AttsSince of + nil -> + Options1; + RevList when is_list(RevList) -> + [{atts_since, RevList}, attachments | Options1] + end, case Revs of [] -> Doc = couch_doc_open(Db, DocId, Rev, Options), - Options2 = - if AttsSince /= nil -> - RevPos = find_ancestor_rev_pos(Doc#doc.revs, AttsSince), - [{atts_after_revpos, RevPos} | Options]; - true -> Options - end, - send_doc(Req, Doc, Options2); + send_doc(Req, Doc, Options); _ -> {ok, Results} = couch_db:open_doc_revs(Db, DocId, Revs, Options), - {ok, Resp} = start_json_response(Req, 200), - send_chunk(Resp, "["), - % We loop through the docs. The first time through the separator - % is whitespace, then a comma on subsequent iterations. - lists:foldl( - fun(Result, AccSeparator) -> - case Result of - {ok, Doc} -> - JsonDoc = couch_doc:to_json_obj(Doc, Options), - Json = ?JSON_ENCODE({[{ok, JsonDoc}]}), - send_chunk(Resp, AccSeparator ++ Json); - {{not_found, missing}, RevId} -> - RevStr = couch_doc:rev_to_str(RevId), - Json = ?JSON_ENCODE({[{"missing", RevStr}]}), - send_chunk(Resp, AccSeparator ++ Json) + case MochiReq:accepts_content_type("multipart/mixed") of + false -> + {ok, Resp} = start_json_response(Req, 200), + send_chunk(Resp, "["), + % We loop through the docs. The first time through the separator + % is whitespace, then a comma on subsequent iterations. + lists:foldl( + fun(Result, AccSeparator) -> + case Result of + {ok, Doc} -> + JsonDoc = couch_doc:to_json_obj(Doc, Options), + Json = ?JSON_ENCODE({[{ok, JsonDoc}]}), + send_chunk(Resp, AccSeparator ++ Json); + {{not_found, missing}, RevId} -> + RevStr = couch_doc:rev_to_str(RevId), + Json = ?JSON_ENCODE({[{"missing", RevStr}]}), + send_chunk(Resp, AccSeparator ++ Json) + end, + "," % AccSeparator now has a comma end, - "," % AccSeparator now has a comma - end, - "", Results), - send_chunk(Resp, "]"), - end_json_response(Resp) + "", Results), + send_chunk(Resp, "]"), + end_json_response(Resp); + true -> + send_docs_multipart(Req, Results, Options) + end end; + db_doc_req(#httpd{method='POST'}=Req, Db, DocId) -> + couch_httpd:validate_referer(Req), couch_doc:validate_docid(DocId), - case couch_httpd:header_value(Req, "Content-Type") of - "multipart/form-data" ++ _Rest -> - ok; - _Else -> - throw({bad_ctype, <<"Invalid Content-Type header for form upload">>}) - end, + couch_httpd:validate_ctype(Req, "multipart/form-data"), Form = couch_httpd:parse_form(Req), - case proplists:is_defined("_doc", Form) of - true -> - Json = ?JSON_DECODE(proplists:get_value("_doc", Form)), - Doc = couch_doc_from_req(Req, DocId, Json); - false -> - Rev = couch_doc:parse_rev(list_to_binary(proplists:get_value("_rev", Form))), - {ok, [{ok, Doc}]} = couch_db:open_doc_revs(Db, DocId, [Rev], []) + case couch_util:get_value("_doc", Form) of + undefined -> + Rev = couch_doc:parse_rev(couch_util:get_value("_rev", Form)), + {ok, [{ok, Doc}]} = couch_db:open_doc_revs(Db, DocId, [Rev], []); + Json -> + Doc = couch_doc_from_req(Req, DocId, ?JSON_DECODE(Json)) end, UpdatedAtts = [ #att{name=validate_attachment_name(Name), @@ -785,13 +658,13 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) -> Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)), RespHeaders = [{"Location", Loc}], - case couch_httpd:header_value(Req, "Content-Type") of - ("multipart/related" ++ _Rest) = ContentType-> - Doc0 = couch_doc:doc_from_multi_part_stream(ContentType, + case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of + ("multipart/related;" ++ _) = ContentType -> + {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType, fun() -> receive_request_data(Req) end), Doc = couch_doc_from_req(Req, DocId, Doc0), update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType); - _ -> + _Else -> case couch_httpd:qs_value(Req, "batch") of "ok" -> % batch @@ -810,7 +683,8 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) -> ]}); _Normal -> % normal - Doc = couch_doc_from_req(Req, DocId, couch_httpd:json_body(Req)), + Body = couch_httpd:json_body(Req), + Doc = couch_doc_from_req(Req, DocId, Body), update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType) end end; @@ -835,24 +709,13 @@ db_doc_req(#httpd{method='COPY'}=Req, Db, SourceDocId) -> db_doc_req(Req, _Db, _DocId) -> send_method_not_allowed(Req, "DELETE,GET,HEAD,POST,PUT,COPY"). -find_ancestor_rev_pos({_, []}, _AttsSinceRevs) -> - 0; -find_ancestor_rev_pos(_DocRevs, []) -> - 0; -find_ancestor_rev_pos({RevPos, [RevId|Rest]}, AttsSinceRevs) -> - case lists:member({RevPos, RevId}, AttsSinceRevs) of - true -> - RevPos; - false -> - find_ancestor_rev_pos({RevPos - 1, Rest}, AttsSinceRevs) - end. send_doc(Req, Doc, Options) -> case Doc#doc.meta of [] -> DiskEtag = couch_httpd:doc_etag(Doc), % output etag only when we have no meta - couch_httpd:etag_respond(Req, DiskEtag, fun() -> + couch_httpd:etag_respond(Req, DiskEtag, fun() -> send_doc_efficiently(Req, Doc, [{"Etag", DiskEtag}], Options) end); _ -> @@ -862,39 +725,84 @@ send_doc(Req, Doc, Options) -> send_doc_efficiently(Req, #doc{atts=[]}=Doc, Headers, Options) -> send_json(Req, 200, Headers, couch_doc:to_json_obj(Doc, Options)); -send_doc_efficiently(Req, #doc{atts=Atts}=Doc, Headers, Options) -> - case lists:member(attachments, Options) orelse - proplists:is_defined(atts_after_revpos, Options) of +send_doc_efficiently(#httpd{mochi_req = MochiReq} = Req, + #doc{atts = Atts} = Doc, Headers, Options) -> + case lists:member(attachments, Options) of true -> - AcceptedTypes = case couch_httpd:header_value(Req, "Accept") of - undefined -> []; - AcceptHeader -> string:tokens(AcceptHeader, ", ") - end, - case lists:member("multipart/related", AcceptedTypes) of + case MochiReq:accepts_content_type("multipart/related") of false -> - send_json(Req, 200, [], couch_doc:to_json_obj(Doc, Options)); + send_json(Req, 200, Headers, couch_doc:to_json_obj(Doc, Options)); true -> Boundary = couch_uuids:random(), - JsonBytes = ?JSON_ENCODE(couch_doc:to_json_obj(Doc, [follows|Options])), - AttsSinceRevPos = proplists:get_value(atts_after_revpos, Options, 0), - Len = couch_doc:len_doc_to_multi_part_stream(Boundary,JsonBytes,Atts, - AttsSinceRevPos), - CType = {<<"Content-Type">>, - <<"multipart/related; boundary=\"", Boundary/binary, "\"">>}, + JsonBytes = ?JSON_ENCODE(couch_doc:to_json_obj(Doc, + [attachments, follows|Options])), + {ContentType, Len} = couch_doc:len_doc_to_multi_part_stream( + Boundary,JsonBytes, Atts,false), + CType = {<<"Content-Type">>, ContentType}, {ok, Resp} = start_response_length(Req, 200, [CType|Headers], Len), couch_doc:doc_to_multi_part_stream(Boundary,JsonBytes,Atts, - AttsSinceRevPos, - fun(Data) -> couch_httpd:send(Resp, Data) end) + fun(Data) -> couch_httpd:send(Resp, Data) end, false) end; false -> - send_json(Req, 200, [], couch_doc:to_json_obj(Doc, Options)) + send_json(Req, 200, Headers, couch_doc:to_json_obj(Doc, Options)) end. - +send_docs_multipart(Req, Results, Options) -> + OuterBoundary = couch_uuids:random(), + InnerBoundary = couch_uuids:random(), + CType = {"Content-Type", + "multipart/mixed; boundary=\"" ++ ?b2l(OuterBoundary) ++ "\""}, + {ok, Resp} = start_chunked_response(Req, 200, [CType]), + couch_httpd:send_chunk(Resp, <<"--", OuterBoundary/binary>>), + lists:foreach( + fun({ok, #doc{atts=Atts}=Doc}) -> + JsonBytes = ?JSON_ENCODE(couch_doc:to_json_obj(Doc, + [attachments,follows|Options])), + {ContentType, _Len} = couch_doc:len_doc_to_multi_part_stream( + InnerBoundary, JsonBytes, Atts, false), + couch_httpd:send_chunk(Resp, <<"\r\nContent-Type: ", + ContentType/binary, "\r\n\r\n">>), + couch_doc:doc_to_multi_part_stream(InnerBoundary, JsonBytes, Atts, + fun(Data) -> couch_httpd:send_chunk(Resp, Data) + end, false), + couch_httpd:send_chunk(Resp, <<"\r\n--", OuterBoundary/binary>>); + ({{not_found, missing}, RevId}) -> + RevStr = couch_doc:rev_to_str(RevId), + Json = ?JSON_ENCODE({[{"missing", RevStr}]}), + couch_httpd:send_chunk(Resp, + [<<"\r\nContent-Type: application/json; error=\"true\"\r\n\r\n">>, + Json, + <<"\r\n--", OuterBoundary/binary>>]) + end, Results), + couch_httpd:send_chunk(Resp, <<"--">>), + couch_httpd:last_chunk(Resp). + +send_ranges_multipart(Req, ContentType, Len, Att, Ranges) -> + Boundary = couch_uuids:random(), + CType = {"Content-Type", + "multipart/byteranges; boundary=\"" ++ ?b2l(Boundary) ++ "\""}, + {ok, Resp} = start_chunked_response(Req, 206, [CType]), + couch_httpd:send_chunk(Resp, <<"--", Boundary/binary>>), + lists:foreach(fun({From, To}) -> + ContentRange = make_content_range(From, To, Len), + couch_httpd:send_chunk(Resp, + <<"\r\nContent-Type: ", ContentType/binary, "\r\n", + "Content-Range: ", ContentRange/binary, "\r\n", + "\r\n">>), + couch_doc:range_att_foldl(Att, From, To + 1, + fun(Seg, _) -> send_chunk(Resp, Seg) end, {ok, Resp}), + couch_httpd:send_chunk(Resp, <<"\r\n--", Boundary/binary>>) + end, Ranges), + couch_httpd:send_chunk(Resp, <<"--">>), + couch_httpd:last_chunk(Resp), + {ok, Resp}. receive_request_data(Req) -> {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}. +make_content_range(From, To, Len) -> + ?l2b(io_lib:format("bytes ~B-~B/~B", [From, To, Len])). + update_doc_result_to_json({{Id, Rev}, Error}) -> {_Code, Err, Msg} = couch_httpd:error_info(Error), {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)}, @@ -943,6 +851,8 @@ couch_doc_from_req(Req, DocId, #doc{revs=Revs}=Doc) -> case extract_header_rev(Req, ExplicitDocRev) of missing_rev -> Revs2 = {0, []}; + ExplicitDocRev -> + Revs2 = Revs; {Pos, Rev} -> Revs2 = {Pos, [Rev]} end, @@ -977,7 +887,7 @@ couch_doc_open(Db, DocId, Rev, Options) -> % Attachment request handlers -db_attachment_req(#httpd{method='GET'}=Req, Db, DocId, FileNameParts) -> +db_attachment_req(#httpd{method='GET',mochi_req=MochiReq}=Req, Db, DocId, FileNameParts) -> FileName = list_to_binary(mochiweb_util:join(lists:map(fun binary_to_list/1, FileNameParts),"/")), #doc_query_args{ rev=Rev, @@ -989,24 +899,83 @@ db_attachment_req(#httpd{method='GET'}=Req, Db, DocId, FileNameParts) -> case [A || A <- Atts, A#att.name == FileName] of [] -> throw({not_found, "Document is missing attachment"}); - [#att{type=Type, len=Len}=Att] -> + [#att{type=Type, encoding=Enc, disk_len=DiskLen, att_len=AttLen}=Att] -> Etag = couch_httpd:doc_etag(Doc), - couch_httpd:etag_respond(Req, Etag, fun() -> - {ok, Resp} = start_response_length(Req, 200, [ - {"ETag", Etag}, - {"Cache-Control", "must-revalidate"}, - {"Content-Type", binary_to_list(Type)} - ], integer_to_list(Len)), - couch_doc:att_foldl( - Att, - fun(BinSegment, _) -> send(Resp, BinSegment) end, - {ok, Resp} % Seed in case of 0 byte attachment. - ) - end) + ReqAcceptsAttEnc = lists:member( + atom_to_list(Enc), + couch_httpd:accepted_encodings(Req) + ), + Len = case {Enc, ReqAcceptsAttEnc} of + {identity, _} -> + % stored and served in identity form + DiskLen; + {_, false} when DiskLen =/= AttLen -> + % Stored encoded, but client doesn't accept the encoding we used, + % so we need to decode on the fly. DiskLen is the identity length + % of the attachment. + DiskLen; + {_, true} -> + % Stored and served encoded. AttLen is the encoded length. + AttLen; + _ -> + % We received an encoded attachment and stored it as such, so we + % don't know the identity length. The client doesn't accept the + % encoding, and since we cannot serve a correct Content-Length + % header we'll fall back to a chunked response. + undefined + end, + Headers = [ + {"ETag", Etag}, + {"Cache-Control", "must-revalidate"}, + {"Content-Type", binary_to_list(Type)} + ] ++ case ReqAcceptsAttEnc of + true -> + [{"Content-Encoding", atom_to_list(Enc)}]; + _ -> + [] + end ++ case Enc of + identity -> + [{"Accept-Ranges", "bytes"}]; + _ -> + [{"Accept-Ranges", "none"}] + end, + AttFun = case ReqAcceptsAttEnc of + false -> + fun couch_doc:att_foldl_decode/3; + true -> + fun couch_doc:att_foldl/3 + end, + couch_httpd:etag_respond( + Req, + Etag, + fun() -> + case Len of + undefined -> + {ok, Resp} = start_chunked_response(Req, 200, Headers), + AttFun(Att, fun(Seg, _) -> send_chunk(Resp, Seg) end, {ok, Resp}), + last_chunk(Resp); + _ -> + Ranges = parse_ranges(MochiReq:get(range), Len), + case {Enc, Ranges} of + {identity, [{From, To}]} -> + Headers1 = [{<<"Content-Range">>, make_content_range(From, To, Len)}] + ++ Headers, + {ok, Resp} = start_response_length(Req, 206, Headers1, To - From + 1), + couch_doc:range_att_foldl(Att, From, To + 1, + fun(Seg, _) -> send(Resp, Seg) end, {ok, Resp}); + {identity, Ranges} when is_list(Ranges) -> + send_ranges_multipart(Req, Type, Len, Att, Ranges); + _ -> + {ok, Resp} = start_response_length(Req, 200, Headers, Len), + AttFun(Att, fun(Seg, _) -> send(Resp, Seg) end, {ok, Resp}) + end + end + end + ) end; -db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) +db_attachment_req(#httpd{method=Method,mochi_req=MochiReq}=Req, Db, DocId, FileNameParts) when (Method == 'PUT') or (Method == 'DELETE') -> FileName = validate_attachment_name( mochiweb_util:join( @@ -1040,17 +1009,42 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) 0 -> <<"">>; Length when is_integer(Length) -> - fun() -> couch_httpd:recv(Req, 0) end; - Length -> - exit({length_not_integer, Length}) + Expect = case couch_httpd:header_value(Req, "expect") of + undefined -> + undefined; + Value when is_list(Value) -> + string:to_lower(Value) + end, + case Expect of + "100-continue" -> + MochiReq:start_raw_response({100, gb_trees:empty()}); + _Else -> + ok + end, + + + fun() -> couch_httpd:recv(Req, 0) end end, - len = case couch_httpd:header_value(Req,"Content-Length") of + att_len = case couch_httpd:header_value(Req,"Content-Length") of undefined -> undefined; Length -> list_to_integer(Length) end, - md5 = get_md5_header(Req) + md5 = get_md5_header(Req), + encoding = case string:to_lower(string:strip( + couch_httpd:header_value(Req,"Content-Encoding","identity") + )) of + "identity" -> + identity; + "gzip" -> + gzip; + _ -> + throw({ + bad_ctype, + "Only gzip and identity content-encodings are supported" + }) + end }] end, @@ -1076,7 +1070,8 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) 'DELETE' -> {200, []}; _ -> - {201, [{"Location", absolute_uri(Req, "/" ++ + {201, [{"Etag", "\"" ++ ?b2l(couch_doc:rev_to_str(UpdatedRev)) ++ "\""}, + {"Location", absolute_uri(Req, "/" ++ binary_to_list(DbName) ++ "/" ++ binary_to_list(DocId) ++ "/" ++ binary_to_list(FileName) @@ -1091,6 +1086,25 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) db_attachment_req(Req, _Db, _DocId, _FileNameParts) -> send_method_not_allowed(Req, "DELETE,GET,HEAD,PUT"). +parse_ranges(undefined, _Len) -> + undefined; +parse_ranges(fail, _Len) -> + undefined; +parse_ranges(Ranges, Len) -> + parse_ranges(Ranges, Len, []). + +parse_ranges([], _Len, Acc) -> + lists:reverse(Acc); +parse_ranges([{From, To}|_], _Len, _Acc) when is_integer(From) andalso is_integer(To) andalso To < From -> + throw(requested_range_not_satisfiable); +parse_ranges([{From, To}|Rest], Len, Acc) when is_integer(To) andalso To >= Len -> + parse_ranges([{From, Len-1}] ++ Rest, Len, Acc); +parse_ranges([{none, To}|Rest], Len, Acc) -> + parse_ranges([{Len - To, Len - 1}] ++ Rest, Len, Acc); +parse_ranges([{From, none}|Rest], Len, Acc) -> + parse_ranges([{From, Len - 1}] ++ Rest, Len, Acc); +parse_ranges([{From,To}|Rest], Len, Acc) -> + parse_ranges(Rest, Len, [{From, To}] ++ Acc). get_md5_header(Req) -> ContentMD5 = couch_httpd:header_value(Req, "Content-MD5"), @@ -1112,17 +1126,6 @@ get_md5_header(Req) -> <<>> end. -parse_doc_format(FormatStr) when is_binary(FormatStr) -> - parse_doc_format(?b2l(FormatStr)); -parse_doc_format(FormatStr) when is_list(FormatStr) -> - SplitFormat = lists:splitwith(fun($/) -> false; (_) -> true end, FormatStr), - case SplitFormat of - {DesignName, [$/ | ShowName]} -> {?l2b(DesignName), ?l2b(ShowName)}; - _Else -> throw({bad_request, <<"Invalid doc format">>}) - end; -parse_doc_format(_BadFormatStr) -> - throw({bad_request, <<"Invalid doc format">>}). - parse_doc_query(Req) -> lists:foldl(fun({Key,Value}, Args) -> case {Key, Value} of @@ -1161,11 +1164,41 @@ parse_doc_query(Req) -> Args#doc_query_args{update_type=replicated_changes}; {"new_edits", "true"} -> Args#doc_query_args{update_type=interactive_edit}; + {"att_encoding_info", "true"} -> + Options = [att_encoding_info | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; _Else -> % unknown key value pair, ignore. Args end end, #doc_query_args{}, couch_httpd:qs(Req)). +parse_changes_query(Req) -> + lists:foldl(fun({Key, Value}, Args) -> + case {Key, Value} of + {"feed", _} -> + Args#changes_args{feed=Value}; + {"descending", "true"} -> + Args#changes_args{dir=rev}; + {"since", _} -> + Args#changes_args{since=list_to_integer(Value)}; + {"limit", _} -> + Args#changes_args{limit=list_to_integer(Value)}; + {"style", _} -> + Args#changes_args{style=list_to_existing_atom(Value)}; + {"heartbeat", "true"} -> + Args#changes_args{heartbeat=true}; + {"heartbeat", _} -> + Args#changes_args{heartbeat=list_to_integer(Value)}; + {"timeout", _} -> + Args#changes_args{timeout=list_to_integer(Value)}; + {"include_docs", "true"} -> + Args#changes_args{include_docs=true}; + {"filter", _} -> + Args#changes_args{filter=Value}; + _Else -> % unknown key value pair, ignore. + Args + end + end, #changes_args{}, couch_httpd:qs(Req)). extract_header_rev(Req, ExplicitRev) when is_binary(ExplicitRev) or is_list(ExplicitRev)-> extract_header_rev(Req, couch_doc:parse_rev(ExplicitRev)); @@ -1185,15 +1218,19 @@ extract_header_rev(Req, ExplicitRev) -> parse_copy_destination_header(Req) -> - Destination = couch_httpd:header_value(Req, "Destination"), - case re:run(Destination, "\\?", [{capture, none}]) of - nomatch -> - {list_to_binary(Destination), {0, []}}; - match -> - [DocId, RevQs] = re:split(Destination, "\\?", [{return, list}]), - [_RevQueryKey, Rev] = re:split(RevQs, "=", [{return, list}]), - {Pos, RevId} = couch_doc:parse_rev(Rev), - {list_to_binary(DocId), {Pos, [RevId]}} + case couch_httpd:header_value(Req, "Destination") of + undefined -> + throw({bad_request, "Destination header in mandatory for COPY."}); + Destination -> + case re:run(Destination, "\\?", [{capture, none}]) of + nomatch -> + {list_to_binary(Destination), {0, []}}; + match -> + [DocId, RevQs] = re:split(Destination, "\\?", [{return, list}]), + [_RevQueryKey, Rev] = re:split(RevQs, "=", [{return, list}]), + {Pos, RevId} = couch_doc:parse_rev(Rev), + {list_to_binary(DocId), {Pos, [RevId]}} + end end. validate_attachment_names(Doc) -> diff --git a/src/couchdb/couch_httpd_external.erl b/src/couchdb/couch_httpd_external.erl index efbe87b8..f8d6f171 100644 --- a/src/couchdb/couch_httpd_external.erl +++ b/src/couchdb/couch_httpd_external.erl @@ -56,6 +56,7 @@ process_external_req(HttpReq, Db, Name) -> json_req_obj(Req, Db) -> json_req_obj(Req, Db, null). json_req_obj(#httpd{mochi_req=Req, method=Method, + requested_path_parts=RequestedPath, path_parts=Path, req_body=ReqBody }, Db, DocId) -> @@ -72,18 +73,22 @@ json_req_obj(#httpd{mochi_req=Req, Headers = Req:get(headers), Hlist = mochiweb_headers:to_list(Headers), {ok, Info} = couch_db:get_db_info(Db), - % add headers... + +% add headers... {[{<<"info">>, {Info}}, {<<"id">>, DocId}, + {<<"uuid">>, couch_uuids:new()}, {<<"method">>, Method}, + {<<"requested_path">>, RequestedPath}, {<<"path">>, Path}, - {<<"query">>, to_json_terms(Req:parse_qs())}, + {<<"query">>, json_query_keys(to_json_terms(Req:parse_qs()))}, {<<"headers">>, to_json_terms(Hlist)}, {<<"body">>, Body}, {<<"peer">>, ?l2b(Req:get(peer))}, {<<"form">>, to_json_terms(ParsedForm)}, {<<"cookie">>, to_json_terms(Req:parse_cookie())}, - {<<"userCtx">>, couch_util:json_user_ctx(Db)}]}. + {<<"userCtx">>, couch_util:json_user_ctx(Db)}, + {<<"secObj">>, couch_db:get_security(Db)}]}. to_json_terms(Data) -> to_json_terms(Data, []). @@ -95,6 +100,19 @@ to_json_terms([{Key, Value} | Rest], Acc) when is_atom(Key) -> to_json_terms([{Key, Value} | Rest], Acc) -> to_json_terms(Rest, [{list_to_binary(Key), list_to_binary(Value)} | Acc]). +json_query_keys({Json}) -> + json_query_keys(Json, []). +json_query_keys([], Acc) -> + {lists:reverse(Acc)}; +json_query_keys([{<<"startkey">>, Value} | Rest], Acc) -> + json_query_keys(Rest, [{<<"startkey">>, couch_util:json_decode(Value)}|Acc]); +json_query_keys([{<<"endkey">>, Value} | Rest], Acc) -> + json_query_keys(Rest, [{<<"endkey">>, couch_util:json_decode(Value)}|Acc]); +json_query_keys([{<<"key">>, Value} | Rest], Acc) -> + json_query_keys(Rest, [{<<"key">>, couch_util:json_decode(Value)}|Acc]); +json_query_keys([Term | Rest], Acc) -> + json_query_keys(Rest, [Term|Acc]). + send_external_response(#httpd{mochi_req=MochiReq}=Req, Response) -> #extern_resp_args{ code = Code, @@ -124,7 +142,7 @@ parse_external_response({Response}) -> Args#extern_resp_args{data=Value, ctype="text/html; charset=utf-8"}; {<<"base64">>, Value} -> Args#extern_resp_args{ - data=couch_util:decodeBase64(Value), + data=base64:decode(Value), ctype="application/binary" }; {<<"headers">>, {Headers}} -> diff --git a/src/couchdb/couch_httpd_misc_handlers.erl b/src/couchdb/couch_httpd_misc_handlers.erl index 2d67b321..6cc48f51 100644 --- a/src/couchdb/couch_httpd_misc_handlers.erl +++ b/src/couchdb/couch_httpd_misc_handlers.erl @@ -15,7 +15,7 @@ -export([handle_welcome_req/2,handle_favicon_req/2,handle_utils_dir_req/2, handle_all_dbs_req/1,handle_replicate_req/1,handle_restart_req/1, handle_uuids_req/1,handle_config_req/1,handle_log_req/1, - handle_task_status_req/1,handle_sleep_req/1]). + handle_task_status_req/1]). -export([increment_update_seq_req/2]). @@ -46,6 +46,7 @@ handle_favicon_req(#httpd{method='GET'}=Req, DocumentRoot) -> {"Expires", httpd_util:rfc1123_date(OneYearFromNow)} ], couch_httpd:serve_file(Req, "favicon.ico", DocumentRoot, CachingHeaders); + handle_favicon_req(Req, _) -> send_method_not_allowed(Req, "GET,HEAD"). @@ -63,13 +64,6 @@ handle_utils_dir_req(#httpd{method='GET'}=Req, DocumentRoot) -> handle_utils_dir_req(Req, _) -> send_method_not_allowed(Req, "GET,HEAD"). -handle_sleep_req(#httpd{method='GET'}=Req) -> - Time = list_to_integer(couch_httpd:qs_value(Req, "time")), - receive snicklefart -> ok after Time -> ok end, - send_json(Req, {[{ok, true}]}); -handle_sleep_req(Req) -> - send_method_not_allowed(Req, "GET,HEAD"). - handle_all_dbs_req(#httpd{method='GET'}=Req) -> {ok, DbNames} = couch_server:all_databases(), send_json(Req, DbNames); @@ -85,25 +79,38 @@ handle_task_status_req(Req) -> send_method_not_allowed(Req, "GET,HEAD"). handle_replicate_req(#httpd{method='POST'}=Req) -> + couch_httpd:validate_ctype(Req, "application/json"), PostBody = couch_httpd:json_body_obj(Req), try couch_rep:replicate(PostBody, Req#httpd.user_ctx) of {ok, {continuous, RepId}} -> send_json(Req, 202, {[{ok, true}, {<<"_local_id">>, RepId}]}); + {ok, {cancelled, RepId}} -> + send_json(Req, 200, {[{ok, true}, {<<"_local_id">>, RepId}]}); {ok, {JsonResults}} -> send_json(Req, {[{ok, true} | JsonResults]}); {error, {Type, Details}} -> send_json(Req, 500, {[{error, Type}, {reason, Details}]}); + {error, not_found} -> + send_json(Req, 404, {[{error, not_found}]}); {error, Reason} -> - send_json(Req, 500, {[{error, Reason}]}) + try + send_json(Req, 500, {[{error, Reason}]}) + catch + exit:{json_encode, _} -> + send_json(Req, 500, {[{error, couch_util:to_binary(Reason)}]}) + end catch throw:{db_not_found, Msg} -> - send_json(Req, 404, {[{error, db_not_found}, {reason, Msg}]}) + send_json(Req, 404, {[{error, db_not_found}, {reason, Msg}]}); + throw:{unauthorized, Msg} -> + send_json(Req, 404, {[{error, unauthorized}, {reason, Msg}]}) end; handle_replicate_req(Req) -> send_method_not_allowed(Req, "POST"). handle_restart_req(#httpd{method='POST'}=Req) -> + couch_httpd:validate_ctype(Req, "application/json"), ok = couch_httpd:verify_is_server_admin(Req), couch_server_sup:restart_core_server(), send_json(Req, 200, {[{ok, true}]}); @@ -155,15 +162,6 @@ handle_config_req(#httpd{method='GET', path_parts=[_,Section]}=Req) -> KVs = [{list_to_binary(Key), list_to_binary(Value)} || {Key, Value} <- couch_config:get(Section)], send_json(Req, 200, {KVs}); -% PUT /_config/Section/Key -% "value" -handle_config_req(#httpd{method='PUT', path_parts=[_, Section, Key]}=Req) -> - ok = couch_httpd:verify_is_server_admin(Req), - Value = couch_httpd:json_body(Req), - Persist = couch_httpd:header_value(Req, "X-Couch-Persist") /= "false", - OldValue = couch_config:get(Section, Key, ""), - ok = couch_config:set(Section, Key, ?b2l(Value), Persist), - send_json(Req, 200, list_to_binary(OldValue)); % GET /_config/Section/Key handle_config_req(#httpd{method='GET', path_parts=[_, Section, Key]}=Req) -> ok = couch_httpd:verify_is_server_admin(Req), @@ -173,24 +171,88 @@ handle_config_req(#httpd{method='GET', path_parts=[_, Section, Key]}=Req) -> Value -> send_json(Req, 200, list_to_binary(Value)) end; -% DELETE /_config/Section/Key -handle_config_req(#httpd{method='DELETE',path_parts=[_,Section,Key]}=Req) -> +% PUT or DELETE /_config/Section/Key +handle_config_req(#httpd{method=Method, path_parts=[_, Section, Key]}=Req) + when (Method == 'PUT') or (Method == 'DELETE') -> ok = couch_httpd:verify_is_server_admin(Req), Persist = couch_httpd:header_value(Req, "X-Couch-Persist") /= "false", + case couch_config:get(<<"httpd">>, <<"config_whitelist">>, null) of + null -> + % No whitelist; allow all changes. + handle_approved_config_req(Req, Persist); + WhitelistValue -> + % Provide a failsafe to protect against inadvertently locking + % onesself out of the config by supplying a syntactically-incorrect + % Erlang term. To intentionally lock down the whitelist, supply a + % well-formed list which does not include the whitelist config + % variable itself. + FallbackWhitelist = [{<<"httpd">>, <<"config_whitelist">>}], + + Whitelist = case couch_util:parse_term(WhitelistValue) of + {ok, Value} when is_list(Value) -> + Value; + {ok, _NonListValue} -> + FallbackWhitelist; + {error, _} -> + [{WhitelistSection, WhitelistKey}] = FallbackWhitelist, + ?LOG_ERROR("Only whitelisting ~s/~s due to error parsing: ~p", + [WhitelistSection, WhitelistKey, WhitelistValue]), + FallbackWhitelist + end, + + IsRequestedKeyVal = fun(Element) -> + case Element of + {A, B} -> + % For readability, tuples may be used instead of binaries + % in the whitelist. + case {couch_util:to_binary(A), couch_util:to_binary(B)} of + {Section, Key} -> + true; + {Section, <<"*">>} -> + true; + _Else -> + false + end; + _Else -> + false + end + end, + + case lists:any(IsRequestedKeyVal, Whitelist) of + true -> + % Allow modifying this whitelisted variable. + handle_approved_config_req(Req, Persist); + _NotWhitelisted -> + % Disallow modifying this non-whitelisted variable. + send_error(Req, 400, <<"modification_not_allowed">>, + ?l2b("This config variable is read-only")) + end + end; +handle_config_req(Req) -> + send_method_not_allowed(Req, "GET,PUT,DELETE"). + +% PUT /_config/Section/Key +% "value" +handle_approved_config_req(#httpd{method='PUT', path_parts=[_, Section, Key]}=Req, Persist) -> + Value = couch_httpd:json_body(Req), + OldValue = couch_config:get(Section, Key, ""), + ok = couch_config:set(Section, Key, ?b2l(Value), Persist), + send_json(Req, 200, list_to_binary(OldValue)); +% DELETE /_config/Section/Key +handle_approved_config_req(#httpd{method='DELETE',path_parts=[_,Section,Key]}=Req, Persist) -> case couch_config:get(Section, Key, null) of null -> throw({not_found, unknown_config_value}); OldValue -> couch_config:delete(Section, Key, Persist), send_json(Req, 200, list_to_binary(OldValue)) - end; -handle_config_req(Req) -> - send_method_not_allowed(Req, "GET,PUT,DELETE"). + end. % httpd db handlers increment_update_seq_req(#httpd{method='POST'}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), {ok, NewSeq} = couch_db:increment_update_seq(Db), send_json(Req, {[{ok, true}, {update_seq, NewSeq} @@ -201,6 +263,7 @@ increment_update_seq_req(Req, _Db) -> % httpd log handlers handle_log_req(#httpd{method='GET'}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), Bytes = list_to_integer(couch_httpd:qs_value(Req, "bytes", "1000")), Offset = list_to_integer(couch_httpd:qs_value(Req, "offset", "0")), Chunk = couch_log:read(Bytes, Offset), diff --git a/src/couchdb/couch_httpd_oauth.erl b/src/couchdb/couch_httpd_oauth.erl index ddf84008..05ee10e2 100644 --- a/src/couchdb/couch_httpd_oauth.erl +++ b/src/couchdb/couch_httpd_oauth.erl @@ -18,9 +18,9 @@ % OAuth auth handler using per-node user db oauth_authentication_handler(#httpd{mochi_req=MochiReq}=Req) -> serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> - AccessToken = proplists:get_value("oauth_token", Params), + AccessToken = couch_util:get_value("oauth_token", Params), case couch_config:get("oauth_token_secrets", AccessToken) of - undefined -> + undefined -> couch_httpd:send_error(Req, 400, <<"invalid_token">>, <<"Invalid OAuth token.">>); TokenSecret -> @@ -41,17 +41,17 @@ set_user_ctx(Req, AccessToken) -> undefined -> throw({bad_request, unknown_oauth_token}); Value -> ?l2b(Value) end, - case couch_httpd_auth:get_user(Name) of + case couch_auth_cache:get_user_creds(Name) of nil -> Req; User -> - Roles = proplists:get_value(<<"roles">>, User, []), + Roles = couch_util:get_value(<<"roles">>, User, []), Req#httpd{user_ctx=#user_ctx{name=Name, roles=Roles}} end. % OAuth request_token handle_oauth_req(#httpd{path_parts=[_OAuth, <<"request_token">>], method=Method}=Req) -> serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> - AccessToken = proplists:get_value("oauth_token", Params), + AccessToken = couch_util:get_value("oauth_token", Params), TokenSecret = couch_config:get("oauth_token_secrets", AccessToken), case oauth:verify(Signature, atom_to_list(Method), URL, Params, Consumer, TokenSecret) of true -> @@ -88,7 +88,7 @@ serve_oauth_authorize(#httpd{method=Method}=Req) -> 'GET' -> % Confirm with the User that they want to authenticate the Consumer serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> - AccessToken = proplists:get_value("oauth_token", Params), + AccessToken = couch_util:get_value("oauth_token", Params), TokenSecret = couch_config:get("oauth_token_secrets", AccessToken), case oauth:verify(Signature, "GET", URL, Params, Consumer, TokenSecret) of true -> @@ -100,7 +100,7 @@ serve_oauth_authorize(#httpd{method=Method}=Req) -> 'POST' -> % If the User has confirmed, we direct the User back to the Consumer with a verification code serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> - AccessToken = proplists:get_value("oauth_token", Params), + AccessToken = couch_util:get_value("oauth_token", Params), TokenSecret = couch_config:get("oauth_token_secrets", AccessToken), case oauth:verify(Signature, "POST", URL, Params, Consumer, TokenSecret) of true -> @@ -129,24 +129,24 @@ serve_oauth(#httpd{mochi_req=MochiReq}=Req, Fun, FailSilently) -> end end, HeaderParams = oauth_uri:params_from_header_string(AuthHeader), - %Realm = proplists:get_value("realm", HeaderParams), + %Realm = couch_util:get_value("realm", HeaderParams), Params = proplists:delete("realm", HeaderParams) ++ MochiReq:parse_qs(), ?LOG_DEBUG("OAuth Params: ~p", [Params]), - case proplists:get_value("oauth_version", Params, "1.0") of + case couch_util:get_value("oauth_version", Params, "1.0") of "1.0" -> - case proplists:get_value("oauth_consumer_key", Params, undefined) of + case couch_util:get_value("oauth_consumer_key", Params, undefined) of undefined -> case FailSilently of true -> Req; false -> couch_httpd:send_error(Req, 400, <<"invalid_consumer">>, <<"Invalid consumer.">>) end; ConsumerKey -> - SigMethod = proplists:get_value("oauth_signature_method", Params), + SigMethod = couch_util:get_value("oauth_signature_method", Params), case consumer_lookup(ConsumerKey, SigMethod) of none -> couch_httpd:send_error(Req, 400, <<"invalid_consumer">>, <<"Invalid consumer (key or signature method).">>); Consumer -> - Signature = proplists:get_value("oauth_signature", Params), + Signature = couch_util:get_value("oauth_signature", Params), URL = couch_httpd:absolute_uri(Req, MochiReq:get(raw_path)), Fun(URL, proplists:delete("oauth_signature", Params), Consumer, Signature) diff --git a/src/couchdb/couch_httpd_proxy.erl b/src/couchdb/couch_httpd_proxy.erl new file mode 100644 index 00000000..e4b94f22 --- /dev/null +++ b/src/couchdb/couch_httpd_proxy.erl @@ -0,0 +1,425 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +-module(couch_httpd_proxy). + +-export([handle_proxy_req/2]). + +-include("couch_db.hrl"). +-include("../ibrowse/ibrowse.hrl"). + +-define(TIMEOUT, infinity). +-define(PKT_SIZE, 4096). + + +handle_proxy_req(Req, ProxyDest) -> + + %% Bug in Mochiweb? + %% Reported here: http://github.com/mochi/mochiweb/issues/issue/16 + erase(mochiweb_request_body_length), + + Method = get_method(Req), + Url = get_url(Req, ProxyDest), + Version = get_version(Req), + Headers = get_headers(Req), + Body = get_body(Req), + Options = [ + {http_vsn, Version}, + {headers_as_is, true}, + {response_format, binary}, + {stream_to, {self(), once}} + ], + case ibrowse:send_req(Url, Headers, Method, Body, Options, ?TIMEOUT) of + {ibrowse_req_id, ReqId} -> + stream_response(Req, ProxyDest, ReqId); + {error, Reason} -> + throw({error, Reason}) + end. + + +get_method(#httpd{mochi_req=MochiReq}) -> + case MochiReq:get(method) of + Method when is_atom(Method) -> + list_to_atom(string:to_lower(atom_to_list(Method))); + Method when is_list(Method) -> + list_to_atom(string:to_lower(Method)); + Method when is_binary(Method) -> + list_to_atom(string:to_lower(?b2l(Method))) + end. + + +get_url(Req, ProxyDest) when is_binary(ProxyDest) -> + get_url(Req, ?b2l(ProxyDest)); +get_url(#httpd{mochi_req=MochiReq}=Req, ProxyDest) -> + BaseUrl = case mochiweb_util:partition(ProxyDest, "/") of + {[], "/", _} -> couch_httpd:absolute_uri(Req, ProxyDest); + _ -> ProxyDest + end, + ProxyPrefix = "/" ++ ?b2l(hd(Req#httpd.path_parts)), + RequestedPath = MochiReq:get(raw_path), + case mochiweb_util:partition(RequestedPath, ProxyPrefix) of + {[], ProxyPrefix, []} -> + BaseUrl; + {[], ProxyPrefix, [$/ | DestPath]} -> + remove_trailing_slash(BaseUrl) ++ "/" ++ DestPath; + {[], ProxyPrefix, DestPath} -> + remove_trailing_slash(BaseUrl) ++ "/" ++ DestPath; + _Else -> + throw({invalid_url_path, {ProxyPrefix, RequestedPath}}) + end. + +get_version(#httpd{mochi_req=MochiReq}) -> + MochiReq:get(version). + + +get_headers(#httpd{mochi_req=MochiReq}) -> + to_ibrowse_headers(mochiweb_headers:to_list(MochiReq:get(headers)), []). + +to_ibrowse_headers([], Acc) -> + lists:reverse(Acc); +to_ibrowse_headers([{K, V} | Rest], Acc) when is_atom(K) -> + to_ibrowse_headers([{atom_to_list(K), V} | Rest], Acc); +to_ibrowse_headers([{K, V} | Rest], Acc) when is_list(K) -> + case string:to_lower(K) of + "content-length" -> + to_ibrowse_headers(Rest, [{content_length, V} | Acc]); + % This appears to make ibrowse too smart. + %"transfer-encoding" -> + % to_ibrowse_headers(Rest, [{transfer_encoding, V} | Acc]); + _ -> + to_ibrowse_headers(Rest, [{K, V} | Acc]) + end. + +get_body(#httpd{method='GET'}) -> + fun() -> eof end; +get_body(#httpd{method='HEAD'}) -> + fun() -> eof end; +get_body(#httpd{method='DELETE'}) -> + fun() -> eof end; +get_body(#httpd{mochi_req=MochiReq}) -> + case MochiReq:get(body_length) of + undefined -> + <<>>; + {unknown_transfer_encoding, Unknown} -> + exit({unknown_transfer_encoding, Unknown}); + chunked -> + {fun stream_chunked_body/1, {init, MochiReq, 0}}; + 0 -> + <<>>; + Length when is_integer(Length) andalso Length > 0 -> + {fun stream_length_body/1, {init, MochiReq, Length}}; + Length -> + exit({invalid_body_length, Length}) + end. + + +remove_trailing_slash(Url) -> + rem_slash(lists:reverse(Url)). + +rem_slash([]) -> + []; +rem_slash([$\s | RevUrl]) -> + rem_slash(RevUrl); +rem_slash([$\t | RevUrl]) -> + rem_slash(RevUrl); +rem_slash([$\r | RevUrl]) -> + rem_slash(RevUrl); +rem_slash([$\n | RevUrl]) -> + rem_slash(RevUrl); +rem_slash([$/ | RevUrl]) -> + rem_slash(RevUrl); +rem_slash(RevUrl) -> + lists:reverse(RevUrl). + + +stream_chunked_body({init, MReq, 0}) -> + % First chunk, do expect-continue dance. + init_body_stream(MReq), + stream_chunked_body({stream, MReq, 0, [], ?PKT_SIZE}); +stream_chunked_body({stream, MReq, 0, Buf, BRem}) -> + % Finished a chunk, get next length. If next length + % is 0, its time to try and read trailers. + {CRem, Data} = read_chunk_length(MReq), + case CRem of + 0 -> + BodyData = iolist_to_binary(lists:reverse(Buf, Data)), + {ok, BodyData, {trailers, MReq, [], ?PKT_SIZE}}; + _ -> + stream_chunked_body( + {stream, MReq, CRem, [Data | Buf], BRem-size(Data)} + ) + end; +stream_chunked_body({stream, MReq, CRem, Buf, BRem}) when BRem =< 0 -> + % Time to empty our buffers to the upstream socket. + BodyData = iolist_to_binary(lists:reverse(Buf)), + {ok, BodyData, {stream, MReq, CRem, [], ?PKT_SIZE}}; +stream_chunked_body({stream, MReq, CRem, Buf, BRem}) -> + % Buffer some more data from the client. + Length = lists:min([CRem, BRem]), + Socket = MReq:get(socket), + NewState = case mochiweb_socket:recv(Socket, Length, ?TIMEOUT) of + {ok, Data} when size(Data) == CRem -> + case mochiweb_socket:recv(Socket, 2, ?TIMEOUT) of + {ok, <<"\r\n">>} -> + {stream, MReq, 0, [<<"\r\n">>, Data | Buf], BRem-Length-2}; + _ -> + exit(normal) + end; + {ok, Data} -> + {stream, MReq, CRem-Length, [Data | Buf], BRem-Length}; + _ -> + exit(normal) + end, + stream_chunked_body(NewState); +stream_chunked_body({trailers, MReq, Buf, BRem}) when BRem =< 0 -> + % Empty our buffers and send data upstream. + BodyData = iolist_to_binary(lists:reverse(Buf)), + {ok, BodyData, {trailers, MReq, [], ?PKT_SIZE}}; +stream_chunked_body({trailers, MReq, Buf, BRem}) -> + % Read another trailer into the buffer or stop on an + % empty line. + Socket = MReq:get(socket), + mochiweb_socket:setopts(Socket, [{packet, line}]), + case mochiweb_socket:recv(Socket, 0, ?TIMEOUT) of + {ok, <<"\r\n">>} -> + mochiweb_socket:setopts(Socket, [{packet, raw}]), + BodyData = iolist_to_binary(lists:reverse(Buf, <<"\r\n">>)), + {ok, BodyData, eof}; + {ok, Footer} -> + mochiweb_socket:setopts(Socket, [{packet, raw}]), + NewState = {trailers, MReq, [Footer | Buf], BRem-size(Footer)}, + stream_chunked_body(NewState); + _ -> + exit(normal) + end; +stream_chunked_body(eof) -> + % Tell ibrowse we're done sending data. + eof. + + +stream_length_body({init, MochiReq, Length}) -> + % Do the expect-continue dance + init_body_stream(MochiReq), + stream_length_body({stream, MochiReq, Length}); +stream_length_body({stream, _MochiReq, 0}) -> + % Finished streaming. + eof; +stream_length_body({stream, MochiReq, Length}) -> + BufLen = lists:min([Length, ?PKT_SIZE]), + case MochiReq:recv(BufLen) of + <<>> -> eof; + Bin -> {ok, Bin, {stream, MochiReq, Length-BufLen}} + end. + + +init_body_stream(MochiReq) -> + Expect = case MochiReq:get_header_value("expect") of + undefined -> + undefined; + Value when is_list(Value) -> + string:to_lower(Value) + end, + case Expect of + "100-continue" -> + MochiReq:start_raw_response({100, gb_trees:empty()}); + _Else -> + ok + end. + + +read_chunk_length(MochiReq) -> + Socket = MochiReq:get(socket), + mochiweb_socket:setopts(Socket, [{packet, line}]), + case mochiweb_socket:recv(Socket, 0, ?TIMEOUT) of + {ok, Header} -> + mochiweb_socket:setopts(Socket, [{packet, raw}]), + Splitter = fun(C) -> + C =/= $\r andalso C =/= $\n andalso C =/= $\s + end, + {Hex, _Rest} = lists:splitwith(Splitter, ?b2l(Header)), + {mochihex:to_int(Hex), Header}; + _ -> + exit(normal) + end. + + +stream_response(Req, ProxyDest, ReqId) -> + receive + {ibrowse_async_headers, ReqId, "100", _} -> + % ibrowse doesn't handle 100 Continue responses which + % means we have to discard them so the proxy client + % doesn't get confused. + ibrowse:stream_next(ReqId), + stream_response(Req, ProxyDest, ReqId); + {ibrowse_async_headers, ReqId, Status, Headers} -> + {Source, Dest} = get_urls(Req, ProxyDest), + FixedHeaders = fix_headers(Source, Dest, Headers, []), + case body_length(FixedHeaders) of + chunked -> + {ok, Resp} = couch_httpd:start_chunked_response( + Req, list_to_integer(Status), FixedHeaders + ), + ibrowse:stream_next(ReqId), + stream_chunked_response(Req, ReqId, Resp), + {ok, Resp}; + Length when is_integer(Length) -> + {ok, Resp} = couch_httpd:start_response_length( + Req, list_to_integer(Status), FixedHeaders, Length + ), + ibrowse:stream_next(ReqId), + stream_length_response(Req, ReqId, Resp), + {ok, Resp}; + _ -> + {ok, Resp} = couch_httpd:start_response( + Req, list_to_integer(Status), FixedHeaders + ), + ibrowse:stream_next(ReqId), + stream_length_response(Req, ReqId, Resp), + % XXX: MochiWeb apparently doesn't look at the + % response to see if it must force close the + % connection. So we help it out here. + erlang:put(mochiweb_request_force_close, true), + {ok, Resp} + end + end. + + +stream_chunked_response(Req, ReqId, Resp) -> + receive + {ibrowse_async_response, ReqId, {error, Reason}} -> + throw({error, Reason}); + {ibrowse_async_response, ReqId, Chunk} -> + couch_httpd:send_chunk(Resp, Chunk), + ibrowse:stream_next(ReqId), + stream_chunked_response(Req, ReqId, Resp); + {ibrowse_async_response_end, ReqId} -> + couch_httpd:last_chunk(Resp) + end. + + +stream_length_response(Req, ReqId, Resp) -> + receive + {ibrowse_async_response, ReqId, {error, Reason}} -> + throw({error, Reason}); + {ibrowse_async_response, ReqId, Chunk} -> + couch_httpd:send(Resp, Chunk), + ibrowse:stream_next(ReqId), + stream_length_response(Req, ReqId, Resp); + {ibrowse_async_response_end, ReqId} -> + ok + end. + + +get_urls(Req, ProxyDest) -> + SourceUrl = couch_httpd:absolute_uri(Req, "/" ++ hd(Req#httpd.path_parts)), + Source = parse_url(?b2l(iolist_to_binary(SourceUrl))), + case (catch parse_url(ProxyDest)) of + Dest when is_record(Dest, url) -> + {Source, Dest}; + _ -> + DestUrl = couch_httpd:absolute_uri(Req, ProxyDest), + {Source, parse_url(DestUrl)} + end. + + +fix_headers(_, _, [], Acc) -> + lists:reverse(Acc); +fix_headers(Source, Dest, [{K, V} | Rest], Acc) -> + Fixed = case string:to_lower(K) of + "location" -> rewrite_location(Source, Dest, V); + "content-location" -> rewrite_location(Source, Dest, V); + "uri" -> rewrite_location(Source, Dest, V); + "destination" -> rewrite_location(Source, Dest, V); + "set-cookie" -> rewrite_cookie(Source, Dest, V); + _ -> V + end, + fix_headers(Source, Dest, Rest, [{K, Fixed} | Acc]). + + +rewrite_location(Source, #url{host=Host, port=Port, protocol=Proto}, Url) -> + case (catch parse_url(Url)) of + #url{host=Host, port=Port, protocol=Proto} = Location -> + DestLoc = #url{ + protocol=Source#url.protocol, + host=Source#url.host, + port=Source#url.port, + path=join_url_path(Source#url.path, Location#url.path) + }, + url_to_url(DestLoc); + #url{} -> + Url; + _ -> + url_to_url(Source#url{path=join_url_path(Source#url.path, Url)}) + end. + + +rewrite_cookie(_Source, _Dest, Cookie) -> + Cookie. + + +parse_url(Url) when is_binary(Url) -> + ibrowse_lib:parse_url(?b2l(Url)); +parse_url(Url) when is_list(Url) -> + ibrowse_lib:parse_url(?b2l(iolist_to_binary(Url))). + + +join_url_path(Src, Dst) -> + Src2 = case lists:reverse(Src) of + "/" ++ RestSrc -> lists:reverse(RestSrc); + _ -> Src + end, + Dst2 = case Dst of + "/" ++ RestDst -> RestDst; + _ -> Dst + end, + Src2 ++ "/" ++ Dst2. + + +url_to_url(#url{host=Host, port=Port, path=Path, protocol=Proto}) -> + LPort = case {Proto, Port} of + {http, 80} -> ""; + {https, 443} -> ""; + _ -> ":" ++ integer_to_list(Port) + end, + LPath = case Path of + "/" ++ _RestPath -> Path; + _ -> "/" ++ Path + end, + atom_to_list(Proto) ++ "://" ++ Host ++ LPort ++ LPath. + + +body_length(Headers) -> + case is_chunked(Headers) of + true -> chunked; + _ -> content_length(Headers) + end. + + +is_chunked([]) -> + false; +is_chunked([{K, V} | Rest]) -> + case string:to_lower(K) of + "transfer-encoding" -> + string:to_lower(V) == "chunked"; + _ -> + is_chunked(Rest) + end. + +content_length([]) -> + undefined; +content_length([{K, V} | Rest]) -> + case string:to_lower(K) of + "content-length" -> + list_to_integer(V); + _ -> + content_length(Rest) + end. + diff --git a/src/couchdb/couch_httpd_rewrite.erl b/src/couchdb/couch_httpd_rewrite.erl new file mode 100644 index 00000000..893f99ed --- /dev/null +++ b/src/couchdb/couch_httpd_rewrite.erl @@ -0,0 +1,461 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +% +% bind_path is based on bind method from Webmachine + + +%% @doc Module for URL rewriting by pattern matching. + +-module(couch_httpd_rewrite). +-export([handle_rewrite_req/3]). +-include("couch_db.hrl"). + +-define(SEPARATOR, $\/). +-define(MATCH_ALL, {bind, <<"*">>}). + + +%% doc The http rewrite handler. All rewriting is done from +%% /dbname/_design/ddocname/_rewrite by default. +%% +%% each rules should be in rewrites member of the design doc. +%% Ex of a complete rule : +%% +%% { +%% .... +%% "rewrites": [ +%% { +%% "from": "", +%% "to": "index.html", +%% "method": "GET", +%% "query": {} +%% } +%% ] +%% } +%% +%% from: is the path rule used to bind current uri to the rule. It +%% use pattern matching for that. +%% +%% to: rule to rewrite an url. It can contain variables depending on binding +%% variables discovered during pattern matching and query args (url args and from +%% the query member.) +%% +%% method: method to bind the request method to the rule. by default "*" +%% query: query args you want to define they can contain dynamic variable +%% by binding the key to the bindings +%% +%% +%% to and from are path with patterns. pattern can be string starting with ":" or +%% "*". ex: +%% /somepath/:var/* +%% +%% This path is converted in erlang list by splitting "/". Each var are +%% converted in atom. "*" is converted to '*' atom. The pattern matching is done +%% by splitting "/" in request url in a list of token. A string pattern will +%% match equal token. The star atom ('*' in single quotes) will match any number +%% of tokens, but may only be present as the last pathtern in a pathspec. If all +%% tokens are matched and all pathterms are used, then the pathspec matches. It works +%% like webmachine. Each identified token will be reused in to rule and in query +%% +%% The pattern matching is done by first matching the request method to a rule. by +%% default all methods match a rule. (method is equal to "*" by default). Then +%% It will try to match the path to one rule. If no rule match, then a 404 error +%% is displayed. +%% +%% Once a rule is found we rewrite the request url using the "to" and +%% "query" members. The identified token are matched to the rule and +%% will replace var. if '*' is found in the rule it will contain the remaining +%% part if it exists. +%% +%% Examples: +%% +%% Dispatch rule URL TO Tokens +%% +%% {"from": "/a/b", /a/b?k=v /some/b?k=v var =:= b +%% "to": "/some/"} k = v +%% +%% {"from": "/a/b", /a/b /some/b?var=b var =:= b +%% "to": "/some/:var"} +%% +%% {"from": "/a", /a /some +%% "to": "/some/*"} +%% +%% {"from": "/a/*", /a/b/c /some/b/c +%% "to": "/some/*"} +%% +%% {"from": "/a", /a /some +%% "to": "/some/*"} +%% +%% {"from": "/a/:foo/*", /a/b/c /some/b/c?foo=b foo =:= b +%% "to": "/some/:foo/*"} +%% +%% {"from": "/a/:foo", /a/b /some/?k=b&foo=b foo =:= b +%% "to": "/some", +%% "query": { +%% "k": ":foo" +%% }} +%% +%% {"from": "/a", /a?foo=b /some/b foo =:= b +%% "to": "/some/:foo", +%% }} +%% +%% {"from": "/a/<foo>" /a/b /some/b foo =:= b +%% "to": "/a/b", +%% }} +%% +%% {"from": "/a/<foo>.blah" /a/b /some/b foo =:= b +%% "to": "/a/b", +%% }} + + + + +handle_rewrite_req(#httpd{ + path_parts=[DbName, <<"_design">>, DesignName, _Rewrite|PathParts], + method=Method, + mochi_req=MochiReq}=Req, _Db, DDoc) -> + + % we are in a design handler + DesignId = <<"_design/", DesignName/binary>>, + Prefix = <<"/", DbName/binary, "/", DesignId/binary>>, + QueryList = couch_httpd:qs(Req), + QueryList1 = [{to_binding(K), V} || {K, V} <- QueryList], + + #doc{body={Props}} = DDoc, + + % get rules from ddoc + case couch_util:get_value(<<"rewrites">>, Props) of + undefined -> + couch_httpd:send_error(Req, 404, <<"rewrite_error">>, + <<"Invalid path.">>); + Rules -> + % create dispatch list from rules + DispatchList = [make_rule(Rule) || {Rule} <- Rules], + + %% get raw path by matching url to a rule. + RawPath = case try_bind_path(DispatchList, couch_util:to_binary(Method), PathParts, + QueryList1) of + no_dispatch_path -> + throw(not_found); + {NewPathParts, Bindings} -> + Parts = [quote_plus(X) || X <- NewPathParts], + + % build new path, reencode query args, eventually convert + % them to json + Path = lists:append( + string:join(Parts, [?SEPARATOR]), + case Bindings of + [] -> []; + _ -> [$?, encode_query(Bindings)] + end), + + % if path is relative detect it and rewrite path + case mochiweb_util:safe_relative_path(Path) of + undefined -> + ?b2l(Prefix) ++ "/" ++ Path; + P1 -> + ?b2l(Prefix) ++ "/" ++ P1 + end + + end, + + % normalize final path (fix levels "." and "..") + RawPath1 = ?b2l(iolist_to_binary(normalize_path(RawPath))), + + ?LOG_DEBUG("rewrite to ~p ~n", [RawPath1]), + + % build a new mochiweb request + MochiReq1 = mochiweb_request:new(MochiReq:get(socket), + MochiReq:get(method), + RawPath1, + MochiReq:get(version), + MochiReq:get(headers)), + + % cleanup, It force mochiweb to reparse raw uri. + MochiReq1:cleanup(), + + #httpd{ + db_url_handlers = DbUrlHandlers, + design_url_handlers = DesignUrlHandlers, + default_fun = DefaultFun, + url_handlers = UrlHandlers + } = Req, + couch_httpd:handle_request_int(MochiReq1, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers) + end. + +quote_plus({bind, X}) -> + mochiweb_util:quote_plus(X); +quote_plus(X) -> + mochiweb_util:quote_plus(X). + +%% @doc Try to find a rule matching current url. If none is found +%% 404 error not_found is raised +try_bind_path([], _Method, _PathParts, _QueryList) -> + no_dispatch_path; +try_bind_path([Dispatch|Rest], Method, PathParts, QueryList) -> + [{PathParts1, Method1}, RedirectPath, QueryArgs] = Dispatch, + case bind_method(Method1, Method) of + true -> + case bind_path(PathParts1, PathParts, []) of + {ok, Remaining, Bindings} -> + Bindings1 = Bindings ++ QueryList, + % we parse query args from the rule and fill + % it eventually with bindings vars + QueryArgs1 = make_query_list(QueryArgs, Bindings1, []), + % remove params in QueryLists1 that are already in + % QueryArgs1 + Bindings2 = lists:foldl(fun({K, V}, Acc) -> + K1 = to_binding(K), + KV = case couch_util:get_value(K1, QueryArgs1) of + undefined -> [{K1, V}]; + _V1 -> [] + end, + Acc ++ KV + end, [], Bindings1), + + FinalBindings = Bindings2 ++ QueryArgs1, + NewPathParts = make_new_path(RedirectPath, FinalBindings, + Remaining, []), + {NewPathParts, FinalBindings}; + fail -> + try_bind_path(Rest, Method, PathParts, QueryList) + end; + false -> + try_bind_path(Rest, Method, PathParts, QueryList) + end. + +%% rewriting dynamically the quey list given as query member in +%% rewrites. Each value is replaced by one binding or an argument +%% passed in url. +make_query_list([], _Bindings, Acc) -> + Acc; +make_query_list([{Key, {Value}}|Rest], Bindings, Acc) -> + Value1 = to_json({Value}), + make_query_list(Rest, Bindings, [{to_binding(Key), Value1}|Acc]); +make_query_list([{Key, Value}|Rest], Bindings, Acc) when is_binary(Value) -> + Value1 = replace_var(Key, Value, Bindings), + make_query_list(Rest, Bindings, [{to_binding(Key), Value1}|Acc]); +make_query_list([{Key, Value}|Rest], Bindings, Acc) when is_list(Value) -> + Value1 = replace_var(Key, Value, Bindings), + make_query_list(Rest, Bindings, [{to_binding(Key), Value1}|Acc]); +make_query_list([{Key, Value}|Rest], Bindings, Acc) -> + make_query_list(Rest, Bindings, [{to_binding(Key), Value}|Acc]). + +replace_var(Key, Value, Bindings) -> + case Value of + <<":", Var/binary>> -> + get_var(Var, Bindings, Value); + <<"*">> -> + get_var(Value, Bindings, Value); + _ when is_list(Value) -> + Value1 = lists:foldr(fun(V, Acc) -> + V1 = case V of + <<":", VName/binary>> -> + case get_var(VName, Bindings, V) of + V2 when is_list(V2) -> + iolist_to_binary(V2); + V2 -> V2 + end; + <<"*">> -> + get_var(V, Bindings, V); + _ -> + V + end, + [V1|Acc] + end, [], Value), + to_json(Value1); + _ when is_binary(Value) -> + Value; + _ -> + case Key of + <<"key">> -> to_json(Value); + <<"startkey">> -> to_json(Value); + <<"start_key">> -> to_json(Value); + <<"endkey">> -> to_json(Value); + <<"end_key">> -> to_json(Value); + _ -> + lists:flatten(?JSON_ENCODE(Value)) + end + end. + + +get_var(VarName, Props, Default) -> + VarName1 = to_binding(VarName), + couch_util:get_value(VarName1, Props, Default). + +%% doc: build new patch from bindings. bindings are query args +%% (+ dynamic query rewritten if needed) and bindings found in +%% bind_path step. +make_new_path([], _Bindings, _Remaining, Acc) -> + lists:reverse(Acc); +make_new_path([?MATCH_ALL], _Bindings, Remaining, Acc) -> + Acc1 = lists:reverse(Acc) ++ Remaining, + Acc1; +make_new_path([?MATCH_ALL|_Rest], _Bindings, Remaining, Acc) -> + Acc1 = lists:reverse(Acc) ++ Remaining, + Acc1; +make_new_path([{bind, P}|Rest], Bindings, Remaining, Acc) -> + P2 = case couch_util:get_value({bind, P}, Bindings) of + undefined -> << "undefined">>; + P1 -> P1 + end, + make_new_path(Rest, Bindings, Remaining, [P2|Acc]); +make_new_path([P|Rest], Bindings, Remaining, Acc) -> + make_new_path(Rest, Bindings, Remaining, [P|Acc]). + + +%% @doc If method of the query fith the rule method. If the +%% method rule is '*', which is the default, all +%% request method will bind. It allows us to make rules +%% depending on HTTP method. +bind_method(?MATCH_ALL, _Method ) -> + true; +bind_method({bind, Method}, Method) -> + true; +bind_method(_, _) -> + false. + + +%% @doc bind path. Using the rule from we try to bind variables given +%% to the current url by pattern matching +bind_path([], [], Bindings) -> + {ok, [], Bindings}; +bind_path([?MATCH_ALL], [Match|_RestMatch]=Rest, Bindings) -> + {ok, Rest, [{?MATCH_ALL, Match}|Bindings]}; +bind_path(_, [], _) -> + fail; +bind_path([{bind, {Token, MatchRe}}|RestToken], + [Match|RestMatch],Bindings) -> + case re:run(Match, MatchRe, [{capture, all, binary}]) of + {match, [_, Match1]} -> + bind_path(RestToken, RestMatch, [{{bind, Token}, Match1}|Bindings]); + _ -> + fail + end; +bind_path([{bind, Token}|RestToken],[Match|RestMatch],Bindings) -> + bind_path(RestToken, RestMatch, [{{bind, Token}, Match}|Bindings]); +bind_path([Token|RestToken], [Token|RestMatch], Bindings) -> + bind_path(RestToken, RestMatch, Bindings); +bind_path(_, _, _) -> + fail. + + +%% normalize path. +normalize_path(Path) -> + "/" ++ string:join(normalize_path1(string:tokens(Path, + "/"), []), [?SEPARATOR]). + + +normalize_path1([], Acc) -> + lists:reverse(Acc); +normalize_path1([".."|Rest], Acc) -> + Acc1 = case Acc of + [] -> [".."|Acc]; + [T|_] when T =:= ".." -> [".."|Acc]; + [_|R] -> R + end, + normalize_path1(Rest, Acc1); +normalize_path1(["."|Rest], Acc) -> + normalize_path1(Rest, Acc); +normalize_path1([Path|Rest], Acc) -> + normalize_path1(Rest, [Path|Acc]). + + +%% @doc transform json rule in erlang for pattern matching +make_rule(Rule) -> + Method = case couch_util:get_value(<<"method">>, Rule) of + undefined -> ?MATCH_ALL; + M -> to_binding(M) + end, + QueryArgs = case couch_util:get_value(<<"query">>, Rule) of + undefined -> []; + {Args} -> Args + end, + FromParts = case couch_util:get_value(<<"from">>, Rule) of + undefined -> [?MATCH_ALL]; + From -> + parse_path(From) + end, + ToParts = case couch_util:get_value(<<"to">>, Rule) of + undefined -> + throw({error, invalid_rewrite_target}); + To -> + parse_path(To) + end, + [{FromParts, Method}, ToParts, QueryArgs]. + +parse_path(Path) -> + {ok, SlashRE} = re:compile(<<"\\/">>), + path_to_list(re:split(Path, SlashRE), [], 0). + +%% @doc convert a path rule (from or to) to an erlang list +%% * and path variable starting by ":" are converted +%% in erlang atom. +path_to_list([], Acc, _DotDotCount) -> + lists:reverse(Acc); +path_to_list([<<>>|R], Acc, DotDotCount) -> + path_to_list(R, Acc, DotDotCount); +path_to_list([<<"*">>|R], Acc, DotDotCount) -> + path_to_list(R, [?MATCH_ALL|Acc], DotDotCount); +path_to_list([<<"..">>|R], Acc, DotDotCount) when DotDotCount == 2 -> + case couch_config:get("httpd", "secure_rewrites", "true") of + "false" -> + path_to_list(R, [<<"..">>|Acc], DotDotCount+1); + _Else -> + ?LOG_INFO("insecure_rewrite_rule ~p blocked", [lists:reverse(Acc) ++ [<<"..">>] ++ R]), + throw({insecure_rewrite_rule, "too many ../.. segments"}) + end; +path_to_list([<<"..">>|R], Acc, DotDotCount) -> + path_to_list(R, [<<"..">>|Acc], DotDotCount+1); +path_to_list([P|R], Acc, DotDotCount) -> + P1 = case P of + <<"<", _Rest/binary>> -> + {ok, VarRe} = re:compile(<<"<([^>].*)>(.*)">>), + case re:run(P, VarRe, [{capture, all, binary}]) of + {match, [_, Var, Match]} -> + {ok, MatchRe} = re:compile(<<"(.*)", Match/binary>>), + {bind, {Var, MatchRe}}; + _ -> P + end; + <<":", Var/binary>> -> + to_binding(Var); + + + _ -> P + end, + path_to_list(R, [P1|Acc], DotDotCount). + +encode_query(Props) -> + Props1 = lists:foldl(fun ({{bind, K}, V}, Acc) -> + case K of + <<"*">> -> Acc; + _ -> + V1 = case is_list(V) orelse is_binary(V) of + true -> V; + false -> + % probably it's a number + quote_plus(V) + end, + [{K, V1} | Acc] + end + end, [], Props), + lists:flatten(mochiweb_util:urlencode(Props1)). + +to_binding({bind, V}) -> + {bind, V}; +to_binding(V) when is_list(V) -> + to_binding(?l2b(V)); +to_binding(V) -> + {bind, V}. + +to_json(V) -> + iolist_to_binary(?JSON_ENCODE(V)). diff --git a/src/couchdb/couch_httpd_show.erl b/src/couchdb/couch_httpd_show.erl index 467c0a42..70dd82e1 100644 --- a/src/couchdb/couch_httpd_show.erl +++ b/src/couchdb/couch_httpd_show.erl @@ -13,7 +13,7 @@ -module(couch_httpd_show). -export([handle_doc_show_req/3, handle_doc_update_req/3, handle_view_list_req/3, - handle_doc_show/5, handle_view_list/6, get_fun_key/3]). + handle_view_list/6, get_fun_key/3]). -include("couch_db.hrl"). @@ -22,18 +22,42 @@ start_json_response/2,send_chunk/2,last_chunk/1,send_chunked_error/2, start_chunked_response/3, send_error/4]). -% /db/_design/foo/show/bar/docid -% show converts a json doc to a response of any content-type. + +% /db/_design/foo/_show/bar/docid +% show converts a json doc to a response of any content-type. % it looks up the doc an then passes it to the query server. % then it sends the response from the query server to the http client. + +maybe_open_doc(Db, DocId) -> + case catch couch_httpd_db:couch_doc_open(Db, DocId, nil, [conflicts]) of + {not_found, missing} -> nil; + {not_found,deleted} -> nil; + Doc -> Doc + end. handle_doc_show_req(#httpd{ path_parts=[_, _, _, _, ShowName, DocId] }=Req, Db, DDoc) -> + % open the doc - Doc = couch_httpd_db:couch_doc_open(Db, DocId, nil, [conflicts]), + Doc = maybe_open_doc(Db, DocId), + % we don't handle revs here b/c they are an internal api % returns 404 if there is no doc with DocId - handle_doc_show(Req, Db, DDoc, ShowName, Doc); + handle_doc_show(Req, Db, DDoc, ShowName, Doc, DocId); + +handle_doc_show_req(#httpd{ + path_parts=[_, _, _, _, ShowName, DocId|Rest] + }=Req, Db, DDoc) -> + + DocParts = [DocId|Rest], + DocId1 = ?l2b(string:join([?b2l(P)|| P <- DocParts], "/")), + + % open the doc + Doc = maybe_open_doc(Db, DocId1), + + % we don't handle revs here b/c they are an internal api + % pass 404 docs to the show function + handle_doc_show(Req, Db, DDoc, ShowName, Doc, DocId1); handle_doc_show_req(#httpd{ path_parts=[_, _, _, _, ShowName] @@ -45,12 +69,15 @@ handle_doc_show_req(Req, _Db, _DDoc) -> send_error(Req, 404, <<"show_error">>, <<"Invalid path.">>). handle_doc_show(Req, Db, DDoc, ShowName, Doc) -> + handle_doc_show(Req, Db, DDoc, ShowName, Doc, null). + +handle_doc_show(Req, Db, DDoc, ShowName, Doc, DocId) -> % get responder for ddoc/showname CurrentEtag = show_etag(Req, Doc, DDoc, []), couch_httpd:etag_respond(Req, CurrentEtag, fun() -> - JsonReq = couch_httpd_external:json_req_obj(Req, Db), + JsonReq = couch_httpd_external:json_req_obj(Req, Db, DocId), JsonDoc = couch_query_servers:json_doc(Doc), - [<<"resp">>, ExternalResp] = + [<<"resp">>, ExternalResp] = couch_query_servers:ddoc_prompt(DDoc, [<<"shows">>, ShowName], [JsonDoc, JsonReq]), JsonResp = apply_etag(ExternalResp, CurrentEtag), couch_httpd_external:send_external_response(Req, JsonResp) @@ -68,7 +95,7 @@ show_etag(#httpd{user_ctx=UserCtx}=Req, Doc, DDoc, More) -> get_fun_key(DDoc, Type, Name) -> #doc{body={Props}} = DDoc, - Lang = proplists:get_value(<<"language">>, Props, <<"javascript">>), + Lang = couch_util:get_value(<<"language">>, Props, <<"javascript">>), Src = couch_util:get_nested_json_value({Props}, [Type, Name]), {Lang, Src}. @@ -98,22 +125,27 @@ handle_doc_update_req(Req, _Db, _DDoc) -> send_doc_update_response(Req, Db, DDoc, UpdateName, Doc, DocId) -> JsonReq = couch_httpd_external:json_req_obj(Req, Db, DocId), JsonDoc = couch_query_servers:json_doc(Doc), - case couch_query_servers:ddoc_prompt(DDoc, [<<"updates">>, UpdateName], [JsonDoc, JsonReq]) of - [<<"up">>, {NewJsonDoc}, JsonResp] -> - Options = case couch_httpd:header_value(Req, "X-Couch-Full-Commit", "false") of + {Code, JsonResp1} = case couch_query_servers:ddoc_prompt(DDoc, + [<<"updates">>, UpdateName], [JsonDoc, JsonReq]) of + [<<"up">>, {NewJsonDoc}, {JsonResp}] -> + Options = case couch_httpd:header_value(Req, "X-Couch-Full-Commit", + "false") of "true" -> [full_commit]; _ -> [] end, NewDoc = couch_doc:from_json_obj({NewJsonDoc}), - Code = 201, - {ok, _NewRev} = couch_db:update_doc(Db, NewDoc, Options); + {ok, NewRev} = couch_db:update_doc(Db, NewDoc, Options), + NewRevStr = couch_doc:rev_to_str(NewRev), + JsonRespWithRev = {[{<<"headers">>, + {[{<<"X-Couch-Update-NewRev">>, NewRevStr}]}} | JsonResp]}, + {201, JsonRespWithRev}; [<<"up">>, _Other, JsonResp] -> - Code = 200, - ok + {200, JsonResp} end, - JsonResp2 = json_apply_field({<<"code">>, Code}, JsonResp), + + JsonResp2 = couch_util:json_apply_field({<<"code">>, Code}, JsonResp1), % todo set location field couch_httpd_external:send_external_response(Req, JsonResp2). @@ -121,12 +153,14 @@ send_doc_update_response(Req, Db, DDoc, UpdateName, Doc, DocId) -> % view-list request with view and list from same design doc. handle_view_list_req(#httpd{method='GET', path_parts=[_, _, DesignName, _, ListName, ViewName]}=Req, Db, DDoc) -> - handle_view_list(Req, Db, DDoc, ListName, {DesignName, ViewName}, nil); + Keys = couch_httpd:qs_json_value(Req, "keys", nil), + handle_view_list(Req, Db, DDoc, ListName, {DesignName, ViewName}, Keys); % view-list request with view and list from different design docs. handle_view_list_req(#httpd{method='GET', path_parts=[_, _, _, _, ListName, ViewDesignName, ViewName]}=Req, Db, DDoc) -> - handle_view_list(Req, Db, DDoc, ListName, {ViewDesignName, ViewName}, nil); + Keys = couch_httpd:qs_json_value(Req, "keys", nil), + handle_view_list(Req, Db, DDoc, ListName, {ViewDesignName, ViewName}, Keys); handle_view_list_req(#httpd{method='GET'}=Req, _Db, _DDoc) -> send_error(Req, 404, <<"list_error">>, <<"Invalid path.">>); @@ -136,7 +170,7 @@ handle_view_list_req(#httpd{method='POST', % {Props2} = couch_httpd:json_body(Req), ReqBody = couch_httpd:body(Req), {Props2} = ?JSON_DECODE(ReqBody), - Keys = proplists:get_value(<<"keys">>, Props2, nil), + Keys = couch_util:get_value(<<"keys">>, Props2, nil), handle_view_list(Req#httpd{req_body=ReqBody}, Db, DDoc, ListName, {DesignName, ViewName}, Keys); handle_view_list_req(#httpd{method='POST', @@ -144,7 +178,7 @@ handle_view_list_req(#httpd{method='POST', % {Props2} = couch_httpd:json_body(Req), ReqBody = couch_httpd:body(Req), {Props2} = ?JSON_DECODE(ReqBody), - Keys = proplists:get_value(<<"keys">>, Props2, nil), + Keys = couch_util:get_value(<<"keys">>, Props2, nil), handle_view_list(Req#httpd{req_body=ReqBody}, Db, DDoc, ListName, {ViewDesignName, ViewName}, Keys); handle_view_list_req(#httpd{method='POST'}=Req, _Db, _DDoc) -> @@ -156,23 +190,23 @@ handle_view_list_req(Req, _Db, _DDoc) -> handle_view_list(Req, Db, DDoc, LName, {ViewDesignName, ViewName}, Keys) -> ViewDesignId = <<"_design/", ViewDesignName/binary>>, {ViewType, View, Group, QueryArgs} = couch_httpd_view:load_view(Req, Db, {ViewDesignId, ViewName}, Keys), - Etag = list_etag(Req, Db, Group, {couch_httpd:doc_etag(DDoc), Keys}), + Etag = list_etag(Req, Db, Group, View, {couch_httpd:doc_etag(DDoc), Keys}), couch_httpd:etag_respond(Req, Etag, fun() -> - output_list(ViewType, Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys) - end). + output_list(ViewType, Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) + end). -list_etag(#httpd{user_ctx=UserCtx}=Req, Db, Group, More) -> +list_etag(#httpd{user_ctx=UserCtx}=Req, Db, Group, View, More) -> Accept = couch_httpd:header_value(Req, "Accept"), - couch_httpd_view:view_group_etag(Group, Db, {More, Accept, UserCtx#user_ctx.roles}). + couch_httpd_view:view_etag(Db, Group, View, {More, Accept, UserCtx#user_ctx.roles}). -output_list(map, Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys) -> - output_map_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys); -output_list(reduce, Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys) -> - output_reduce_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys). +output_list(map, Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) -> + output_map_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group); +output_list(reduce, Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) -> + output_reduce_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group). % next step: % use with_ddoc_proc/2 to make this simpler -output_map_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys) -> +output_map_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) -> #view_query_args{ limit = Limit, skip = SkipCount @@ -188,12 +222,13 @@ output_map_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys) -> reduce_count = fun couch_view:reduce_to_count/1, start_response = StartListRespFun = make_map_start_resp_fun(QServer, Db, LName), send_row = make_map_send_row_fun(QServer) - }, + }, + CurrentSeq = Group#group.current_seq, {ok, _, FoldResult} = case Keys of nil -> - FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs, Etag, Db, RowCount, ListFoldHelpers), - couch_view:fold(View, FoldlFun, FoldAccInit, + FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs, Etag, Db, CurrentSeq, RowCount, ListFoldHelpers), + couch_view:fold(View, FoldlFun, FoldAccInit, couch_httpd_view:make_key_options(QueryArgs)); Keys -> lists:foldl( @@ -202,27 +237,29 @@ output_map_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys) -> start_key = Key, end_key = Key }, - FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs2, Etag, Db, RowCount, ListFoldHelpers), + FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs2, Etag, Db, CurrentSeq, RowCount, ListFoldHelpers), couch_view:fold(View, FoldlFun, FoldAcc, couch_httpd_view:make_key_options(QueryArgs2)) end, {ok, nil, FoldAccInit}, Keys) end, - finish_list(Req, QServer, Etag, FoldResult, StartListRespFun, RowCount) + finish_list(Req, QServer, Etag, FoldResult, StartListRespFun, CurrentSeq, RowCount) end). -output_reduce_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys) -> +output_reduce_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) -> #view_query_args{ limit = Limit, skip = SkipCount, group_level = GroupLevel } = QueryArgs, + CurrentSeq = Group#group.current_seq, + couch_query_servers:with_ddoc_proc(DDoc, fun(QServer) -> StartListRespFun = make_reduce_start_resp_fun(QServer, Db, LName), SendListRowFun = make_reduce_send_row_fun(QServer, Db), {ok, GroupRowsFun, RespFun} = couch_httpd_view:make_reduce_fold_funs(Req, - GroupLevel, QueryArgs, Etag, + GroupLevel, QueryArgs, Etag, CurrentSeq, #reduce_fold_helper_funs{ start_response = StartListRespFun, send_row = SendListRowFun @@ -230,36 +267,36 @@ output_reduce_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys) -> FoldAccInit = {Limit, SkipCount, undefined, []}, {ok, FoldResult} = case Keys of nil -> - couch_view:fold_reduce(View, RespFun, FoldAccInit, [{key_group_fun, GroupRowsFun} | + couch_view:fold_reduce(View, RespFun, FoldAccInit, [{key_group_fun, GroupRowsFun} | couch_httpd_view:make_key_options(QueryArgs)]); Keys -> lists:foldl( fun(Key, {ok, FoldAcc}) -> couch_view:fold_reduce(View, RespFun, FoldAcc, - [{key_group_fun, GroupRowsFun} | + [{key_group_fun, GroupRowsFun} | couch_httpd_view:make_key_options( QueryArgs#view_query_args{start_key=Key, end_key=Key})] - ) + ) end, {ok, FoldAccInit}, Keys) end, - finish_list(Req, QServer, Etag, FoldResult, StartListRespFun, null) + finish_list(Req, QServer, Etag, FoldResult, StartListRespFun, CurrentSeq, null) end). make_map_start_resp_fun(QueryServer, Db, LName) -> - fun(Req, Etag, TotalRows, Offset, _Acc) -> - Head = {[{<<"total_rows">>, TotalRows}, {<<"offset">>, Offset}]}, + fun(Req, Etag, TotalRows, Offset, _Acc, UpdateSeq) -> + Head = {[{<<"total_rows">>, TotalRows}, {<<"offset">>, Offset}, {<<"update_seq">>, UpdateSeq}]}, start_list_resp(QueryServer, LName, Req, Db, Head, Etag) end. make_reduce_start_resp_fun(QueryServer, Db, LName) -> - fun(Req2, Etag, _Acc) -> - start_list_resp(QueryServer, LName, Req2, Db, {[]}, Etag) + fun(Req2, Etag, _Acc, UpdateSeq) -> + start_list_resp(QueryServer, LName, Req2, Db, {[{<<"update_seq">>, UpdateSeq}]}, Etag) end. start_list_resp(QServer, LName, Req, Db, Head, Etag) -> JsonReq = couch_httpd_external:json_req_obj(Req, Db), - [<<"start">>,Chunks,JsonResp] = couch_query_servers:ddoc_proc_prompt(QServer, + [<<"start">>,Chunks,JsonResp] = couch_query_servers:ddoc_proc_prompt(QServer, [<<"lists">>, LName], [Head, JsonReq]), JsonResp2 = apply_etag(JsonResp, Etag), #extern_resp_args{ @@ -313,7 +350,7 @@ send_non_empty_chunk(Resp, Chunk) -> _ -> send_chunk(Resp, Chunk) end. -finish_list(Req, {Proc, _DDocId}, Etag, FoldResult, StartFun, TotalRows) -> +finish_list(Req, {Proc, _DDocId}, Etag, FoldResult, StartFun, CurrentSeq, TotalRows) -> FoldResult2 = case FoldResult of {Limit, SkipCount, Response, RowAcc} -> {Limit, SkipCount, Response, RowAcc, nil}; @@ -323,8 +360,8 @@ finish_list(Req, {Proc, _DDocId}, Etag, FoldResult, StartFun, TotalRows) -> case FoldResult2 of {_, _, undefined, _, _} -> {ok, Resp, BeginBody} = - render_head_for_empty_list(StartFun, Req, Etag, TotalRows), - [<<"end">>, Chunks] = couch_query_servers:proc_prompt(Proc, [<<"list_end">>]), + render_head_for_empty_list(StartFun, Req, Etag, CurrentSeq, TotalRows), + [<<"end">>, Chunks] = couch_query_servers:proc_prompt(Proc, [<<"list_end">>]), Chunk = BeginBody ++ ?b2l(?l2b(Chunks)), send_non_empty_chunk(Resp, Chunk); {_, _, Resp, stop, _} -> @@ -336,32 +373,17 @@ finish_list(Req, {Proc, _DDocId}, Etag, FoldResult, StartFun, TotalRows) -> last_chunk(Resp). -render_head_for_empty_list(StartListRespFun, Req, Etag, null) -> - StartListRespFun(Req, Etag, []); % for reduce -render_head_for_empty_list(StartListRespFun, Req, Etag, TotalRows) -> - StartListRespFun(Req, Etag, TotalRows, null, []). - - -% Maybe this is in the proplists API -% todo move to couch_util -json_apply_field(H, {L}) -> - json_apply_field(H, L, []). -json_apply_field({Key, NewValue}, [{Key, _OldVal} | Headers], Acc) -> - % drop matching keys - json_apply_field({Key, NewValue}, Headers, Acc); -json_apply_field({Key, NewValue}, [{OtherKey, OtherVal} | Headers], Acc) -> - % something else is next, leave it alone. - json_apply_field({Key, NewValue}, Headers, [{OtherKey, OtherVal} | Acc]); -json_apply_field({Key, NewValue}, [], Acc) -> - % end of list, add ours - {[{Key, NewValue}|Acc]}. +render_head_for_empty_list(StartListRespFun, Req, Etag, CurrentSeq, null) -> + StartListRespFun(Req, Etag, [], CurrentSeq); % for reduce +render_head_for_empty_list(StartListRespFun, Req, Etag, CurrentSeq, TotalRows) -> + StartListRespFun(Req, Etag, TotalRows, null, [], CurrentSeq). apply_etag({ExternalResponse}, CurrentEtag) -> % Here we embark on the delicate task of replacing or creating the % headers on the JsonResponse object. We need to control the Etag and % Vary headers. If the external function controls the Etag, we'd have to % run it to check for a match, which sort of defeats the purpose. - case proplists:get_value(<<"headers">>, ExternalResponse, nil) of + case couch_util:get_value(<<"headers">>, ExternalResponse, nil) of nil -> % no JSON headers % add our Etag and Vary headers to the response @@ -369,8 +391,8 @@ apply_etag({ExternalResponse}, CurrentEtag) -> JsonHeaders -> {[case Field of {<<"headers">>, JsonHeaders} -> % add our headers - JsonHeadersEtagged = json_apply_field({<<"Etag">>, CurrentEtag}, JsonHeaders), - JsonHeadersVaried = json_apply_field({<<"Vary">>, <<"Accept">>}, JsonHeadersEtagged), + JsonHeadersEtagged = couch_util:json_apply_field({<<"Etag">>, CurrentEtag}, JsonHeaders), + JsonHeadersVaried = couch_util:json_apply_field({<<"Vary">>, <<"Accept">>}, JsonHeadersEtagged), {<<"headers">>, JsonHeadersVaried}; _ -> % skip non-header fields Field diff --git a/src/couchdb/couch_httpd_stats_handlers.erl b/src/couchdb/couch_httpd_stats_handlers.erl index 40444bf8..41aeaed0 100644 --- a/src/couchdb/couch_httpd_stats_handlers.erl +++ b/src/couchdb/couch_httpd_stats_handlers.erl @@ -29,7 +29,8 @@ handle_stats_req(#httpd{method='GET', path_parts=[_, _Mod]}) -> handle_stats_req(#httpd{method='GET', path_parts=[_, Mod, Key]}=Req) -> flush(Req), - Stats = couch_stats_aggregator:get_json({?b2a(Mod), ?b2a(Key)}, range(Req)), + Stats = couch_stats_aggregator:get_json({list_to_atom(binary_to_list(Mod)), + list_to_atom(binary_to_list(Key))}, range(Req)), send_json(Req, {[{Mod, {[{Key, Stats}]}}]}); handle_stats_req(#httpd{method='GET', path_parts=[_, _Mod, _Key | _Extra]}) -> @@ -39,7 +40,7 @@ handle_stats_req(Req) -> send_method_not_allowed(Req, "GET"). range(Req) -> - case proplists:get_value("range", couch_httpd:qs(Req)) of + case couch_util:get_value("range", couch_httpd:qs(Req)) of undefined -> 0; Value -> @@ -47,7 +48,7 @@ range(Req) -> end. flush(Req) -> - case proplists:get_value("flush", couch_httpd:qs(Req)) of + case couch_util:get_value("flush", couch_httpd:qs(Req)) of "true" -> couch_stats_aggregator:collect_sample(); _Else -> diff --git a/src/couchdb/couch_httpd_vhost.erl b/src/couchdb/couch_httpd_vhost.erl new file mode 100644 index 00000000..3dba2919 --- /dev/null +++ b/src/couchdb/couch_httpd_vhost.erl @@ -0,0 +1,402 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + + +-module(couch_httpd_vhost). +-behaviour(gen_server). + +-export([start_link/0, init/1, handle_call/3, handle_info/2, handle_cast/2]). +-export([code_change/3, terminate/2]). +-export([match_vhost/1, urlsplit_netloc/2]). +-export([redirect_to_vhost/2]). + +-include("couch_db.hrl"). + +-define(SEPARATOR, $\/). +-define(MATCH_ALL, {bind, '*'}). + +-record(vhosts, { + vhost_globals, + vhosts = [], + vhost_fun +}). + + +%% doc the vhost manager. +%% This gen_server keep state of vhosts added to the ini and try to +%% match the Host header (or forwarded) against rules built against +%% vhost list. +%% +%% Declaration of vhosts take place in the configuration file : +%% +%% [vhosts] +%% example.com = /example +%% *.example.com = /example +%% +%% The first line will rewrite the rquest to display the content of the +%% example database. This rule works only if the Host header is +%% 'example.com' and won't work for CNAMEs. Second rule on the other hand +%% match all CNAMES to example db. So www.example.com or db.example.com +%% will work. +%% +%% The wildcard ('*') should always be the last in the cnames: +%% +%% "*.db.example.com = /" will match all cname on top of db +%% examples to the root of the machine. +%% +%% +%% Rewriting Hosts to path +%% ----------------------- +%% +%% Like in the _rewrite handler you could match some variable and use +%them to create the target path. Some examples: +%% +%% [vhosts] +%% *.example.com = /* +%% :dbname.example.com = /:dbname +%% :ddocname.:dbname.example.com = /:dbname/_design/:ddocname/_rewrite +%% +%% First rule pass wildcard as dbname, second do the same but use a +%% variable name and the third one allows you to use any app with +%% @ddocname in any db with @dbname . +%% +%% You could also change the default function to handle request by +%% changing the setting `redirect_vhost_handler` in `httpd` section of +%% the Ini: +%% +%% [httpd] +%% redirect_vhost_handler = {Module, Fun} +%% +%% The function take 2 args : the mochiweb request object and the target +%%% path. + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +%% @doc Try to find a rule matching current Host heade. some rule is +%% found it rewrite the Mochiweb Request else it return current Request. +match_vhost(MochiReq) -> + {ok, MochiReq1} = gen_server:call(couch_httpd_vhost, {match_vhost, + MochiReq}), + + MochiReq1. + + +%% -------------------- +%% gen_server functions +%% -------------------- + +init(_) -> + process_flag(trap_exit, true), + + % init state + VHosts = make_vhosts(), + VHostGlobals = re:split( + couch_config:get("httpd", "vhost_global_handlers", ""), + ", ?", + [{return, list}] + ), + + % Set vhost fun + DefaultVHostFun = "{couch_httpd_vhost, redirect_to_vhost}", + VHostFun = couch_httpd:make_arity_2_fun( + couch_config:get("httpd", "redirect_vhost_handler", DefaultVHostFun) + ), + + + Self = self(), + % register for changes in vhosts section + ok = couch_config:register( + fun("vhosts") -> + ok = gen_server:call(Self, vhosts_changed, infinity) + end + ), + + % register for changes in vhost_global_handlers key + ok = couch_config:register( + fun("httpd", "vhost_global_handlers") -> + ok = gen_server:call(Self, vhosts_global_changed, infinity) + end + ), + + ok = couch_config:register( + fun("httpd", "redirect_vhost_handler") -> + ok = gen_server:call(Self, fun_changed, infinity) + end + ), + + {ok, #vhosts{ + vhost_globals = VHostGlobals, + vhosts = VHosts, + vhost_fun = VHostFun} + }. + + +handle_call({match_vhost, MochiReq}, _From, State) -> + #vhosts{ + vhost_globals = VHostGlobals, + vhosts = VHosts, + vhost_fun = Fun + } = State, + + {"/" ++ VPath, Query, Fragment} = mochiweb_util:urlsplit_path(MochiReq:get(raw_path)), + VPathParts = string:tokens(VPath, "/"), + + XHost = couch_config:get("httpd", "x_forwarded_host", "X-Forwarded-Host"), + VHost = case MochiReq:get_header_value(XHost) of + undefined -> + case MochiReq:get_header_value("Host") of + undefined -> []; + Value1 -> Value1 + end; + Value -> Value + end, + {VHostParts, VhostPort} = split_host_port(VHost), + FinalMochiReq = case try_bind_vhost(VHosts, lists:reverse(VHostParts), + VhostPort, VPathParts) of + no_vhost_matched -> MochiReq; + {VhostTarget, NewPath} -> + case vhost_global(VHostGlobals, MochiReq) of + true -> + MochiReq; + _Else -> + NewPath1 = mochiweb_util:urlunsplit_path({NewPath, Query, + Fragment}), + MochiReq1 = mochiweb_request:new(MochiReq:get(socket), + MochiReq:get(method), + NewPath1, + MochiReq:get(version), + MochiReq:get(headers)), + Fun(MochiReq1, VhostTarget) + end + end, + {reply, {ok, FinalMochiReq}, State}; + +% update vhosts +handle_call(vhosts_changed, _From, State) -> + {reply, ok, State#vhosts{vhosts= make_vhosts()}}; + + +% update vhosts_globals +handle_call(vhosts_global_changed, _From, State) -> + VHostGlobals = re:split( + couch_config:get("httpd", "vhost_global_handlers", ""), + ", ?", + [{return, list}] + ), + {reply, ok, State#vhosts{vhost_globals=VHostGlobals}}; +% change fun +handle_call(fun_changed, _From, State) -> + DefaultVHostFun = "{couch_httpd_vhosts, redirect_to_vhost}", + VHostFun = couch_httpd:make_arity_2_fun( + couch_config:get("httpd", "redirect_vhost_handler", DefaultVHostFun) + ), + {reply, ok, State#vhosts{vhost_fun=VHostFun}}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Msg, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + + +% default redirect vhost handler + +redirect_to_vhost(MochiReq, VhostTarget) -> + Path = MochiReq:get(raw_path), + Target = VhostTarget ++ Path, + + ?LOG_DEBUG("Vhost Target: '~p'~n", [Target]), + + Headers = mochiweb_headers:enter("x-couchdb-vhost-path", Path, + MochiReq:get(headers)), + + % build a new mochiweb request + MochiReq1 = mochiweb_request:new(MochiReq:get(socket), + MochiReq:get(method), + Target, + MochiReq:get(version), + Headers), + % cleanup, It force mochiweb to reparse raw uri. + MochiReq1:cleanup(), + + MochiReq1. + +%% if so, then it will not be rewritten, but will run as a normal couchdb request. +%* normally you'd use this for _uuids _utils and a few of the others you want to +%% keep available on vhosts. You can also use it to make databases 'global'. +vhost_global( VhostGlobals, MochiReq) -> + RawUri = MochiReq:get(raw_path), + {"/" ++ Path, _, _} = mochiweb_util:urlsplit_path(RawUri), + + Front = case couch_httpd:partition(Path) of + {"", "", ""} -> + "/"; % Special case the root url handler + {FirstPart, _, _} -> + FirstPart + end, + [true] == [true||V <- VhostGlobals, V == Front]. + +%% bind host +%% first it try to bind the port then the hostname. +try_bind_vhost([], _HostParts, _Port, _PathParts) -> + no_vhost_matched; +try_bind_vhost([VhostSpec|Rest], HostParts, Port, PathParts) -> + {{VHostParts, VPort, VPath}, Path} = VhostSpec, + case bind_port(VPort, Port) of + ok -> + case bind_vhost(lists:reverse(VHostParts), HostParts, []) of + {ok, Bindings, Remainings} -> + case bind_path(VPath, PathParts) of + {ok, PathParts1} -> + Path1 = make_target(Path, Bindings, Remainings, []), + {make_path(Path1), make_path(PathParts1)}; + fail -> + try_bind_vhost(Rest, HostParts, Port, + PathParts) + end; + fail -> try_bind_vhost(Rest, HostParts, Port, PathParts) + end; + fail -> try_bind_vhost(Rest, HostParts, Port, PathParts) + end. + +%% doc: build new patch from bindings. bindings are query args +%% (+ dynamic query rewritten if needed) and bindings found in +%% bind_path step. +%% TODO: merge code wit rewrite. But we need to make sure we are +%% in string here. +make_target([], _Bindings, _Remaining, Acc) -> + lists:reverse(Acc); +make_target([?MATCH_ALL], _Bindings, Remaining, Acc) -> + Acc1 = lists:reverse(Acc) ++ Remaining, + Acc1; +make_target([?MATCH_ALL|_Rest], _Bindings, Remaining, Acc) -> + Acc1 = lists:reverse(Acc) ++ Remaining, + Acc1; +make_target([{bind, P}|Rest], Bindings, Remaining, Acc) -> + P2 = case couch_util:get_value({bind, P}, Bindings) of + undefined -> "undefined"; + P1 -> P1 + end, + make_target(Rest, Bindings, Remaining, [P2|Acc]); +make_target([P|Rest], Bindings, Remaining, Acc) -> + make_target(Rest, Bindings, Remaining, [P|Acc]). + +%% bind port +bind_port(Port, Port) -> ok; +bind_port(_,_) -> fail. + +%% bind bhost +bind_vhost([],[], Bindings) -> {ok, Bindings, []}; +bind_vhost([?MATCH_ALL], [], _Bindings) -> fail; +bind_vhost([?MATCH_ALL], Rest, Bindings) -> {ok, Bindings, Rest}; +bind_vhost([], _HostParts, _Bindings) -> fail; +bind_vhost([{bind, Token}|Rest], [Match|RestHost], Bindings) -> + bind_vhost(Rest, RestHost, [{{bind, Token}, Match}|Bindings]); +bind_vhost([Cname|Rest], [Cname|RestHost], Bindings) -> + bind_vhost(Rest, RestHost, Bindings); +bind_vhost(_, _, _) -> fail. + +%% bind path +bind_path([], PathParts) -> + {ok, PathParts}; +bind_path(_VPathParts, []) -> + fail; +bind_path([Path|VRest],[Path|Rest]) -> + bind_path(VRest, Rest); +bind_path(_, _) -> + fail. + +% utilities + + +%% create vhost list from ini +make_vhosts() -> + lists:foldl(fun({Vhost, Path}, Acc) -> + [{parse_vhost(Vhost), split_path(Path)}|Acc] + end, [], couch_config:get("vhosts")). + + +parse_vhost(Vhost) -> + case urlsplit_netloc(Vhost, []) of + {[], Path} -> + {make_spec("*", []), 80, Path}; + {HostPort, []} -> + {H, P} = split_host_port(HostPort), + H1 = make_spec(H, []), + {H1, P, []}; + {HostPort, Path} -> + {H, P} = split_host_port(HostPort), + H1 = make_spec(H, []), + {H1, P, string:tokens(Path, "/")} + end. + + +split_host_port(HostAsString) -> + case string:rchr(HostAsString, $:) of + 0 -> + {split_host(HostAsString), 80}; + N -> + HostPart = string:substr(HostAsString, 1, N-1), + case (catch erlang:list_to_integer(HostAsString, N+1, + length(HostAsString))) of + {'EXIT', _} -> + {split_host(HostAsString), 80}; + Port -> + {split_host(HostPart), Port} + end + end. + +split_host(HostAsString) -> + string:tokens(HostAsString, "\."). + +split_path(Path) -> + make_spec(string:tokens(Path, "/"), []). + + +make_spec([], Acc) -> + lists:reverse(Acc); +make_spec([""|R], Acc) -> + make_spec(R, Acc); +make_spec(["*"|R], Acc) -> + make_spec(R, [?MATCH_ALL|Acc]); +make_spec([P|R], Acc) -> + P1 = parse_var(P), + make_spec(R, [P1|Acc]). + + +parse_var(P) -> + case P of + ":" ++ Var -> + {bind, Var}; + _ -> P + end. + + +% mochiweb doesn't export it. +urlsplit_netloc("", Acc) -> + {lists:reverse(Acc), ""}; +urlsplit_netloc(Rest=[C | _], Acc) when C =:= $/; C =:= $?; C =:= $# -> + {lists:reverse(Acc), Rest}; +urlsplit_netloc([C | Rest], Acc) -> + urlsplit_netloc(Rest, [C | Acc]). + +make_path(Parts) -> + "/" ++ string:join(Parts,[?SEPARATOR]). diff --git a/src/couchdb/couch_httpd_view.erl b/src/couchdb/couch_httpd_view.erl index 6419ca55..8656c43e 100644 --- a/src/couchdb/couch_httpd_view.erl +++ b/src/couchdb/couch_httpd_view.erl @@ -15,9 +15,9 @@ -export([handle_view_req/3,handle_temp_view_req/2]). --export([get_stale_type/1, get_reduce_type/1, parse_view_params/3]). --export([make_view_fold_fun/6, finish_view_fold/4, view_row_obj/3]). --export([view_group_etag/2, view_group_etag/3, make_reduce_fold_funs/5]). +-export([parse_view_params/3]). +-export([make_view_fold_fun/7, finish_view_fold/4, finish_view_fold/5, view_row_obj/3]). +-export([view_etag/3, view_etag/4, make_reduce_fold_funs/6]). -export([design_doc_view/5, parse_bool_param/1, doc_member/2]). -export([make_key_options/1, load_view/4]). @@ -26,6 +26,8 @@ start_json_response/2, start_json_response/3, end_json_response/1, send_chunked_error/2]). +-import(couch_db,[get_update_seq/1]). + design_doc_view(Req, Db, DName, ViewName, Keys) -> DesignId = <<"_design/", DName/binary>>, Stale = get_stale_type(Req), @@ -55,12 +57,14 @@ design_doc_view(Req, Db, DName, ViewName, Keys) -> handle_view_req(#httpd{method='GET', path_parts=[_, _, DName, _, ViewName]}=Req, Db, _DDoc) -> - design_doc_view(Req, Db, DName, ViewName, nil); + Keys = couch_httpd:qs_json_value(Req, "keys", nil), + design_doc_view(Req, Db, DName, ViewName, Keys); handle_view_req(#httpd{method='POST', path_parts=[_, _, DName, _, ViewName]}=Req, Db, _DDoc) -> + couch_httpd:validate_ctype(Req, "application/json"), {Fields} = couch_httpd:json_body_obj(Req), - case proplists:get_value(<<"keys">>, Fields, nil) of + case couch_util:get_value(<<"keys">>, Fields, nil) of nil -> Fmt = "POST to view ~p/~p in database ~p with no keys member.", ?LOG_DEBUG(Fmt, [DName, ViewName, Db]), @@ -75,14 +79,16 @@ handle_view_req(Req, _Db, _DDoc) -> send_method_not_allowed(Req, "GET,POST,HEAD"). handle_temp_view_req(#httpd{method='POST'}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + ok = couch_db:check_is_admin(Db), couch_stats_collector:increment({httpd, temporary_view_reads}), {Props} = couch_httpd:json_body_obj(Req), - Language = proplists:get_value(<<"language">>, Props, <<"javascript">>), - {DesignOptions} = proplists:get_value(<<"options">>, Props, {[]}), - MapSrc = proplists:get_value(<<"map">>, Props), - Keys = proplists:get_value(<<"keys">>, Props, nil), + Language = couch_util:get_value(<<"language">>, Props, <<"javascript">>), + {DesignOptions} = couch_util:get_value(<<"options">>, Props, {[]}), + MapSrc = couch_util:get_value(<<"map">>, Props), + Keys = couch_util:get_value(<<"keys">>, Props, nil), Reduce = get_reduce_type(Req), - case proplists:get_value(<<"reduce">>, Props, null) of + case couch_util:get_value(<<"reduce">>, Props, null) of null -> QueryArgs = parse_view_params(Req, Keys, map), {ok, View, Group} = couch_view:get_temp_map_view(Db, Language, @@ -108,14 +114,14 @@ output_map_view(Req, View, Group, Db, QueryArgs, nil) -> limit = Limit, skip = SkipCount } = QueryArgs, - CurrentEtag = view_group_etag(Group, Db), + CurrentEtag = view_etag(Db, Group, View), couch_httpd:etag_respond(Req, CurrentEtag, fun() -> {ok, RowCount} = couch_view:get_row_count(View), - FoldlFun = make_view_fold_fun(Req, QueryArgs, CurrentEtag, Db, RowCount, #view_fold_helper_funs{reduce_count=fun couch_view:reduce_to_count/1}), + FoldlFun = make_view_fold_fun(Req, QueryArgs, CurrentEtag, Db, Group#group.current_seq, RowCount, #view_fold_helper_funs{reduce_count=fun couch_view:reduce_to_count/1}), FoldAccInit = {Limit, SkipCount, undefined, []}, - {ok, LastReduce, FoldResult} = couch_view:fold(View, + {ok, LastReduce, FoldResult} = couch_view:fold(View, FoldlFun, FoldAccInit, make_key_options(QueryArgs)), - finish_view_fold(Req, RowCount, + finish_view_fold(Req, RowCount, couch_view:reduce_to_count(LastReduce), FoldResult) end); @@ -124,23 +130,23 @@ output_map_view(Req, View, Group, Db, QueryArgs, Keys) -> limit = Limit, skip = SkipCount } = QueryArgs, - CurrentEtag = view_group_etag(Group, Db, Keys), + CurrentEtag = view_etag(Db, Group, View, Keys), couch_httpd:etag_respond(Req, CurrentEtag, fun() -> {ok, RowCount} = couch_view:get_row_count(View), FoldAccInit = {Limit, SkipCount, undefined, []}, - {LastReduce, FoldResult} = lists:foldl( - fun(Key, {_, FoldAcc}) -> - FoldlFun = make_view_fold_fun(Req, - QueryArgs#view_query_args{ - }, CurrentEtag, Db, RowCount, + {LastReduce, FoldResult} = lists:foldl(fun(Key, {_, FoldAcc}) -> + FoldlFun = make_view_fold_fun(Req, QueryArgs#view_query_args{}, + CurrentEtag, Db, Group#group.current_seq, RowCount, #view_fold_helper_funs{ reduce_count = fun couch_view:reduce_to_count/1 }), - {ok, LastReduce, FoldResult} = couch_view:fold(View, FoldlFun, FoldAcc, - make_key_options(QueryArgs#view_query_args{start_key=Key, end_key=Key})), - {LastReduce, FoldResult} - end, {{[],[]}, FoldAccInit}, Keys), - finish_view_fold(Req, RowCount, couch_view:reduce_to_count(LastReduce), FoldResult) + {ok, LastReduce, FoldResult} = couch_view:fold(View, FoldlFun, + FoldAcc, make_key_options( + QueryArgs#view_query_args{start_key=Key, end_key=Key})), + {LastReduce, FoldResult} + end, {{[],[]}, FoldAccInit}, Keys), + finish_view_fold(Req, RowCount, couch_view:reduce_to_count(LastReduce), + FoldResult, [{update_seq,Group#group.current_seq}]) end). output_reduce_view(Req, Db, View, Group, QueryArgs, nil) -> @@ -149,9 +155,11 @@ output_reduce_view(Req, Db, View, Group, QueryArgs, nil) -> skip = Skip, group_level = GroupLevel } = QueryArgs, - CurrentEtag = view_group_etag(Group, Db), + CurrentEtag = view_etag(Db, Group, View), couch_httpd:etag_respond(Req, CurrentEtag, fun() -> - {ok, GroupRowsFun, RespFun} = make_reduce_fold_funs(Req, GroupLevel, QueryArgs, CurrentEtag, #reduce_fold_helper_funs{}), + {ok, GroupRowsFun, RespFun} = make_reduce_fold_funs(Req, GroupLevel, + QueryArgs, CurrentEtag, Group#group.current_seq, + #reduce_fold_helper_funs{}), FoldAccInit = {Limit, Skip, undefined, []}, {ok, {_, _, Resp, _}} = couch_view:fold_reduce(View, RespFun, FoldAccInit, [{key_group_fun, GroupRowsFun} | @@ -165,12 +173,15 @@ output_reduce_view(Req, Db, View, Group, QueryArgs, Keys) -> skip = Skip, group_level = GroupLevel } = QueryArgs, - CurrentEtag = view_group_etag(Group, Db, Keys), + CurrentEtag = view_etag(Db, Group, View, Keys), couch_httpd:etag_respond(Req, CurrentEtag, fun() -> - {ok, GroupRowsFun, RespFun} = make_reduce_fold_funs(Req, GroupLevel, QueryArgs, CurrentEtag, #reduce_fold_helper_funs{}), + {ok, GroupRowsFun, RespFun} = make_reduce_fold_funs(Req, GroupLevel, + QueryArgs, CurrentEtag, Group#group.current_seq, + #reduce_fold_helper_funs{}), {Resp, _RedAcc3} = lists:foldl( fun(Key, {Resp, RedAcc}) -> - % run the reduce once for each key in keys, with limit etc reapplied for each key + % run the reduce once for each key in keys, with limit etc + % reapplied for each key FoldAccInit = {Limit, Skip, Resp, RedAcc}, {_, {_, _, Resp2, RedAcc2}} = couch_view:fold_reduce(View, RespFun, FoldAccInit, [{key_group_fun, GroupRowsFun} | @@ -180,7 +191,7 @@ output_reduce_view(Req, Db, View, Group, QueryArgs, Keys) -> {Resp2, RedAcc2} end, {undefined, []}, Keys), % Start with no comma - finish_reduce_fold(Req, Resp) + finish_reduce_fold(Req, Resp, [{update_seq,Group#group.current_seq}]) end). reverse_key_default(?MIN_STR) -> ?MAX_STR; @@ -188,28 +199,28 @@ reverse_key_default(?MAX_STR) -> ?MIN_STR; reverse_key_default(Key) -> Key. get_stale_type(Req) -> - list_to_atom(couch_httpd:qs_value(Req, "stale", "nil")). + list_to_existing_atom(couch_httpd:qs_value(Req, "stale", "nil")). get_reduce_type(Req) -> - list_to_atom(couch_httpd:qs_value(Req, "reduce", "true")). + list_to_existing_atom(couch_httpd:qs_value(Req, "reduce", "true")). load_view(Req, Db, {ViewDesignId, ViewName}, Keys) -> - Stale = couch_httpd_view:get_stale_type(Req), - Reduce = couch_httpd_view:get_reduce_type(Req), + Stale = get_stale_type(Req), + Reduce = get_reduce_type(Req), case couch_view:get_map_view(Db, ViewDesignId, ViewName, Stale) of {ok, View, Group} -> - QueryArgs = couch_httpd_view:parse_view_params(Req, Keys, map), + QueryArgs = parse_view_params(Req, Keys, map), {map, View, Group, QueryArgs}; {not_found, _Reason} -> case couch_view:get_reduce_view(Db, ViewDesignId, ViewName, Stale) of {ok, ReduceView, Group} -> case Reduce of false -> - QueryArgs = couch_httpd_view:parse_view_params(Req, Keys, map_red), + QueryArgs = parse_view_params(Req, Keys, map_red), MapView = couch_view:extract_map_view(ReduceView), {map, MapView, Group, QueryArgs}; _ -> - QueryArgs = couch_httpd_view:parse_view_params(Req, Keys, reduce), + QueryArgs = parse_view_params(Req, Keys, reduce), {reduce, ReduceView, Group, QueryArgs} end; {not_found, Reason} -> @@ -226,8 +237,8 @@ parse_view_params(Req, Keys, ViewType) -> QueryList = couch_httpd:qs(Req), QueryParams = lists:foldl(fun({K, V}, Acc) -> - parse_view_param(K, V) ++ Acc - end, [], QueryList), + parse_view_param(K, V) ++ Acc + end, [], QueryList), IsMultiGet = (Keys =/= nil), Args = #view_query_args{ view_type=ViewType, @@ -236,20 +247,20 @@ parse_view_params(Req, Keys, ViewType) -> QueryArgs = lists:foldl(fun({K, V}, Args2) -> validate_view_query(K, V, Args2) end, Args, lists:reverse(QueryParams)), % Reverse to match QS order. - + warn_on_empty_key_range(QueryArgs), GroupLevel = QueryArgs#view_query_args.group_level, case {ViewType, GroupLevel, IsMultiGet} of - {reduce, exact, true} -> - QueryArgs; - {reduce, _, false} -> - QueryArgs; - {reduce, _, _} -> - % we can simplify code if we just drop this error message. - Msg = <<"Multi-key fetchs for reduce " - "view must include `group=true`">>, - throw({query_parse_error, Msg}); - _ -> - QueryArgs + {reduce, exact, true} -> + QueryArgs; + {reduce, _, false} -> + QueryArgs; + {reduce, _, _} -> + % we can simplify code if we just drop this error message. + Msg = <<"Multi-key fetchs for reduce " + "view must include `group=true`">>, + throw({query_parse_error, Msg}); + _ -> + QueryArgs end, QueryArgs. @@ -258,22 +269,37 @@ parse_view_param("", _) -> parse_view_param("key", Value) -> JsonKey = ?JSON_DECODE(Value), [{start_key, JsonKey}, {end_key, JsonKey}]; +% TODO: maybe deprecate startkey_docid parse_view_param("startkey_docid", Value) -> [{start_docid, ?l2b(Value)}]; +parse_view_param("start_key_doc_id", Value) -> + [{start_docid, ?l2b(Value)}]; +% TODO: maybe deprecate endkey_docid parse_view_param("endkey_docid", Value) -> [{end_docid, ?l2b(Value)}]; +parse_view_param("end_key_doc_id", Value) -> + [{end_docid, ?l2b(Value)}]; +% TODO: maybe deprecate startkey parse_view_param("startkey", Value) -> [{start_key, ?JSON_DECODE(Value)}]; +parse_view_param("start_key", Value) -> + [{start_key, ?JSON_DECODE(Value)}]; +% TODO: maybe deprecate endkey parse_view_param("endkey", Value) -> [{end_key, ?JSON_DECODE(Value)}]; +parse_view_param("end_key", Value) -> + [{end_key, ?JSON_DECODE(Value)}]; parse_view_param("limit", Value) -> [{limit, parse_positive_int_param(Value)}]; parse_view_param("count", _Value) -> throw({query_parse_error, <<"Query parameter 'count' is now 'limit'.">>}); parse_view_param("stale", "ok") -> [{stale, ok}]; +parse_view_param("stale", "update_after") -> + [{stale, update_after}]; parse_view_param("stale", _Value) -> - throw({query_parse_error, <<"stale only available as stale=ok">>}); + throw({query_parse_error, + <<"stale only available as stale=ok or as stale=update_after">>}); parse_view_param("update", _Value) -> throw({query_parse_error, <<"update=false is now stale=ok">>}); parse_view_param("descending", Value) -> @@ -300,25 +326,45 @@ parse_view_param("callback", _) -> parse_view_param(Key, Value) -> [{extra, {Key, Value}}]. +warn_on_empty_key_range(#view_query_args{start_key=undefined}) -> + ok; +warn_on_empty_key_range(#view_query_args{end_key=undefined}) -> + ok; +warn_on_empty_key_range(#view_query_args{start_key=A, end_key=A}) -> + ok; +warn_on_empty_key_range(#view_query_args{ + start_key=StartKey, end_key=EndKey, direction=Dir}) -> + case {Dir, couch_view:less_json(StartKey, EndKey)} of + {fwd, false} -> + throw({query_parse_error, + <<"No rows can match your key range, reverse your ", + "start_key and end_key or set descending=true">>}); + {rev, true} -> + throw({query_parse_error, + <<"No rows can match your key range, reverse your ", + "start_key and end_key or set descending=false">>}); + _ -> ok + end. + validate_view_query(start_key, Value, Args) -> case Args#view_query_args.multi_get of - true -> - Msg = <<"Query parameter `start_key` is " - "not compatible with multi-get">>, - throw({query_parse_error, Msg}); - _ -> - Args#view_query_args{start_key=Value} + true -> + Msg = <<"Query parameter `start_key` is " + "not compatible with multi-get">>, + throw({query_parse_error, Msg}); + _ -> + Args#view_query_args{start_key=Value} end; validate_view_query(start_docid, Value, Args) -> Args#view_query_args{start_docid=Value}; validate_view_query(end_key, Value, Args) -> case Args#view_query_args.multi_get of - true-> - Msg = <<"Query parameter `end_key` is " - "not compatible with multi-get">>, - throw({query_parse_error, Msg}); - _ -> - Args#view_query_args{end_key=Value} + true-> + Msg = <<"Query parameter `end_key` is " + "not compatible with multi-get">>, + throw({query_parse_error, Msg}); + _ -> + Args#view_query_args{end_key=Value} end; validate_view_query(end_docid, Value, Args) -> Args#view_query_args{end_docid=Value}; @@ -326,19 +372,23 @@ validate_view_query(limit, Value, Args) -> Args#view_query_args{limit=Value}; validate_view_query(list, Value, Args) -> Args#view_query_args{list=Value}; +validate_view_query(stale, ok, Args) -> + Args#view_query_args{stale=ok}; +validate_view_query(stale, update_after, Args) -> + Args#view_query_args{stale=update_after}; validate_view_query(stale, _, Args) -> Args; validate_view_query(descending, true, Args) -> case Args#view_query_args.direction of - rev -> Args; % Already reversed - fwd -> - Args#view_query_args{ - direction = rev, - start_docid = - reverse_key_default(Args#view_query_args.start_docid), - end_docid = - reverse_key_default(Args#view_query_args.end_docid) - } + rev -> Args; % Already reversed + fwd -> + Args#view_query_args{ + direction = rev, + start_docid = + reverse_key_default(Args#view_query_args.start_docid), + end_docid = + reverse_key_default(Args#view_query_args.end_docid) + } end; validate_view_query(descending, false, Args) -> Args; % Ignore default condition @@ -346,38 +396,41 @@ validate_view_query(skip, Value, Args) -> Args#view_query_args{skip=Value}; validate_view_query(group_level, Value, Args) -> case Args#view_query_args.view_type of - reduce -> - Args#view_query_args{group_level=Value}; - _ -> - Msg = <<"Invalid URL parameter 'group' or " - " 'group_level' for non-reduce view.">>, - throw({query_parse_error, Msg}) + reduce -> + Args#view_query_args{group_level=Value}; + _ -> + Msg = <<"Invalid URL parameter 'group' or " + " 'group_level' for non-reduce view.">>, + throw({query_parse_error, Msg}) end; validate_view_query(inclusive_end, Value, Args) -> Args#view_query_args{inclusive_end=Value}; +validate_view_query(reduce, false, Args) -> + Args; validate_view_query(reduce, _, Args) -> case Args#view_query_args.view_type of - map -> - Msg = <<"Invalid URL parameter `reduce` for map view.">>, - throw({query_parse_error, Msg}); - _ -> - Args + map -> + Msg = <<"Invalid URL parameter `reduce` for map view.">>, + throw({query_parse_error, Msg}); + _ -> + Args end; validate_view_query(include_docs, true, Args) -> case Args#view_query_args.view_type of - reduce -> - Msg = <<"Query parameter `include_docs` " - "is invalid for reduce views.">>, - throw({query_parse_error, Msg}); - _ -> - Args#view_query_args{include_docs=true} + reduce -> + Msg = <<"Query parameter `include_docs` " + "is invalid for reduce views.">>, + throw({query_parse_error, Msg}); + _ -> + Args#view_query_args{include_docs=true} end; +% Use the view_query_args record's default value validate_view_query(include_docs, _Value, Args) -> Args; validate_view_query(extra, _Value, Args) -> Args. -make_view_fold_fun(Req, QueryArgs, Etag, Db, TotalViewCount, HelperFuns) -> +make_view_fold_fun(Req, QueryArgs, Etag, Db, UpdateSeq, TotalViewCount, HelperFuns) -> #view_fold_helper_funs{ start_response = StartRespFun, send_row = SendRowFun, @@ -388,7 +441,8 @@ make_view_fold_fun(Req, QueryArgs, Etag, Db, TotalViewCount, HelperFuns) -> include_docs = IncludeDocs } = QueryArgs, - fun({{Key, DocId}, Value}, OffsetReds, {AccLimit, AccSkip, Resp, RowFunAcc}) -> + fun({{Key, DocId}, Value}, OffsetReds, + {AccLimit, AccSkip, Resp, RowFunAcc}) -> case {AccLimit, AccSkip, Resp} of {0, _, _} -> % we've done "limit" rows, stop foldling @@ -400,7 +454,7 @@ make_view_fold_fun(Req, QueryArgs, Etag, Db, TotalViewCount, HelperFuns) -> % rendering the first row, first we start the response Offset = ReduceCountFun(OffsetReds), {ok, Resp2, RowFunAcc0} = StartRespFun(Req, Etag, - TotalViewCount, Offset, RowFunAcc), + TotalViewCount, Offset, RowFunAcc, UpdateSeq), {Go, RowFunAcc2} = SendRowFun(Resp2, Db, {{Key, DocId}, Value}, IncludeDocs, RowFunAcc0), {Go, {AccLimit - 1, 0, Resp2, RowFunAcc2}}; @@ -412,7 +466,7 @@ make_view_fold_fun(Req, QueryArgs, Etag, Db, TotalViewCount, HelperFuns) -> end end. -make_reduce_fold_funs(Req, GroupLevel, _QueryArgs, Etag, HelperFuns) -> +make_reduce_fold_funs(Req, GroupLevel, _QueryArgs, Etag, UpdateSeq, HelperFuns) -> #reduce_fold_helper_funs{ start_response = StartRespFun, send_row = SendRowFun @@ -438,7 +492,7 @@ make_reduce_fold_funs(Req, GroupLevel, _QueryArgs, Etag, HelperFuns) -> (_Key, Red, {AccLimit, 0, undefined, RowAcc0}) when GroupLevel == 0 -> % we haven't started responding yet and group=false - {ok, Resp2, RowAcc} = StartRespFun(Req, Etag, RowAcc0), + {ok, Resp2, RowAcc} = StartRespFun(Req, Etag, RowAcc0, UpdateSeq), {Go, RowAcc2} = SendRowFun(Resp2, {null, Red}, RowAcc), {Go, {AccLimit - 1, 0, Resp2, RowAcc2}}; (_Key, Red, {AccLimit, 0, Resp, RowAcc}) when GroupLevel == 0 -> @@ -449,18 +503,20 @@ make_reduce_fold_funs(Req, GroupLevel, _QueryArgs, Etag, HelperFuns) -> (Key, Red, {AccLimit, 0, undefined, RowAcc0}) when is_integer(GroupLevel), is_list(Key) -> % group_level and we haven't responded yet - {ok, Resp2, RowAcc} = StartRespFun(Req, Etag, RowAcc0), - {Go, RowAcc2} = SendRowFun(Resp2, {lists:sublist(Key, GroupLevel), Red}, RowAcc), + {ok, Resp2, RowAcc} = StartRespFun(Req, Etag, RowAcc0, UpdateSeq), + {Go, RowAcc2} = SendRowFun(Resp2, + {lists:sublist(Key, GroupLevel), Red}, RowAcc), {Go, {AccLimit - 1, 0, Resp2, RowAcc2}}; (Key, Red, {AccLimit, 0, Resp, RowAcc}) when is_integer(GroupLevel), is_list(Key) -> % group_level and we've already started the response - {Go, RowAcc2} = SendRowFun(Resp, {lists:sublist(Key, GroupLevel), Red}, RowAcc), + {Go, RowAcc2} = SendRowFun(Resp, + {lists:sublist(Key, GroupLevel), Red}, RowAcc), {Go, {AccLimit - 1, 0, Resp, RowAcc2}}; (Key, Red, {AccLimit, 0, undefined, RowAcc0}) -> % group=true and we haven't responded yet - {ok, Resp2, RowAcc} = StartRespFun(Req, Etag, RowAcc0), + {ok, Resp2, RowAcc} = StartRespFun(Req, Etag, RowAcc0, UpdateSeq), {Go, RowAcc2} = SendRowFun(Resp2, {Key, Red}, RowAcc), {Go, {AccLimit - 1, 0, Resp2, RowAcc2}}; (Key, Red, {AccLimit, 0, Resp, RowAcc}) -> @@ -470,13 +526,13 @@ make_reduce_fold_funs(Req, GroupLevel, _QueryArgs, Etag, HelperFuns) -> end, {ok, GroupRowsFun, RespFun}. -apply_default_helper_funs(#view_fold_helper_funs{ - start_response = StartResp, - send_row = SendRow -}=Helpers) -> - +apply_default_helper_funs( + #view_fold_helper_funs{ + start_response = StartResp, + send_row = SendRow + }=Helpers) -> StartResp2 = case StartResp of - undefined -> fun json_view_start_resp/5; + undefined -> fun json_view_start_resp/6; _ -> StartResp end, @@ -491,12 +547,13 @@ apply_default_helper_funs(#view_fold_helper_funs{ }; -apply_default_helper_funs(#reduce_fold_helper_funs{ - start_response = StartResp, - send_row = SendRow -}=Helpers) -> +apply_default_helper_funs( + #reduce_fold_helper_funs{ + start_response = StartResp, + send_row = SendRow + }=Helpers) -> StartResp2 = case StartResp of - undefined -> fun json_reduce_start_resp/3; + undefined -> fun json_reduce_start_resp/4; _ -> StartResp end, @@ -511,7 +568,7 @@ apply_default_helper_funs(#reduce_fold_helper_funs{ }. make_key_options(#view_query_args{direction = Dir}=QueryArgs) -> - [{dir,Dir} | make_start_key_option(QueryArgs) ++ + [{dir,Dir} | make_start_key_option(QueryArgs) ++ make_end_key_option(QueryArgs)]. make_start_key_option( @@ -538,10 +595,19 @@ make_end_key_option( inclusive_end = false}) -> [{end_key_gt, {EndKey,reverse_key_default(EndDocId)}}]. -json_view_start_resp(Req, Etag, TotalViewCount, Offset, _Acc) -> +json_view_start_resp(Req, Etag, TotalViewCount, Offset, _Acc, UpdateSeq) -> {ok, Resp} = start_json_response(Req, 200, [{"Etag", Etag}]), - BeginBody = io_lib:format("{\"total_rows\":~w,\"offset\":~w,\"rows\":[\r\n", - [TotalViewCount, Offset]), + BeginBody = case couch_httpd:qs_value(Req, "update_seq") of + "true" -> + io_lib:format( + "{\"total_rows\":~w,\"update_seq\":~w," + "\"offset\":~w,\"rows\":[\r\n", + [TotalViewCount, UpdateSeq, Offset]); + _Else -> + io_lib:format( + "{\"total_rows\":~w,\"offset\":~w,\"rows\":[\r\n", + [TotalViewCount, Offset]) + end, {ok, Resp, BeginBody}. send_json_view_row(Resp, Db, {{Key, DocId}, Value}, IncludeDocs, RowFront) -> @@ -549,37 +615,41 @@ send_json_view_row(Resp, Db, {{Key, DocId}, Value}, IncludeDocs, RowFront) -> send_chunk(Resp, RowFront ++ ?JSON_ENCODE(JsonObj)), {ok, ",\r\n"}. -json_reduce_start_resp(Req, Etag, _Acc0) -> +json_reduce_start_resp(Req, Etag, _Acc0, UpdateSeq) -> {ok, Resp} = start_json_response(Req, 200, [{"Etag", Etag}]), - {ok, Resp, "{\"rows\":[\r\n"}. + case couch_httpd:qs_value(Req, "update_seq") of + "true" -> + {ok, Resp, io_lib:format("{\"update_seq\":~w,\"rows\":[\r\n",[UpdateSeq])}; + _Else -> + {ok, Resp, "{\"rows\":[\r\n"} + end. send_json_reduce_row(Resp, {Key, Value}, RowFront) -> send_chunk(Resp, RowFront ++ ?JSON_ENCODE({[{key, Key}, {value, Value}]})), {ok, ",\r\n"}. -view_group_etag(Group, Db) -> - view_group_etag(Group, Db, nil). +view_etag(Db, Group, View) -> + view_etag(Db, Group, View, nil). -view_group_etag(#group{sig=Sig,current_seq=CurrentSeq}, _Db, Extra) -> - % ?LOG_ERROR("Group ~p",[Group]), - % This is not as granular as it could be. - % If there are updates to the db that do not effect the view index, - % they will change the Etag. For more granular Etags we'd need to keep - % track of the last Db seq that caused an index change. - couch_httpd:make_etag({Sig, CurrentSeq, Extra}). +view_etag(Db, Group, {reduce, _, _, View}, Extra) -> + view_etag(Db, Group, View, Extra); +view_etag(Db, Group, {temp_reduce, View}, Extra) -> + view_etag(Db, Group, View, Extra); +view_etag(_Db, #group{sig=Sig}, #view{update_seq=UpdateSeq, purge_seq=PurgeSeq}, Extra) -> + couch_httpd:make_etag({Sig, UpdateSeq, PurgeSeq, Extra}). % the view row has an error view_row_obj(_Db, {{Key, error}, Value}, _IncludeDocs) -> {[{key, Key}, {error, Value}]}; % include docs in the view output view_row_obj(Db, {{Key, DocId}, {Props}}, true) -> - Rev = case proplists:get_value(<<"_rev">>, Props) of + Rev = case couch_util:get_value(<<"_rev">>, Props) of undefined -> nil; Rev0 -> couch_doc:parse_rev(Rev0) end, - IncludeId = proplists:get_value(<<"_id">>, Props, DocId), + IncludeId = couch_util:get_value(<<"_id">>, Props, DocId), view_row_with_doc(Db, {{Key, DocId}, {Props}}, {IncludeId, Rev}); view_row_obj(Db, {{Key, DocId}, Value}, true) -> view_row_with_doc(Db, {{Key, DocId}, Value}, {DocId, nil}); @@ -593,15 +663,17 @@ view_row_with_doc(Db, {{Key, DocId}, Value}, IdRev) -> doc_member(Db, {DocId, Rev}) -> ?LOG_DEBUG("Include Doc: ~p ~p", [DocId, Rev]), case (catch couch_httpd_db:couch_doc_open(Db, DocId, Rev, [])) of - #doc{} = Doc -> - JsonDoc = couch_doc:to_json_obj(Doc, []), - [{doc, JsonDoc}]; - _Else -> - [{doc, null}] + #doc{} = Doc -> + JsonDoc = couch_doc:to_json_obj(Doc, []), + [{doc, JsonDoc}]; + _Else -> + [{doc, null}] end. - finish_view_fold(Req, TotalRows, Offset, FoldResult) -> + finish_view_fold(Req, TotalRows, Offset, FoldResult, []). + +finish_view_fold(Req, TotalRows, Offset, FoldResult, Fields) -> case FoldResult of {_, _, undefined, _} -> % nothing found in the view or keys, nothing has been returned @@ -610,7 +682,7 @@ finish_view_fold(Req, TotalRows, Offset, FoldResult) -> {total_rows, TotalRows}, {offset, Offset}, {rows, []} - ]}); + ] ++ Fields}); {_, _, Resp, _} -> % end the view send_chunk(Resp, "\r\n]}"), @@ -618,11 +690,14 @@ finish_view_fold(Req, TotalRows, Offset, FoldResult) -> end. finish_reduce_fold(Req, Resp) -> + finish_reduce_fold(Req, Resp, []). + +finish_reduce_fold(Req, Resp, Fields) -> case Resp of undefined -> send_json(Req, 200, {[ {rows, []} - ]}); + ] ++ Fields}); Resp -> send_chunk(Resp, "\r\n]}"), end_json_response(Resp) @@ -630,11 +705,11 @@ finish_reduce_fold(Req, Resp) -> parse_bool_param(Val) -> case string:to_lower(Val) of - "true" -> true; - "false" -> false; - _ -> - Msg = io_lib:format("Invalid boolean parameter: ~p", [Val]), - throw({query_parse_error, ?l2b(Msg)}) + "true" -> true; + "false" -> false; + _ -> + Msg = io_lib:format("Invalid boolean parameter: ~p", [Val]), + throw({query_parse_error, ?l2b(Msg)}) end. parse_int_param(Val) -> diff --git a/src/couchdb/couch_js_functions.hrl b/src/couchdb/couch_js_functions.hrl new file mode 100644 index 00000000..67f06686 --- /dev/null +++ b/src/couchdb/couch_js_functions.hrl @@ -0,0 +1,148 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(AUTH_DB_DOC_VALIDATE_FUNCTION, <<" + function(newDoc, oldDoc, userCtx) { + if (newDoc._deleted === true) { + // allow deletes by admins and matching users + // without checking the other fields + if ((userCtx.roles.indexOf('_admin') !== -1) || + (userCtx.name == oldDoc.name)) { + return; + } else { + throw({forbidden: 'Only admins may delete other user docs.'}); + } + } + + if ((oldDoc && oldDoc.type !== 'user') || newDoc.type !== 'user') { + throw({forbidden : 'doc.type must be user'}); + } // we only allow user docs for now + + if (!newDoc.name) { + throw({forbidden: 'doc.name is required'}); + } + + if (newDoc.roles && !isArray(newDoc.roles)) { + throw({forbidden: 'doc.roles must be an array'}); + } + + if (newDoc._id !== ('org.couchdb.user:' + newDoc.name)) { + throw({ + forbidden: 'Doc ID must be of the form org.couchdb.user:name' + }); + } + + if (oldDoc) { // validate all updates + if (oldDoc.name !== newDoc.name) { + throw({forbidden: 'Usernames can not be changed.'}); + } + } + + if (newDoc.password_sha && !newDoc.salt) { + throw({ + forbidden: 'Users with password_sha must have a salt.' + + 'See /_utils/script/couch.js for example code.' + }); + } + + if (userCtx.roles.indexOf('_admin') === -1) { + if (oldDoc) { // validate non-admin updates + if (userCtx.name !== newDoc.name) { + throw({ + forbidden: 'You may only update your own user document.' + }); + } + // validate role updates + var oldRoles = oldDoc.roles.sort(); + var newRoles = newDoc.roles.sort(); + + if (oldRoles.length !== newRoles.length) { + throw({forbidden: 'Only _admin may edit roles'}); + } + + for (var i = 0; i < oldRoles.length; i++) { + if (oldRoles[i] !== newRoles[i]) { + throw({forbidden: 'Only _admin may edit roles'}); + } + } + } else if (newDoc.roles.length > 0) { + throw({forbidden: 'Only _admin may set roles'}); + } + } + + // no system roles in users db + for (var i = 0; i < newDoc.roles.length; i++) { + if (newDoc.roles[i][0] === '_') { + throw({ + forbidden: + 'No system roles (starting with underscore) in users db.' + }); + } + } + + // no system names as names + if (newDoc.name[0] === '_') { + throw({forbidden: 'Username may not start with underscore.'}); + } + } +">>). + + +-define(REP_DB_DOC_VALIDATE_FUN, <<" + function(newDoc, oldDoc, userCtx) { + function reportError(error_msg) { + log('Error writing document `' + newDoc._id + + '` to replicator DB: ' + error_msg); + throw({forbidden: error_msg}); + } + + var isReplicator = (userCtx.roles.indexOf('_replicator') >= 0); + if (oldDoc && !newDoc._deleted && !isReplicator) { + reportError('Only the replicator can edit replication documents.'); + } + + if (newDoc.user_ctx) { + var user_ctx = newDoc.user_ctx; + + if (typeof user_ctx !== 'object') { + reportError('The user_ctx property must be an object.'); + } + + if (!(user_ctx.name === null || + (typeof user_ctx.name === 'undefined') || + ((typeof user_ctx.name === 'string') && + user_ctx.name.length > 0))) { + reportError('The name property of the user_ctx must be a ' + + 'non-empty string.'); + } + + if (user_ctx.roles && !isArray(user_ctx.roles)) { + reportError('The roles property of the user_ctx must be ' + + 'an array of strings.'); + } + + if (user_ctx.roles) { + for (var i = 0; i < user_ctx.roles.length; i++) { + var role = user_ctx.roles[i]; + + if (typeof role !== 'string' || role.length === 0) { + reportError('Each role must be a non-empty string.'); + } + if (role[0] === '_') { + reportError('System roles (starting with underscore) ' + + 'are not allowed.'); + } + } + } + } + } +">>). diff --git a/src/couchdb/couch_key_tree.erl b/src/couchdb/couch_key_tree.erl index d5944119..2e3b3e3c 100644 --- a/src/couchdb/couch_key_tree.erl +++ b/src/couchdb/couch_key_tree.erl @@ -16,100 +16,97 @@ -export([map/2, get_all_leafs/1, count_leafs/1, remove_leafs/2, get_all_leafs_full/1,stem/2,map_leafs/2]). -% a key tree looks like this: -% Tree -> [] or [{Key, Value, ChildTree} | SiblingTree] -% ChildTree -> Tree -% SiblingTree -> [] or [{SiblingKey, Value, Tree} | Tree] -% And each Key < SiblingKey - +% Tree::term() is really a tree(), but we don't want to require R13B04 yet +-type branch() :: {Key::term(), Value::term(), Tree::term()}. +-type path() :: {Start::pos_integer(), branch()}. +-type tree() :: [branch()]. % sorted by key % partial trees arranged by how much they are cut off. -merge(A, B) -> - {Merged, HasConflicts} = - lists:foldl( - fun(InsertTree, {AccTrees, AccConflicts}) -> - {ok, Merged, Conflicts} = merge_one(AccTrees, InsertTree, [], false), - {Merged, Conflicts or AccConflicts} - end, - {A, false}, B), - if HasConflicts or - ((length(Merged) =/= length(A)) and (length(Merged) =/= length(B))) -> +-spec merge([path()], path()) -> {[path()], conflicts | no_conflicts}. +merge(Paths, Path) -> + {ok, Merged, HasConflicts} = merge_one(Paths, Path, [], false), + if HasConflicts -> + Conflicts = conflicts; + (length(Merged) =/= length(Paths)) and (length(Merged) =/= 1) -> Conflicts = conflicts; true -> Conflicts = no_conflicts end, {lists:sort(Merged), Conflicts}. +-spec merge_one(Original::[path()], Inserted::path(), [path()], boolean()) -> + {ok, Merged::[path()], NewConflicts::boolean()}. merge_one([], Insert, OutAcc, ConflictsAcc) -> {ok, [Insert | OutAcc], ConflictsAcc}; -merge_one([{Start, Tree}|Rest], {StartInsert, TreeInsert}, OutAcc, ConflictsAcc) -> - if Start =< StartInsert -> - StartA = Start, - StartB = StartInsert, - TreeA = Tree, - TreeB = TreeInsert; - true -> - StartB = Start, - StartA = StartInsert, - TreeB = Tree, - TreeA = TreeInsert - end, - case merge_at([TreeA], StartB - StartA, TreeB) of - {ok, [CombinedTrees], Conflicts} -> - merge_one(Rest, {StartA, CombinedTrees}, OutAcc, Conflicts or ConflictsAcc); +merge_one([{Start, Tree}|Rest], {StartInsert, TreeInsert}, Acc, HasConflicts) -> + case merge_at([Tree], StartInsert - Start, [TreeInsert]) of + {ok, [Merged], Conflicts} -> + MergedStart = lists:min([Start, StartInsert]), + merge_one(Rest, {MergedStart, Merged}, Acc, Conflicts or HasConflicts); no -> - merge_one(Rest, {StartB, TreeB}, [{StartA, TreeA} | OutAcc], ConflictsAcc) + AccOut = [{Start, Tree} | Acc], + merge_one(Rest, {StartInsert, TreeInsert}, AccOut, HasConflicts) end. +-spec merge_at(tree(), Place::integer(), tree()) -> + {ok, Merged::tree(), HasConflicts::boolean()} | no. +merge_at(_Ours, _Place, []) -> + no; merge_at([], _Place, _Insert) -> no; -merge_at([{Key, Value, SubTree}|Sibs], 0, {InsertKey, InsertValue, InsertSubTree}) -> - if Key == InsertKey -> - {Merge, Conflicts} = merge_simple(SubTree, InsertSubTree), - {ok, [{Key, Value, Merge} | Sibs], Conflicts}; - true -> - case merge_at(Sibs, 0, {InsertKey, InsertValue, InsertSubTree}) of - {ok, Merged, Conflicts} -> - {ok, [{Key, Value, SubTree} | Merged], Conflicts}; - no -> - no - end - end; -merge_at([{Key, Value, SubTree}|Sibs], Place, Insert) -> - case merge_at(SubTree, Place - 1,Insert) of +merge_at([{Key, Value, SubTree}|Sibs], Place, InsertTree) when Place > 0 -> + % inserted starts later than committed, need to drill into committed subtree + case merge_at(SubTree, Place - 1, InsertTree) of {ok, Merged, Conflicts} -> {ok, [{Key, Value, Merged} | Sibs], Conflicts}; no -> - case merge_at(Sibs, Place, Insert) of + case merge_at(Sibs, Place, InsertTree) of {ok, Merged, Conflicts} -> {ok, [{Key, Value, SubTree} | Merged], Conflicts}; no -> no end + end; +merge_at(OurTree, Place, [{Key, Value, SubTree}]) when Place < 0 -> + % inserted starts earlier than committed, need to drill into insert subtree + case merge_at(OurTree, Place + 1, SubTree) of + {ok, Merged, Conflicts} -> + {ok, [{Key, Value, Merged}], Conflicts}; + no -> + no + end; +merge_at([{Key, Value, SubTree}|Sibs], 0, [{Key, _Value, InsertSubTree}]) -> + {Merged, Conflicts} = merge_simple(SubTree, InsertSubTree), + {ok, [{Key, Value, Merged} | Sibs], Conflicts}; +merge_at([{OurKey, _, _} | _], 0, [{Key, _, _}]) when OurKey > Key -> + % siblings keys are ordered, no point in continuing + no; +merge_at([Tree | Sibs], 0, InsertTree) -> + case merge_at(Sibs, 0, InsertTree) of + {ok, Merged, Conflicts} -> + {ok, [Tree | Merged], Conflicts}; + no -> + no end. % key tree functions + +-spec merge_simple(tree(), tree()) -> {Merged::tree(), NewConflicts::boolean()}. merge_simple([], B) -> {B, false}; merge_simple(A, []) -> {A, false}; -merge_simple([ATree | ANextTree], [BTree | BNextTree]) -> - {AKey, AValue, ASubTree} = ATree, - {BKey, _BValue, BSubTree} = BTree, - if - AKey == BKey -> - %same key - {MergedSubTree, Conflict1} = merge_simple(ASubTree, BSubTree), - {MergedNextTree, Conflict2} = merge_simple(ANextTree, BNextTree), - {[{AKey, AValue, MergedSubTree} | MergedNextTree], Conflict1 or Conflict2}; - AKey < BKey -> - {MTree, _} = merge_simple(ANextTree, [BTree | BNextTree]), - {[ATree | MTree], true}; - true -> - {MTree, _} = merge_simple([ATree | ANextTree], BNextTree), - {[BTree | MTree], true} - end. +merge_simple([{Key, Value, SubA} | NextA], [{Key, _, SubB} | NextB]) -> + {MergedSubTree, Conflict1} = merge_simple(SubA, SubB), + {MergedNextTree, Conflict2} = merge_simple(NextA, NextB), + {[{Key, Value, MergedSubTree} | MergedNextTree], Conflict1 or Conflict2}; +merge_simple([{A, _, _} = Tree | Next], [{B, _, _} | _] = Insert) when A < B -> + {Merged, _} = merge_simple(Next, Insert), + {[Tree | Merged], true}; +merge_simple(Ours, [Tree | Next]) -> + {Merged, _} = merge_simple(Ours, Next), + {[Tree | Merged], true}. find_missing(_Tree, []) -> []; @@ -159,7 +156,7 @@ remove_leafs(Trees, Keys) -> fun({PathPos, Path},TreeAcc) -> [SingleTree] = lists:foldl( fun({K,V},NewTreeAcc) -> [{K,V,NewTreeAcc}] end, [], Path), - {NewTrees, _} = merge(TreeAcc, [{PathPos + 1 - length(Path), SingleTree}]), + {NewTrees, _} = merge(TreeAcc, {PathPos + 1 - length(Path), SingleTree}), NewTrees end, [], FilteredPaths), {NewTree, RemovedKeys}. @@ -290,7 +287,7 @@ map(Fun, [{Pos, Tree}|Rest]) -> map_simple(_Fun, _Pos, []) -> []; map_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) -> - Value2 = Fun({Pos, Key}, Value, + Value2 = Fun({Pos, Key}, Value, if SubTree == [] -> leaf; true -> branch end), [{Key, Value2, map_simple(Fun, Pos + 1, SubTree)} | map_simple(Fun, Pos, RestTree)]. @@ -321,7 +318,7 @@ stem(Trees, Limit) -> fun({PathPos, Path},TreeAcc) -> [SingleTree] = lists:foldl( fun({K,V},NewTreeAcc) -> [{K,V,NewTreeAcc}] end, [], Path), - {NewTrees, _} = merge(TreeAcc, [{PathPos + 1 - length(Path), SingleTree}]), + {NewTrees, _} = merge(TreeAcc, {PathPos + 1 - length(Path), SingleTree}), NewTrees end, [], Paths2). diff --git a/src/couchdb/couch_log.erl b/src/couchdb/couch_log.erl index 5a45c207..dfd2d178 100644 --- a/src/couchdb/couch_log.erl +++ b/src/couchdb/couch_log.erl @@ -50,14 +50,23 @@ init([]) -> fun("log", "file") -> ?MODULE:stop(); ("log", "level") -> + ?MODULE:stop(); + ("log", "include_sasl") -> ?MODULE:stop() end), Filename = couch_config:get("log", "file", "couchdb.log"), - Level = couch_config:get("log", "level", "info"), + Level = level_integer(list_to_atom(couch_config:get("log", "level", "info"))), + Sasl = list_to_atom(couch_config:get("log", "include_sasl", "true")), + + case ets:info(?MODULE) of + undefined -> ets:new(?MODULE, [named_table]); + _ -> ok + end, + ets:insert(?MODULE, {level, Level}), {ok, Fd} = file:open(Filename, [append]), - {ok, {Fd, level_integer(list_to_atom(Level))}}. + {ok, {Fd, Level, Sasl}}. debug_on() -> get_level_integer() =< ?LEVEL_DEBUG. @@ -72,29 +81,35 @@ get_level() -> level_atom(get_level_integer()). get_level_integer() -> - catch gen_event:call(error_logger, couch_log, get_level_integer). + try + ets:lookup_element(?MODULE, level, 2) + catch error:badarg -> + ?LEVEL_ERROR + end. set_level_integer(Int) -> gen_event:call(error_logger, couch_log, {set_level_integer, Int}). -handle_event({error_report, _, {Pid, couch_error, {Format, Args}}}, {Fd, _LogLevel}=State) -> - log(Fd, Pid, error, Format, Args), - {ok, State}; -handle_event({error_report, _, {Pid, _, _}}=Event, {Fd, _LogLevel}=State) -> - log(Fd, Pid, error, "~p", [Event]), - {ok, State}; -handle_event({error, _, {Pid, Format, Args}}, {Fd, _LogLevel}=State) -> +handle_event({Pid, couch_error, {Format, Args}}, {Fd, _LogLevel, _Sasl}=State) -> log(Fd, Pid, error, Format, Args), {ok, State}; -handle_event({info_report, _, {Pid, couch_info, {Format, Args}}}, {Fd, LogLevel}=State) +handle_event({Pid, couch_info, {Format, Args}}, {Fd, LogLevel, _Sasl}=State) when LogLevel =< ?LEVEL_INFO -> log(Fd, Pid, info, Format, Args), {ok, State}; -handle_event({info_report, _, {Pid, couch_debug, {Format, Args}}}, {Fd, LogLevel}=State) +handle_event({Pid, couch_debug, {Format, Args}}, {Fd, LogLevel, _Sasl}=State) when LogLevel =< ?LEVEL_DEBUG -> log(Fd, Pid, debug, Format, Args), {ok, State}; -handle_event({_, _, {Pid, _, _}}=Event, {Fd, LogLevel}=State) +handle_event({error_report, _, {Pid, _, _}}=Event, {Fd, _LogLevel, Sasl}=State) +when Sasl =/= false -> + log(Fd, Pid, error, "~p", [Event]), + {ok, State}; +handle_event({error, _, {Pid, Format, Args}}, {Fd, _LogLevel, Sasl}=State) +when Sasl =/= false -> + log(Fd, Pid, error, Format, Args), + {ok, State}; +handle_event({_, _, {Pid, _, _}}=Event, {Fd, LogLevel, _Sasl}=State) when LogLevel =< ?LEVEL_TMI -> % log every remaining event if tmi! log(Fd, Pid, tmi, "~p", [Event]), @@ -102,10 +117,9 @@ when LogLevel =< ?LEVEL_TMI -> handle_event(_Event, State) -> {ok, State}. -handle_call(get_level_integer, {_Fd, LogLevel}=State) -> - {ok, LogLevel, State}; -handle_call({set_level_integer, NewLevel}, {Fd, _LogLevel}) -> - {ok, ok, {Fd, NewLevel}}. +handle_call({set_level_integer, NewLevel}, {Fd, _LogLevel, Sasl}) -> + ets:insert(?MODULE, {level, NewLevel}), + {ok, ok, {Fd, NewLevel, Sasl}}. handle_info(_Info, State) -> {ok, State}. @@ -113,7 +127,7 @@ handle_info(_Info, State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. -terminate(_Arg, {Fd, _LoggingLevel}) -> +terminate(_Arg, {Fd, _LoggingLevel, _Sasl}) -> file:close(Fd). log(Fd, Pid, Level, Format, Args) -> @@ -121,11 +135,11 @@ log(Fd, Pid, Level, Format, Args) -> ok = io:format("[~s] [~p] ~s~n", [Level, Pid, Msg]), % dump to console too Msg2 = re:replace(lists:flatten(Msg),"\\r\\n|\\r|\\n", "\r\n", [global, {return, list}]), - ok = io:format(Fd, "[~s] [~s] [~p] ~s\r~n\r~n", [httpd_util:rfc1123_date(), Level, Pid, Msg2]). + ok = io:format(Fd, "[~s] [~s] [~p] ~s\r~n", [httpd_util:rfc1123_date(), Level, Pid, Msg2]). read(Bytes, Offset) -> LogFileName = couch_config:get("log", "file"), - LogFileSize = couch_util:file_read_size(LogFileName), + LogFileSize = filelib:file_size(LogFileName), {ok, Fd} = file:open(LogFileName, [read]), Start = lists:max([LogFileSize - Bytes, 0]) + Offset, diff --git a/src/couchdb/couch_native_process.erl b/src/couchdb/couch_native_process.erl index 65e4e131..b512f712 100644 --- a/src/couchdb/couch_native_process.erl +++ b/src/couchdb/couch_native_process.erl @@ -1,16 +1,16 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); -% you may not use this file except in compliance with the License. +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. % % You may obtain a copy of the License at % http://www.apache.org/licenses/LICENSE-2.0 % -% Unless required by applicable law or agreed to in writing, -% software distributed under the License is distributed on an -% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -% either express or implied. +% Unless required by applicable law or agreed to in writing, +% software distributed under the License is distributed on an +% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +% either express or implied. % % See the License for the specific language governing permissions -% and limitations under the License. +% and limitations under the License. % % This file drew much inspiration from erlview, which was written by and % copyright Michael McDaniel [http://autosys.us], and is also under APL 2.0 @@ -25,14 +25,14 @@ % % fun({Doc}) -> % % Below, we emit a single record - the _id as key, null as value -% DocId = proplists:get_value(Doc, <<"_id">>, null), +% DocId = couch_util:get_value(Doc, <<"_id">>, null), % Emit(DocId, null) % end. % % which should be roughly the same as the javascript: % emit(doc._id, null); % -% This module exposes enough functions such that a native erlang server can +% This module exposes enough functions such that a native erlang server can % act as a fully-fleged view server, but no 'helper' functions specifically % for simplifying your erlang view code. It is expected other third-party % extensions will evolve which offer useful layers on top of this view server @@ -40,7 +40,8 @@ -module(couch_native_process). -behaviour(gen_server). --export([start_link/0,init/1,terminate/2,handle_call/3,handle_cast/2]). +-export([start_link/0,init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3, + handle_info/2]). -export([set_timeout/2, prompt/2]). -define(STATE, native_proc_state). @@ -76,23 +77,27 @@ handle_call({prompt, Data}, _From, State) -> throw:{error, Why} -> {State, [<<"error">>, Why, Why]} end, - + case Resp of {error, Reason} -> Msg = io_lib:format("couch native server error: ~p", [Reason]), {reply, [<<"error">>, <<"native_query_server">>, list_to_binary(Msg)], NewState}; [<<"error">> | Rest] -> - Msg = io_lib:format("couch native server error: ~p", [Rest]), + % Msg = io_lib:format("couch native server error: ~p", [Rest]), + % TODO: markh? (jan) {reply, [<<"error">> | Rest], NewState}; [<<"fatal">> | Rest] -> - Msg = io_lib:format("couch native server error: ~p", [Rest]), + % Msg = io_lib:format("couch native server error: ~p", [Rest]), + % TODO: markh? (jan) {stop, fatal, [<<"error">> | Rest], NewState}; Resp -> {reply, Resp, NewState} end. -handle_cast(_Msg, State) -> {noreply, State}. -handle_info(_Msg, State) -> {noreply, State}. +handle_cast(foo, State) -> {noreply, State}. +handle_info({'EXIT',_,normal}, State) -> {noreply, State}; +handle_info({'EXIT',_,Reason}, State) -> + {stop, Reason, State}. terminate(_Reason, _State) -> ok. code_change(_OldVersion, State, _Extra) -> {ok, State}. @@ -168,12 +173,12 @@ ddoc(State, {DDoc}, [FunPath, Args]) -> % load fun from the FunPath BFun = lists:foldl(fun (Key, {Props}) when is_list(Props) -> - proplists:get_value(Key, Props, nil); - (Key, Fun) when is_binary(Fun) -> + couch_util:get_value(Key, Props, nil); + (_Key, Fun) when is_binary(Fun) -> Fun; - (Key, nil) -> + (_Key, nil) -> throw({error, not_found}); - (Key, Fun) -> + (_Key, _Fun) -> throw({error, malformed_ddoc}) end, {DDoc}, FunPath), ddoc(State, makefun(State, BFun, {DDoc}), FunPath, Args). @@ -238,7 +243,7 @@ load_ddoc(DDocs, DDocId) -> try dict:fetch(DDocId, DDocs) of {DDoc} -> {DDoc} catch - _:Else -> throw({error, ?l2b(io_lib:format("Native Query Server missing DDoc with Id: ~s",[DDocId]))}) + _:_Else -> throw({error, ?l2b(io_lib:format("Native Query Server missing DDoc with Id: ~s",[DDocId]))}) end. bindings(State, Sig) -> @@ -300,7 +305,7 @@ bindings(State, Sig, DDoc) -> {'FoldRows', FoldRows} ], case DDoc of - {Props} -> + {_Props} -> Bindings ++ [{'DDoc', DDoc}]; _Else -> Bindings end. @@ -308,11 +313,11 @@ bindings(State, Sig, DDoc) -> % thanks to erlview, via: % http://erlang.org/pipermail/erlang-questions/2003-November/010544.html makefun(State, Source) -> - Sig = erlang:md5(Source), + Sig = couch_util:md5(Source), BindFuns = bindings(State, Sig), {Sig, makefun(State, Source, BindFuns)}. makefun(State, Source, {DDoc}) -> - Sig = erlang:md5(lists:flatten([Source, term_to_binary(DDoc)])), + Sig = couch_util:md5(lists:flatten([Source, term_to_binary(DDoc)])), BindFuns = bindings(State, Sig, {DDoc}), {Sig, makefun(State, Source, BindFuns)}; makefun(_State, Source, BindFuns) when is_list(BindFuns) -> @@ -365,7 +370,7 @@ start_list_resp(Self, Sig) -> undefined -> {[{<<"headers">>, {[]}}]}; CurrHdrs -> CurrHdrs end, - Chunks = + Chunks = case erlang:get(Sig) of undefined -> []; CurrChunks -> CurrChunks diff --git a/src/couchdb/couch_os_daemons.erl b/src/couchdb/couch_os_daemons.erl new file mode 100644 index 00000000..bdd39997 --- /dev/null +++ b/src/couchdb/couch_os_daemons.erl @@ -0,0 +1,363 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +-module(couch_os_daemons). +-behaviour(gen_server). + +-export([start_link/0, info/0, info/1, config_change/2]). + +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +-include("couch_db.hrl"). + +-record(daemon, { + port, + name, + cmd, + kill, + status=running, + cfg_patterns=[], + errors=[], + buf=[] +}). + +-define(PORT_OPTIONS, [stream, {line, 1024}, binary, exit_status, hide]). +-define(TIMEOUT, 5000). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +info() -> + info([]). + +info(Options) -> + gen_server:call(?MODULE, {daemon_info, Options}). + +config_change(Section, Key) -> + gen_server:cast(?MODULE, {config_change, Section, Key}). + +init(_) -> + process_flag(trap_exit, true), + ok = couch_config:register(fun couch_os_daemons:config_change/2), + Table = ets:new(?MODULE, [protected, set, {keypos, #daemon.port}]), + reload_daemons(Table), + {ok, Table}. + +terminate(_Reason, Table) -> + [stop_port(D) || D <- ets:tab2list(Table)], + ok. + +handle_call({daemon_info, Options}, _From, Table) when is_list(Options) -> + case lists:member(table, Options) of + true -> + {reply, {ok, ets:tab2list(Table)}, Table}; + _ -> + {reply, {ok, Table}, Table} + end; +handle_call(Msg, From, Table) -> + ?LOG_ERROR("Unknown call message to ~p from ~p: ~p", [?MODULE, From, Msg]), + {stop, error, Table}. + +handle_cast({config_change, Sect, Key}, Table) -> + restart_daemons(Table, Sect, Key), + case Sect of + "os_daemons" -> reload_daemons(Table); + _ -> ok + end, + {noreply, Table}; +handle_cast(stop, Table) -> + {stop, normal, Table}; +handle_cast(Msg, Table) -> + ?LOG_ERROR("Unknown cast message to ~p: ~p", [?MODULE, Msg]), + {stop, error, Table}. + +handle_info({'EXIT', Port, Reason}, Table) -> + case ets:lookup(Table, Port) of + [] -> + ?LOG_INFO("Port ~p exited after stopping: ~p~n", [Port, Reason]); + [#daemon{status=stopping}] -> + true = ets:delete(Table, Port); + [#daemon{name=Name, status=restarting, errors=Errs}=D] -> + ?LOG_INFO("Daemon ~P restarting after config change.", [Name]), + true = ets:delete(Table, Port), + {ok, Port2} = start_port(D#daemon.cmd), + true = ets:insert(Table, D#daemon{ + port=Port2, status=running, kill=undefined, errors=Errs, buf=[] + }); + [#daemon{name=Name, status=halted}] -> + ?LOG_ERROR("Halted daemon process: ~p", [Name]); + [D] -> + ?LOG_ERROR("Invalid port state at exit: ~p", [D]) + end, + {noreply, Table}; +handle_info({Port, closed}, Table) -> + handle_info({Port, {exit_status, closed}}, Table); +handle_info({Port, {exit_status, Status}}, Table) -> + case ets:lookup(Table, Port) of + [] -> + ?LOG_ERROR("Unknown port ~p exiting ~p", [Port, Status]), + {stop, {error, unknown_port_died, Status}, Table}; + [#daemon{name=Name, status=restarting, errors=Errors}=D] -> + ?LOG_INFO("Daemon ~P restarting after config change.", [Name]), + true = ets:delete(Table, Port), + {ok, Port2} = start_port(D#daemon.cmd), + true = ets:insert(Table, D#daemon{ + port=Port2, kill=undefined, errors=Errors, buf=[] + }), + {noreply, Table}; + [#daemon{status=stopping}=D] -> + % The configuration changed and this daemon is no + % longer needed. + ?LOG_DEBUG("Port ~p shut down.", [D#daemon.name]), + true = ets:delete(Table, Port), + {noreply, Table}; + [D] -> + % Port died for unknown reason. Check to see if it's + % died too many times or if we should boot it back up. + case should_halt([now() | D#daemon.errors]) of + {true, _} -> + % Halting the process. We won't try and reboot + % until the configuration changes. + Fmt = "Daemon ~p halted with exit_status ~p", + ?LOG_ERROR(Fmt, [D#daemon.name, Status]), + D2 = D#daemon{status=halted, errors=nil, buf=nil}, + true = ets:insert(Table, D2), + {noreply, Table}; + {false, Errors} -> + % We're guessing it was a random error, this daemon + % has behaved so we'll give it another chance. + Fmt = "Daemon ~p is being rebooted after exit_status ~p", + ?LOG_INFO(Fmt, [D#daemon.name, Status]), + true = ets:delete(Table, Port), + {ok, Port2} = start_port(D#daemon.cmd), + true = ets:insert(Table, D#daemon{ + port=Port2, kill=undefined, errors=Errors, buf=[] + }), + {noreply, Table} + end; + _Else -> + throw(error) + end; +handle_info({Port, {data, {noeol, Data}}}, Table) -> + [#daemon{buf=Buf}=D] = ets:lookup(Table, Port), + true = ets:insert(Table, D#daemon{buf=[Data | Buf]}), + {noreply, Table}; +handle_info({Port, {data, {eol, Data}}}, Table) -> + [#daemon{buf=Buf}=D] = ets:lookup(Table, Port), + Line = lists:reverse(Buf, Data), + % The first line echoed back is the kill command + % for when we go to get rid of the port. Lines after + % that are considered part of the stdio API. + case D#daemon.kill of + undefined -> + true = ets:insert(Table, D#daemon{kill=?b2l(Line), buf=[]}); + _Else -> + D2 = case (catch ?JSON_DECODE(Line)) of + {invalid_json, Rejected} -> + ?LOG_ERROR("Ignoring OS daemon request: ~p", [Rejected]), + D; + JSON -> + {ok, D3} = handle_port_message(D, JSON), + D3 + end, + true = ets:insert(Table, D2#daemon{buf=[]}) + end, + {noreply, Table}; +handle_info({Port, Error}, Table) -> + ?LOG_ERROR("Unexpectd message from port ~p: ~p", [Port, Error]), + stop_port(Port), + [D] = ets:lookup(Table, Port), + true = ets:insert(Table, D#daemon{status=restarting, buf=nil}), + {noreply, Table}; +handle_info(Msg, Table) -> + ?LOG_ERROR("Unexpected info message to ~p: ~p", [?MODULE, Msg]), + {stop, error, Table}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +% Internal API + +% +% Port management helpers +% + +start_port(Command) -> + PrivDir = couch_util:priv_dir(), + Spawnkiller = filename:join(PrivDir, "couchspawnkillable"), + Port = open_port({spawn, Spawnkiller ++ " " ++ Command}, ?PORT_OPTIONS), + {ok, Port}. + + +stop_port(#daemon{port=Port, kill=undefined}=D) -> + ?LOG_ERROR("Stopping daemon without a kill command: ~p", [D#daemon.name]), + catch port_close(Port); +stop_port(#daemon{port=Port}=D) -> + ?LOG_DEBUG("Stopping daemon: ~p", [D#daemon.name]), + os:cmd(D#daemon.kill), + catch port_close(Port). + + +handle_port_message(#daemon{port=Port}=Daemon, [<<"get">>, Section]) -> + KVs = couch_config:get(Section), + Data = lists:map(fun({K, V}) -> {?l2b(K), ?l2b(V)} end, KVs), + Json = iolist_to_binary(?JSON_ENCODE({Data})), + port_command(Port, <<Json/binary, "\n">>), + {ok, Daemon}; +handle_port_message(#daemon{port=Port}=Daemon, [<<"get">>, Section, Key]) -> + Value = case couch_config:get(Section, Key, null) of + null -> null; + String -> ?l2b(String) + end, + Json = iolist_to_binary(?JSON_ENCODE(Value)), + port_command(Port, <<Json/binary, "\n">>), + {ok, Daemon}; +handle_port_message(Daemon, [<<"register">>, Sec]) when is_binary(Sec) -> + Patterns = lists:usort(Daemon#daemon.cfg_patterns ++ [{?b2l(Sec)}]), + {ok, Daemon#daemon{cfg_patterns=Patterns}}; +handle_port_message(Daemon, [<<"register">>, Sec, Key]) + when is_binary(Sec) andalso is_binary(Key) -> + Pattern = {?b2l(Sec), ?b2l(Key)}, + Patterns = lists:usort(Daemon#daemon.cfg_patterns ++ [Pattern]), + {ok, Daemon#daemon{cfg_patterns=Patterns}}; +handle_port_message(#daemon{name=Name}=Daemon, [<<"log">>, Msg]) -> + handle_log_message(Name, Msg, <<"info">>), + {ok, Daemon}; +handle_port_message(#daemon{name=Name}=Daemon, [<<"log">>, Msg, {Opts}]) -> + Level = couch_util:get_value(<<"level">>, Opts, <<"info">>), + handle_log_message(Name, Msg, Level), + {ok, Daemon}; +handle_port_message(#daemon{name=Name}=Daemon, Else) -> + ?LOG_ERROR("Daemon ~p made invalid request: ~p", [Name, Else]), + {ok, Daemon}. + + +handle_log_message(Name, Msg, _Level) when not is_binary(Msg) -> + ?LOG_ERROR("Invalid log message from daemon ~p: ~p", [Name, Msg]); +handle_log_message(Name, Msg, <<"debug">>) -> + ?LOG_DEBUG("Daemon ~p :: ~s", [Name, ?b2l(Msg)]); +handle_log_message(Name, Msg, <<"info">>) -> + ?LOG_INFO("Daemon ~p :: ~s", [Name, ?b2l(Msg)]); +handle_log_message(Name, Msg, <<"error">>) -> + ?LOG_ERROR("Daemon: ~p :: ~s", [Name, ?b2l(Msg)]); +handle_log_message(Name, Msg, Level) -> + ?LOG_ERROR("Invalid log level from daemon: ~p", [Level]), + ?LOG_INFO("Daemon: ~p :: ~s", [Name, ?b2l(Msg)]). + +% +% Daemon management helpers +% + +reload_daemons(Table) -> + % List of daemons we want to have running. + Configured = lists:sort(couch_config:get("os_daemons")), + + % Remove records for daemons that were halted. + MSpecHalted = #daemon{name='$1', cmd='$2', status=halted, _='_'}, + Halted = lists:sort([{N, C} || [N, C] <- ets:match(Table, MSpecHalted)]), + ok = stop_os_daemons(Table, find_to_stop(Configured, Halted, [])), + + % Stop daemons that are running + % Start newly configured daemons + MSpecRunning = #daemon{name='$1', cmd='$2', status=running, _='_'}, + Running = lists:sort([{N, C} || [N, C] <- ets:match(Table, MSpecRunning)]), + ok = stop_os_daemons(Table, find_to_stop(Configured, Running, [])), + ok = boot_os_daemons(Table, find_to_boot(Configured, Running, [])), + ok. + + +restart_daemons(Table, Sect, Key) -> + restart_daemons(Table, Sect, Key, ets:first(Table)). + +restart_daemons(_, _, _, '$end_of_table') -> + ok; +restart_daemons(Table, Sect, Key, Port) -> + [D] = ets:lookup(Table, Port), + HasSect = lists:member({Sect}, D#daemon.cfg_patterns), + HasKey = lists:member({Sect, Key}, D#daemon.cfg_patterns), + case HasSect or HasKey of + true -> + stop_port(D), + D2 = D#daemon{status=restarting, buf=nil}, + true = ets:insert(Table, D2); + _ -> + ok + end, + restart_daemons(Table, Sect, Key, ets:next(Table, Port)). + + +stop_os_daemons(_Table, []) -> + ok; +stop_os_daemons(Table, [{Name, Cmd} | Rest]) -> + [[Port]] = ets:match(Table, #daemon{port='$1', name=Name, cmd=Cmd, _='_'}), + [D] = ets:lookup(Table, Port), + case D#daemon.status of + halted -> + ets:delete(Table, Port); + _ -> + stop_port(D), + D2 = D#daemon{status=stopping, errors=nil, buf=nil}, + true = ets:insert(Table, D2) + end, + stop_os_daemons(Table, Rest). + +boot_os_daemons(_Table, []) -> + ok; +boot_os_daemons(Table, [{Name, Cmd} | Rest]) -> + {ok, Port} = start_port(Cmd), + true = ets:insert(Table, #daemon{port=Port, name=Name, cmd=Cmd}), + boot_os_daemons(Table, Rest). + +% Elements unique to the configured set need to be booted. +find_to_boot([], _Rest, Acc) -> + % Nothing else configured. + Acc; +find_to_boot([D | R1], [D | R2], Acc) -> + % Elements are equal, daemon already running. + find_to_boot(R1, R2, Acc); +find_to_boot([D1 | R1], [D2 | _]=A2, Acc) when D1 < D2 -> + find_to_boot(R1, A2, [D1 | Acc]); +find_to_boot(A1, [_ | R2], Acc) -> + find_to_boot(A1, R2, Acc); +find_to_boot(Rest, [], Acc) -> + % No more candidates for already running. Boot all. + Rest ++ Acc. + +% Elements unique to the running set need to be killed. +find_to_stop([], Rest, Acc) -> + % The rest haven't been found, so they must all + % be ready to die. + Rest ++ Acc; +find_to_stop([D | R1], [D | R2], Acc) -> + % Elements are equal, daemon already running. + find_to_stop(R1, R2, Acc); +find_to_stop([D1 | R1], [D2 | _]=A2, Acc) when D1 < D2 -> + find_to_stop(R1, A2, Acc); +find_to_stop(A1, [D2 | R2], Acc) -> + find_to_stop(A1, R2, [D2 | Acc]); +find_to_stop(_, [], Acc) -> + % No more running daemons to worry about. + Acc. + +should_halt(Errors) -> + RetryTimeCfg = couch_config:get("os_daemon_settings", "retry_time", "5"), + RetryTime = list_to_integer(RetryTimeCfg), + + Now = now(), + RecentErrors = lists:filter(fun(Time) -> + timer:now_diff(Now, Time) =< RetryTime * 1000000 + end, Errors), + + RetryCfg = couch_config:get("os_daemon_settings", "max_retries", "3"), + Retries = list_to_integer(RetryCfg), + + {length(RecentErrors) >= Retries, RecentErrors}. diff --git a/src/couchdb/couch_os_process.erl b/src/couchdb/couch_os_process.erl index 5ac13715..5776776b 100644 --- a/src/couchdb/couch_os_process.erl +++ b/src/couchdb/couch_os_process.erl @@ -93,10 +93,10 @@ readjson(OsProc) when is_record(OsProc, os_proc) -> ?LOG_INFO("OS Process ~p Log :: ~s", [OsProc#os_proc.port, Msg]), readjson(OsProc); [<<"error">>, Id, Reason] -> - throw({list_to_atom(binary_to_list(Id)),Reason}); + throw({couch_util:to_existing_atom(Id),Reason}); [<<"fatal">>, Id, Reason] -> ?LOG_INFO("OS Process ~p Fatal Error :: ~s ~p",[OsProc#os_proc.port, Id, Reason]), - throw({list_to_atom(binary_to_list(Id)),Reason}); + throw({couch_util:to_existing_atom(Id),Reason}); Result -> Result end. @@ -104,6 +104,7 @@ readjson(OsProc) when is_record(OsProc, os_proc) -> % gen_server API init([Command, Options, PortOptions]) -> + process_flag(trap_exit, true), PrivDir = couch_util:priv_dir(), Spawnkiller = filename:join(PrivDir, "couchspawnkillable"), BaseProc = #os_proc{ diff --git a/src/couchdb/couch_query_servers.erl b/src/couchdb/couch_query_servers.erl index 30f4c4c7..b0e46937 100644 --- a/src/couchdb/couch_query_servers.erl +++ b/src/couchdb/couch_query_servers.erl @@ -15,9 +15,9 @@ -export([start_link/0]). --export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2,code_change/3,stop/0]). --export([start_doc_map/2, map_docs/2, stop_doc_map/1]). --export([reduce/3, rereduce/3,validate_doc_update/4]). +-export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2,code_change/3]). +-export([start_doc_map/3, map_docs/2, stop_doc_map/1]). +-export([reduce/3, rereduce/3,validate_doc_update/5]). -export([filter_docs/5]). -export([with_ddoc_proc/2, proc_prompt/2, ddoc_prompt/3, ddoc_proc_prompt/3, json_doc/1]). @@ -35,14 +35,25 @@ stop_fun }). +-record(qserver, { + langs, % Keyed by language name, value is {Mod,Func,Arg} + pid_procs, % Keyed by PID, valus is a #proc record. + lang_procs, % Keyed by language name, value is a #proc record + lang_limits, % Keyed by language name, value is {Lang, Limit, Current} + waitlist = [], + config +}). + start_link() -> gen_server:start_link({local, couch_query_servers}, couch_query_servers, [], []). -stop() -> - exit(whereis(couch_query_servers), normal). - -start_doc_map(Lang, Functions) -> +start_doc_map(Lang, Functions, Lib) -> Proc = get_os_process(Lang), + case Lib of + {[]} -> ok; + Lib -> + true = proc_prompt(Proc, [<<"add_lib">>, Lib]) + end, lists:foreach(fun(FunctionSource) -> true = proc_prompt(Proc, [<<"add_fun">>, FunctionSource]) end, Functions), @@ -93,7 +104,7 @@ group_reductions_results(List) -> rereduce(_Lang, [], _ReducedValues) -> {ok, []}; rereduce(Lang, RedSrcs, ReducedValues) -> - Grouped = group_reductions_results(ReducedValues), + Grouped = group_reductions_results(ReducedValues), Results = lists:zipwith( fun (<<"_", _/binary>> = FunSrc, Values) -> @@ -133,8 +144,6 @@ os_reduce(Lang, OsRedSrcs, KVs) -> end, {ok, OsResults}. -os_rereduce(_Lang, [], _KVs) -> - {ok, []}; os_rereduce(Lang, OsRedSrcs, KVs) -> Proc = get_os_process(Lang), try proc_prompt(Proc, [<<"rereduce">>, OsRedSrcs, KVs]) of @@ -146,30 +155,68 @@ os_rereduce(Lang, OsRedSrcs, KVs) -> builtin_reduce(_Re, [], _KVs, Acc) -> {ok, lists:reverse(Acc)}; -builtin_reduce(Re, [<<"_sum">>|BuiltinReds], KVs, Acc) -> +builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) -> Sum = builtin_sum_rows(KVs), builtin_reduce(Re, BuiltinReds, KVs, [Sum|Acc]); -builtin_reduce(reduce, [<<"_count">>|BuiltinReds], KVs, Acc) -> +builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> Count = length(KVs), builtin_reduce(reduce, BuiltinReds, KVs, [Count|Acc]); -builtin_reduce(rereduce, [<<"_count">>|BuiltinReds], KVs, Acc) -> +builtin_reduce(rereduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> Count = builtin_sum_rows(KVs), - builtin_reduce(rereduce, BuiltinReds, KVs, [Count|Acc]). + builtin_reduce(rereduce, BuiltinReds, KVs, [Count|Acc]); +builtin_reduce(Re, [<<"_stats",_/binary>>|BuiltinReds], KVs, Acc) -> + Stats = builtin_stats(Re, KVs), + builtin_reduce(Re, BuiltinReds, KVs, [Stats|Acc]). builtin_sum_rows(KVs) -> lists:foldl(fun - ([_Key, Value], Acc) when is_number(Value) -> + ([_Key, Value], Acc) when is_number(Value), is_number(Acc) -> Acc + Value; + ([_Key, Value], Acc) when is_list(Value), is_list(Acc) -> + sum_terms(Acc, Value); + ([_Key, Value], Acc) when is_number(Value), is_list(Acc) -> + sum_terms(Acc, [Value]); + ([_Key, Value], Acc) when is_list(Value), is_number(Acc) -> + sum_terms([Acc], Value); (_Else, _Acc) -> - throw({invalid_value, <<"builtin _sum function requires map values to be numbers">>}) + throw({invalid_value, <<"builtin _sum function requires map values to be numbers or lists of numbers">>}) end, 0, KVs). +sum_terms([], []) -> + []; +sum_terms([_|_]=Xs, []) -> + Xs; +sum_terms([], [_|_]=Ys) -> + Ys; +sum_terms([X|Xs], [Y|Ys]) when is_number(X), is_number(Y) -> + [X+Y | sum_terms(Xs,Ys)]; +sum_terms(_, _) -> + throw({invalid_value, <<"builtin _sum function requires map values to be numbers or lists of numbers">>}). + +builtin_stats(reduce, [[_,First]|Rest]) when is_number(First) -> + Stats = lists:foldl(fun([_K,V], {S,C,Mi,Ma,Sq}) when is_number(V) -> + {S+V, C+1, lists:min([Mi, V]), lists:max([Ma, V]), Sq+(V*V)}; + (_, _) -> + throw({invalid_value, + <<"builtin _stats function requires map values to be numbers">>}) + end, {First,1,First,First,First*First}, Rest), + {Sum, Cnt, Min, Max, Sqr} = Stats, + {[{sum,Sum}, {count,Cnt}, {min,Min}, {max,Max}, {sumsqr,Sqr}]}; + +builtin_stats(rereduce, [[_,First]|Rest]) -> + {[{sum,Sum0}, {count,Cnt0}, {min,Min0}, {max,Max0}, {sumsqr,Sqr0}]} = First, + Stats = lists:foldl(fun([_K,Red], {S,C,Mi,Ma,Sq}) -> + {[{sum,Sum}, {count,Cnt}, {min,Min}, {max,Max}, {sumsqr,Sqr}]} = Red, + {Sum+S, Cnt+C, lists:min([Min, Mi]), lists:max([Max, Ma]), Sqr+Sq} + end, {Sum0,Cnt0,Min0,Max0,Sqr0}, Rest), + {Sum, Cnt, Min, Max, Sqr} = Stats, + {[{sum,Sum}, {count,Cnt}, {min,Min}, {max,Max}, {sumsqr,Sqr}]}. % use the function stored in ddoc.validate_doc_update to test an update. -validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx) -> +validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx, SecObj) -> JsonEditDoc = couch_doc:to_json_obj(EditDoc, [revs]), - JsonDiskDoc = json_doc(DiskDoc), - case ddoc_prompt(DDoc, [<<"validate_doc_update">>], [JsonEditDoc, JsonDiskDoc, Ctx]) of + JsonDiskDoc = json_doc(DiskDoc), + case ddoc_prompt(DDoc, [<<"validate_doc_update">>], [JsonEditDoc, JsonDiskDoc, Ctx, SecObj]) of 1 -> ok; {[{<<"forbidden">>, Message}]} -> @@ -183,13 +230,17 @@ json_doc(Doc) -> couch_doc:to_json_obj(Doc, [revs]). filter_docs(Req, Db, DDoc, FName, Docs) -> - JsonReq = couch_httpd_external:json_req_obj(Req, Db), + JsonReq = case Req of + {json_req, JsonObj} -> + JsonObj; + #httpd{} = HttpReq -> + couch_httpd_external:json_req_obj(HttpReq, Db) + end, JsonDocs = [couch_doc:to_json_obj(Doc, [revs]) || Doc <- Docs], - JsonCtx = couch_util:json_user_ctx(Db), - [true, Passes] = ddoc_prompt(DDoc, [<<"filters">>, FName], [JsonDocs, JsonReq, JsonCtx]), + [true, Passes] = ddoc_prompt(DDoc, [<<"filters">>, FName], [JsonDocs, JsonReq]), {ok, Passes}. -ddoc_proc_prompt({Proc, DDocId}, FunPath, Args) -> +ddoc_proc_prompt({Proc, DDocId}, FunPath, Args) -> proc_prompt(Proc, [<<"ddoc">>, DDocId, FunPath, Args]). ddoc_prompt(DDoc, FunPath, Args) -> @@ -214,100 +265,105 @@ init([]) -> ok = couch_config:register( fun("query_servers" ++ _, _) -> - ?MODULE:stop() + supervisor:terminate_child(couch_secondary_services, query_servers), + supervisor:restart_child(couch_secondary_services, query_servers) end), ok = couch_config:register( fun("native_query_servers" ++ _, _) -> - ?MODULE:stop() + supervisor:terminate_child(couch_secondary_services, query_servers), + [supervisor:restart_child(couch_secondary_services, query_servers)] + end), + ok = couch_config:register( + fun("query_server_config" ++ _, _) -> + supervisor:terminate_child(couch_secondary_services, query_servers), + supervisor:restart_child(couch_secondary_services, query_servers) end), Langs = ets:new(couch_query_server_langs, [set, private]), + LangLimits = ets:new(couch_query_server_lang_limits, [set, private]), PidProcs = ets:new(couch_query_server_pid_langs, [set, private]), LangProcs = ets:new(couch_query_server_procs, [set, private]), - InUse = ets:new(couch_query_server_used, [set, private]), + + ProcTimeout = list_to_integer(couch_config:get( + "couchdb", "os_process_timeout", "5000")), + ReduceLimit = list_to_atom( + couch_config:get("query_server_config","reduce_limit","true")), + OsProcLimit = list_to_integer( + couch_config:get("query_server_config","os_process_limit","10")), + % 'query_servers' specifies an OS command-line to execute. lists:foreach(fun({Lang, Command}) -> + true = ets:insert(LangLimits, {?l2b(Lang), OsProcLimit, 0}), true = ets:insert(Langs, {?l2b(Lang), couch_os_process, start_link, [Command]}) end, couch_config:get("query_servers")), % 'native_query_servers' specifies a {Module, Func, Arg} tuple. lists:foreach(fun({Lang, SpecStr}) -> {ok, {Mod, Fun, SpecArg}} = couch_util:parse_term(SpecStr), + true = ets:insert(LangLimits, {?l2b(Lang), 0, 0}), % 0 means no limit true = ets:insert(Langs, {?l2b(Lang), Mod, Fun, SpecArg}) end, couch_config:get("native_query_servers")), - process_flag(trap_exit, true), - {ok, {Langs, % Keyed by language name, value is {Mod,Func,Arg} - PidProcs, % Keyed by PID, valus is a #proc record. - LangProcs, % Keyed by language name, value is a #proc record - InUse % Keyed by PID, value is #proc record. - }}. -terminate(_Reason, _Server) -> + + process_flag(trap_exit, true), + {ok, #qserver{ + langs = Langs, % Keyed by language name, value is {Mod,Func,Arg} + pid_procs = PidProcs, % Keyed by PID, valus is a #proc record. + lang_procs = LangProcs, % Keyed by language name, value is a #proc record + lang_limits = LangLimits, % Keyed by language name, value is {Lang, Limit, Current} + config = {[{<<"reduce_limit">>, ReduceLimit},{<<"timeout">>, ProcTimeout}]} + }}. + +terminate(_Reason, #qserver{pid_procs=PidProcs}) -> + [couch_util:shutdown_sync(P) || {P,_} <- ets:tab2list(PidProcs)], ok. -handle_call({get_proc, #doc{body={Props}}=DDoc, DDocKey}, _From, {Langs, PidProcs, LangProcs, InUse}=Server) -> - % Note to future self. Add max process limit. - Lang = proplists:get_value(<<"language">>, Props, <<"javascript">>), - case ets:lookup(LangProcs, Lang) of - [{Lang, [P|Rest]}] -> - % find a proc in the set that has the DDoc - case proc_with_ddoc(DDoc, DDocKey, [P|Rest]) of - {ok, Proc} -> - % looks like the proc isn't getting dropped from the list. - % we need to change this to take a fun for equality checking - % so we can do a comparison on portnum - rem_from_list(LangProcs, Lang, Proc), - add_to_list(InUse, Lang, Proc), - {reply, {ok, Proc, get_query_server_config()}, Server}; - Error -> - {reply, Error, Server} - end; - _ -> - case (catch new_process(Langs, Lang)) of - {ok, Proc} -> - add_value(PidProcs, Proc#proc.pid, Proc), - case proc_with_ddoc(DDoc, DDocKey, [Proc]) of - {ok, Proc2} -> - rem_from_list(LangProcs, Lang, Proc), - add_to_list(InUse, Lang, Proc2), - {reply, {ok, Proc2, get_query_server_config()}, Server}; - Error -> - {reply, Error, Server} - end; - Error -> - {reply, Error, Server} - end +handle_call({get_proc, #doc{body={Props}}=DDoc, DDocKey}, From, Server) -> + Lang = couch_util:get_value(<<"language">>, Props, <<"javascript">>), + case lang_proc(Lang, Server, fun(Procs) -> + % find a proc in the set that has the DDoc + proc_with_ddoc(DDoc, DDocKey, Procs) + end) of + {ok, Proc} -> + {reply, {ok, Proc, Server#qserver.config}, Server}; + wait -> + {noreply, add_to_waitlist({DDoc, DDocKey}, From, Server)}; + Error -> + {reply, Error, Server} end; -handle_call({get_proc, Lang}, _From, {Langs, PidProcs, LangProcs, InUse}=Server) -> - % Note to future self. Add max process limit. - case ets:lookup(LangProcs, Lang) of - [{Lang, [Proc|_]}] -> - add_value(PidProcs, Proc#proc.pid, Proc), - rem_from_list(LangProcs, Lang, Proc), - add_to_list(InUse, Lang, Proc), - {reply, {ok, Proc, get_query_server_config()}, Server}; - _ -> - case (catch new_process(Langs, Lang)) of - {ok, Proc} -> - add_value(PidProcs, Proc#proc.pid, Proc), - add_to_list(InUse, Lang, Proc), - {reply, {ok, Proc, get_query_server_config()}, Server}; - Error -> - {reply, Error, Server} - end +handle_call({get_proc, Lang}, From, Server) -> + case lang_proc(Lang, Server, fun([P|_Procs]) -> + {ok, P} + end) of + {ok, Proc} -> + {reply, {ok, Proc, Server#qserver.config}, Server}; + wait -> + {noreply, add_to_waitlist({Lang}, From, Server)}; + Error -> + {reply, Error, Server} end; -handle_call({ret_proc, Proc}, _From, {_, _, LangProcs, InUse}=Server) -> +handle_call({unlink_proc, Pid}, _From, #qserver{pid_procs=PidProcs}=Server) -> + rem_value(PidProcs, Pid), + unlink(Pid), + {reply, ok, Server}; +handle_call({ret_proc, Proc}, _From, #qserver{ + pid_procs=PidProcs, + lang_procs=LangProcs}=Server) -> % Along with max process limit, here we should check % if we're over the limit and discard when we are. + add_value(PidProcs, Proc#proc.pid, Proc), add_to_list(LangProcs, Proc#proc.lang, Proc), - rem_from_list(InUse, Proc#proc.lang, Proc), - {reply, true, Server}. + link(Proc#proc.pid), + {reply, true, service_waitlist(Server)}. handle_cast(_Whatever, Server) -> {noreply, Server}. -handle_info({'EXIT', Pid, Status}, {_, PidProcs, LangProcs, InUse}=Server) -> +handle_info({'EXIT', Pid, Status}, #qserver{ + pid_procs=PidProcs, + lang_procs=LangProcs, + lang_limits=LangLimits}=Server) -> case ets:lookup(PidProcs, Pid) of [{Pid, Proc}] -> case Status of @@ -316,11 +372,16 @@ handle_info({'EXIT', Pid, Status}, {_, PidProcs, LangProcs, InUse}=Server) -> end, rem_value(PidProcs, Pid), catch rem_from_list(LangProcs, Proc#proc.lang, Proc), - catch rem_from_list(InUse, Proc#proc.lang, Proc), - {noreply, Server}; + [{Lang, Lim, Current}] = ets:lookup(LangLimits, Proc#proc.lang), + true = ets:insert(LangLimits, {Lang, Lim, Current-1}), + {noreply, service_waitlist(Server)}; [] -> - ?LOG_DEBUG("Unknown linked process died: ~p (reason: ~p)", [Pid, Status]), - {stop, Status, Server} + case Status of + normal -> + {noreply, Server}; + _ -> + {stop, Status, Server} + end end. code_change(_OldVsn, State, _Extra) -> @@ -328,27 +389,94 @@ code_change(_OldVsn, State, _Extra) -> % Private API -get_query_server_config() -> - ReduceLimit = list_to_atom( - couch_config:get("query_server_config","reduce_limit","true")), - {[{<<"reduce_limit">>, ReduceLimit}]}. - -new_process(Langs, Lang) -> - case ets:lookup(Langs, Lang) of - [{Lang, Mod, Func, Arg}] -> - {ok, Pid} = apply(Mod, Func, Arg), - {ok, #proc{lang=Lang, - pid=Pid, - % Called via proc_prompt, proc_set_timeout, and proc_stop - prompt_fun={Mod, prompt}, - set_timeout_fun={Mod, set_timeout}, - stop_fun={Mod, stop}}}; +add_to_waitlist(Info, From, #qserver{waitlist=Waitlist}=Server) -> + Server#qserver{waitlist=[{Info, From}|Waitlist]}. + +service_waitlist(#qserver{waitlist=[]}=Server) -> + Server; +service_waitlist(#qserver{waitlist=Waitlist}=Server) -> + [Oldest|RevWList] = lists:reverse(Waitlist), + case service_waiting(Oldest, Server) of + ok -> + Server#qserver{waitlist=lists:reverse(RevWList)}; + wait -> + Server#qserver{waitlist=Waitlist} + end. + +% todo get rid of duplication +service_waiting({{#doc{body={Props}}=DDoc, DDocKey}, From}, Server) -> + Lang = couch_util:get_value(<<"language">>, Props, <<"javascript">>), + case lang_proc(Lang, Server, fun(Procs) -> + % find a proc in the set that has the DDoc + proc_with_ddoc(DDoc, DDocKey, Procs) + end) of + {ok, Proc} -> + gen_server:reply(From, {ok, Proc, Server#qserver.config}), + ok; + wait -> % this should never happen + wait; + Error -> + gen_server:reply(From, Error), + ok + end; +service_waiting({{Lang}, From}, Server) -> + case lang_proc(Lang, Server, fun([P|_Procs]) -> + {ok, P} + end) of + {ok, Proc} -> + gen_server:reply(From, {ok, Proc, Server#qserver.config}), + ok; + wait -> % this should never happen + wait; + Error -> + gen_server:reply(From, Error), + ok + end. + +lang_proc(Lang, #qserver{ + langs=Langs, + pid_procs=PidProcs, + lang_procs=LangProcs, + lang_limits=LangLimits}, PickFun) -> + % Note to future self. Add max process limit. + case ets:lookup(LangProcs, Lang) of + [{Lang, [P|Procs]}] -> + {ok, Proc} = PickFun([P|Procs]), + rem_from_list(LangProcs, Lang, Proc), + {ok, Proc}; _ -> - {unknown_query_language, Lang} + case (catch new_process(Langs, LangLimits, Lang)) of + {ok, Proc} -> + add_value(PidProcs, Proc#proc.pid, Proc), + PickFun([Proc]); + ErrorOrWait -> + ErrorOrWait + end + end. + +new_process(Langs, LangLimits, Lang) -> + [{Lang, Lim, Current}] = ets:lookup(LangLimits, Lang), + if (Lim == 0) or (Current < Lim) -> % Lim == 0 means no limit + % we are below the limit for our language, make a new one + case ets:lookup(Langs, Lang) of + [{Lang, Mod, Func, Arg}] -> + {ok, Pid} = apply(Mod, Func, Arg), + true = ets:insert(LangLimits, {Lang, Lim, Current+1}), + {ok, #proc{lang=Lang, + pid=Pid, + % Called via proc_prompt, proc_set_timeout, and proc_stop + prompt_fun={Mod, prompt}, + set_timeout_fun={Mod, set_timeout}, + stop_fun={Mod, stop}}}; + _ -> + {unknown_query_language, Lang} + end; + true -> + wait end. proc_with_ddoc(DDoc, DDocKey, LangProcs) -> - DDocProcs = lists:filter(fun(#proc{ddoc_keys=Keys}) -> + DDocProcs = lists:filter(fun(#proc{ddoc_keys=Keys}) -> lists:any(fun(Key) -> Key == DDocKey end, Keys) @@ -390,13 +518,13 @@ teach_ddoc(DDoc, {DDocId, _Rev}=DDocKey, #proc{ddoc_keys=Keys}=Proc) -> get_ddoc_process(#doc{} = DDoc, DDocKey) -> % remove this case statement case gen_server:call(couch_query_servers, {get_proc, DDoc, DDocKey}) of - {ok, Proc, QueryConfig} -> + {ok, Proc, {QueryConfig}} -> % process knows the ddoc - case (catch proc_prompt(Proc, [<<"reset">>, QueryConfig])) of + case (catch proc_prompt(Proc, [<<"reset">>, {QueryConfig}])) of true -> - proc_set_timeout(Proc, list_to_integer(couch_config:get( - "couchdb", "os_process_timeout", "5000"))), + proc_set_timeout(Proc, couch_util:get_value(<<"timeout">>, QueryConfig)), link(Proc#proc.pid), + gen_server:call(couch_query_servers, {unlink_proc, Proc#proc.pid}), Proc; _ -> catch proc_stop(Proc), @@ -406,19 +534,14 @@ get_ddoc_process(#doc{} = DDoc, DDocKey) -> throw(Error) end. -ret_ddoc_process(Proc) -> - true = gen_server:call(couch_query_servers, {ret_proc, Proc}), - catch unlink(Proc#proc.pid), - ok. - get_os_process(Lang) -> case gen_server:call(couch_query_servers, {get_proc, Lang}) of - {ok, Proc, QueryConfig} -> - case (catch proc_prompt(Proc, [<<"reset">>, QueryConfig])) of + {ok, Proc, {QueryConfig}} -> + case (catch proc_prompt(Proc, [<<"reset">>, {QueryConfig}])) of true -> - proc_set_timeout(Proc, list_to_integer(couch_config:get( - "couchdb", "os_process_timeout", "5000"))), + proc_set_timeout(Proc, couch_util:get_value(<<"timeout">>, QueryConfig)), link(Proc#proc.pid), + gen_server:call(couch_query_servers, {unlink_proc, Proc#proc.pid}), Proc; _ -> catch proc_stop(Proc), diff --git a/src/couchdb/couch_ref_counter.erl b/src/couchdb/couch_ref_counter.erl index 5c8e7437..5a111ab6 100644 --- a/src/couchdb/couch_ref_counter.erl +++ b/src/couchdb/couch_ref_counter.erl @@ -24,7 +24,7 @@ drop(RefCounterPid) -> drop(RefCounterPid, self()). drop(RefCounterPid, Pid) -> - gen_server:cast(RefCounterPid, {drop, Pid}). + gen_server:call(RefCounterPid, {drop, Pid}). add(RefCounterPid) -> @@ -40,17 +40,18 @@ count(RefCounterPid) -> -record(srv, { - referrers=dict:new() % a dict of each ref counting proc. + referrers=dict:new(), % a dict of each ref counting proc. + child_procs=[] }). init({Pid, ChildProcs}) -> [link(ChildProc) || ChildProc <- ChildProcs], Referrers = dict:from_list([{Pid, {erlang:monitor(process, Pid), 1}}]), - {ok, #srv{referrers=Referrers}}. + {ok, #srv{referrers=Referrers, child_procs=ChildProcs}}. -terminate(Reason, _Srv) -> - couch_util:terminate_linked(Reason), +terminate(_Reason, #srv{child_procs=ChildProcs}) -> + [couch_util:shutdown_sync(Pid) || Pid <- ChildProcs], ok. @@ -65,10 +66,8 @@ handle_call({add, Pid},_From, #srv{referrers=Referrers}=Srv) -> {reply, ok, Srv#srv{referrers=Referrers2}}; handle_call(count, _From, Srv) -> {monitors, Monitors} = process_info(self(), monitors), - {reply, length(Monitors), Srv}. - - -handle_cast({drop, Pid}, #srv{referrers=Referrers}=Srv) -> + {reply, length(Monitors), Srv}; +handle_call({drop, Pid}, _From, #srv{referrers=Referrers}=Srv) -> Referrers2 = case dict:find(Pid, Referrers) of {ok, {MonRef, 1}} -> @@ -79,7 +78,16 @@ handle_cast({drop, Pid}, #srv{referrers=Referrers}=Srv) -> error -> Referrers end, - maybe_close_async(Srv#srv{referrers=Referrers2}). + Srv2 = Srv#srv{referrers=Referrers2}, + case should_close() of + true -> + {stop,normal,ok,Srv2}; + false -> + {reply, ok, Srv2} + end. + +handle_cast(Msg, _Srv)-> + exit({unknown_msg,Msg}). code_change(_OldVsn, State, _Extra) -> @@ -87,21 +95,17 @@ code_change(_OldVsn, State, _Extra) -> handle_info({'DOWN', MonRef, _, Pid, _}, #srv{referrers=Referrers}=Srv) -> {ok, {MonRef, _RefCount}} = dict:find(Pid, Referrers), - maybe_close_async(Srv#srv{referrers=dict:erase(Pid, Referrers)}). + Srv2 = Srv#srv{referrers=dict:erase(Pid, Referrers)}, + case should_close() of + true -> + {stop,normal,Srv2}; + false -> + {noreply,Srv2} + end. should_close() -> case process_info(self(), monitors) of - {monitors, []} -> - true; - _ -> - false - end. - -maybe_close_async(Srv) -> - case should_close() of - true -> - {stop,normal,Srv}; - false -> - {noreply,Srv} + {monitors, []} -> true; + _ -> false end. diff --git a/src/couchdb/couch_rep.erl b/src/couchdb/couch_rep.erl index 98413f23..04051b8e 100644 --- a/src/couchdb/couch_rep.erl +++ b/src/couchdb/couch_rep.erl @@ -12,12 +12,18 @@ -module(couch_rep). -behaviour(gen_server). --export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). -export([replicate/2, checkpoint/1]). +-export([ensure_rep_db_exists/0, make_replication_id/2]). +-export([start_replication/3, end_replication/1, get_result/4]). +-export([update_rep_doc/2]). -include("couch_db.hrl"). +-include("couch_js_functions.hrl"). + +-define(REP_ID_VERSION, 2). -record(state, { changes_feed, @@ -34,6 +40,7 @@ start_seq, history, + session_id, source_log, target_log, rep_starttime, @@ -45,7 +52,10 @@ complete = false, committed_seq = 0, - stats = nil + stats = nil, + rep_doc = nil, + source_db_update_notifier = nil, + target_db_update_notifier = nil }). %% convenience function to do a simple replication from the shell @@ -58,70 +68,95 @@ replicate(Source, Target) when is_binary(Source), is_binary(Target) -> %% function handling POST to _replicate replicate({Props}=PostBody, UserCtx) -> - {BaseId, Extension} = make_replication_id(PostBody, UserCtx), - Replicator = {BaseId ++ Extension, - {gen_server, start_link, [?MODULE, [BaseId, PostBody, UserCtx], []]}, + RepId = make_replication_id(PostBody, UserCtx), + case couch_util:get_value(<<"cancel">>, Props, false) of + true -> + end_replication(RepId); + false -> + Server = start_replication(PostBody, RepId, UserCtx), + get_result(Server, RepId, PostBody, UserCtx) + end. + +end_replication({BaseId, Extension}) -> + RepId = BaseId ++ Extension, + case supervisor:terminate_child(couch_rep_sup, RepId) of + {error, not_found} = R -> + R; + ok -> + ok = supervisor:delete_child(couch_rep_sup, RepId), + {ok, {cancelled, ?l2b(BaseId)}} + end. + +start_replication(RepDoc, {BaseId, Extension}, UserCtx) -> + Replicator = { + BaseId ++ Extension, + {gen_server, start_link, + [?MODULE, [BaseId, RepDoc, UserCtx], []]}, temporary, 1, worker, [?MODULE] }, + start_replication_server(Replicator). - Server = start_replication_server(Replicator), +checkpoint(Server) -> + gen_server:cast(Server, do_checkpoint). - case proplists:get_value(<<"continuous">>, Props, false) of +get_result(Server, {BaseId, _Extension}, {Props} = PostBody, UserCtx) -> + case couch_util:get_value(<<"continuous">>, Props, false) of true -> {ok, {continuous, ?l2b(BaseId)}}; false -> - get_result(Server, PostBody, UserCtx) + try gen_server:call(Server, get_result, infinity) of + retry -> replicate(PostBody, UserCtx); + Else -> Else + catch + exit:{noproc, {gen_server, call, [Server, get_result, infinity]}} -> + %% oops, this replication just finished -- restart it. + replicate(PostBody, UserCtx); + exit:{normal, {gen_server, call, [Server, get_result, infinity]}} -> + %% we made the call during terminate + replicate(PostBody, UserCtx) + end end. -checkpoint(Server) -> - gen_server:cast(Server, do_checkpoint). - -get_result(Server, PostBody, UserCtx) -> - try gen_server:call(Server, get_result, infinity) of - retry -> replicate(PostBody, UserCtx); - Else -> Else +init(InitArgs) -> + try + do_init(InitArgs) catch - exit:{noproc, {gen_server, call, [Server, get_result , infinity]}} -> - %% oops, this replication just finished -- restart it. - replicate(PostBody, UserCtx); - exit:{normal, {gen_server, call, [Server, get_result , infinity]}} -> - %% we made the call during terminate - replicate(PostBody, UserCtx) + throw:Error -> + {stop, Error} end. -init(InitArgs) -> - try do_init(InitArgs) - catch throw:{db_not_found, DbUrl} -> {stop, {db_not_found, DbUrl}} end. - -do_init([RepId, {PostProps}, UserCtx] = InitArgs) -> +do_init([RepId, {PostProps} = RepDoc, UserCtx] = InitArgs) -> process_flag(trap_exit, true), - SourceProps = proplists:get_value(<<"source">>, PostProps), - TargetProps = proplists:get_value(<<"target">>, PostProps), + SourceProps = couch_util:get_value(<<"source">>, PostProps), + TargetProps = couch_util:get_value(<<"target">>, PostProps), - Continuous = proplists:get_value(<<"continuous">>, PostProps, false), - CreateTarget = proplists:get_value(<<"create_target">>, PostProps, false), + Continuous = couch_util:get_value(<<"continuous">>, PostProps, false), + CreateTarget = couch_util:get_value(<<"create_target">>, PostProps, false), - Source = open_db(SourceProps, UserCtx), - Target = open_db(TargetProps, UserCtx, CreateTarget), - - SourceLog = open_replication_log(Source, RepId), - TargetLog = open_replication_log(Target, RepId), + ProxyParams = parse_proxy_params( + couch_util:get_value(<<"proxy">>, PostProps, [])), + Source = open_db(SourceProps, UserCtx, ProxyParams), + Target = open_db(TargetProps, UserCtx, ProxyParams, CreateTarget), SourceInfo = dbinfo(Source), TargetInfo = dbinfo(Target), - + + maybe_set_triggered(RepDoc, RepId), + + [SourceLog, TargetLog] = find_replication_logs( + [Source, Target], RepId, {PostProps}, UserCtx), {StartSeq, History} = compare_replication_logs(SourceLog, TargetLog), {ok, ChangesFeed} = - couch_rep_changes_feed:start_link(self(), Source, StartSeq, PostProps), + couch_rep_changes_feed:start_link(self(), Source, StartSeq, PostProps), {ok, MissingRevs} = - couch_rep_missing_revs:start_link(self(), Target, ChangesFeed, PostProps), + couch_rep_missing_revs:start_link(self(), Target, ChangesFeed, PostProps), {ok, Reader} = - couch_rep_reader:start_link(self(), Source, MissingRevs, PostProps), + couch_rep_reader:start_link(self(), Source, MissingRevs, PostProps), {ok, Writer} = couch_rep_writer:start_link(self(), Target, Reader, PostProps), @@ -152,11 +187,15 @@ do_init([RepId, {PostProps}, UserCtx] = InitArgs) -> start_seq = StartSeq, history = History, + session_id = couch_uuids:random(), source_log = SourceLog, target_log = TargetLog, rep_starttime = httpd_util:rfc1123_date(), - src_starttime = proplists:get_value(instance_start_time, SourceInfo), - tgt_starttime = proplists:get_value(instance_start_time, TargetInfo) + src_starttime = couch_util:get_value(instance_start_time, SourceInfo), + tgt_starttime = couch_util:get_value(instance_start_time, TargetInfo), + rep_doc = RepDoc, + source_db_update_notifier = source_db_update_notifier(Source), + target_db_update_notifier = target_db_update_notifier(Target) }, {ok, State}. @@ -164,7 +203,21 @@ handle_call(get_result, From, #state{complete=true, listeners=[]} = State) -> {stop, normal, State#state{listeners=[From]}}; handle_call(get_result, From, State) -> Listeners = State#state.listeners, - {noreply, State#state{listeners=[From|Listeners]}}. + {noreply, State#state{listeners=[From|Listeners]}}; + +handle_call(get_source_db, _From, #state{source = Source} = State) -> + {reply, {ok, Source}, State}; + +handle_call(get_target_db, _From, #state{target = Target} = State) -> + {reply, {ok, Target}, State}. + +handle_cast(reopen_source_db, #state{source = Source} = State) -> + {ok, NewSource} = couch_db:reopen(Source), + {noreply, State#state{source = NewSource}}; + +handle_cast(reopen_target_db, #state{target = Target} = State) -> + {ok, NewTarget} = couch_db:reopen(Target), + {noreply, State#state{target = NewTarget}}; handle_cast(do_checkpoint, State) -> {noreply, do_checkpoint(State)}; @@ -213,23 +266,26 @@ handle_info({'EXIT', _Pid, Reason}, State) -> {stop, Reason, State}. terminate(normal, #state{checkpoint_scheduled=nil} = State) -> - do_terminate(State); + do_terminate(State), + update_rep_doc( + State#state.rep_doc, [{<<"_replication_state">>, <<"completed">>}]); terminate(normal, State) -> timer:cancel(State#state.checkpoint_scheduled), - do_terminate(do_checkpoint(State)); + do_terminate(do_checkpoint(State)), + update_rep_doc( + State#state.rep_doc, [{<<"_replication_state">>, <<"completed">>}]); -terminate(Reason, State) -> - #state{ - listeners = Listeners, - source = Source, - target = Target, - stats = Stats - } = State, +terminate(shutdown, #state{listeners = Listeners} = State) -> + % continuous replication stopped + [gen_server:reply(L, {ok, stopped}) || L <- Listeners], + terminate_cleanup(State); + +terminate(Reason, #state{listeners = Listeners} = State) -> [gen_server:reply(L, {error, Reason}) || L <- Listeners], - ets:delete(Stats), - close_db(Target), - close_db(Source). + terminate_cleanup(State), + update_rep_doc( + State#state.rep_doc, [{<<"_replication_state">>, <<"error">>}]). code_change(_OldVsn, State, _Extra) -> {ok, State}. @@ -254,29 +310,37 @@ start_replication_server(Replicator) -> {error, {already_started, Pid}} = supervisor:start_child(couch_rep_sup, Replicator), ?LOG_DEBUG("replication ~p already running at ~p", [RepId, Pid]), - Pid + Pid; + {error, {db_not_found, DbUrl}} -> + throw({db_not_found, <<"could not open ", DbUrl/binary>>}); + {error, {unauthorized, DbUrl}} -> + throw({unauthorized, + <<"unauthorized to access database ", DbUrl/binary>>}) end; {error, {already_started, Pid}} -> ?LOG_DEBUG("replication ~p already running at ~p", [RepId, Pid]), Pid; {error, {{db_not_found, DbUrl}, _}} -> - throw({db_not_found, <<"could not open ", DbUrl/binary>>}) + throw({db_not_found, <<"could not open ", DbUrl/binary>>}); + {error, {{unauthorized, DbUrl}, _}} -> + throw({unauthorized, + <<"unauthorized to access database ", DbUrl/binary>>}) end. compare_replication_logs(SrcDoc, TgtDoc) -> #doc{body={RepRecProps}} = SrcDoc, #doc{body={RepRecPropsTgt}} = TgtDoc, - case proplists:get_value(<<"session_id">>, RepRecProps) == - proplists:get_value(<<"session_id">>, RepRecPropsTgt) of + case couch_util:get_value(<<"session_id">>, RepRecProps) == + couch_util:get_value(<<"session_id">>, RepRecPropsTgt) of true -> % if the records have the same session id, % then we have a valid replication history - OldSeqNum = proplists:get_value(<<"source_last_seq">>, RepRecProps, 0), - OldHistory = proplists:get_value(<<"history">>, RepRecProps, []), + OldSeqNum = couch_util:get_value(<<"source_last_seq">>, RepRecProps, 0), + OldHistory = couch_util:get_value(<<"history">>, RepRecProps, []), {OldSeqNum, OldHistory}; false -> - SourceHistory = proplists:get_value(<<"history">>, RepRecProps, []), - TargetHistory = proplists:get_value(<<"history">>, RepRecPropsTgt, []), + SourceHistory = couch_util:get_value(<<"history">>, RepRecProps, []), + TargetHistory = couch_util:get_value(<<"history">>, RepRecPropsTgt, []), ?LOG_INFO("Replication records differ. " "Scanning histories to find a common ancestor.", []), ?LOG_DEBUG("Record on source:~p~nRecord on target:~p~n", @@ -288,18 +352,18 @@ compare_rep_history(S, T) when S =:= [] orelse T =:= [] -> ?LOG_INFO("no common ancestry -- performing full replication", []), {0, []}; compare_rep_history([{S}|SourceRest], [{T}|TargetRest]=Target) -> - SourceId = proplists:get_value(<<"session_id">>, S), + SourceId = couch_util:get_value(<<"session_id">>, S), case has_session_id(SourceId, Target) of true -> - RecordSeqNum = proplists:get_value(<<"recorded_seq">>, S, 0), + RecordSeqNum = couch_util:get_value(<<"recorded_seq">>, S, 0), ?LOG_INFO("found a common replication record with source_seq ~p", [RecordSeqNum]), {RecordSeqNum, SourceRest}; false -> - TargetId = proplists:get_value(<<"session_id">>, T), + TargetId = couch_util:get_value(<<"session_id">>, T), case has_session_id(TargetId, SourceRest) of true -> - RecordSeqNum = proplists:get_value(<<"recorded_seq">>, T, 0), + RecordSeqNum = couch_util:get_value(<<"recorded_seq">>, T, 0), ?LOG_INFO("found a common replication record with source_seq ~p", [RecordSeqNum]), {RecordSeqNum, TargetRest}; @@ -314,13 +378,13 @@ close_db(Db) -> couch_db:close(Db). dbname(#http_db{url = Url}) -> - Url; + couch_util:url_strip_password(Url); dbname(#db{name = Name}) -> Name. dbinfo(#http_db{} = Db) -> {DbProps} = couch_rep_httpc:request(Db), - [{list_to_atom(?b2l(K)), V} || {K,V} <- DbProps]; + [{list_to_existing_atom(?b2l(K)), V} || {K,V} <- DbProps]; dbinfo(Db) -> {ok, Info} = couch_db:get_db_info(Db), Info. @@ -331,14 +395,9 @@ do_terminate(State) -> committed_seq = NewSeq, listeners = Listeners, source = Source, - target = Target, continuous = Continuous, - stats = Stats, source_log = #doc{body={OldHistory}} } = State, - couch_task_status:update("Finishing"), - ets:delete(Stats), - close_db(Target), NewRepHistory = case CheckpointHistory of nil -> @@ -366,22 +425,35 @@ do_terminate(State) -> false -> [gen_server:reply(R, retry) || R <- OtherListeners] end, - close_db(Source). + couch_task_status:update("Finishing"), + terminate_cleanup(State). + +terminate_cleanup(State) -> + close_db(State#state.source), + close_db(State#state.target), + stop_db_update_notifier(State#state.source_db_update_notifier), + stop_db_update_notifier(State#state.target_db_update_notifier), + ets:delete(State#state.stats). + +stop_db_update_notifier(nil) -> + ok; +stop_db_update_notifier(Notifier) -> + couch_db_update_notifier:stop(Notifier). has_session_id(_SessionId, []) -> false; has_session_id(SessionId, [{Props} | Rest]) -> - case proplists:get_value(<<"session_id">>, Props, nil) of + case couch_util:get_value(<<"session_id">>, Props, nil) of SessionId -> true; _Else -> has_session_id(SessionId, Rest) end. -maybe_append_options(Options, Props) -> +maybe_append_options(Options, {Props}) -> lists:foldl(fun(Option, Acc) -> - Acc ++ - case proplists:get_value(Option, Props, false) of + Acc ++ + case couch_util:get_value(Option, Props, false) of true -> "+" ++ ?b2l(Option); false -> @@ -389,25 +461,67 @@ maybe_append_options(Options, Props) -> end end, [], Options). -make_replication_id({Props}, UserCtx) -> - %% funky algorithm to preserve backwards compatibility - {ok, HostName} = inet:gethostname(), - % Port = mochiweb_socket_server:get(couch_httpd, port), - Src = get_rep_endpoint(UserCtx, proplists:get_value(<<"source">>, Props)), - Tgt = get_rep_endpoint(UserCtx, proplists:get_value(<<"target">>, Props)), - Base = couch_util:to_hex(erlang:md5(term_to_binary([HostName, Src, Tgt]))), +make_replication_id(RepProps, UserCtx) -> + BaseId = make_replication_id(RepProps, UserCtx, ?REP_ID_VERSION), Extension = maybe_append_options( - [<<"continuous">>, <<"create_target">>], Props), - {Base, Extension}. + [<<"continuous">>, <<"create_target">>], RepProps), + {BaseId, Extension}. + +% Versioned clauses for generating replication ids +% If a change is made to how replications are identified +% add a new clause and increase ?REP_ID_VERSION at the top +make_replication_id({Props}, UserCtx, 2) -> + {ok, HostName} = inet:gethostname(), + Port = mochiweb_socket_server:get(couch_httpd, port), + Src = get_rep_endpoint(UserCtx, couch_util:get_value(<<"source">>, Props)), + Tgt = get_rep_endpoint(UserCtx, couch_util:get_value(<<"target">>, Props)), + maybe_append_filters({Props}, [HostName, Port, Src, Tgt], UserCtx); +make_replication_id({Props}, UserCtx, 1) -> + {ok, HostName} = inet:gethostname(), + Src = get_rep_endpoint(UserCtx, couch_util:get_value(<<"source">>, Props)), + Tgt = get_rep_endpoint(UserCtx, couch_util:get_value(<<"target">>, Props)), + maybe_append_filters({Props}, [HostName, Src, Tgt], UserCtx). + +maybe_append_filters({Props}, Base, UserCtx) -> + Base2 = Base ++ + case couch_util:get_value(<<"filter">>, Props) of + undefined -> + case couch_util:get_value(<<"doc_ids">>, Props) of + undefined -> + []; + DocIds -> + [DocIds] + end; + Filter -> + [filter_code(Filter, Props, UserCtx), + couch_util:get_value(<<"query_params">>, Props, {[]})] + end, + couch_util:to_hex(couch_util:md5(term_to_binary(Base2))). + +filter_code(Filter, Props, UserCtx) -> + {match, [DDocName, FilterName]} = + re:run(Filter, "(.*?)/(.*)", [{capture, [1, 2], binary}]), + ProxyParams = parse_proxy_params( + couch_util:get_value(<<"proxy">>, Props, [])), + Source = open_db( + couch_util:get_value(<<"source">>, Props), UserCtx, ProxyParams), + try + {ok, DDoc} = open_doc(Source, <<"_design/", DDocName/binary>>), + Code = couch_util:get_nested_json_value( + DDoc#doc.body, [<<"filters">>, FilterName]), + re:replace(Code, "^\s*(.*?)\s*$", "\\1", [{return, binary}]) + after + close_db(Source) + end. maybe_add_trailing_slash(Url) -> re:replace(Url, "[^/]$", "&/", [{return, list}]). get_rep_endpoint(_UserCtx, {Props}) -> - Url = maybe_add_trailing_slash(proplists:get_value(<<"url">>, Props)), - {BinHeaders} = proplists:get_value(<<"headers">>, Props, {[]}), - {Auth} = proplists:get_value(<<"auth">>, Props, {[]}), - case proplists:get_value(<<"oauth">>, Auth) of + Url = maybe_add_trailing_slash(couch_util:get_value(<<"url">>, Props)), + {BinHeaders} = couch_util:get_value(<<"headers">>, Props, {[]}), + {Auth} = couch_util:get_value(<<"auth">>, Props, {[]}), + case couch_util:get_value(<<"oauth">>, Auth) of undefined -> {remote, Url, [{?b2l(K),?b2l(V)} || {K,V} <- BinHeaders]}; {OAuth} -> @@ -420,60 +534,97 @@ get_rep_endpoint(_UserCtx, <<"https://",_/binary>>=Url) -> get_rep_endpoint(UserCtx, <<DbName/binary>>) -> {local, DbName, UserCtx}. -open_replication_log(#http_db{}=Db, RepId) -> - DocId = ?LOCAL_DOC_PREFIX ++ RepId, - Req = Db#http_db{resource=couch_util:url_encode(DocId)}, +find_replication_logs(DbList, RepId, RepProps, UserCtx) -> + LogId = ?l2b(?LOCAL_DOC_PREFIX ++ RepId), + fold_replication_logs(DbList, ?REP_ID_VERSION, + LogId, LogId, RepProps, UserCtx, []). + +% Accumulate the replication logs +% Falls back to older log document ids and migrates them +fold_replication_logs([], _Vsn, _LogId, _NewId, _RepProps, _UserCtx, Acc) -> + lists:reverse(Acc); +fold_replication_logs([Db|Rest]=Dbs, Vsn, LogId, NewId, + RepProps, UserCtx, Acc) -> + case open_replication_log(Db, LogId) of + {error, not_found} when Vsn > 1 -> + OldRepId = make_replication_id(RepProps, UserCtx, Vsn - 1), + fold_replication_logs(Dbs, Vsn - 1, + ?l2b(?LOCAL_DOC_PREFIX ++ OldRepId), NewId, RepProps, UserCtx, Acc); + {error, not_found} -> + fold_replication_logs(Rest, ?REP_ID_VERSION, NewId, NewId, + RepProps, UserCtx, [#doc{id=NewId}|Acc]); + {ok, Doc} when LogId =:= NewId -> + fold_replication_logs(Rest, ?REP_ID_VERSION, NewId, NewId, + RepProps, UserCtx, [Doc|Acc]); + {ok, Doc} -> + MigratedLog = #doc{id=NewId,body=Doc#doc.body}, + fold_replication_logs(Rest, ?REP_ID_VERSION, NewId, NewId, + RepProps, UserCtx, [MigratedLog|Acc]) + end. + +open_replication_log(Db, DocId) -> + case open_doc(Db, DocId) of + {ok, Doc} -> + ?LOG_DEBUG("found a replication log for ~s", [dbname(Db)]), + {ok, Doc}; + _ -> + ?LOG_DEBUG("didn't find a replication log for ~s", [dbname(Db)]), + {error, not_found} + end. + +open_doc(#http_db{} = Db, DocId) -> + Req = Db#http_db{resource = couch_util:encode_doc_id(DocId)}, case couch_rep_httpc:request(Req) of {[{<<"error">>, _}, {<<"reason">>, _}]} -> - ?LOG_DEBUG("didn't find a replication log for ~s", [Db#http_db.url]), - #doc{id=?l2b(DocId)}; + {error, not_found}; Doc -> - ?LOG_DEBUG("found a replication log for ~s", [Db#http_db.url]), - couch_doc:from_json_obj(Doc) + {ok, couch_doc:from_json_obj(Doc)} end; -open_replication_log(Db, RepId) -> - DocId = ?l2b(?LOCAL_DOC_PREFIX ++ RepId), - case couch_db:open_doc(Db, DocId, []) of - {ok, Doc} -> - ?LOG_DEBUG("found a replication log for ~s", [Db#db.name]), - Doc; - _ -> - ?LOG_DEBUG("didn't find a replication log for ~s", [Db#db.name]), - #doc{id=DocId} - end. +open_doc(Db, DocId) -> + couch_db:open_doc(Db, DocId). -open_db(Props, UserCtx) -> - open_db(Props, UserCtx, false). +open_db(Props, UserCtx, ProxyParams) -> + open_db(Props, UserCtx, ProxyParams, false). -open_db({Props}, _UserCtx, CreateTarget) -> - Url = maybe_add_trailing_slash(proplists:get_value(<<"url">>, Props)), - {AuthProps} = proplists:get_value(<<"auth">>, Props, {[]}), - {BinHeaders} = proplists:get_value(<<"headers">>, Props, {[]}), +open_db({Props}, _UserCtx, ProxyParams, CreateTarget) -> + Url = maybe_add_trailing_slash(couch_util:get_value(<<"url">>, Props)), + {AuthProps} = couch_util:get_value(<<"auth">>, Props, {[]}), + {BinHeaders} = couch_util:get_value(<<"headers">>, Props, {[]}), Headers = [{?b2l(K),?b2l(V)} || {K,V} <- BinHeaders], DefaultHeaders = (#http_db{})#http_db.headers, - Db = #http_db{ + Db1 = #http_db{ url = Url, auth = AuthProps, headers = lists:ukeymerge(1, Headers, DefaultHeaders) }, + Db = Db1#http_db{ + options = Db1#http_db.options ++ ProxyParams ++ + couch_rep_httpc:ssl_options(Db1) + }, couch_rep_httpc:db_exists(Db, CreateTarget); -open_db(<<"http://",_/binary>>=Url, _, CreateTarget) -> - open_db({[{<<"url">>,Url}]}, [], CreateTarget); -open_db(<<"https://",_/binary>>=Url, _, CreateTarget) -> - open_db({[{<<"url">>,Url}]}, [], CreateTarget); -open_db(<<DbName/binary>>, UserCtx, CreateTarget) -> - case CreateTarget of - true -> - ok = couch_httpd:verify_is_server_admin(UserCtx), - couch_server:create(DbName, [{user_ctx, UserCtx}]); - false -> ok - end, - - case couch_db:open(DbName, [{user_ctx, UserCtx}]) of - {ok, Db} -> - couch_db:monitor(Db), - Db; - {not_found, no_db_file} -> throw({db_not_found, DbName}) +open_db(<<"http://",_/binary>>=Url, _, ProxyParams, CreateTarget) -> + open_db({[{<<"url">>,Url}]}, [], ProxyParams, CreateTarget); +open_db(<<"https://",_/binary>>=Url, _, ProxyParams, CreateTarget) -> + open_db({[{<<"url">>,Url}]}, [], ProxyParams, CreateTarget); +open_db(<<DbName/binary>>, UserCtx, _ProxyParams, CreateTarget) -> + try + case CreateTarget of + true -> + ok = couch_httpd:verify_is_server_admin(UserCtx), + couch_server:create(DbName, [{user_ctx, UserCtx}]); + false -> + ok + end, + + case couch_db:open(DbName, [{user_ctx, UserCtx}]) of + {ok, Db} -> + couch_db:monitor(Db), + Db; + {not_found, no_db_file} -> + throw({db_not_found, DbName}) + end + catch throw:{unauthorized, _} -> + throw({unauthorized, DbName}) end. schedule_checkpoint(#state{checkpoint_scheduled = nil} = State) -> @@ -495,38 +646,59 @@ do_checkpoint(State) -> committed_seq = NewSeqNum, start_seq = StartSeqNum, history = OldHistory, + session_id = SessionId, source_log = SourceLog, target_log = TargetLog, rep_starttime = ReplicationStartTime, src_starttime = SrcInstanceStartTime, tgt_starttime = TgtInstanceStartTime, - stats = Stats + stats = Stats, + rep_doc = {RepDoc} } = State, case commit_to_both(Source, Target, NewSeqNum) of {SrcInstanceStartTime, TgtInstanceStartTime} -> ?LOG_INFO("recording a checkpoint for ~s -> ~s at source update_seq ~p", [dbname(Source), dbname(Target), NewSeqNum]), - SessionId = couch_uuids:random(), + EndTime = ?l2b(httpd_util:rfc1123_date()), + StartTime = ?l2b(ReplicationStartTime), + DocsRead = ets:lookup_element(Stats, docs_read, 2), + DocsWritten = ets:lookup_element(Stats, docs_written, 2), + DocWriteFailures = ets:lookup_element(Stats, doc_write_failures, 2), NewHistoryEntry = {[ {<<"session_id">>, SessionId}, - {<<"start_time">>, list_to_binary(ReplicationStartTime)}, - {<<"end_time">>, list_to_binary(httpd_util:rfc1123_date())}, + {<<"start_time">>, StartTime}, + {<<"end_time">>, EndTime}, {<<"start_last_seq">>, StartSeqNum}, {<<"end_last_seq">>, NewSeqNum}, {<<"recorded_seq">>, NewSeqNum}, {<<"missing_checked">>, ets:lookup_element(Stats, total_revs, 2)}, {<<"missing_found">>, ets:lookup_element(Stats, missing_revs, 2)}, - {<<"docs_read">>, ets:lookup_element(Stats, docs_read, 2)}, - {<<"docs_written">>, ets:lookup_element(Stats, docs_written, 2)}, - {<<"doc_write_failures">>, - ets:lookup_element(Stats, doc_write_failures, 2)} + {<<"docs_read">>, DocsRead}, + {<<"docs_written">>, DocsWritten}, + {<<"doc_write_failures">>, DocWriteFailures} ]}, - % limit history to 50 entries - NewRepHistory = {[ + BaseHistory = [ {<<"session_id">>, SessionId}, - {<<"source_last_seq">>, NewSeqNum}, - {<<"history">>, lists:sublist([NewHistoryEntry | OldHistory], 50)} - ]}, + {<<"source_last_seq">>, NewSeqNum} + ] ++ case couch_util:get_value(<<"doc_ids">>, RepDoc) of + undefined -> + []; + DocIds when is_list(DocIds) -> + % backwards compatibility with the result of a replication by + % doc IDs in versions 0.11.x and 1.0.x + [ + {<<"start_time">>, StartTime}, + {<<"end_time">>, EndTime}, + {<<"docs_read">>, DocsRead}, + {<<"docs_written">>, DocsWritten}, + {<<"doc_write_failures">>, DocWriteFailures} + ] + end, + % limit history to 50 entries + NewRepHistory = { + BaseHistory ++ + [{<<"history">>, lists:sublist([NewHistoryEntry | OldHistory], 50)}] + }, try {SrcRevPos,SrcRevId} = @@ -559,7 +731,7 @@ do_checkpoint(State) -> close_db(Target), close_db(Source), {ok, NewState} = init(State#state.init_args), - NewState + NewState#state{listeners=State#state.listeners} end. commit_to_both(Source, Target, RequiredSeq) -> @@ -580,17 +752,22 @@ commit_to_both(Source, Target, RequiredSeq) -> end, {SourceStartTime, TargetStartTime}. -ensure_full_commit(#http_db{} = Target) -> +ensure_full_commit(#http_db{headers = Headers} = Target) -> + Headers1 = [ + {"Content-Length", 0} | + couch_util:proplist_apply_field( + {"Content-Type", "application/json"}, Headers) + ], Req = Target#http_db{ resource = "_ensure_full_commit", method = post, - body = true + headers = Headers1 }, {ResultProps} = couch_rep_httpc:request(Req), - true = proplists:get_value(<<"ok">>, ResultProps), - proplists:get_value(<<"instance_start_time">>, ResultProps); + true = couch_util:get_value(<<"ok">>, ResultProps), + couch_util:get_value(<<"instance_start_time">>, ResultProps); ensure_full_commit(Target) -> - {ok, NewDb} = couch_db:open(Target#db.name, []), + {ok, NewDb} = couch_db:open_int(Target#db.name, []), UpdateSeq = couch_db:get_update_seq(Target), CommitSeq = couch_db:get_committed_update_seq(NewDb), InstanceStartTime = NewDb#db.instance_start_time, @@ -605,20 +782,25 @@ ensure_full_commit(Target) -> InstanceStartTime end. -ensure_full_commit(#http_db{} = Source, RequiredSeq) -> +ensure_full_commit(#http_db{headers = Headers} = Source, RequiredSeq) -> + Headers1 = [ + {"Content-Length", 0} | + couch_util:proplist_apply_field( + {"Content-Type", "application/json"}, Headers) + ], Req = Source#http_db{ resource = "_ensure_full_commit", method = post, - body = true, - qs = [{seq, RequiredSeq}] + qs = [{seq, RequiredSeq}], + headers = Headers1 }, {ResultProps} = couch_rep_httpc:request(Req), - case proplists:get_value(<<"ok">>, ResultProps) of + case couch_util:get_value(<<"ok">>, ResultProps) of true -> - proplists:get_value(<<"instance_start_time">>, ResultProps); + couch_util:get_value(<<"instance_start_time">>, ResultProps); undefined -> nil end; ensure_full_commit(Source, RequiredSeq) -> - {ok, NewDb} = couch_db:open(Source#db.name, []), + {ok, NewDb} = couch_db:open_int(Source#db.name, []), CommitSeq = couch_db:get_committed_update_seq(NewDb), InstanceStartTime = NewDb#db.instance_start_time, couch_db:close(NewDb), @@ -632,15 +814,15 @@ ensure_full_commit(Source, RequiredSeq) -> InstanceStartTime end. -update_local_doc(#http_db{} = Db, #doc{id=DocId} = Doc) -> +update_local_doc(#http_db{} = Db, Doc) -> Req = Db#http_db{ - resource = couch_util:url_encode(DocId), + resource = couch_util:encode_doc_id(Doc), method = put, body = couch_doc:to_json_obj(Doc, [attachments]), headers = [{"x-couch-full-commit", "false"} | Db#http_db.headers] }, {ResponseMembers} = couch_rep_httpc:request(Req), - Rev = proplists:get_value(<<"rev">>, ResponseMembers), + Rev = couch_util:get_value(<<"rev">>, ResponseMembers), couch_doc:parse_rev(Rev); update_local_doc(Db, Doc) -> {ok, Result} = couch_db:update_doc(Db, Doc, [delay_commit]), @@ -649,7 +831,129 @@ update_local_doc(Db, Doc) -> up_to_date(#http_db{}, _Seq) -> true; up_to_date(Source, Seq) -> - {ok, NewDb} = couch_db:open(Source#db.name, []), + {ok, NewDb} = couch_db:open_int(Source#db.name, []), T = NewDb#db.update_seq == Seq, couch_db:close(NewDb), T. + +parse_proxy_params(ProxyUrl) when is_binary(ProxyUrl) -> + parse_proxy_params(?b2l(ProxyUrl)); +parse_proxy_params([]) -> + []; +parse_proxy_params(ProxyUrl) -> + {url, _, Base, Port, User, Passwd, _Path, _Proto} = + ibrowse_lib:parse_url(ProxyUrl), + [{proxy_host, Base}, {proxy_port, Port}] ++ + case is_list(User) andalso is_list(Passwd) of + false -> + []; + true -> + [{proxy_user, User}, {proxy_password, Passwd}] + end. + +update_rep_doc({Props} = _RepDoc, KVs) -> + case couch_util:get_value(<<"_id">>, Props) of + undefined -> + % replication triggered by POSTing to _replicate/ + ok; + RepDocId -> + % replication triggered by adding a Rep Doc to the replicator DB + {ok, RepDb} = ensure_rep_db_exists(), + case couch_db:open_doc(RepDb, RepDocId, []) of + {ok, LatestRepDoc} -> + update_rep_doc(RepDb, LatestRepDoc, KVs); + _ -> + ok + end, + couch_db:close(RepDb) + end. + +update_rep_doc(RepDb, #doc{body = {RepDocBody}} = RepDoc, KVs) -> + NewRepDocBody = lists:foldl( + fun({<<"_replication_state">> = K, _V} = KV, Body) -> + Body1 = lists:keystore(K, 1, Body, KV), + {Mega, Secs, _} = erlang:now(), + UnixTime = Mega * 1000000 + Secs, + lists:keystore( + <<"_replication_state_time">>, 1, + Body1, {<<"_replication_state_time">>, UnixTime}); + ({K, _V} = KV, Body) -> + lists:keystore(K, 1, Body, KV) + end, + RepDocBody, + KVs + ), + % might not succeed - when the replication doc is deleted right + % before this update (not an error) + couch_db:update_doc( + RepDb, + RepDoc#doc{body = {NewRepDocBody}}, + [] + ). + +maybe_set_triggered({RepProps} = RepDoc, RepId) -> + case couch_util:get_value(<<"_replication_state">>, RepProps) of + <<"triggered">> -> + ok; + _ -> + update_rep_doc( + RepDoc, + [ + {<<"_replication_state">>, <<"triggered">>}, + {<<"_replication_id">>, ?l2b(RepId)} + ] + ) + end. + +ensure_rep_db_exists() -> + DbName = ?l2b(couch_config:get("replicator", "db", "_replicator")), + Opts = [ + {user_ctx, #user_ctx{roles=[<<"_admin">>, <<"_replicator">>]}}, + sys_db + ], + case couch_db:open(DbName, Opts) of + {ok, Db} -> + Db; + _Error -> + {ok, Db} = couch_db:create(DbName, Opts) + end, + ok = ensure_rep_ddoc_exists(Db, <<"_design/_replicator">>), + {ok, Db}. + +ensure_rep_ddoc_exists(RepDb, DDocID) -> + case couch_db:open_doc(RepDb, DDocID, []) of + {ok, _Doc} -> + ok; + _ -> + DDoc = couch_doc:from_json_obj({[ + {<<"_id">>, DDocID}, + {<<"language">>, <<"javascript">>}, + {<<"validate_doc_update">>, ?REP_DB_DOC_VALIDATE_FUN} + ]}), + {ok, _Rev} = couch_db:update_doc(RepDb, DDoc, []) + end, + ok. + +source_db_update_notifier(#db{name = DbName}) -> + Server = self(), + {ok, Notifier} = couch_db_update_notifier:start_link( + fun({compacted, DbName1}) when DbName1 =:= DbName -> + ok = gen_server:cast(Server, reopen_source_db); + (_) -> + ok + end), + Notifier; +source_db_update_notifier(_) -> + nil. + +target_db_update_notifier(#db{name = DbName}) -> + Server = self(), + {ok, Notifier} = couch_db_update_notifier:start_link( + fun({compacted, DbName1}) when DbName1 =:= DbName -> + ok = gen_server:cast(Server, reopen_target_db); + (_) -> + ok + end), + Notifier; +target_db_update_notifier(_) -> + nil. diff --git a/src/couchdb/couch_rep_att.erl b/src/couchdb/couch_rep_att.erl index 6b576a01..6bb993a8 100644 --- a/src/couchdb/couch_rep_att.erl +++ b/src/couchdb/couch_rep_att.erl @@ -29,11 +29,13 @@ convert_stub(#att{data=stub, name=Name} = Attachment, Attachment#att{data=RcvFun}. cleanup() -> - receive + receive {ibrowse_async_response, _, _} -> %% TODO maybe log, didn't expect to have data here cleanup(); - {ibrowse_async_response_end, _} -> + {ibrowse_async_response_end, _} -> + cleanup(); + {ibrowse_async_headers, _, _, _} -> cleanup() after 0 -> erase(), @@ -43,13 +45,27 @@ cleanup() -> % internal funs attachment_receiver(Ref, Request) -> - case get(Ref) of + try case get(Ref) of undefined -> {ReqId, ContentEncoding} = start_http_request(Request), put(Ref, {ReqId, ContentEncoding}), receive_data(Ref, ReqId, ContentEncoding); {ReqId, ContentEncoding} -> receive_data(Ref, ReqId, ContentEncoding) + end + catch + throw:{attachment_request_failed, _} -> + case {Request#http_db.retries, Request#http_db.pause} of + {0, _} -> + ?LOG_INFO("request for ~p failed", [Request#http_db.resource]), + throw({attachment_request_failed, max_retries_reached}); + {N, Pause} when N > 0 -> + ?LOG_INFO("request for ~p timed out, retrying in ~p seconds", + [Request#http_db.resource, Pause/1000]), + timer:sleep(Pause), + cleanup(), + attachment_receiver(Ref, Request#http_db{retries = N-1}) + end end. receive_data(Ref, ReqId, ContentEncoding) -> @@ -63,14 +79,12 @@ receive_data(Ref, ReqId, ContentEncoding) -> throw({attachment_request_failed, Err}); {ibrowse_async_response, ReqId, Data} -> % ?LOG_DEBUG("got ~p bytes for ~p", [size(Data), ReqId]), - if ContentEncoding =:= "gzip" -> - zlib:gunzip(Data); - true -> - Data - end; + Data; {ibrowse_async_response_end, ReqId} -> ?LOG_ERROR("streaming att. ended but more data requested ~p", [ReqId]), throw({attachment_request_failed, premature_end}) + after 31000 -> + throw({attachment_request_failed, timeout}) end. start_http_request(Req) -> @@ -84,14 +98,15 @@ start_http_request(Req) -> {ok, ContentEncoding, NewReqId} -> {NewReqId, ContentEncoding} end + after 10000 -> + throw({attachment_request_failed, timeout}) end. validate_headers(_Req, 200, Headers) -> MochiHeaders = mochiweb_headers:make(Headers), {ok, mochiweb_headers:get_value("Content-Encoding", MochiHeaders)}; validate_headers(Req, Code, Headers) when Code > 299, Code < 400 -> - Url = mochiweb_headers:get_value("Location",mochiweb_headers:make(Headers)), - NewReq = couch_rep_httpc:redirected_request(Req, Url), + NewReq = couch_rep_httpc:redirected_request(Code, Headers, Req), {ibrowse_req_id, ReqId} = couch_rep_httpc:request(NewReq), receive {ibrowse_async_headers, ReqId, NewCode, NewHeaders} -> {ok, Encoding} = validate_headers(NewReq, list_to_integer(NewCode), diff --git a/src/couchdb/couch_rep_changes_feed.erl b/src/couchdb/couch_rep_changes_feed.erl index cdfed6a0..246e82c0 100644 --- a/src/couchdb/couch_rep_changes_feed.erl +++ b/src/couchdb/couch_rep_changes_feed.erl @@ -12,12 +12,13 @@ -module(couch_rep_changes_feed). -behaviour(gen_server). --export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). -export([start_link/4, next/1, stop/1]). -define(BUFFER_SIZE, 1000). +-define(DOC_IDS_FILTER_NAME, "_doc_ids"). -include("couch_db.hrl"). -include("../ibrowse/ibrowse.hrl"). @@ -36,6 +37,11 @@ rows = queue:new() }). +-import(couch_util, [ + get_value/2, + get_value/3 +]). + start_link(Parent, Source, StartSeq, PostProps) -> gen_server:start_link(?MODULE, [Parent, Source, StartSeq, PostProps], []). @@ -43,44 +49,64 @@ next(Server) -> gen_server:call(Server, next_changes, infinity). stop(Server) -> - gen_server:call(Server, stop). + catch gen_server:call(Server, stop), + ok. -init([_Parent, #http_db{}=Source, Since, PostProps] = Args) -> +init([Parent, #http_db{headers = Headers0} = Source, Since, PostProps]) -> process_flag(trap_exit, true), - Feed = case proplists:get_value(<<"continuous">>, PostProps, false) of + Feed = case get_value(<<"continuous">>, PostProps, false) of false -> normal; true -> continuous end, + BaseQS = [ + {"style", all_docs}, + {"heartbeat", 10000}, + {"since", Since}, + {"feed", Feed} + ], + {QS, Method, Body, Headers} = case get_value(<<"doc_ids">>, PostProps) of + undefined -> + {maybe_add_filter_qs_params(PostProps, BaseQS), get, nil, Headers0}; + DocIds when is_list(DocIds) -> + Headers1 = [{"Content-Type", "application/json"} | Headers0], + QS1 = [{"filter", ?l2b(?DOC_IDS_FILTER_NAME)} | BaseQS], + {QS1, post, {[{<<"doc_ids">>, DocIds}]}, Headers1} + end, Pid = couch_rep_httpc:spawn_link_worker_process(Source), Req = Source#http_db{ + method = Method, + body = Body, resource = "_changes", - qs = [{style, all_docs}, {heartbeat, 10000}, {since, Since}, - {feed, Feed}], + qs = QS, conn = Pid, - options = [{stream_to, {self(), once}}, {response_format, binary}, - {inactivity_timeout, 31000}], % miss 3 heartbeats, assume death - headers = Source#http_db.headers -- [{"Accept-Encoding", "gzip"}] + options = [{stream_to, {self(), once}}] ++ + lists:keydelete(inactivity_timeout, 1, Source#http_db.options), + headers = Headers -- [{"Accept-Encoding", "gzip"}] }, {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Req), + Args = [Parent, Req, Since, PostProps], receive {ibrowse_async_headers, ReqId, "200", _} -> ibrowse:stream_next(ReqId), {ok, #state{conn=Pid, last_seq=Since, reqid=ReqId, init_args=Args}}; - {ibrowse_async_headers, ReqId, Code, Hdrs} when Code=="301"; Code=="302" -> - catch ibrowse:stop_worker_process(Pid), - Url2 = mochiweb_headers:get_value("Location", mochiweb_headers:make(Hdrs)), - %% TODO use couch_httpc:request instead of start_http_request - {Pid2, ReqId2} = start_http_request(Url2), + {ibrowse_async_headers, ReqId, Code, Hdrs} + when Code =:= "301"; Code =:= "302"; Code =:= "303" -> + stop_link_worker(Pid), + Req2 = couch_rep_httpc:redirected_request(Code, Hdrs, Req), + Pid2 = couch_rep_httpc:spawn_link_worker_process(Req2), + Req3 = Req2#http_db{conn = Pid2}, + {ibrowse_req_id, ReqId2} = couch_rep_httpc:request(Req3), + Args2 = [Parent, Req3, Since, PostProps], receive {ibrowse_async_headers, ReqId2, "200", _} -> - {ok, #state{conn=Pid2, last_seq=Since, reqid=ReqId2, init_args=Args}} + {ok, #state{conn=Pid2, last_seq=Since, reqid=ReqId2, init_args=Args2}} after 30000 -> {stop, changes_timeout} end; {ibrowse_async_headers, ReqId, "404", _} -> - catch ibrowse:stop_worker_process(Pid), + stop_link_worker(Pid), ?LOG_INFO("source doesn't have _changes, trying _all_docs_by_seq", []), Self = self(), BySeqPid = spawn_link(fun() -> by_seq_loop(Self, Source, Since) end), @@ -94,20 +120,81 @@ init([_Parent, #http_db{}=Source, Since, PostProps] = Args) -> init([_Parent, Source, Since, PostProps] = InitArgs) -> process_flag(trap_exit, true), Server = self(), - ChangesPid = - case proplists:get_value(<<"continuous">>, PostProps, false) of - false -> - spawn_link(fun() -> send_local_changes_once(Server, Source, Since) end); - true -> - spawn_link(fun() -> - Self = self(), - {ok, _} = couch_db_update_notifier:start_link(fun(Msg) -> - local_update_notification(Self, Source#db.name, Msg) end), - send_local_changes_forever(Server, Source, Since) - end) + Filter = case get_value(<<"doc_ids">>, PostProps) of + undefined -> + ?b2l(get_value(<<"filter">>, PostProps, <<>>)); + DocIds when is_list(DocIds) -> + ?DOC_IDS_FILTER_NAME end, + ChangesArgs = #changes_args{ + style = all_docs, + since = Since, + filter = Filter, + feed = case get_value(<<"continuous">>, PostProps, false) of + true -> + "continuous"; + false -> + "normal" + end, + timeout = infinity + }, + ChangesPid = spawn_link(fun() -> + ChangesFeedFun = couch_changes:handle_changes( + ChangesArgs, + {json_req, filter_json_req(Filter, Source, PostProps)}, + Source + ), + ChangesFeedFun(fun({change, Change, _}, _) -> + gen_server:call(Server, {add_change, Change}, infinity); + (_, _) -> + ok + end) + end), {ok, #state{changes_loop=ChangesPid, init_args=InitArgs}}. +maybe_add_filter_qs_params(PostProps, BaseQS) -> + case get_value(<<"filter">>, PostProps) of + undefined -> + BaseQS; + FilterName -> + {Params} = get_value(<<"query_params">>, PostProps, {[]}), + lists:foldr( + fun({K, V}, QSAcc) -> + Ks = couch_util:to_list(K), + case proplists:is_defined(Ks, QSAcc) of + true -> + QSAcc; + false -> + [{Ks, V} | QSAcc] + end + end, + [{"filter", FilterName} | BaseQS], + Params + ) + end. + +filter_json_req([], _Db, _PostProps) -> + {[]}; +filter_json_req(?DOC_IDS_FILTER_NAME, _Db, PostProps) -> + {[{<<"doc_ids">>, get_value(<<"doc_ids">>, PostProps)}]}; +filter_json_req(FilterName, Db, PostProps) -> + {Query} = get_value(<<"query_params">>, PostProps, {[]}), + {ok, Info} = couch_db:get_db_info(Db), + % simulate a request to db_name/_changes + {[ + {<<"info">>, {Info}}, + {<<"id">>, null}, + {<<"method">>, 'GET'}, + {<<"path">>, [couch_db:name(Db), <<"_changes">>]}, + {<<"query">>, {[{<<"filter">>, FilterName} | Query]}}, + {<<"headers">>, []}, + {<<"body">>, []}, + {<<"peer">>, <<"replicator">>}, + {<<"form">>, []}, + {<<"cookie">>, []}, + {<<"userCtx">>, couch_util:json_user_ctx(Db)} + ]}. + handle_call({add_change, Row}, From, State) -> handle_add_change(Row, From, State); @@ -123,7 +210,7 @@ handle_cast(_Msg, State) -> handle_info({ibrowse_async_headers, Id, Code, Hdrs}, #state{reqid=Id}=State) -> handle_headers(list_to_integer(Code), Hdrs, State); -handle_info({ibrowse_async_response, Id, {error,connection_closed}}, +handle_info({ibrowse_async_response, Id, {error, sel_conn_closed}}, #state{reqid=Id}=State) -> handle_retry(State); @@ -144,9 +231,20 @@ handle_info({'EXIT', From, Reason}, #state{changes_loop=From} = State) -> ?LOG_ERROR("changes_loop died with reason ~p", [Reason]), {stop, changes_loop_died, State}; -handle_info(Msg, State) -> - ?LOG_DEBUG("unexpected message at changes_feed ~p", [Msg]), - {noreply, State}. +handle_info({'EXIT', From, Reason}, State) -> + ?LOG_ERROR("changes loop, process ~p died with reason ~p", [From, Reason]), + {stop, {From, Reason}, State}; + +handle_info(Msg, #state{init_args = InitArgs} = State) -> + case Msg of + changes_timeout -> + [_, #http_db{url = Url} | _] = InitArgs, + ?LOG_ERROR("changes loop timeout, no data received from ~s", + [couch_util:url_strip_password(Url)]); + _ -> + ?LOG_ERROR("changes loop received unexpected message ~p", [Msg]) + end, + {stop, Msg, State}. terminate(_Reason, State) -> #state{ @@ -154,8 +252,7 @@ terminate(_Reason, State) -> conn = Conn } = State, if is_pid(ChangesPid) -> exit(ChangesPid, stop); true -> ok end, - if is_pid(Conn) -> catch ibrowse:stop_worker_process(Conn); true -> ok end, - ok. + stop_link_worker(Conn). code_change(_OldVsn, State, _Extra) -> {ok, State}. @@ -196,12 +293,16 @@ handle_next_changes(_From, State) -> handle_headers(200, _, State) -> maybe_stream_next(State), {noreply, State}; -handle_headers(301, Hdrs, State) -> - catch ibrowse:stop_worker_process(State#state.conn), - Url = mochiweb_headers:get_value("Location", mochiweb_headers:make(Hdrs)), - %% TODO use couch_httpc:request instead of start_http_request - {Pid, ReqId} = start_http_request(Url), - {noreply, State#state{conn=Pid, reqid=ReqId}}; +handle_headers(Code, Hdrs, #state{init_args = InitArgs} = State) + when Code =:= 301 ; Code =:= 302 ; Code =:= 303 -> + stop_link_worker(State#state.conn), + [Parent, Source, Since, PostProps] = InitArgs, + Source2 = couch_rep_httpc:redirected_request(Code, Hdrs, Source), + Pid2 = couch_rep_httpc:spawn_link_worker_process(Source2), + Source3 = Source2#http_db{conn = Pid2}, + {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Source3), + InitArgs2 = [Parent, Source3, Since, PostProps], + {noreply, State#state{conn=Pid2, reqid=ReqId, init_args=InitArgs2}}; handle_headers(Code, Hdrs, State) -> ?LOG_ERROR("replicator changes feed failed with code ~s and Headers ~n~p", [Code,Hdrs]), @@ -212,12 +313,10 @@ handle_messages([], State) -> {noreply, State}; handle_messages([<<"{\"results\":[">>|Rest], State) -> handle_messages(Rest, State); -handle_messages([<<"]">>, <<"\"last_seq\":", LastSeqStr/binary>>], State) -> - LastSeq = list_to_integer(?b2l(hd(re:split(LastSeqStr, "}")))), - handle_feed_completion(State#state{last_seq = LastSeq}); -handle_messages([<<"{\"last_seq\":", LastSeqStr/binary>>], State) -> - LastSeq = list_to_integer(?b2l(hd(re:split(LastSeqStr, "}")))), - handle_feed_completion(State#state{last_seq = LastSeq}); +handle_messages([<<"]">>, <<"\"last_seq\":", _/binary>>], State) -> + handle_feed_completion(State); +handle_messages([<<"{\"last_seq\":", _/binary>>], State) -> + handle_feed_completion(State); handle_messages([Chunk|Rest], State) -> #state{ count = Count, @@ -230,7 +329,7 @@ handle_messages([Chunk|Rest], State) -> #state{reply_to=nil} -> State#state{ count = Count+1, - last_seq = proplists:get_value(<<"seq">>, Props), + last_seq = couch_util:get_value(<<"seq">>, Props), partial_chunk = <<>>, rows=queue:in(Row,Rows) }; @@ -277,16 +376,16 @@ by_seq_loop(Server, Source, StartSeq) -> qs = [{limit, 1000}, {startkey, StartSeq}] }, {Results} = couch_rep_httpc:request(Req), - Rows = proplists:get_value(<<"rows">>, Results), + Rows = couch_util:get_value(<<"rows">>, Results), if Rows =:= [] -> exit(normal); true -> ok end, EndSeq = lists:foldl(fun({RowInfoList}, _) -> - Id = proplists:get_value(<<"id">>, RowInfoList), - Seq = proplists:get_value(<<"key">>, RowInfoList), - {RowProps} = proplists:get_value(<<"value">>, RowInfoList), + Id = couch_util:get_value(<<"id">>, RowInfoList), + Seq = couch_util:get_value(<<"key">>, RowInfoList), + {RowProps} = couch_util:get_value(<<"value">>, RowInfoList), RawRevs = [ - proplists:get_value(<<"rev">>, RowProps), - proplists:get_value(<<"conflicts">>, RowProps, []), - proplists:get_value(<<"deleted_conflicts">>, RowProps, []) + couch_util:get_value(<<"rev">>, RowProps), + couch_util:get_value(<<"conflicts">>, RowProps, []), + couch_util:get_value(<<"deleted_conflicts">>, RowProps, []) ], ParsedRevs = couch_doc:parse_revs(lists:flatten(RawRevs)), Change = {[ @@ -302,73 +401,21 @@ by_seq_loop(Server, Source, StartSeq) -> decode_row(<<",", Rest/binary>>) -> decode_row(Rest); decode_row(Row) -> - {Props} = ?JSON_DECODE(Row), - % [Seq, Id, {<<"changes">>,C}] - Seq = proplists:get_value(<<"seq">>, Props), - Id = proplists:get_value(<<"id">>, Props), - C = proplists:get_value(<<"changes">>, Props), - C2 = [{[{<<"rev">>,couch_doc:parse_rev(R)}]} || {[{<<"rev">>,R}]} <- C], - {[{<<"seq">>, Seq}, {<<"id">>,Id}, {<<"changes">>,C2}]}. - -flush_updated_messages() -> - receive updated -> flush_updated_messages() - after 0 -> ok - end. - -local_update_notification(Self, DbName, {updated, DbName}) -> - Self ! updated; -local_update_notification(_, _, _) -> - ok. + ?JSON_DECODE(Row). maybe_stream_next(#state{reqid=nil}) -> ok; maybe_stream_next(#state{complete=false, count=N} = S) when N < ?BUFFER_SIZE -> + timer:cancel(get(timeout)), + {ok, Timeout} = timer:send_after(31000, changes_timeout), + put(timeout, Timeout), ibrowse:stream_next(S#state.reqid); maybe_stream_next(_) -> - ok. - -send_local_changes_forever(Server, Db, Since) -> - #db{name = DbName, user_ctx = UserCtx} = Db, - {ok, NewSeq} = send_local_changes_once(Server, Db, Since), - couch_db:close(Db), - ok = wait_db_updated(), - {ok, NewDb} = couch_db:open(DbName, [{user_ctx, UserCtx}]), - send_local_changes_forever(Server, NewDb, NewSeq). - -send_local_changes_once(Server, Db, Since) -> - FilterFun = - fun(#doc_info{revs=[#rev_info{rev=Rev}|_]}) -> - {[{<<"rev">>, Rev}]} - end, + timer:cancel(get(timeout)). - ChangesFun = - fun([#doc_info{id=Id, high_seq=Seq}|_]=DocInfos, _) -> - Results0 = [FilterFun(DocInfo) || DocInfo <- DocInfos], - Results = [Result || Result <- Results0, Result /= null], - if Results /= [] -> - Change = {[{<<"seq">>,Seq}, {<<"id">>,Id}, {<<"changes">>,Results}]}, - gen_server:call(Server, {add_change, Change}, infinity); - true -> - ok - end, - {ok, Seq} - end, - - couch_db:changes_since(Db, all_docs, Since, ChangesFun, Since). - -start_http_request(RawUrl) -> - Url = ibrowse_lib:parse_url(RawUrl), - {ok, Pid} = ibrowse:spawn_link_worker_process(Url#url.host, Url#url.port), - Opts = [ - {stream_to, {self(), once}}, - {inactivity_timeout, 31000}, - {response_format, binary} - ], - {ibrowse_req_id, Id} = - ibrowse:send_req_direct(Pid, RawUrl, [], get, [], Opts, infinity), - {Pid, Id}. - -wait_db_updated() -> - receive updated -> - flush_updated_messages() - end. +stop_link_worker(Conn) when is_pid(Conn) -> + unlink(Conn), + receive {'EXIT', Conn, _} -> ok after 0 -> ok end, + catch ibrowse:stop_worker_process(Conn); +stop_link_worker(_) -> + ok. diff --git a/src/couchdb/couch_rep_db_listener.erl b/src/couchdb/couch_rep_db_listener.erl new file mode 100644 index 00000000..6f1f0443 --- /dev/null +++ b/src/couchdb/couch_rep_db_listener.erl @@ -0,0 +1,311 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_db_listener). +-behaviour(gen_server). + +-export([start_link/0, init/1, handle_call/3, handle_info/2, handle_cast/2]). +-export([code_change/3, terminate/2]). + +-include("couch_db.hrl"). + +-define(DOC_TO_REP_ID_MAP, rep_doc_id_to_rep_id). +-define(REP_ID_TO_DOC_ID_MAP, rep_id_to_rep_doc_id). + +-record(state, { + changes_feed_loop = nil, + changes_queue = nil, + changes_processor = nil, + db_notifier = nil +}). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init(_) -> + process_flag(trap_exit, true), + {ok, Queue} = couch_work_queue:new( + [{max_size, 1024 * 1024}, {max_items, 1000}]), + {ok, Processor} = changes_processor(Queue), + {ok, Loop} = changes_feed_loop(Queue), + Server = self(), + ok = couch_config:register( + fun("replicator", "db") -> + ok = gen_server:cast(Server, rep_db_changed) + end + ), + {ok, #state{ + changes_feed_loop = Loop, + changes_queue = Queue, + changes_processor = Processor, + db_notifier = db_update_notifier()} + }. + + +handle_call(Msg, From, State) -> + ?LOG_ERROR("Replicator DB listener received unexpected call ~p from ~p", + [Msg, From]), + {stop, {error, {unexpected_call, Msg}}, State}. + + +handle_cast(rep_db_changed, State) -> + #state{ + changes_feed_loop = Loop, + changes_queue = Queue + } = State, + catch unlink(Loop), + catch exit(Loop, rep_db_changed), + couch_work_queue:queue(Queue, stop_all_replications), + {ok, NewLoop} = changes_feed_loop(Queue), + {noreply, State#state{changes_feed_loop = NewLoop}}; + +handle_cast(rep_db_created, #state{changes_feed_loop = Loop} = State) -> + catch unlink(Loop), + catch exit(Loop, rep_db_changed), + {ok, NewLoop} = changes_feed_loop(State#state.changes_queue), + {noreply, State#state{changes_feed_loop = NewLoop}}; + +handle_cast(Msg, State) -> + ?LOG_ERROR("Replicator DB listener received unexpected cast ~p", [Msg]), + {stop, {error, {unexpected_cast, Msg}}, State}. + +handle_info({'EXIT', From, normal}, #state{changes_feed_loop = From} = State) -> + % replicator DB deleted + couch_work_queue:queue(State#state.changes_queue, stop_all_replications), + {noreply, State#state{changes_feed_loop = nil}}; + +handle_info({'EXIT', From, Reason}, #state{db_notifier = From} = State) -> + ?LOG_ERROR("Database update notifier died. Reason: ~p", [Reason]), + {stop, {db_update_notifier_died, Reason}, State}; + +handle_info({'EXIT', From, Reason}, #state{changes_processor = From} = State) -> + ?LOG_ERROR("Replicator DB changes processor died. Reason: ~p", [Reason]), + {stop, {rep_db_changes_processor_died, Reason}, State}. + + +terminate(_Reason, State) -> + #state{ + changes_feed_loop = Loop, + changes_queue = Queue + } = State, + exit(Loop, stop), + % closing the queue will cause changes_processor to shutdown + couch_work_queue:close(Queue), + ok. + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +changes_feed_loop(ChangesQueue) -> + {ok, RepDb} = couch_rep:ensure_rep_db_exists(), + Pid = spawn_link( + fun() -> + ChangesFeedFun = couch_changes:handle_changes( + #changes_args{ + include_docs = true, + feed = "continuous", + timeout = infinity, + db_open_options = [sys_db] + }, + {json_req, null}, + RepDb + ), + ChangesFeedFun( + fun({change, Change, _}, _) -> + case has_valid_rep_id(Change) of + true -> + couch_work_queue:queue(ChangesQueue, Change); + false -> + ok + end; + (_, _) -> + ok + end + ) + end + ), + couch_db:close(RepDb), + {ok, Pid}. + + +db_update_notifier() -> + Server = self(), + {ok, Notifier} = couch_db_update_notifier:start_link( + fun({created, DbName}) -> + case ?l2b(couch_config:get("replicator", "db", "_replicator")) of + DbName -> + ok = gen_server:cast(Server, rep_db_created); + _ -> + ok + end; + (_) -> + ok + end + ), + Notifier. + + +changes_processor(ChangesQueue) -> + Pid = spawn_link( + fun() -> + ets:new(?DOC_TO_REP_ID_MAP, [named_table, set, private]), + ets:new(?REP_ID_TO_DOC_ID_MAP, [named_table, set, private]), + consume_changes(ChangesQueue), + true = ets:delete(?REP_ID_TO_DOC_ID_MAP), + true = ets:delete(?DOC_TO_REP_ID_MAP) + end + ), + {ok, Pid}. + + +consume_changes(ChangesQueue) -> + case couch_work_queue:dequeue(ChangesQueue) of + closed -> + ok; + {ok, Changes} -> + lists:foreach(fun process_change/1, Changes), + consume_changes(ChangesQueue) + end. + + +has_valid_rep_id({Change}) -> + has_valid_rep_id(couch_util:get_value(<<"id">>, Change)); +has_valid_rep_id(<<?DESIGN_DOC_PREFIX, _Rest/binary>>) -> + false; +has_valid_rep_id(_Else) -> + true. + +process_change(stop_all_replications) -> + ?LOG_INFO("Stopping all ongoing replications because the replicator DB " + "was deleted or changed", []), + stop_all_replications(); + +process_change({Change}) -> + {RepProps} = JsonRepDoc = couch_util:get_value(doc, Change), + DocId = couch_util:get_value(<<"_id">>, RepProps), + case couch_util:get_value(<<"deleted">>, Change, false) of + true -> + rep_doc_deleted(DocId); + false -> + case couch_util:get_value(<<"_replication_state">>, RepProps) of + <<"completed">> -> + replication_complete(DocId); + <<"error">> -> + stop_replication(DocId); + <<"triggered">> -> + maybe_start_replication(DocId, JsonRepDoc); + undefined -> + maybe_start_replication(DocId, JsonRepDoc); + _ -> + ?LOG_ERROR("Invalid value for the `_replication_state` property" + " of the replication document `~s`", [DocId]) + end + end, + ok. + + +rep_user_ctx({RepDoc}) -> + case couch_util:get_value(<<"user_ctx">>, RepDoc) of + undefined -> + #user_ctx{roles = [<<"_admin">>]}; + {UserCtx} -> + #user_ctx{ + name = couch_util:get_value(<<"name">>, UserCtx, null), + roles = couch_util:get_value(<<"roles">>, UserCtx, []) + } + end. + + +maybe_start_replication(DocId, JsonRepDoc) -> + UserCtx = rep_user_ctx(JsonRepDoc), + {BaseId, _} = RepId = couch_rep:make_replication_id(JsonRepDoc, UserCtx), + case ets:lookup(?REP_ID_TO_DOC_ID_MAP, BaseId) of + [] -> + true = ets:insert(?REP_ID_TO_DOC_ID_MAP, {BaseId, DocId}), + true = ets:insert(?DOC_TO_REP_ID_MAP, {DocId, RepId}), + spawn_link(fun() -> start_replication(JsonRepDoc, RepId, UserCtx) end); + [{BaseId, DocId}] -> + ok; + [{BaseId, OtherDocId}] -> + maybe_tag_rep_doc(DocId, JsonRepDoc, ?l2b(BaseId), OtherDocId) + end. + + +maybe_tag_rep_doc(DocId, {Props} = JsonRepDoc, RepId, OtherDocId) -> + case couch_util:get_value(<<"_replication_id">>, Props) of + RepId -> + ok; + _ -> + ?LOG_INFO("The replication specified by the document `~s` was already" + " triggered by the document `~s`", [DocId, OtherDocId]), + couch_rep:update_rep_doc(JsonRepDoc, [{<<"_replication_id">>, RepId}]) + end. + + + +start_replication({RepProps} = RepDoc, {Base, Ext} = RepId, UserCtx) -> + case (catch couch_rep:start_replication(RepDoc, RepId, UserCtx)) of + RepPid when is_pid(RepPid) -> + ?LOG_INFO("Document `~s` triggered replication `~s`", + [couch_util:get_value(<<"_id">>, RepProps), Base ++ Ext]), + couch_rep:get_result(RepPid, RepId, RepDoc, UserCtx); + Error -> + couch_rep:update_rep_doc( + RepDoc, + [ + {<<"_replication_state">>, <<"error">>}, + {<<"_replication_id">>, ?l2b(Base)} + ] + ), + ?LOG_ERROR("Error starting replication `~s`: ~p", [Base ++ Ext, Error]) + end. + +rep_doc_deleted(DocId) -> + case stop_replication(DocId) of + {ok, {Base, Ext}} -> + ?LOG_INFO("Stopped replication `~s` because replication document `~s`" + " was deleted", [Base ++ Ext, DocId]); + none -> + ok + end. + +replication_complete(DocId) -> + case stop_replication(DocId) of + {ok, {Base, Ext}} -> + ?LOG_INFO("Replication `~s` finished (triggered by document `~s`)", + [Base ++ Ext, DocId]); + none -> + ok + end. + +stop_replication(DocId) -> + case ets:lookup(?DOC_TO_REP_ID_MAP, DocId) of + [{DocId, {BaseId, _} = RepId}] -> + couch_rep:end_replication(RepId), + true = ets:delete(?REP_ID_TO_DOC_ID_MAP, BaseId), + true = ets:delete(?DOC_TO_REP_ID_MAP, DocId), + {ok, RepId}; + [] -> + none + end. + +stop_all_replications() -> + ets:foldl( + fun({_, RepId}, _) -> couch_rep:end_replication(RepId) end, + ok, + ?DOC_TO_REP_ID_MAP + ), + true = ets:delete_all_objects(?REP_ID_TO_DOC_ID_MAP), + true = ets:delete_all_objects(?DOC_TO_REP_ID_MAP). diff --git a/src/couchdb/couch_rep_httpc.erl b/src/couchdb/couch_rep_httpc.erl index 4944f554..7a5bd18b 100644 --- a/src/couchdb/couch_rep_httpc.erl +++ b/src/couchdb/couch_rep_httpc.erl @@ -14,8 +14,10 @@ -include("couch_db.hrl"). -include("../ibrowse/ibrowse.hrl"). --export([db_exists/1, db_exists/2, full_url/1, request/1, redirected_request/2, - spawn_worker_process/1, spawn_link_worker_process/1]). +-export([db_exists/1, db_exists/2]). +-export([full_url/1, request/1, redirected_request/3]). +-export([spawn_worker_process/1, spawn_link_worker_process/1]). +-export([ssl_options/1]). request(#http_db{} = Req) -> do_request(Req). @@ -34,7 +36,7 @@ do_request(Req) -> qs = QS } = Req, Url = full_url(Req), - Headers = case proplists:get_value(<<"oauth">>, Auth) of + Headers = case couch_util:get_value(<<"oauth">>, Auth) of undefined -> Headers0; {OAuthProps} -> @@ -46,7 +48,7 @@ do_request(Req) -> nil -> []; _Else -> - iolist_to_binary(?JSON_ENCODE(B)) + iolist_to_binary(?JSON_ENCODE(B)) end, Resp = case Conn of nil -> @@ -72,10 +74,11 @@ db_exists(Req, CanonicalUrl, CreateDB) -> #http_db{ auth = Auth, headers = Headers0, + options = Options, url = Url } = Req, HeadersFun = fun(Method) -> - case proplists:get_value(<<"oauth">>, Auth) of + case couch_util:get_value(<<"oauth">>, Auth) of undefined -> Headers0; {OAuthProps} -> @@ -84,25 +87,46 @@ db_exists(Req, CanonicalUrl, CreateDB) -> end, case CreateDB of true -> - catch ibrowse:send_req(Url, HeadersFun(put), put); + Headers = [{"Content-Length", 0} | HeadersFun(put)], + catch ibrowse:send_req(Url, Headers, put, [], Options); _Else -> ok end, - case catch ibrowse:send_req(Url, HeadersFun(head), head) of + case catch ibrowse:send_req(Url, HeadersFun(head), head, [], Options) of {ok, "200", _, _} -> Req#http_db{url = CanonicalUrl}; {ok, "301", RespHeaders, _} -> - MochiHeaders = mochiweb_headers:make(RespHeaders), - RedirectUrl = mochiweb_headers:get_value("Location", MochiHeaders), + RedirectUrl = redirect_url(RespHeaders, Req#http_db.url), db_exists(Req#http_db{url = RedirectUrl}, RedirectUrl); {ok, "302", RespHeaders, _} -> - MochiHeaders = mochiweb_headers:make(RespHeaders), - RedirectUrl = mochiweb_headers:get_value("Location", MochiHeaders), + RedirectUrl = redirect_url(RespHeaders, Req#http_db.url), db_exists(Req#http_db{url = RedirectUrl}, CanonicalUrl); + {ok, "303", RespHeaders, _} -> + RedirectUrl = redirect_url(RespHeaders, Req#http_db.url), + db_exists(Req#http_db{method = get, url = RedirectUrl}, CanonicalUrl); + {ok, "401", _, _} -> + throw({unauthorized, ?l2b(Url)}); Error -> ?LOG_DEBUG("DB at ~s could not be found because ~p", [Url, Error]), throw({db_not_found, ?l2b(Url)}) end. +redirect_url(RespHeaders, OrigUrl) -> + MochiHeaders = mochiweb_headers:make(RespHeaders), + RedUrl = mochiweb_headers:get_value("Location", MochiHeaders), + #url{ + host = Host, port = Port, + path = Path, protocol = Proto + } = ibrowse_lib:parse_url(RedUrl), + #url{username = User, password = Passwd} = ibrowse_lib:parse_url(OrigUrl), + Creds = case is_list(User) andalso is_list(Passwd) of + true -> + User ++ ":" ++ Passwd ++ "@"; + false -> + [] + end, + atom_to_list(Proto) ++ "://" ++ Creds ++ Host ++ ":" ++ + integer_to_list(Port) ++ Path. + full_url(#http_db{url=Url} = Req) when is_binary(Url) -> full_url(Req#http_db{url = ?b2l(Url)}); @@ -115,7 +139,7 @@ full_url(Req) -> resource = Resource, qs = QS } = Req, - QStr = lists:map(fun({K,V}) -> io_lib:format("~s=~s", + QStr = lists:map(fun({K,V}) -> io_lib:format("~s=~s", [couch_util:to_list(K), couch_util:to_list(V)]) end, QS), lists:flatten([Url, Resource, "?", string:join(QStr, "&")]). @@ -123,10 +147,8 @@ process_response({ok, Status, Headers, Body}, Req) -> Code = list_to_integer(Status), if Code =:= 200; Code =:= 201 -> ?JSON_DECODE(maybe_decompress(Headers, Body)); - Code =:= 301; Code =:= 302 -> - MochiHeaders = mochiweb_headers:make(Headers), - RedirectUrl = mochiweb_headers:get_value("Location", MochiHeaders), - do_request(redirected_request(Req, RedirectUrl)); + Code =:= 301; Code =:= 302 ; Code =:= 303 -> + do_request(redirected_request(Code, Headers, Req)); Code =:= 409 -> throw(conflict); Code >= 400, Code < 500 -> @@ -156,7 +178,7 @@ process_response({error, Reason}, Req) -> pause = Pause } = Req, ShortReason = case Reason of - connection_closed -> + sel_conn_closed -> connection_closed; {'EXIT', {noproc, _}} -> noproc; @@ -175,16 +197,26 @@ process_response({error, Reason}, Req) -> do_request(Req#http_db{retries = Retries-1, pause = 2*Pause}) end. -redirected_request(Req, RedirectUrl) -> +redirected_request(Code, Headers, Req) -> + RedirectUrl = redirect_url(Headers, Req#http_db.url), {Base, QStr, _} = mochiweb_util:urlsplit_path(RedirectUrl), QS = mochiweb_util:parse_qs(QStr), - Hdrs = case proplists:get_value(<<"oauth">>, Req#http_db.auth) of + ReqHeaders = case couch_util:get_value(<<"oauth">>, Req#http_db.auth) of undefined -> Req#http_db.headers; _Else -> lists:keydelete("Authorization", 1, Req#http_db.headers) end, - Req#http_db{url=Base, resource="", qs=QS, headers=Hdrs}. + Req#http_db{ + method = case couch_util:to_integer(Code) of + 303 -> get; + _ -> Req#http_db.method + end, + url = Base, + resource = "", + qs = QS, + headers = ReqHeaders + }. spawn_worker_process(Req) -> Url = ibrowse_lib:parse_url(Req#http_db.url), @@ -192,8 +224,7 @@ spawn_worker_process(Req) -> Pid. spawn_link_worker_process(Req) -> - Url = ibrowse_lib:parse_url(Req#http_db.url), - {ok, Pid} = ibrowse_http_client:start_link(Url), + {ok, Pid} = ibrowse:spawn_link_worker_process(Req#http_db.url), Pid. maybe_decompress(Headers, Body) -> @@ -209,11 +240,11 @@ oauth_header(Url, QS, Action, Props) -> % erlang-oauth doesn't like iolists QSL = [{couch_util:to_list(K), ?b2l(?l2b(couch_util:to_list(V)))} || {K,V} <- QS], - ConsumerKey = ?b2l(proplists:get_value(<<"consumer_key">>, Props)), - Token = ?b2l(proplists:get_value(<<"token">>, Props)), - TokenSecret = ?b2l(proplists:get_value(<<"token_secret">>, Props)), - ConsumerSecret = ?b2l(proplists:get_value(<<"consumer_secret">>, Props)), - SignatureMethodStr = ?b2l(proplists:get_value(<<"signature_method">>, Props, <<"HMAC-SHA1">>)), + ConsumerKey = ?b2l(couch_util:get_value(<<"consumer_key">>, Props)), + Token = ?b2l(couch_util:get_value(<<"token">>, Props)), + TokenSecret = ?b2l(couch_util:get_value(<<"token_secret">>, Props)), + ConsumerSecret = ?b2l(couch_util:get_value(<<"consumer_secret">>, Props)), + SignatureMethodStr = ?b2l(couch_util:get_value(<<"signature_method">>, Props, <<"HMAC-SHA1">>)), SignatureMethodAtom = case SignatureMethodStr of "PLAINTEXT" -> plaintext; @@ -232,3 +263,35 @@ oauth_header(Url, QS, Action, Props) -> Params = oauth:signed_params(Method, Url, QSL, Consumer, Token, TokenSecret) -- QSL, {"Authorization", "OAuth " ++ oauth_uri:params_to_header_string(Params)}. + +ssl_options(#http_db{url = Url}) -> + case ibrowse_lib:parse_url(Url) of + #url{protocol = https} -> + Depth = list_to_integer( + couch_config:get("replicator", "ssl_certificate_max_depth", "3") + ), + SslOpts = [{depth, Depth} | + case couch_config:get("replicator", "verify_ssl_certificates") of + "true" -> + ssl_verify_options(true); + _ -> + ssl_verify_options(false) + end], + [{is_ssl, true}, {ssl_options, SslOpts}]; + #url{protocol = http} -> + [] + end. + +ssl_verify_options(Value) -> + ssl_verify_options(Value, erlang:system_info(otp_release)). + +ssl_verify_options(true, OTPVersion) when OTPVersion >= "R14" -> + CAFile = couch_config:get("replicator", "ssl_trusted_certificates_file"), + [{verify, verify_peer}, {cacertfile, CAFile}]; +ssl_verify_options(false, OTPVersion) when OTPVersion >= "R14" -> + [{verify, verify_none}]; +ssl_verify_options(true, _OTPVersion) -> + CAFile = couch_config:get("replicator", "ssl_trusted_certificates_file"), + [{verify, 2}, {cacertfile, CAFile}]; +ssl_verify_options(false, _OTPVersion) -> + [{verify, 0}]. diff --git a/src/couchdb/couch_rep_missing_revs.erl b/src/couchdb/couch_rep_missing_revs.erl index 5790dd71..9809ca5e 100644 --- a/src/couchdb/couch_rep_missing_revs.erl +++ b/src/couchdb/couch_rep_missing_revs.erl @@ -12,7 +12,7 @@ -module(couch_rep_missing_revs). -behaviour(gen_server). --export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). -export([start_link/4, next/1, stop/1]). @@ -24,7 +24,6 @@ -record (state, { changes_loop, changes_from = nil, - target, parent, complete = false, count = 0, @@ -44,11 +43,11 @@ next(Server) -> stop(Server) -> gen_server:call(Server, stop). -init([Parent, Target, ChangesFeed, _PostProps]) -> +init([Parent, _Target, ChangesFeed, _PostProps]) -> process_flag(trap_exit, true), Self = self(), - Pid = spawn_link(fun() -> changes_loop(Self, ChangesFeed, Target) end), - {ok, #state{changes_loop=Pid, target=Target, parent=Parent}}. + Pid = spawn_link(fun() -> changes_loop(Self, ChangesFeed, Parent) end), + {ok, #state{changes_loop=Pid, parent=Parent}}. handle_call({add_missing_revs, {HighSeq, Revs}}, From, State) -> State#state.parent ! {update_stats, missing_revs, length(Revs)}, @@ -133,48 +132,57 @@ handle_changes_loop_exit(normal, State) -> handle_changes_loop_exit(Reason, State) -> {stop, Reason, State#state{changes_loop=nil}}. -changes_loop(OurServer, SourceChangesServer, Target) -> +changes_loop(OurServer, SourceChangesServer, Parent) -> case couch_rep_changes_feed:next(SourceChangesServer) of complete -> exit(normal); Changes -> + {ok, Target} = gen_server:call(Parent, get_target_db, infinity), MissingRevs = get_missing_revs(Target, Changes), gen_server:call(OurServer, {add_missing_revs, MissingRevs}, infinity) end, - changes_loop(OurServer, SourceChangesServer, Target). + changes_loop(OurServer, SourceChangesServer, Parent). get_missing_revs(#http_db{}=Target, Changes) -> - Transform = fun({[{<<"seq">>,_}, {<<"id">>,Id}, {<<"changes">>,C}]}) -> - {Id, [couch_doc:rev_to_str(R) || {[{<<"rev">>, R}]} <- C]} end, + Transform = fun({Props}) -> + C = couch_util:get_value(<<"changes">>, Props), + Id = couch_util:get_value(<<"id">>, Props), + {Id, [R || {[{<<"rev">>, R}]} <- C]} + end, IdRevsList = [Transform(Change) || Change <- Changes], SeqDict = changes_dictionary(Changes), - {[{<<"seq">>, HighSeq}, _, _]} = lists:last(Changes), + {LastProps} = lists:last(Changes), + HighSeq = couch_util:get_value(<<"seq">>, LastProps), Request = Target#http_db{ resource = "_missing_revs", method = post, body = {IdRevsList} }, {Resp} = couch_rep_httpc:request(Request), - case proplists:get_value(<<"missing_revs">>, Resp) of + case couch_util:get_value(<<"missing_revs">>, Resp) of {MissingRevs} -> X = [{Id, dict:fetch(Id, SeqDict), couch_doc:parse_revs(RevStrs)} || {Id,RevStrs} <- MissingRevs], {HighSeq, X}; _ -> - exit({target_error, proplists:get_value(<<"error">>, Resp)}) + exit({target_error, couch_util:get_value(<<"error">>, Resp)}) end; get_missing_revs(Target, Changes) -> - Transform = fun({[{<<"seq">>,_}, {<<"id">>,Id}, {<<"changes">>,C}]}) -> - {Id, [R || {[{<<"rev">>, R}]} <- C]} end, + Transform = fun({Props}) -> + C = couch_util:get_value(<<"changes">>, Props), + Id = couch_util:get_value(<<"id">>, Props), + {Id, [couch_doc:parse_rev(R) || {[{<<"rev">>, R}]} <- C]} + end, IdRevsList = [Transform(Change) || Change <- Changes], SeqDict = changes_dictionary(Changes), - {[{<<"seq">>, HighSeq}, _, _]} = lists:last(Changes), + {LastProps} = lists:last(Changes), + HighSeq = couch_util:get_value(<<"seq">>, LastProps), {ok, Results} = couch_db:get_missing_revs(Target, IdRevsList), {HighSeq, [{Id, dict:fetch(Id, SeqDict), Revs} || {Id, Revs, _} <- Results]}. changes_dictionary(ChangeList) -> - KVs = [{proplists:get_value(<<"id">>,C), proplists:get_value(<<"seq">>,C)} + KVs = [{couch_util:get_value(<<"id">>,C), couch_util:get_value(<<"seq">>,C)} || {C} <- ChangeList], dict:from_list(KVs). diff --git a/src/couchdb/couch_rep_reader.erl b/src/couchdb/couch_rep_reader.erl index a66454c8..9252bc8c 100644 --- a/src/couchdb/couch_rep_reader.erl +++ b/src/couchdb/couch_rep_reader.erl @@ -12,12 +12,12 @@ -module(couch_rep_reader). -behaviour(gen_server). --export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). -export([start_link/4, next/1]). --import(couch_util, [url_encode/1]). +-import(couch_util, [encode_doc_id/1]). -define (BUFFER_SIZE, 1000). -define (MAX_CONCURRENT_REQUESTS, 100). @@ -57,7 +57,8 @@ init([Parent, Source, MissingRevs, _PostProps]) -> ibrowse:set_max_pipeline_size(Host, Port, ?MAX_PIPELINE_SIZE); true -> ok end, Self = self(), - ReaderLoop = spawn_link(fun() -> reader_loop(Self, Source, MissingRevs) end), + ReaderLoop = spawn_link( + fun() -> reader_loop(Self, Parent, Source, MissingRevs) end), State = #state{ parent = Parent, source = Source, @@ -98,6 +99,8 @@ code_change(_OldVsn, State, _Extra) -> %internal funs +handle_add_docs(_Seq, [], _From, State) -> + {reply, ok, State}; handle_add_docs(Seq, DocsToAdd, From, #state{reply_to=nil} = State) -> State1 = update_sequence_lists(Seq, State), NewState = State1#state{ @@ -141,9 +144,13 @@ handle_open_remote_doc(Id, Seq, Revs, _, #state{source=#http_db{}} = State) -> {_, _Ref} = spawn_document_request(Source, Id, Seq, Revs), {reply, ok, State#state{monitor_count = Count+1}}. -handle_monitor_down(normal, #state{pending_doc_request=nil, +handle_monitor_down(normal, #state{pending_doc_request=nil, reply_to=nil, monitor_count=1, complete=waiting_on_monitors} = State) -> {noreply, State#state{complete=true, monitor_count=0}}; +handle_monitor_down(normal, #state{pending_doc_request=nil, reply_to=From, + monitor_count=1, complete=waiting_on_monitors} = State) -> + gen_server:reply(From, {complete, calculate_new_high_seq(State)}), + {stop, normal, State#state{complete=true, monitor_count=0}}; handle_monitor_down(normal, #state{pending_doc_request=nil} = State) -> #state{monitor_count = Count} = State, {noreply, State#state{monitor_count = Count-1}}; @@ -210,11 +217,11 @@ update_sequence_lists(Seq, State) -> opened_seqs = Opened }. -open_doc_revs(#http_db{} = DbS, DocId, Revs) -> +open_doc_revs(#http_db{url = Url} = DbS, DocId, Revs) -> %% all this logic just splits up revision lists that are too long for %% MochiWeb into multiple requests - BaseQS = [{revs,true}, {latest,true}], - BaseReq = DbS#http_db{resource=url_encode(DocId), qs=BaseQS}, + BaseQS = [{revs,true}, {latest,true}, {att_encoding_info,true}], + BaseReq = DbS#http_db{resource=encode_doc_id(DocId), qs=BaseQS}, BaseLength = length(couch_rep_httpc:full_url(BaseReq)) + 11, % &open_revs= {RevLists, _, _} = lists:foldl(fun split_revlist/2, @@ -226,15 +233,22 @@ open_doc_revs(#http_db{} = DbS, DocId, Revs) -> JsonResults = lists:flatten([couch_rep_httpc:request(R) || R <- Requests]), Transform = - fun({[{<<"missing">>, Rev}]}) -> - {{not_found, missing}, couch_doc:parse_rev(Rev)}; - ({[{<<"ok">>, Json}]}) -> + fun({[{<<"ok">>, Json}]}, Acc) -> #doc{id=Id, revs=Rev, atts=Atts} = Doc = couch_doc:from_json_obj(Json), - Doc#doc{atts=[couch_rep_att:convert_stub(A, {DbS,Id,Rev}) || A <- Atts]} + Doc1 = Doc#doc{ + atts=[couch_rep_att:convert_stub(A, {DbS,Id,Rev}) || A <- Atts] + }, + [Doc1 | Acc]; + ({ErrorProps}, Acc) -> + Err = couch_util:get_value(<<"error">>, ErrorProps, + ?JSON_ENCODE({ErrorProps})), + ?LOG_ERROR("Replicator: error accessing doc ~s at ~s, reason: ~s", + [DocId, couch_util:url_strip_password(Url), Err]), + Acc end, - [Transform(Result) || Result <- JsonResults]. + lists:reverse(lists:foldl(Transform, [], JsonResults)). -reader_loop(ReaderServer, Source, MissingRevsServer) -> +reader_loop(ReaderServer, Parent, Source, MissingRevsServer) -> case couch_rep_missing_revs:next(MissingRevsServer) of complete -> exit(complete); @@ -247,22 +261,23 @@ reader_loop(ReaderServer, Source, MissingRevsServer) -> #http_db{} -> [gen_server:call(ReaderServer, {open_remote_doc, Id, Seq, Revs}, infinity) || {Id,Seq,Revs} <- SortedIdsRevs], - reader_loop(ReaderServer, Source, MissingRevsServer); + reader_loop(ReaderServer, Parent, Source, MissingRevsServer); _Local -> - Source2 = maybe_reopen_db(Source, HighSeq), + {ok, Source1} = gen_server:call(Parent, get_source_db, infinity), + Source2 = maybe_reopen_db(Source1, HighSeq), lists:foreach(fun({Id,Seq,Revs}) -> {ok, Docs} = couch_db:open_doc_revs(Source2, Id, Revs, [latest]), JustTheDocs = [Doc || {ok, Doc} <- Docs], gen_server:call(ReaderServer, {add_docs, Seq, JustTheDocs}, infinity) end, SortedIdsRevs), - reader_loop(ReaderServer, Source2, MissingRevsServer) + couch_db:close(Source2), + reader_loop(ReaderServer, Parent, Source2, MissingRevsServer) end end. maybe_reopen_db(#db{update_seq=OldSeq} = Db, HighSeq) when HighSeq > OldSeq -> {ok, NewDb} = couch_db:open(Db#db.name, [{user_ctx, Db#db.user_ctx}]), - couch_db:close(Db), NewDb; maybe_reopen_db(Db, _HighSeq) -> Db. diff --git a/src/couchdb/couch_rep_writer.erl b/src/couchdb/couch_rep_writer.erl index b86028ce..12d6dec5 100644 --- a/src/couchdb/couch_rep_writer.erl +++ b/src/couchdb/couch_rep_writer.erl @@ -16,16 +16,17 @@ -include("couch_db.hrl"). -start_link(Parent, Target, Reader, _PostProps) -> - {ok, spawn_link(fun() -> writer_loop(Parent, Reader, Target) end)}. +start_link(Parent, _Target, Reader, _PostProps) -> + {ok, spawn_link(fun() -> writer_loop(Parent, Reader) end)}. -writer_loop(Parent, Reader, Target) -> +writer_loop(Parent, Reader) -> case couch_rep_reader:next(Reader) of {complete, FinalSeq} -> Parent ! {writer_checkpoint, FinalSeq}, ok; {HighSeq, Docs} -> DocCount = length(Docs), + {ok, Target} = gen_server:call(Parent, get_target_db, infinity), try write_docs(Target, Docs) of {ok, []} -> Parent ! {update_stats, docs_written, DocCount}; @@ -41,31 +42,124 @@ writer_loop(Parent, Reader, Target) -> Parent ! {writer_checkpoint, HighSeq}, couch_rep_att:cleanup(), couch_util:should_flush(), - writer_loop(Parent, Reader, Target) + writer_loop(Parent, Reader) end. -write_docs(#http_db{headers = Headers} = Db, Docs) -> - JsonDocs = [couch_doc:to_json_obj(Doc, [revs,attachments]) || Doc <- Docs], +write_docs(#http_db{} = Db, Docs) -> + {DocsAtts, DocsNoAtts} = lists:partition( + fun(#doc{atts=[]}) -> false; (_) -> true end, + Docs + ), + ErrorsJson0 = write_bulk_docs(Db, DocsNoAtts), + ErrorsJson = lists:foldl( + fun(Doc, Acc) -> write_multi_part_doc(Db, Doc) ++ Acc end, + ErrorsJson0, + DocsAtts + ), + {ok, ErrorsJson}; +write_docs(Db, Docs) -> + couch_db:update_docs(Db, Docs, [delay_commit], replicated_changes). + +write_bulk_docs(_Db, []) -> + []; +write_bulk_docs(#http_db{headers = Headers} = Db, Docs) -> + JsonDocs = [ + couch_doc:to_json_obj(Doc, [revs, att_gzip_length]) || Doc <- Docs + ], Request = Db#http_db{ resource = "_bulk_docs", method = post, body = {[{new_edits, false}, {docs, JsonDocs}]}, - headers = [{"x-couch-full-commit", "false"} | Headers] + headers = couch_util:proplist_apply_field({"Content-Type", "application/json"}, [{"X-Couch-Full-Commit", "false"} | Headers]) }, ErrorsJson = case couch_rep_httpc:request(Request) of {FailProps} -> - exit({target_error, proplists:get_value(<<"error">>, FailProps)}); + exit({target_error, couch_util:get_value(<<"error">>, FailProps)}); List when is_list(List) -> List end, - ErrorsList = [write_docs_1(V) || V <- ErrorsJson], - {ok, ErrorsList}; -write_docs(Db, Docs) -> - couch_db:update_docs(Db, Docs, [delay_commit], replicated_changes). + [write_docs_1(V) || V <- ErrorsJson]. + +write_multi_part_doc(#http_db{headers=Headers} = Db, #doc{atts=Atts} = Doc) -> + JsonBytes = ?JSON_ENCODE( + couch_doc:to_json_obj( + Doc, + [follows, att_encoding_info, attachments] + ) + ), + Boundary = couch_uuids:random(), + {ContentType, Len} = couch_doc:len_doc_to_multi_part_stream( + Boundary, JsonBytes, Atts, true + ), + StreamerPid = spawn_link( + fun() -> streamer_fun(Boundary, JsonBytes, Atts) end + ), + BodyFun = fun(Acc) -> + DataQueue = case Acc of + nil -> + StreamerPid ! {start, self()}, + receive + {queue, Q} -> + Q + end; + Queue -> + Queue + end, + case couch_work_queue:dequeue(DataQueue) of + closed -> + eof; + {ok, Data} -> + {ok, iolist_to_binary(Data), DataQueue} + end + end, + Request = Db#http_db{ + resource = couch_util:encode_doc_id(Doc), + method = put, + qs = [{new_edits, false}], + body = {BodyFun, nil}, + headers = [ + {"x-couch-full-commit", "false"}, + {"Content-Type", ?b2l(ContentType)}, + {"Content-Length", Len} | Headers + ] + }, + Result = case couch_rep_httpc:request(Request) of + {[{<<"error">>, Error}, {<<"reason">>, Reason}]} -> + {Pos, [RevId | _]} = Doc#doc.revs, + ErrId = couch_util:to_existing_atom(Error), + [{Doc#doc.id, couch_doc:rev_to_str({Pos, RevId})}, {ErrId, Reason}]; + _ -> + [] + end, + StreamerPid ! stop, + Result. + +streamer_fun(Boundary, JsonBytes, Atts) -> + receive + stop -> + ok; + {start, From} -> + % better use a brand new queue, to ensure there's no garbage from + % a previous (failed) iteration + {ok, DataQueue} = couch_work_queue:new( + [{max_size, 1024 * 1024}, {max_items, 1000}]), + From ! {queue, DataQueue}, + couch_doc:doc_to_multi_part_stream( + Boundary, + JsonBytes, + Atts, + fun(Data) -> + couch_work_queue:queue(DataQueue, Data) + end, + true + ), + couch_work_queue:close(DataQueue), + streamer_fun(Boundary, JsonBytes, Atts) + end. write_docs_1({Props}) -> - Id = proplists:get_value(<<"id">>, Props), - Rev = couch_doc:parse_rev(proplists:get_value(<<"rev">>, Props)), - ErrId = couch_util:to_existing_atom(proplists:get_value(<<"error">>, Props)), - Reason = proplists:get_value(<<"reason">>, Props), + Id = couch_util:get_value(<<"id">>, Props), + Rev = couch_doc:parse_rev(couch_util:get_value(<<"rev">>, Props)), + ErrId = couch_util:to_existing_atom(couch_util:get_value(<<"error">>, Props)), + Reason = couch_util:get_value(<<"reason">>, Props), {{Id, Rev}, {ErrId, Reason}}. diff --git a/src/couchdb/couch_server.erl b/src/couchdb/couch_server.erl index afdf9365..7870d69e 100644 --- a/src/couchdb/couch_server.erl +++ b/src/couchdb/couch_server.erl @@ -51,30 +51,35 @@ sup_start_link() -> gen_server:start_link({local, couch_server}, couch_server, [], []). open(DbName, Options) -> - case gen_server:call(couch_server, {open, DbName, Options}) of - {ok, MainPid} -> - Ctx = proplists:get_value(user_ctx, Options, #user_ctx{}), - couch_db:open_ref_counted(MainPid, Ctx); + case gen_server:call(couch_server, {open, DbName, Options}, infinity) of + {ok, Db} -> + Ctx = couch_util:get_value(user_ctx, Options, #user_ctx{}), + {ok, Db#db{user_ctx=Ctx}}; Error -> Error end. create(DbName, Options) -> - case gen_server:call(couch_server, {create, DbName, Options}) of - {ok, MainPid} -> - Ctx = proplists:get_value(user_ctx, Options, #user_ctx{}), - couch_db:open_ref_counted(MainPid, Ctx); + case gen_server:call(couch_server, {create, DbName, Options}, infinity) of + {ok, Db} -> + Ctx = couch_util:get_value(user_ctx, Options, #user_ctx{}), + {ok, Db#db{user_ctx=Ctx}}; Error -> Error end. delete(DbName, Options) -> - gen_server:call(couch_server, {delete, DbName, Options}). + gen_server:call(couch_server, {delete, DbName, Options}, infinity). check_dbname(#server{dbname_regexp=RegExp}, DbName) -> case re:run(DbName, RegExp, [{capture, none}]) of nomatch -> - {error, illegal_database_name}; + case DbName of + "_users" -> ok; + "_replicator" -> ok; + _Else -> + {error, illegal_database_name} + end; match -> ok end. @@ -104,7 +109,7 @@ hash_admin_passwords(Persist) -> ({User, ClearPassword}) -> Salt = ?b2l(couch_uuids:random()), Hashed = couch_util:to_hex(crypto:sha(ClearPassword ++ Salt)), - couch_config:set("admins", + couch_config:set("admins", User, "-hashed-" ++ Hashed ++ "," ++ Salt, Persist) end, couch_config:get("admins")). @@ -127,6 +132,7 @@ init([]) -> gen_server:call(couch_server, {set_max_dbs_open, list_to_integer(Max)}) end), + ok = couch_file:init_delete_dir(RootDir), hash_admin_passwords(), ok = couch_config:register( fun("admins", _Key, _Value, Persist) -> @@ -137,28 +143,32 @@ init([]) -> ets:new(couch_dbs_by_name, [set, private, named_table]), ets:new(couch_dbs_by_pid, [set, private, named_table]), ets:new(couch_dbs_by_lru, [ordered_set, private, named_table]), + ets:new(couch_sys_dbs, [set, private, named_table]), process_flag(trap_exit, true), {ok, #server{root_dir=RootDir, dbname_regexp=RegExp, max_dbs_open=MaxDbsOpen, start_time=httpd_util:rfc1123_date()}}. -terminate(Reason, _Srv) -> - couch_util:terminate_linked(Reason), +terminate(_Reason, _Srv) -> + [couch_util:shutdown_sync(Pid) || {_, {Pid, _LruTime}} <- + ets:tab2list(couch_dbs_by_name)], ok. all_databases() -> {ok, #server{root_dir=Root}} = gen_server:call(couch_server, get_server), + NormRoot = couch_util:normpath(Root), Filenames = filelib:fold_files(Root, "^[a-z0-9\\_\\$()\\+\\-]*[\\.]couch$", true, fun(Filename, AccIn) -> - case Filename -- Root of + NormFilename = couch_util:normpath(Filename), + case NormFilename -- NormRoot of [$/ | RelativeFilename] -> ok; RelativeFilename -> ok end, [list_to_binary(filename:rootname(RelativeFilename, ".couch")) | AccIn] end, []), - {ok, Filenames}. + {ok, lists:usort(Filenames)}. maybe_close_lru_db(#server{dbs_open=NumOpen, max_dbs_open=MaxOpen}=Server) @@ -173,27 +183,22 @@ maybe_close_lru_db(#server{dbs_open=NumOpen}=Server) -> end. try_close_lru(StartTime) -> - LruTime = ets:first(couch_dbs_by_lru), + LruTime = get_lru(), if LruTime > StartTime -> % this means we've looped through all our opened dbs and found them % all in use. {error, all_dbs_active}; true -> [{_, DbName}] = ets:lookup(couch_dbs_by_lru, LruTime), - [{_, {MainPid, LruTime}}] = ets:lookup(couch_dbs_by_name, DbName), + [{_, {opened, MainPid, LruTime}}] = ets:lookup(couch_dbs_by_name, DbName), case couch_db:is_idle(MainPid) of true -> - exit(MainPid, kill), - receive {'EXIT', MainPid, _Reason} -> ok end, - true = ets:delete(couch_dbs_by_lru, LruTime), - true = ets:delete(couch_dbs_by_name, DbName), - true = ets:delete(couch_dbs_by_pid, MainPid), - ok; + ok = shutdown_idle_db(DbName, MainPid, LruTime); false -> % this still has referrers. Go ahead and give it a current lru time % and try the next one in the table. NewLruTime = now(), - true = ets:insert(couch_dbs_by_name, {DbName, {MainPid, NewLruTime}}), + true = ets:insert(couch_dbs_by_name, {DbName, {opened, MainPid, NewLruTime}}), true = ets:insert(couch_dbs_by_pid, {MainPid, DbName}), true = ets:delete(couch_dbs_by_lru, LruTime), true = ets:insert(couch_dbs_by_lru, {NewLruTime, DbName}), @@ -201,98 +206,160 @@ try_close_lru(StartTime) -> end end. +get_lru() -> + get_lru(ets:first(couch_dbs_by_lru)). + +get_lru(LruTime) -> + [{LruTime, DbName}] = ets:lookup(couch_dbs_by_lru, LruTime), + case ets:member(couch_sys_dbs, DbName) of + false -> + LruTime; + true -> + [{_, {opened, MainPid, _}}] = ets:lookup(couch_dbs_by_name, DbName), + case couch_db:is_idle(MainPid) of + true -> + NextLru = ets:next(couch_dbs_by_lru, LruTime), + ok = shutdown_idle_db(DbName, MainPid, LruTime), + get_lru(NextLru); + false -> + get_lru(ets:next(couch_dbs_by_lru, LruTime)) + end + end. + +shutdown_idle_db(DbName, MainPid, LruTime) -> + couch_util:shutdown_sync(MainPid), + true = ets:delete(couch_dbs_by_lru, LruTime), + true = ets:delete(couch_dbs_by_name, DbName), + true = ets:delete(couch_dbs_by_pid, MainPid), + true = ets:delete(couch_sys_dbs, DbName), + ok. + +open_async(Server, From, DbName, Filepath, Options) -> + Parent = self(), + Opener = spawn_link(fun() -> + Res = couch_db:start_link(DbName, Filepath, Options), + gen_server:call( + Parent, {open_result, DbName, Res, Options}, infinity + ), + unlink(Parent), + case Res of + {ok, DbReader} -> + unlink(DbReader); + _ -> + ok + end + end), + true = ets:insert(couch_dbs_by_name, {DbName, {opening, Opener, [From]}}), + true = ets:insert(couch_dbs_by_pid, {Opener, DbName}), + DbsOpen = case lists:member(sys_db, Options) of + true -> + true = ets:insert(couch_sys_dbs, {DbName, true}), + Server#server.dbs_open; + false -> + Server#server.dbs_open + 1 + end, + Server#server{dbs_open = DbsOpen}. + handle_call({set_max_dbs_open, Max}, _From, Server) -> {reply, ok, Server#server{max_dbs_open=Max}}; handle_call(get_server, _From, Server) -> {reply, {ok, Server}, Server}; -handle_call({open, DbName, Options}, _From, Server) -> +handle_call({open_result, DbName, {ok, OpenedDbPid}, Options}, _From, Server) -> + link(OpenedDbPid), + [{DbName, {opening,Opener,Froms}}] = ets:lookup(couch_dbs_by_name, DbName), + lists:foreach(fun({FromPid,_}=From) -> + gen_server:reply(From, + catch couch_db:open_ref_counted(OpenedDbPid, FromPid)) + end, Froms), + LruTime = now(), + true = ets:insert(couch_dbs_by_name, + {DbName, {opened, OpenedDbPid, LruTime}}), + true = ets:delete(couch_dbs_by_pid, Opener), + true = ets:insert(couch_dbs_by_pid, {OpenedDbPid, DbName}), + true = ets:insert(couch_dbs_by_lru, {LruTime, DbName}), + case lists:member(create, Options) of + true -> + couch_db_update_notifier:notify({created, DbName}); + false -> + ok + end, + {reply, ok, Server}; +handle_call({open_result, DbName, Error, Options}, _From, Server) -> + [{DbName, {opening,Opener,Froms}}] = ets:lookup(couch_dbs_by_name, DbName), + lists:foreach(fun(From) -> + gen_server:reply(From, Error) + end, Froms), + true = ets:delete(couch_dbs_by_name, DbName), + true = ets:delete(couch_dbs_by_pid, Opener), + DbsOpen = case lists:member(sys_db, Options) of + true -> + true = ets:delete(couch_sys_dbs, DbName), + Server#server.dbs_open; + false -> + Server#server.dbs_open - 1 + end, + {reply, ok, Server#server{dbs_open = DbsOpen}}; +handle_call({open, DbName, Options}, {FromPid,_}=From, Server) -> LruTime = now(), case ets:lookup(couch_dbs_by_name, DbName) of [] -> - DbNameList = binary_to_list(DbName), - case check_dbname(Server, DbNameList) of - ok -> - case maybe_close_lru_db(Server) of - {ok, Server2} -> - Filepath = get_full_filename(Server, DbNameList), - case couch_db:start_link(DbName, Filepath, Options) of - {ok, MainPid} -> - true = ets:insert(couch_dbs_by_name, {DbName, {MainPid, LruTime}}), - true = ets:insert(couch_dbs_by_pid, {MainPid, DbName}), - true = ets:insert(couch_dbs_by_lru, {LruTime, DbName}), - DbsOpen = Server2#server.dbs_open + 1, - {reply, {ok, MainPid}, - Server2#server{dbs_open=DbsOpen}}; - Error -> - {reply, Error, Server2} - end; - CloseError -> - {reply, CloseError, Server} - end; - Error -> - {reply, Error, Server} - end; - [{_, {MainPid, PrevLruTime}}] -> - true = ets:insert(couch_dbs_by_name, {DbName, {MainPid, LruTime}}), + open_db(DbName, Server, Options, From); + [{_, {opening, Opener, Froms}}] -> + true = ets:insert(couch_dbs_by_name, {DbName, {opening, Opener, [From|Froms]}}), + {noreply, Server}; + [{_, {opened, MainPid, PrevLruTime}}] -> + true = ets:insert(couch_dbs_by_name, {DbName, {opened, MainPid, LruTime}}), true = ets:delete(couch_dbs_by_lru, PrevLruTime), true = ets:insert(couch_dbs_by_lru, {LruTime, DbName}), - {reply, {ok, MainPid}, Server} + {reply, couch_db:open_ref_counted(MainPid, FromPid), Server} end; -handle_call({create, DbName, Options}, _From, Server) -> - DbNameList = binary_to_list(DbName), - case check_dbname(Server, DbNameList) of - ok -> - Filepath = get_full_filename(Server, DbNameList), - - case ets:lookup(couch_dbs_by_name, DbName) of - [] -> - case maybe_close_lru_db(Server) of - {ok, Server2} -> - case couch_db:start_link(DbName, Filepath, [create|Options]) of - {ok, MainPid} -> - LruTime = now(), - true = ets:insert(couch_dbs_by_name, - {DbName, {MainPid, LruTime}}), - true = ets:insert(couch_dbs_by_pid, {MainPid, DbName}), - true = ets:insert(couch_dbs_by_lru, {LruTime, DbName}), - DbsOpen = Server2#server.dbs_open + 1, - couch_db_update_notifier:notify({created, DbName}), - {reply, {ok, MainPid}, - Server2#server{dbs_open=DbsOpen}}; - Error -> - {reply, Error, Server2} - end; - CloseError -> - {reply, CloseError, Server} - end; - [_AlreadyRunningDb] -> - {reply, file_exists, Server} - end; - Error -> - {reply, Error, Server} +handle_call({create, DbName, Options}, From, Server) -> + case ets:lookup(couch_dbs_by_name, DbName) of + [] -> + open_db(DbName, Server, [create | Options], From); + [_AlreadyRunningDb] -> + {reply, file_exists, Server} end; handle_call({delete, DbName, _Options}, _From, Server) -> DbNameList = binary_to_list(DbName), case check_dbname(Server, DbNameList) of ok -> FullFilepath = get_full_filename(Server, DbNameList), - Server2 = + UpdateState = case ets:lookup(couch_dbs_by_name, DbName) of - [] -> Server; - [{_, {Pid, LruTime}}] -> - exit(Pid, kill), - receive {'EXIT', Pid, _Reason} -> ok end, + [] -> false; + [{_, {opening, Pid, Froms}}] -> + couch_util:shutdown_sync(Pid), + true = ets:delete(couch_dbs_by_name, DbName), + true = ets:delete(couch_dbs_by_pid, Pid), + [gen_server:reply(F, not_found) || F <- Froms], + true; + [{_, {opened, Pid, LruTime}}] -> + couch_util:shutdown_sync(Pid), true = ets:delete(couch_dbs_by_name, DbName), true = ets:delete(couch_dbs_by_pid, Pid), true = ets:delete(couch_dbs_by_lru, LruTime), - Server#server{dbs_open=Server#server.dbs_open - 1} + true + end, + Server2 = case UpdateState of + true -> + DbsOpen = case ets:member(couch_sys_dbs, DbName) of + true -> + true = ets:delete(couch_sys_dbs, DbName), + Server#server.dbs_open; + false -> + Server#server.dbs_open - 1 + end, + Server#server{dbs_open = DbsOpen}; + false -> + Server end, %% Delete any leftover .compact files. If we don't do this a subsequent %% request for this DB will try to open the .compact file and use it. - file:delete(FullFilepath ++ ".compact"), + couch_file:delete(Server#server.root_dir, FullFilepath ++ ".compact"), - case file:delete(FullFilepath) of + case couch_file:delete(Server#server.root_dir, FullFilepath) of ok -> couch_db_update_notifier:notify({deleted, DbName}), {reply, ok, Server2}; @@ -310,15 +377,29 @@ handle_cast(Msg, _Server) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. - -handle_info({'EXIT', _Pid, config_change}, _Server) -> - exit(kill); -handle_info({'EXIT', Pid, _Reason}, #server{dbs_open=DbsOpen}=Server) -> - [{Pid, DbName}] = ets:lookup(couch_dbs_by_pid, Pid), - [{DbName, {Pid, LruTime}}] = ets:lookup(couch_dbs_by_name, DbName), - true = ets:delete(couch_dbs_by_pid, Pid), - true = ets:delete(couch_dbs_by_name, DbName), - true = ets:delete(couch_dbs_by_lru, LruTime), - {noreply, Server#server{dbs_open=DbsOpen - 1}}; -handle_info(Info, _Server) -> - exit({unknown_message, Info}). + +handle_info({'EXIT', _Pid, config_change}, Server) -> + {noreply, shutdown, Server}; +handle_info(Error, _Server) -> + ?LOG_ERROR("Unexpected message, restarting couch_server: ~p", [Error]), + exit(kill). + +open_db(DbName, Server, Options, From) -> + DbNameList = binary_to_list(DbName), + case check_dbname(Server, DbNameList) of + ok -> + Filepath = get_full_filename(Server, DbNameList), + case lists:member(sys_db, Options) of + true -> + {noreply, open_async(Server, From, DbName, Filepath, Options)}; + false -> + case maybe_close_lru_db(Server) of + {ok, Server2} -> + {noreply, open_async(Server2, From, DbName, Filepath, Options)}; + CloseError -> + {reply, CloseError, Server} + end + end; + Error -> + {reply, Error, Server} + end. diff --git a/src/couchdb/couch_server_sup.erl b/src/couchdb/couch_server_sup.erl index d89e987d..4f0445da 100644 --- a/src/couchdb/couch_server_sup.erl +++ b/src/couchdb/couch_server_sup.erl @@ -32,12 +32,7 @@ start_link(IniFiles) -> end. restart_core_server() -> - supervisor:terminate_child(couch_primary_services, couch_server), - supervisor:terminate_child(couch_secondary_services, stats_aggregator), - supervisor:terminate_child(couch_secondary_services, stats_collector), - supervisor:restart_child(couch_primary_services, couch_server), - supervisor:restart_child(couch_secondary_services, stats_collector), - supervisor:restart_child(couch_secondary_services, stats_aggregator). + init:restart(). couch_config_start_link_wrapper(IniFiles, FirstConfigPid) -> case is_process_alive(FirstConfigPid) of @@ -127,6 +122,13 @@ start_server(IniFiles) -> Port = mochiweb_socket_server:get(couch_httpd, port), io:format("Apache CouchDB has started. Time to relax.~n"), ?LOG_INFO("Apache CouchDB has started on http://~s:~w/", [Ip, Port]), + + case couch_config:get("couchdb", "uri_file", null) of + null -> ok; + UriFile -> + Line = io_lib:format("http://~s:~w/~n", [Ip, Port]), + file:write_file(UriFile, Line) + end, {ok, Pid}. @@ -174,7 +176,7 @@ start_secondary_services() -> {list_to_atom(Name), {Module, Fun, Args}, permanent, - brutal_kill, + 1000, worker, [Module]} end diff --git a/src/couchdb/couch_stats_collector.erl b/src/couchdb/couch_stats_collector.erl index 59d62a6e..f7b9bb48 100644 --- a/src/couchdb/couch_stats_collector.erl +++ b/src/couchdb/couch_stats_collector.erl @@ -60,7 +60,7 @@ increment(Key) -> Key2 = make_key(Key), case catch ets:update_counter(?HIT_TABLE, Key2, 1) of {'EXIT', {badarg, _}} -> - true = ets:insert(?HIT_TABLE, {Key2, 1}), + catch ets:insert(?HIT_TABLE, {Key2, 1}), ok; _ -> ok @@ -70,16 +70,16 @@ decrement(Key) -> Key2 = make_key(Key), case catch ets:update_counter(?HIT_TABLE, Key2, -1) of {'EXIT', {badarg, _}} -> - true = ets:insert(?HIT_TABLE, {Key2, -1}), + catch ets:insert(?HIT_TABLE, {Key2, -1}), ok; _ -> ok end. record(Key, Value) -> - true = ets:insert(?ABS_TABLE, {make_key(Key), Value}). + catch ets:insert(?ABS_TABLE, {make_key(Key), Value}). clear(Key) -> - true = ets:delete(?ABS_TABLE, make_key(Key)). + catch ets:delete(?ABS_TABLE, make_key(Key)). track_process_count(Stat) -> track_process_count(self(), Stat). diff --git a/src/couchdb/couch_stream.erl b/src/couchdb/couch_stream.erl index 2a873e4c..60af1c2b 100644 --- a/src/couchdb/couch_stream.erl +++ b/src/couchdb/couch_stream.erl @@ -24,7 +24,7 @@ -define(DEFAULT_STREAM_CHUNK, 16#00100000). % 1 meg chunks when streaming data --export([open/1, close/1, write/2, foldl/4, foldl/5, +-export([open/1, open/3, close/1, write/2, foldl/4, foldl/5, range_foldl/6, foldl_decode/6, old_foldl/5,old_copy_to_new_stream/4]). -export([copy_to_new_stream/3,old_read_term/2]). -export([init/1, terminate/2, handle_call/3]). @@ -39,14 +39,23 @@ buffer_len = 0, max_buffer = 4096, written_len = 0, - md5 + md5, + % md5 of the content without any transformation applied (e.g. compression) + % needed for the attachment upload integrity check (ticket 558) + identity_md5, + identity_len = 0, + encoding_fun, + end_encoding_fun }). %%% Interface functions %%% open(Fd) -> - gen_server:start_link(couch_stream, Fd, []). + open(Fd, identity, []). + +open(Fd, Encoding, Options) -> + gen_server:start_link(couch_stream, {Fd, Encoding, Options}, []). close(Pid) -> gen_server:call(Pid, close, infinity). @@ -85,19 +94,124 @@ foldl(Fd, [Pos|Rest], Fun, Acc) -> foldl(Fd, PosList, <<>>, Fun, Acc) -> foldl(Fd, PosList, Fun, Acc); foldl(Fd, PosList, Md5, Fun, Acc) -> - foldl(Fd, PosList, Md5, erlang:md5_init(), Fun, Acc). - + foldl(Fd, PosList, Md5, couch_util:md5_init(), Fun, Acc). + +foldl_decode(Fd, PosList, Md5, Enc, Fun, Acc) -> + {DecDataFun, DecEndFun} = case Enc of + gzip -> + ungzip_init(); + identity -> + identity_enc_dec_funs() + end, + Result = foldl_decode( + DecDataFun, Fd, PosList, Md5, couch_util:md5_init(), Fun, Acc + ), + DecEndFun(), + Result. foldl(_Fd, [], Md5, Md5Acc, _Fun, Acc) -> - Md5 = erlang:md5_final(Md5Acc), + Md5 = couch_util:md5_final(Md5Acc), Acc; +foldl(Fd, [{Pos, _Size}], Md5, Md5Acc, Fun, Acc) -> % 0110 UPGRADE CODE + foldl(Fd, [Pos], Md5, Md5Acc, Fun, Acc); foldl(Fd, [Pos], Md5, Md5Acc, Fun, Acc) -> {ok, Bin} = couch_file:pread_iolist(Fd, Pos), - Md5 = erlang:md5_final(erlang:md5_update(Md5Acc, Bin)), + Md5 = couch_util:md5_final(couch_util:md5_update(Md5Acc, Bin)), Fun(Bin, Acc); +foldl(Fd, [{Pos, _Size}|Rest], Md5, Md5Acc, Fun, Acc) -> + foldl(Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc); foldl(Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc) -> {ok, Bin} = couch_file:pread_iolist(Fd, Pos), - foldl(Fd, Rest, Md5, erlang:md5_update(Md5Acc, Bin), Fun, Fun(Bin, Acc)). + foldl(Fd, Rest, Md5, couch_util:md5_update(Md5Acc, Bin), Fun, Fun(Bin, Acc)). + +range_foldl(Fd, PosList, From, To, Fun, Acc) -> + range_foldl(Fd, PosList, From, To, 0, Fun, Acc). + +range_foldl(_Fd, _PosList, _From, To, Off, _Fun, Acc) when Off >= To -> + Acc; +range_foldl(Fd, [Pos|Rest], From, To, Off, Fun, Acc) when is_integer(Pos) -> % old-style attachment + {ok, Bin} = couch_file:pread_iolist(Fd, Pos), + range_foldl(Fd, [{Pos, iolist_size(Bin)}] ++ Rest, From, To, Off, Fun, Acc); +range_foldl(Fd, [{_Pos, Size}|Rest], From, To, Off, Fun, Acc) when From > Off + Size -> + range_foldl(Fd, Rest, From, To, Off + Size, Fun, Acc); +range_foldl(Fd, [{Pos, Size}|Rest], From, To, Off, Fun, Acc) -> + {ok, Bin} = couch_file:pread_iolist(Fd, Pos), + Bin1 = if + From =< Off andalso To >= Off + Size -> Bin; %% the whole block is covered + true -> + PrefixLen = clip(From - Off, 0, Size), + PostfixLen = clip(Off + Size - To, 0, Size), + MatchLen = Size - PrefixLen - PostfixLen, + <<_Prefix:PrefixLen/binary,Match:MatchLen/binary,_Postfix:PostfixLen/binary>> = iolist_to_binary(Bin), + Match + end, + range_foldl(Fd, Rest, From, To, Off + Size, Fun, Fun(Bin1, Acc)). + +clip(Value, Lo, Hi) -> + if + Value < Lo -> Lo; + Value > Hi -> Hi; + true -> Value + end. + +foldl_decode(_DecFun, _Fd, [], Md5, Md5Acc, _Fun, Acc) -> + Md5 = couch_util:md5_final(Md5Acc), + Acc; +foldl_decode(DecFun, Fd, [{Pos, _Size}], Md5, Md5Acc, Fun, Acc) -> + foldl_decode(DecFun, Fd, [Pos], Md5, Md5Acc, Fun, Acc); +foldl_decode(DecFun, Fd, [Pos], Md5, Md5Acc, Fun, Acc) -> + {ok, EncBin} = couch_file:pread_iolist(Fd, Pos), + Md5 = couch_util:md5_final(couch_util:md5_update(Md5Acc, EncBin)), + Bin = DecFun(EncBin), + Fun(Bin, Acc); +foldl_decode(DecFun, Fd, [{Pos, _Size}|Rest], Md5, Md5Acc, Fun, Acc) -> + foldl_decode(DecFun, Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc); +foldl_decode(DecFun, Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc) -> + {ok, EncBin} = couch_file:pread_iolist(Fd, Pos), + Bin = DecFun(EncBin), + Md5Acc2 = couch_util:md5_update(Md5Acc, EncBin), + foldl_decode(DecFun, Fd, Rest, Md5, Md5Acc2, Fun, Fun(Bin, Acc)). + +gzip_init(Options) -> + case couch_util:get_value(compression_level, Options, 0) of + Lvl when Lvl >= 1 andalso Lvl =< 9 -> + Z = zlib:open(), + % 15 = ?MAX_WBITS (defined in the zlib module) + % the 16 + ?MAX_WBITS formula was obtained by inspecting zlib:gzip/1 + ok = zlib:deflateInit(Z, Lvl, deflated, 16 + 15, 8, default), + { + fun(Data) -> + zlib:deflate(Z, Data) + end, + fun() -> + Last = zlib:deflate(Z, [], finish), + ok = zlib:deflateEnd(Z), + ok = zlib:close(Z), + Last + end + }; + _ -> + identity_enc_dec_funs() + end. + +ungzip_init() -> + Z = zlib:open(), + zlib:inflateInit(Z, 16 + 15), + { + fun(Data) -> + zlib:inflate(Z, Data) + end, + fun() -> + ok = zlib:inflateEnd(Z), + ok = zlib:close(Z) + end + }. + +identity_enc_dec_funs() -> + { + fun(Data) -> Data end, + fun() -> [] end + }. write(_Pid, <<>>) -> ok; @@ -105,8 +219,21 @@ write(Pid, Bin) -> gen_server:call(Pid, {write, Bin}, infinity). -init(Fd) -> - {ok, #stream{fd=Fd, md5=erlang:md5_init()}}. +init({Fd, Encoding, Options}) -> + {EncodingFun, EndEncodingFun} = case Encoding of + identity -> + identity_enc_dec_funs(); + gzip -> + gzip_init(Options) + end, + {ok, #stream{ + fd=Fd, + md5=couch_util:md5_init(), + identity_md5=couch_util:md5_init(), + encoding_fun=EncodingFun, + end_encoding_fun=EndEncodingFun + } + }. terminate(_Reason, _Stream) -> ok. @@ -120,39 +247,65 @@ handle_call({write, Bin}, _From, Stream) -> buffer_len = BufferLen, buffer_list = Buffer, max_buffer = Max, - md5 = Md5} = Stream, + md5 = Md5, + identity_md5 = IdenMd5, + identity_len = IdenLen, + encoding_fun = EncodingFun} = Stream, if BinSize + BufferLen > Max -> WriteBin = lists:reverse(Buffer, [Bin]), - Md5_2 = erlang:md5_update(Md5, WriteBin), - {ok, Pos} = couch_file:append_binary(Fd, WriteBin), + IdenMd5_2 = couch_util:md5_update(IdenMd5, WriteBin), + case EncodingFun(WriteBin) of + [] -> + % case where the encoder did some internal buffering + % (zlib does it for example) + WrittenLen2 = WrittenLen, + Md5_2 = Md5, + Written2 = Written; + WriteBin2 -> + {ok, Pos} = couch_file:append_binary(Fd, WriteBin2), + WrittenLen2 = WrittenLen + iolist_size(WriteBin2), + Md5_2 = couch_util:md5_update(Md5, WriteBin2), + Written2 = [{Pos, iolist_size(WriteBin2)}|Written] + end, + {reply, ok, Stream#stream{ - written_len=WrittenLen + BufferLen + BinSize, - written_pointers=[Pos|Written], + written_len=WrittenLen2, + written_pointers=Written2, buffer_list=[], buffer_len=0, - md5=Md5_2}}; + md5=Md5_2, + identity_md5=IdenMd5_2, + identity_len=IdenLen + BinSize}}; true -> {reply, ok, Stream#stream{ buffer_list=[Bin|Buffer], - buffer_len=BufferLen + BinSize}} + buffer_len=BufferLen + BinSize, + identity_len=IdenLen + BinSize}} end; handle_call(close, _From, Stream) -> #stream{ fd = Fd, written_len = WrittenLen, written_pointers = Written, - buffer_len = BufferLen, buffer_list = Buffer, - md5 = Md5} = Stream, - - case Buffer of + md5 = Md5, + identity_md5 = IdenMd5, + identity_len = IdenLen, + encoding_fun = EncodingFun, + end_encoding_fun = EndEncodingFun} = Stream, + + WriteBin = lists:reverse(Buffer), + IdenMd5Final = couch_util:md5_final(couch_util:md5_update(IdenMd5, WriteBin)), + WriteBin2 = EncodingFun(WriteBin) ++ EndEncodingFun(), + Md5Final = couch_util:md5_final(couch_util:md5_update(Md5, WriteBin2)), + Result = case WriteBin2 of [] -> - Result = {lists:reverse(Written), WrittenLen, erlang:md5_final(Md5)}; + {lists:reverse(Written), WrittenLen, IdenLen, Md5Final, IdenMd5Final}; _ -> - WriteBin = lists:reverse(Buffer), - Md5Final = erlang:md5_final(erlang:md5_update(Md5, WriteBin)), - {ok, Pos} = couch_file:append_binary(Fd, WriteBin), - Result = {lists:reverse(Written, [Pos]), WrittenLen + BufferLen, Md5Final} + {ok, Pos} = couch_file:append_binary(Fd, WriteBin2), + StreamInfo = lists:reverse(Written, [{Pos, iolist_size(WriteBin2)}]), + StreamLen = WrittenLen + iolist_size(WriteBin2), + {StreamInfo, StreamLen, IdenLen, Md5Final, IdenMd5Final} end, {stop, normal, Result, Stream}. diff --git a/src/couchdb/couch_util.erl b/src/couchdb/couch_util.erl index 6edfb781..c11ba994 100644 --- a/src/couchdb/couch_util.erl +++ b/src/couchdb/couch_util.erl @@ -12,18 +12,25 @@ -module(couch_util). --export([priv_dir/0, start_driver/1,terminate_linked/1]). +-export([priv_dir/0, start_driver/1, normpath/1]). -export([should_flush/0, should_flush/1, to_existing_atom/1]). -export([rand32/0, implode/2, collate/2, collate/3]). --export([abs_pathname/1,abs_pathname/2, trim/1, ascii_lower/1]). --export([encodeBase64/1, decodeBase64/1, encodeBase64Url/1, decodeBase64Url/1, - to_hex/1,parse_term/1, dict_find/3]). --export([file_read_size/1, get_nested_json_value/2, json_user_ctx/1]). +-export([abs_pathname/1,abs_pathname/2, trim/1]). +-export([encodeBase64Url/1, decodeBase64Url/1]). +-export([to_hex/1, parse_term/1, dict_find/3]). +-export([get_nested_json_value/2, json_user_ctx/1]). +-export([proplist_apply_field/2, json_apply_field/2]). -export([to_binary/1, to_integer/1, to_list/1, url_encode/1]). -export([json_encode/1, json_decode/1]). +-export([verify/2,simple_call/2,shutdown_sync/1]). +-export([compressible_att_type/1]). +-export([get_value/2, get_value/3]). +-export([md5/1, md5_init/0, md5_update/2, md5_final/1]). +-export([reorder_results/2]). +-export([url_strip_password/1]). +-export([encode_doc_id/1]). -include("couch_db.hrl"). --include_lib("kernel/include/file.hrl"). % arbitrarily chosen amount of memory to use before flushing to disk -define(FLUSH_MAX_MEM, 10000000). @@ -48,23 +55,57 @@ start_driver(LibDir) -> exit(erl_ddll:format_error(Error)) end. +% Normalize a pathname by removing .. and . components. +normpath(Path) -> + normparts(filename:split(Path), []). + +normparts([], Acc) -> + filename:join(lists:reverse(Acc)); +normparts([".." | RestParts], [_Drop | RestAcc]) -> + normparts(RestParts, RestAcc); +normparts(["." | RestParts], Acc) -> + normparts(RestParts, Acc); +normparts([Part | RestParts], Acc) -> + normparts(RestParts, [Part | Acc]). + % works like list_to_existing_atom, except can be list or binary and it % gives you the original value instead of an error if no existing atom. to_existing_atom(V) when is_list(V) -> - try list_to_existing_atom(V) catch _ -> V end; + try list_to_existing_atom(V) catch _:_ -> V end; to_existing_atom(V) when is_binary(V) -> - try list_to_existing_atom(?b2l(V)) catch _ -> V end; + try list_to_existing_atom(?b2l(V)) catch _:_ -> V end; to_existing_atom(V) when is_atom(V) -> V. +shutdown_sync(Pid) when not is_pid(Pid)-> + ok; +shutdown_sync(Pid) -> + MRef = erlang:monitor(process, Pid), + try + catch unlink(Pid), + catch exit(Pid, shutdown), + receive + {'DOWN', MRef, _, _, _} -> + ok + end + after + erlang:demonitor(MRef, [flush]) + end. + -terminate_linked(normal) -> - terminate_linked(shutdown); -terminate_linked(Reason) -> - {links, Links} = process_info(self(), links), - [catch exit(Pid, Reason) || Pid <- Links], - ok. - +simple_call(Pid, Message) -> + MRef = erlang:monitor(process, Pid), + try + Pid ! {self(), Message}, + receive + {Pid, Result} -> + Result; + {'DOWN', MRef, _, _, Reason} -> + exit(Reason) + end + after + erlang:demonitor(MRef, [flush]) + end. to_hex([]) -> []; @@ -83,9 +124,19 @@ parse_term(List) -> {ok, Tokens, _} = erl_scan:string(List ++ "."), erl_parse:parse_term(Tokens). +get_value(Key, List) -> + get_value(Key, List, undefined). + +get_value(Key, List, Default) -> + case lists:keysearch(Key, 1, List) of + {value, {Key,Value}} -> + Value; + false -> + Default + end. get_nested_json_value({Props}, [Key|Keys]) -> - case proplists:get_value(Key, Props, nil) of + case couch_util:get_value(Key, Props, nil) of nil -> throw({not_found, <<"missing json key: ", Key/binary>>}); Value -> get_nested_json_value(Value, Keys) end; @@ -94,6 +145,19 @@ get_nested_json_value(Value, []) -> get_nested_json_value(_NotJSONObj, _) -> throw({not_found, json_mismatch}). +proplist_apply_field(H, L) -> + {R} = json_apply_field(H, {L}), + R. + +json_apply_field(H, {L}) -> + json_apply_field(H, L, []). +json_apply_field({Key, NewValue}, [{Key, _OldVal} | Headers], Acc) -> + json_apply_field({Key, NewValue}, Headers, Acc); +json_apply_field({Key, NewValue}, [{OtherKey, OtherVal} | Headers], Acc) -> + json_apply_field({Key, NewValue}, Headers, [{OtherKey, OtherVal} | Acc]); +json_apply_field({Key, NewValue}, [], Acc) -> + {[{Key, NewValue}|Acc]}. + json_user_ctx(#db{name=DbName, user_ctx=Ctx}) -> {[{<<"db">>, DbName}, {<<"name">>,Ctx#user_ctx.name}, @@ -140,18 +204,6 @@ separate_cmd_args(" " ++ Rest, CmdAcc) -> separate_cmd_args([Char|Rest], CmdAcc) -> separate_cmd_args(Rest, [Char | CmdAcc]). -% lowercases string bytes that are the ascii characters A-Z. -% All other characters/bytes are ignored. -ascii_lower(String) -> - ascii_lower(String, []). - -ascii_lower([], Acc) -> - lists:reverse(Acc); -ascii_lower([Char | RestString], Acc) when Char >= $A, Char =< $B -> - ascii_lower(RestString, [Char + ($a-$A) | Acc]); -ascii_lower([Char | RestString], Acc) -> - ascii_lower(RestString, [Char | Acc]). - % Is a character whitespace? is_whitespace($\s) -> true; is_whitespace($\t) -> true; @@ -230,114 +282,18 @@ should_flush(MemThreshHold) -> ProcMem2+BinMem2 > MemThreshHold; true -> false end. +encodeBase64Url(Url) -> + Url1 = iolist_to_binary(re:replace(base64:encode(Url), "=+$", "")), + Url2 = iolist_to_binary(re:replace(Url1, "/", "_", [global])), + iolist_to_binary(re:replace(Url2, "\\+", "-", [global])). -%%% Purpose : Base 64 encoding and decoding. -%%% Copied from ssl_base_64 to avoid using the -%%% erlang ssl library - --define(st(X,A), ((X-A+256) div 256)). - -%% A PEM encoding consists of characters A-Z, a-z, 0-9, +, / and -%% =. Each character encodes a 6 bits value from 0 to 63 (A = 0, / = -%% 63); = is a padding character. -%% - -%% -%% encode64(Bytes|Binary) -> binary -%% -%% Take 3 bytes a time (3 x 8 = 24 bits), and make 4 characters out of -%% them (4 x 6 = 24 bits). -%% -encodeBase64(Bs) when is_list(Bs) -> - encodeBase64(iolist_to_binary(Bs), <<>>); -encodeBase64(Bs) -> - encodeBase64(Bs, <<>>). - -encodeBase64(<<B:3/binary, Bs/binary>>, Acc) -> - <<C1:6, C2:6, C3:6, C4:6>> = B, - encodeBase64(Bs, <<Acc/binary, (enc(C1)), (enc(C2)), (enc(C3)), (enc(C4))>>); -encodeBase64(<<B:2/binary>>, Acc) -> - <<C1:6, C2:6, C3:6, _:6>> = <<B/binary, 0>>, - <<Acc/binary, (enc(C1)), (enc(C2)), (enc(C3)), $=>>; -encodeBase64(<<B:1/binary>>, Acc) -> - <<C1:6, C2:6, _:12>> = <<B/binary, 0, 0>>, - <<Acc/binary, (enc(C1)), (enc(C2)), $=, $=>>; -encodeBase64(<<>>, Acc) -> - Acc. - -encodeBase64Url(Bs) when is_list(Bs) -> - encodeBase64Url(list_to_binary(Bs), <<>>); -encodeBase64Url(Bs) -> - encodeBase64Url(Bs, <<>>). - -encodeBase64Url(<<B:3/binary, Bs/binary>>, Acc) -> - <<C1:6, C2:6, C3:6, C4:6>> = B, - encodeBase64Url(Bs, <<Acc/binary, (encUrl(C1)), (encUrl(C2)), (encUrl(C3)), (encUrl(C4))>>); -encodeBase64Url(<<B:2/binary>>, Acc) -> - <<C1:6, C2:6, C3:6, _:6>> = <<B/binary, 0>>, - <<Acc/binary, (encUrl(C1)), (encUrl(C2)), (encUrl(C3))>>; -encodeBase64Url(<<B:1/binary>>, Acc) -> - <<C1:6, C2:6, _:12>> = <<B/binary, 0, 0>>, - <<Acc/binary, (encUrl(C1)), (encUrl(C2))>>; -encodeBase64Url(<<>>, Acc) -> - Acc. - -%% -%% decodeBase64(BinaryChars) -> Binary -%% -decodeBase64(Cs) when is_list(Cs) -> - decodeBase64(list_to_binary(Cs)); -decodeBase64(Cs) -> - decode1(Cs, <<>>). - -decode1(<<C1, C2, $=, $=>>, Acc) -> - <<B1, _:16>> = <<(dec(C1)):6, (dec(C2)):6, 0:12>>, - <<Acc/binary, B1>>; -decode1(<<C1, C2, C3, $=>>, Acc) -> - <<B1, B2, _:8>> = <<(dec(C1)):6, (dec(C2)):6, (dec(C3)):6, (dec(0)):6>>, - <<Acc/binary, B1, B2>>; -decode1(<<C1, C2, C3, C4, Cs/binary>>, Acc) -> - Bin = <<Acc/binary, (dec(C1)):6, (dec(C2)):6, (dec(C3)):6, (dec(C4)):6>>, - decode1(Cs, Bin); -decode1(<<>>, Acc) -> - Acc. - -decodeBase64Url(Cs) when is_list(Cs) -> - decodeBase64Url(list_to_binary(Cs)); -decodeBase64Url(Cs) -> - decode1Url(Cs, <<>>). - -decode1Url(<<C1, C2>>, Acc) -> - <<B1, _:16>> = <<(decUrl(C1)):6, (decUrl(C2)):6, 0:12>>, - <<Acc/binary, B1>>; -decode1Url(<<C1, C2, C3>>, Acc) -> - <<B1, B2, _:8>> = <<(decUrl(C1)):6, (decUrl(C2)):6, (decUrl(C3)):6, (decUrl(0)):6>>, - <<Acc/binary, B1, B2>>; -decode1Url(<<C1, C2, C3, C4, Cs/binary>>, Acc) -> - Bin = <<Acc/binary, (decUrl(C1)):6, (decUrl(C2)):6, (decUrl(C3)):6, (decUrl(C4)):6>>, - decode1Url(Cs, Bin); -decode1Url(<<>>, Acc) -> - Acc. - -%% enc/1 and dec/1 -%% -%% Mapping: 0-25 -> A-Z, 26-51 -> a-z, 52-61 -> 0-9, 62 -> +, 63 -> / -%% -enc(C) -> - 65 + C + 6*?st(C,26) - 75*?st(C,52) -15*?st(C,62) + 3*?st(C,63). - -dec(C) -> - 62*?st(C,43) + ?st(C,47) + (C-59)*?st(C,48) - 69*?st(C,65) - 6*?st(C,97). - -%% encUrl/1 and decUrl/1 -%% -%% Mapping: 0-25 -> A-Z, 26-51 -> a-z, 52-61 -> 0-9, 62 -> -, 63 -> _ -%% -encUrl(C) -> - 65 + C + 6*?st(C,26) - 75*?st(C,52) -13*?st(C,62) + 49*?st(C,63). - -decUrl(C) -> - 62*?st(C,45) + (C-58)*?st(C,48) - 69*?st(C,65) + 33*?st(C,95) - 39*?st(C,97). +decodeBase64Url(Url64) -> + Url1 = re:replace(iolist_to_binary(Url64), "-", "+", [global]), + Url2 = iolist_to_binary( + re:replace(iolist_to_binary(Url1), "_", "/", [global]) + ), + Padding = ?l2b(lists:duplicate((4 - size(Url2) rem 4) rem 4, $=)), + base64:decode(<<Url2/binary, Padding/binary>>). dict_find(Key, Dict, DefaultValue) -> case dict:find(Key, Dict) of @@ -347,14 +303,6 @@ dict_find(Key, Dict, DefaultValue) -> DefaultValue end. - -file_read_size(FileName) -> - case file:read_file_info(FileName) of - {ok, FileInfo} -> - FileInfo#file_info.size; - Error -> Error - end. - to_binary(V) when is_binary(V) -> V; to_binary(V) when is_list(V) -> @@ -423,3 +371,76 @@ json_decode(V) -> _Type:_Error -> throw({invalid_json,V}) end. + +verify([X|RestX], [Y|RestY], Result) -> + verify(RestX, RestY, (X bxor Y) bor Result); +verify([], [], Result) -> + Result == 0. + +verify(<<X/binary>>, <<Y/binary>>) -> + verify(?b2l(X), ?b2l(Y)); +verify(X, Y) when is_list(X) and is_list(Y) -> + case length(X) == length(Y) of + true -> + verify(X, Y, 0); + false -> + false + end; +verify(_X, _Y) -> false. + +compressible_att_type(MimeType) when is_binary(MimeType) -> + compressible_att_type(?b2l(MimeType)); +compressible_att_type(MimeType) -> + TypeExpList = re:split( + couch_config:get("attachments", "compressible_types", ""), + ", ?", + [{return, list}] + ), + lists:any( + fun(TypeExp) -> + Regexp = ["^\\s*", re:replace(TypeExp, "\\*", ".*"), "\\s*$"], + re:run(MimeType, Regexp, [caseless]) =/= nomatch + end, + [T || T <- TypeExpList, T /= []] + ). + +-spec md5(Data::(iolist() | binary())) -> Digest::binary(). +md5(Data) -> + try crypto:md5(Data) catch error:_ -> erlang:md5(Data) end. + +-spec md5_init() -> Context::binary(). +md5_init() -> + try crypto:md5_init() catch error:_ -> erlang:md5_init() end. + +-spec md5_update(Context::binary(), Data::(iolist() | binary())) -> + NewContext::binary(). +md5_update(Ctx, D) -> + try crypto:md5_update(Ctx,D) catch error:_ -> erlang:md5_update(Ctx,D) end. + +-spec md5_final(Context::binary()) -> Digest::binary(). +md5_final(Ctx) -> + try crypto:md5_final(Ctx) catch error:_ -> erlang:md5_final(Ctx) end. + +% linear search is faster for small lists, length() is 0.5 ms for 100k list +reorder_results(Keys, SortedResults) when length(Keys) < 100 -> + [couch_util:get_value(Key, SortedResults) || Key <- Keys]; +reorder_results(Keys, SortedResults) -> + KeyDict = dict:from_list(SortedResults), + [dict:fetch(Key, KeyDict) || Key <- Keys]. + +url_strip_password(Url) -> + re:replace(Url, + "http(s)?://([^:]+):[^@]+@(.*)$", + "http\\1://\\2:*****@\\3", + [{return, list}]). + +encode_doc_id(#doc{id = Id}) -> + encode_doc_id(Id); +encode_doc_id(Id) when is_list(Id) -> + encode_doc_id(?l2b(Id)); +encode_doc_id(<<"_design/", Rest/binary>>) -> + "_design/" ++ url_encode(Rest); +encode_doc_id(<<"_local/", Rest/binary>>) -> + "_local/" ++ url_encode(Rest); +encode_doc_id(Id) -> + url_encode(Id). diff --git a/src/couchdb/couch_view.erl b/src/couchdb/couch_view.erl index f80ce434..911f1aa6 100644 --- a/src/couchdb/couch_view.erl +++ b/src/couchdb/couch_view.erl @@ -29,11 +29,9 @@ start_link() -> gen_server:start_link({local, couch_view}, couch_view, [], []). get_temp_updater(DbName, Language, DesignOptions, MapSrc, RedSrc) -> - % make temp group - % do we need to close this db? - {ok, _Db, Group} = + {ok, Group} = couch_view_group:open_temp_group(DbName, Language, DesignOptions, MapSrc, RedSrc), - case gen_server:call(couch_view, {get_group_server, DbName, Group}) of + case gen_server:call(couch_view, {get_group_server, DbName, Group}, infinity) of {ok, Pid} -> Pid; Error -> @@ -41,11 +39,9 @@ get_temp_updater(DbName, Language, DesignOptions, MapSrc, RedSrc) -> end. get_group_server(DbName, GroupId) -> - % get signature for group case couch_view_group:open_db_group(DbName, GroupId) of - % do we need to close this db? - {ok, _Db, Group} -> - case gen_server:call(couch_view, {get_group_server, DbName, Group}) of + {ok, Group} -> + case gen_server:call(couch_view, {get_group_server, DbName, Group}, infinity) of {ok, Pid} -> Pid; Error -> @@ -58,11 +54,22 @@ get_group_server(DbName, GroupId) -> get_group(Db, GroupId, Stale) -> MinUpdateSeq = case Stale of ok -> 0; + update_after -> 0; _Else -> couch_db:get_update_seq(Db) end, - couch_view_group:request_group( - get_group_server(couch_db:name(Db), GroupId), - MinUpdateSeq). + GroupPid = get_group_server(couch_db:name(Db), GroupId), + Result = couch_view_group:request_group(GroupPid, MinUpdateSeq), + case Stale of + update_after -> + % best effort, process might die + spawn(fun() -> + LastSeq = couch_db:get_update_seq(Db), + couch_view_group:request_group(GroupPid, LastSeq) + end); + _ -> + ok + end, + Result. get_temp_group(Db, Language, DesignOptions, MapSrc, RedSrc) -> couch_view_group:request_group( @@ -80,7 +87,7 @@ cleanup_index_files(Db) -> % make unique list of group sigs Sigs = lists:map(fun(#doc{id = GroupId}) -> {ok, Info} = get_group_info(Db, GroupId), - ?b2l(proplists:get_value(signature, Info)) + ?b2l(couch_util:get_value(signature, Info)) end, [DD||DD <- DesignDocs, DD#doc.deleted == false]), FileList = list_index_files(Db), @@ -90,11 +97,12 @@ cleanup_index_files(Db) -> % filter out the ones in use DeleteFiles = [FilePath - || FilePath <- FileList, - re:run(FilePath, RegExp, [{capture, none}]) =:= nomatch], + || FilePath <- FileList, + re:run(FilePath, RegExp, [{capture, none}]) =:= nomatch], % delete unused files ?LOG_DEBUG("deleting unused view index files: ~p",[DeleteFiles]), - [file:delete(File)||File <- DeleteFiles], + RootDir = couch_config:get("couchdb", "view_index_dir"), + [couch_file:delete(RootDir,File,false)||File <- DeleteFiles], ok. list_index_files(Db) -> @@ -266,46 +274,65 @@ init([]) -> ets:new(group_servers_by_sig, [set, protected, named_table]), ets:new(couch_groups_by_updater, [set, private, named_table]), process_flag(trap_exit, true), + ok = couch_file:init_delete_dir(RootDir), {ok, #server{root_dir=RootDir}}. -terminate(Reason, _Srv) -> - couch_util:terminate_linked(Reason), +terminate(_Reason, _Srv) -> + [couch_util:shutdown_sync(Pid) || {Pid, _} <- + ets:tab2list(couch_groups_by_updater)], ok. -handle_call({get_group_server, DbName, - #group{name=GroupId,sig=Sig}=Group}, _From, #server{root_dir=Root}=Server) -> +handle_call({get_group_server, DbName, #group{sig=Sig}=Group}, From, + #server{root_dir=Root}=Server) -> case ets:lookup(group_servers_by_sig, {DbName, Sig}) of [] -> - ?LOG_DEBUG("Spawning new group server for view group ~s in database ~s.", - [GroupId, DbName]), - case (catch couch_view_group:start_link({Root, DbName, Group})) of - {ok, NewPid} -> - add_to_ets(NewPid, DbName, Sig), - {reply, {ok, NewPid}, Server}; - Error -> - {reply, Error, Server} - end; + spawn_monitor(fun() -> new_group(Root, DbName, Group) end), + ets:insert(group_servers_by_sig, {{DbName, Sig}, [From]}), + {noreply, Server}; + [{_, WaitList}] when is_list(WaitList) -> + ets:insert(group_servers_by_sig, {{DbName, Sig}, [From | WaitList]}), + {noreply, Server}; [{_, ExistingPid}] -> {reply, {ok, ExistingPid}, Server} - end. + end; + +handle_call({reset_indexes, DbName}, _From, #server{root_dir=Root}=Server) -> + do_reset_indexes(DbName, Root), + {reply, ok, Server}. handle_cast({reset_indexes, DbName}, #server{root_dir=Root}=Server) -> + do_reset_indexes(DbName, Root), + {noreply, Server}. + +new_group(Root, DbName, #group{name=GroupId, sig=Sig} = Group) -> + ?LOG_DEBUG("Spawning new group server for view group ~s in database ~s.", + [GroupId, DbName]), + case (catch couch_view_group:start_link({Root, DbName, Group})) of + {ok, NewPid} -> + unlink(NewPid), + exit({DbName, Sig, {ok, NewPid}}); + {error, invalid_view_seq} -> + ok = gen_server:call(couch_view, {reset_indexes, DbName}), + new_group(Root, DbName, Group); + Error -> + exit({DbName, Sig, Error}) + end. + +do_reset_indexes(DbName, Root) -> % shutdown all the updaters and clear the files, the db got changed Names = ets:lookup(couch_groups_by_db, DbName), lists:foreach( fun({_DbName, Sig}) -> ?LOG_DEBUG("Killing update process for view group ~s. in database ~s.", [Sig, DbName]), [{_, Pid}] = ets:lookup(group_servers_by_sig, {DbName, Sig}), - exit(Pid, kill), - receive {'EXIT', Pid, _} -> - delete_from_ets(Pid, DbName, Sig) - end + couch_util:shutdown_sync(Pid), + delete_from_ets(Pid, DbName, Sig) end, Names), delete_index_dir(Root, DbName), - file:delete(Root ++ "/." ++ binary_to_list(DbName) ++ "_temp"), - {noreply, Server}. + RootDelDir = couch_config:get("couchdb", "view_index_dir"), + couch_file:delete(RootDelDir, Root ++ "/." ++ ?b2l(DbName) ++ "_temp"). handle_info({'EXIT', FromPid, Reason}, Server) -> case ets:lookup(couch_groups_by_updater, FromPid) of @@ -319,6 +346,15 @@ handle_info({'EXIT', FromPid, Reason}, Server) -> [{_, {DbName, GroupId}}] -> delete_from_ets(FromPid, DbName, GroupId) end, + {noreply, Server}; + +handle_info({'DOWN', _, _, _, {DbName, Sig, Reply}}, Server) -> + [{_, WaitList}] = ets:lookup(group_servers_by_sig, {DbName, Sig}), + [gen_server:reply(From, Reply) || From <- WaitList], + case Reply of {ok, NewPid} -> + link(NewPid), + add_to_ets(NewPid, DbName, Sig); + _ -> ok end, {noreply, Server}. add_to_ets(Pid, DbName, Sig) -> @@ -336,19 +372,19 @@ code_change(_OldVsn, State, _Extra) -> delete_index_dir(RootDir, DbName) -> - nuke_dir(RootDir ++ "/." ++ ?b2l(DbName) ++ "_design"). + nuke_dir(RootDir, RootDir ++ "/." ++ ?b2l(DbName) ++ "_design"). -nuke_dir(Dir) -> +nuke_dir(RootDelDir, Dir) -> case file:list_dir(Dir) of {error, enoent} -> ok; % doesn't exist {ok, Files} -> lists:foreach( fun(File)-> Full = Dir ++ "/" ++ File, - case file:delete(Full) of + case couch_file:delete(RootDelDir, Full, false) of ok -> ok; {error, eperm} -> - ok = nuke_dir(Full) + ok = nuke_dir(RootDelDir, Full) end end, Files), @@ -358,81 +394,67 @@ nuke_dir(Dir) -> % keys come back in the language of btree - tuples. less_json_ids({JsonA, IdA}, {JsonB, IdB}) -> - case JsonA == JsonB of - false -> - less_json(JsonA, JsonB); - true -> - IdA < IdB - end. - - -less_json(A, B) -> - TypeA = type_sort(A), - TypeB = type_sort(B), - if TypeA == TypeB -> - less_same_type(A, B); - true -> - TypeA < TypeB + case less_json0(JsonA, JsonB) of + 0 -> + IdA < IdB; + Result -> + Result < 0 end. -type_sort(V) when is_atom(V) -> 0; -type_sort(V) when is_integer(V) -> 1; -type_sort(V) when is_float(V) -> 1; -type_sort(V) when is_binary(V) -> 2; -type_sort(V) when is_list(V) -> 3; -type_sort({V}) when is_list(V) -> 4; -type_sort(V) when is_tuple(V) -> 5. +less_json(A,B) -> + less_json0(A,B) < 0. + +less_json0(A,A) -> 0; + +less_json0(A,B) when is_atom(A), is_atom(B) -> atom_sort(A) - atom_sort(B); +less_json0(A,_) when is_atom(A) -> -1; +less_json0(_,B) when is_atom(B) -> 1; + +less_json0(A,B) when is_number(A), is_number(B) -> A - B; +less_json0(A,_) when is_number(A) -> -1; +less_json0(_,B) when is_number(B) -> 1; + +less_json0(A,B) when is_binary(A), is_binary(B) -> couch_util:collate(A,B); +less_json0(A,_) when is_binary(A) -> -1; +less_json0(_,B) when is_binary(B) -> 1; + +less_json0(A,B) when is_list(A), is_list(B) -> less_list(A,B); +less_json0(A,_) when is_list(A) -> -1; +less_json0(_,B) when is_list(B) -> 1; +less_json0({A},{B}) when is_list(A), is_list(B) -> less_props(A,B); +less_json0({A},_) when is_list(A) -> -1; +less_json0(_,{B}) when is_list(B) -> 1. atom_sort(null) -> 1; atom_sort(false) -> 2; atom_sort(true) -> 3. - -less_same_type(A,B) when is_atom(A) -> - atom_sort(A) < atom_sort(B); -less_same_type(A,B) when is_binary(A) -> - couch_util:collate(A, B) < 0; -less_same_type({AProps}, {BProps}) -> - less_props(AProps, BProps); -less_same_type(A, B) when is_list(A) -> - less_list(A, B); -less_same_type(A, B) -> - A < B. - less_props([], [_|_]) -> - true; + -1; less_props(_, []) -> - false; + 1; less_props([{AKey, AValue}|RestA], [{BKey, BValue}|RestB]) -> case couch_util:collate(AKey, BKey) of - -1 -> true; - 1 -> false; 0 -> - case less_json(AValue, BValue) of - true -> true; - false -> - case less_json(BValue, AValue) of - true -> false; - false -> - less_props(RestA, RestB) - end - end + case less_json0(AValue, BValue) of + 0 -> + less_props(RestA, RestB); + Result -> + Result + end; + Result -> + Result end. less_list([], [_|_]) -> - true; + -1; less_list(_, []) -> - false; + 1; less_list([A|RestA], [B|RestB]) -> - case less_json(A,B) of - true -> true; - false -> - case less_json(B,A) of - true -> false; - false -> - less_list(RestA, RestB) - end + case less_json0(A,B) of + 0 -> + less_list(RestA, RestB); + Result -> + Result end. - - diff --git a/src/couchdb/couch_view_group.erl b/src/couchdb/couch_view_group.erl index 6589bc6a..3cf829b6 100644 --- a/src/couchdb/couch_view_group.erl +++ b/src/couchdb/couch_view_group.erl @@ -32,7 +32,8 @@ compactor_pid=nil, waiting_commit=false, waiting_list=[], - ref_counter=nil + ref_counter=nil, + db_update_notifier=nil }). % api methods @@ -75,23 +76,37 @@ start_link(InitArgs) -> end. % init creates a closure which spawns the appropriate view_updater. -init({InitArgs, ReturnPid, Ref}) -> +init({{_, DbName, _} = InitArgs, ReturnPid, Ref}) -> process_flag(trap_exit, true), - case prepare_group(InitArgs, false) of - {ok, #group{db=Db, fd=Fd}=Group} -> - couch_db:monitor(Db), - Owner = self(), - Pid = spawn_link(fun()-> couch_view_updater:update(Owner, Group) end), - {ok, RefCounter} = couch_ref_counter:start([Fd]), - {ok, #group_state{ - db_name=couch_db:name(Db), - init_args=InitArgs, - updater_pid = Pid, - group=Group, - ref_counter=RefCounter}}; + try prepare_group(InitArgs, false) of + {ok, #group{db=Db, fd=Fd, current_seq=Seq}=Group} -> + case Seq > couch_db:get_update_seq(Db) of + true -> + ReturnPid ! {Ref, self(), {error, invalid_view_seq}}, + ignore; + _ -> + couch_db:monitor(Db), + {ok, RefCounter} = couch_ref_counter:start([Fd]), + Server = self(), + {ok, Notifier} = couch_db_update_notifier:start_link( + fun({compacted, DbName1}) when DbName1 =:= DbName -> + ok = gen_server:cast(Server, reopen_db); + (_) -> + ok + end), + {ok, #group_state{ + db_update_notifier=Notifier, + db_name=couch_db:name(Db), + init_args=InitArgs, + group=Group, + ref_counter=RefCounter}} + end; Error -> ReturnPid ! {Ref, self(), Error}, ignore + catch exit:no_db_file -> + ReturnPid ! {Ref, self(), {error, no_db_file}}, + ignore end. @@ -113,11 +128,11 @@ init({InitArgs, ReturnPid, Ref}) -> handle_call({request_group, RequestSeq}, From, #group_state{ db_name=DbName, - group=#group{current_seq=Seq}=Group, + group=#group{current_seq=Seq, db=OldDb}=Group, updater_pid=nil, waiting_list=WaitList }=State) when RequestSeq > Seq -> - {ok, Db} = couch_db:open(DbName, []), + {ok, Db} = reopen_db(DbName, OldDb), Group2 = Group#group{db=Db}, Owner = self(), Pid = spawn_link(fun()-> couch_view_updater:update(Owner, Group2) end), @@ -152,11 +167,11 @@ handle_call(request_group_info, _From, State) -> handle_cast({start_compact, CompactFun}, #group_state{compactor_pid=nil} = State) -> #group_state{ - group = #group{name = GroupId, sig = GroupSig} = Group, + group = #group{name = GroupId, sig = GroupSig, db = OldDb} = Group, init_args = {RootDir, DbName, _} } = State, ?LOG_INFO("View index compaction starting for ~s ~s", [DbName, GroupId]), - {ok, Db} = couch_db:open(DbName, []), + {ok, Db} = reopen_db(DbName, OldDb), {ok, Fd} = open_index_file(compact, RootDir, DbName, GroupSig), NewGroup = reset_file(Db, Fd, DbName, Group), Pid = spawn_link(fun() -> CompactFun(Group, NewGroup) end), @@ -170,15 +185,16 @@ handle_cast({compact_done, #group{current_seq=NewSeq} = NewGroup}, when NewSeq >= OldSeq -> #group_state{ group = #group{name=GroupId, fd=OldFd, sig=GroupSig} = Group, - init_args = {RootDir, DbName, _}, + init_args = {RootDir, DbName, _}, updater_pid = UpdaterPid, + compactor_pid = CompactorPid, ref_counter = RefCounter } = State, ?LOG_INFO("View index compaction complete for ~s ~s", [DbName, GroupId]), FileName = index_file_name(RootDir, DbName, GroupSig), CompactName = index_file_name(compact, RootDir, DbName, GroupSig), - file:delete(FileName), + ok = couch_file:delete(RootDir, FileName), ok = file:rename(CompactName, FileName), %% if an updater is running, kill it and start a new one @@ -193,6 +209,8 @@ handle_cast({compact_done, #group{current_seq=NewSeq} = NewGroup}, end, %% cleanup old group + unlink(CompactorPid), + receive {'EXIT', CompactorPid, normal} -> ok after 0 -> ok end, unlink(OldFd), couch_ref_counter:drop(RefCounter), {ok, NewRefCounter} = couch_ref_counter:start([NewGroup#group.fd]), @@ -216,13 +234,14 @@ handle_cast({compact_done, NewGroup}, State) -> ?LOG_INFO("View index compaction still behind for ~s ~s -- current: ~p " ++ "compact: ~p", [DbName, GroupId, CurrentSeq, NewGroup#group.current_seq]), couch_db:close(NewGroup#group.db), - {ok, Db} = couch_db:open(DbName, []), Pid = spawn_link(fun() -> + {ok, Db} = couch_db:open_int(DbName, []), {_,Ref} = erlang:spawn_monitor(fun() -> couch_view_updater:update(nil, NewGroup#group{db = Db}) end), receive {'DOWN', Ref, _, _, {new_group, NewGroup2}} -> + couch_db:close(Db), #group{name=GroupId} = NewGroup2, Pid2 = couch_view:get_group_server(DbName, GroupId), gen_server:cast(Pid2, {compact_done, NewGroup2}) @@ -246,10 +265,14 @@ handle_cast({partial_update, Pid, NewGroup}, #group_state{updater_pid=Pid} {noreply, State#group_state{group=NewGroup, waiting_commit=true}}; handle_cast({partial_update, _, _}, State) -> %% message from an old (probably pre-compaction) updater; ignore - {noreply, State}. + {noreply, State}; + +handle_cast(reopen_db, #group_state{group = Group, db_name = DbName} = State) -> + {ok, Db} = reopen_db(DbName, Group#group.db), + {noreply, State#group_state{group = Group#group{db = Db}}}. handle_info(delayed_commit, #group_state{db_name=DbName,group=Group}=State) -> - {ok, Db} = couch_db:open(DbName, []), + {ok, Db} = couch_db:open_int(DbName, []), CommittedSeq = couch_db:get_committed_update_seq(Db), couch_db:close(Db), if CommittedSeq >= Group#group.current_seq -> @@ -284,7 +307,7 @@ handle_info({'EXIT', FromPid, {new_group, #group{db=Db}=Group}}, group=Group#group{db=nil}, updater_pid=nil}}; StillWaiting -> % we still have some waiters, reopen the database and reupdate the index - {ok, Db2} = couch_db:open(DbName, []), + {ok, Db2} = couch_db:open_int(DbName, []), Group2 = Group#group{db=Db2}, Owner = self(), Pid = spawn_link(fun() -> couch_view_updater:update(Owner, Group2) end), @@ -332,9 +355,10 @@ handle_info({'DOWN',_,_,_,_}, State) -> terminate(Reason, #group_state{updater_pid=Update, compactor_pid=Compact}=S) -> + couch_db_update_notifier:stop(S#group_state.db_update_notifier), reply_all(S, Reason), - catch exit(Update, Reason), - catch exit(Compact, Reason), + couch_util:shutdown_sync(Update), + couch_util:shutdown_sync(Compact), ok. code_change(_OldVsn, State, _Extra) -> @@ -363,8 +387,8 @@ reply_all(#group_state{waiting_list=WaitList}=State, Reply) -> [catch gen_server:reply(Pid, Reply) || {Pid, _} <- WaitList], State#group_state{waiting_list=[]}. -prepare_group({RootDir, DbName, #group{sig=Sig}=Group}, ForceReset)-> - case couch_db:open(DbName, []) of +prepare_group({RootDir, DbName, #group{sig=Sig, db=OldDb}=Group}, ForceReset)-> + case reopen_db(DbName, OldDb) of {ok, Db} -> case open_index_file(RootDir, DbName, Sig) of {ok, Fd} -> @@ -393,11 +417,15 @@ prepare_group({RootDir, DbName, #group{sig=Sig}=Group}, ForceReset)-> get_index_header_data(#group{current_seq=Seq, purge_seq=PurgeSeq, id_btree=IdBtree,views=Views}) -> - ViewStates = [couch_btree:get_state(Btree) || #view{btree=Btree} <- Views], - #index_header{seq=Seq, - purge_seq=PurgeSeq, - id_btree_state=couch_btree:get_state(IdBtree), - view_states=ViewStates}. + ViewStates = [ + {couch_btree:get_state(V#view.btree), V#view.update_seq, V#view.purge_seq} || V <- Views + ], + #index_header{ + seq=Seq, + purge_seq=PurgeSeq, + id_btree_state=couch_btree:get_state(IdBtree), + view_states=ViewStates + }. hex_sig(GroupSig) -> couch_util:to_hex(?b2l(GroupSig)). @@ -429,7 +457,7 @@ open_index_file(compact, RootDir, DbName, GroupSig) -> end. open_temp_group(DbName, Language, DesignOptions, MapSrc, RedSrc) -> - case couch_db:open(DbName, []) of + case couch_db:open_int(DbName, []) of {ok, Db} -> View = #view{map_names=[<<"_temp">>], id_num=0, @@ -437,8 +465,8 @@ open_temp_group(DbName, Language, DesignOptions, MapSrc, RedSrc) -> def=MapSrc, reduce_funs= if RedSrc==[] -> []; true -> [{<<"_temp">>, RedSrc}] end, options=DesignOptions}, - - {ok, Db, set_view_sig(#group{name = <<"_temp">>, db=Db, views=[View], + couch_db:close(Db), + {ok, set_view_sig(#group{name = <<"_temp">>,lib={[]}, views=[View], def_lang=Language, design_options=DesignOptions})}; Error -> Error @@ -446,16 +474,48 @@ open_temp_group(DbName, Language, DesignOptions, MapSrc, RedSrc) -> set_view_sig(#group{ views=Views, + lib={[]}, + def_lang=Language, + design_options=DesignOptions}=G) -> + ViewInfo = [old_view_format(V) || V <- Views], + G#group{sig=couch_util:md5(term_to_binary({ViewInfo, Language, DesignOptions}))}; +set_view_sig(#group{ + views=Views, + lib=Lib, def_lang=Language, design_options=DesignOptions}=G) -> - G#group{sig=erlang:md5(term_to_binary({Views, Language, DesignOptions}))}. + ViewInfo = [old_view_format(V) || V <- Views], + G#group{sig=couch_util:md5(term_to_binary({ViewInfo, Language, DesignOptions, sort_lib(Lib)}))}. + +% Use the old view record format so group sig's don't change +old_view_format(View) -> + { + view, + View#view.id_num, + View#view.map_names, + View#view.def, + View#view.btree, + View#view.reduce_funs, + View#view.options + }. + +sort_lib({Lib}) -> + sort_lib(Lib, []). +sort_lib([], LAcc) -> + lists:keysort(1, LAcc); +sort_lib([{LName, {LObj}}|Rest], LAcc) -> + LSorted = sort_lib(LObj, []), % descend into nested object + sort_lib(Rest, [{LName, LSorted}|LAcc]); +sort_lib([{LName, LCode}|Rest], LAcc) -> + sort_lib(Rest, [{LName, LCode}|LAcc]). open_db_group(DbName, GroupId) -> - case couch_db:open(DbName, []) of + case couch_db:open_int(DbName, []) of {ok, Db} -> case couch_db:open_doc(Db, GroupId) of {ok, Doc} -> - {ok, Db, design_doc_to_view_group(Doc)}; + couch_db:close(Db), + {ok, design_doc_to_view_group(Doc)}; Else -> couch_db:close(Db), Else @@ -494,36 +554,39 @@ get_group_info(State) -> % maybe move to another module design_doc_to_view_group(#doc{id=Id,body={Fields}}) -> - Language = proplists:get_value(<<"language">>, Fields, <<"javascript">>), - {DesignOptions} = proplists:get_value(<<"options">>, Fields, {[]}), - {RawViews} = proplists:get_value(<<"views">>, Fields, {[]}), + Language = couch_util:get_value(<<"language">>, Fields, <<"javascript">>), + {DesignOptions} = couch_util:get_value(<<"options">>, Fields, {[]}), + {RawViews} = couch_util:get_value(<<"views">>, Fields, {[]}), + Lib = couch_util:get_value(<<"lib">>, RawViews, {[]}), % add the views to a dictionary object, with the map source as the key DictBySrc = lists:foldl( fun({Name, {MRFuns}}, DictBySrcAcc) -> - MapSrc = proplists:get_value(<<"map">>, MRFuns), - RedSrc = proplists:get_value(<<"reduce">>, MRFuns, null), - {ViewOptions} = proplists:get_value(<<"options">>, MRFuns, {[]}), - View = - case dict:find({MapSrc, ViewOptions}, DictBySrcAcc) of - {ok, View0} -> View0; - error -> #view{def=MapSrc, options=ViewOptions} % create new view object - end, - View2 = - if RedSrc == null -> - View#view{map_names=[Name|View#view.map_names]}; - true -> - View#view{reduce_funs=[{Name,RedSrc}|View#view.reduce_funs]} - end, - dict:store({MapSrc, ViewOptions}, View2, DictBySrcAcc) + case couch_util:get_value(<<"map">>, MRFuns) of + undefined -> DictBySrcAcc; + MapSrc -> + RedSrc = couch_util:get_value(<<"reduce">>, MRFuns, null), + {ViewOptions} = couch_util:get_value(<<"options">>, MRFuns, {[]}), + View = + case dict:find({MapSrc, ViewOptions}, DictBySrcAcc) of + {ok, View0} -> View0; + error -> #view{def=MapSrc, options=ViewOptions} % create new view object + end, + View2 = + if RedSrc == null -> + View#view{map_names=[Name|View#view.map_names]}; + true -> + View#view{reduce_funs=[{Name,RedSrc}|View#view.reduce_funs]} + end, + dict:store({MapSrc, ViewOptions}, View2, DictBySrcAcc) + end end, dict:new(), RawViews), % number the views {Views, _N} = lists:mapfoldl( fun({_Src, View}, N) -> {View#view{id_num=N},N+1} end, 0, lists:sort(dict:to_list(DictBySrc))), - - set_view_sig(#group{name=Id, views=Views, def_lang=Language, design_options=DesignOptions}). + set_view_sig(#group{name=Id, lib=Lib, views=Views, def_lang=Language, design_options=DesignOptions}). reset_group(#group{views=Views}=Group) -> Views2 = [View#view{btree=nil} || View <- Views], @@ -537,19 +600,24 @@ reset_file(Db, Fd, DbName, #group{sig=Sig,name=Name} = Group) -> init_group(Db, Fd, reset_group(Group), nil). delete_index_file(RootDir, DbName, GroupSig) -> - file:delete(index_file_name(RootDir, DbName, GroupSig)). + couch_file:delete(RootDir, index_file_name(RootDir, DbName, GroupSig)). init_group(Db, Fd, #group{views=Views}=Group, nil) -> init_group(Db, Fd, Group, #index_header{seq=0, purge_seq=couch_db:get_purge_seq(Db), - id_btree_state=nil, view_states=[nil || _ <- Views]}); + id_btree_state=nil, view_states=[{nil, 0, 0} || _ <- Views]}); init_group(Db, Fd, #group{def_lang=Lang,views=Views}= Group, IndexHeader) -> #index_header{seq=Seq, purge_seq=PurgeSeq, id_btree_state=IdBtreeState, view_states=ViewStates} = IndexHeader, + StateUpdate = fun + ({_, _, _}=State) -> State; + (State) -> {State, 0, 0} + end, + ViewStates2 = lists:map(StateUpdate, ViewStates), {ok, IdBtree} = couch_btree:open(IdBtreeState, Fd), Views2 = lists:zipwith( - fun(BtreeState, #view{reduce_funs=RedFuns,options=Options}=View) -> + fun({BTState, USeq, PSeq}, #view{reduce_funs=RedFuns,options=Options}=View) -> FunSrcs = [FunSrc || {_Name, FunSrc} <- RedFuns], ReduceFun = fun(reduce, KVs) -> @@ -566,19 +634,22 @@ init_group(Db, Fd, #group{def_lang=Lang,views=Views}= {Count, Reduced} end, - case proplists:get_value(<<"collation">>, Options, <<"default">>) of + case couch_util:get_value(<<"collation">>, Options, <<"default">>) of <<"default">> -> Less = fun couch_view:less_json_ids/2; <<"raw">> -> Less = fun(A,B) -> A < B end end, - {ok, Btree} = couch_btree:open(BtreeState, Fd, - [{less, Less}, - {reduce, ReduceFun}]), - View#view{btree=Btree} + {ok, Btree} = couch_btree:open(BTState, Fd, + [{less, Less}, {reduce, ReduceFun}] + ), + View#view{btree=Btree, update_seq=USeq, purge_seq=PSeq} end, - ViewStates, Views), + ViewStates2, Views), Group#group{db=Db, fd=Fd, current_seq=Seq, purge_seq=PurgeSeq, id_btree=IdBtree, views=Views2}. - +reopen_db(DbName, nil) -> + couch_db:open_int(DbName, []); +reopen_db(_DbName, Db) -> + couch_db:reopen(Db). diff --git a/src/couchdb/couch_view_updater.erl b/src/couchdb/couch_view_updater.erl index aa7d98fc..8e089fa9 100644 --- a/src/couchdb/couch_view_updater.erl +++ b/src/couchdb/couch_view_updater.erl @@ -38,8 +38,10 @@ update(Owner, Group) -> couch_task_status:update(<<"Resetting view index due to lost purge entries.">>), exit(reset) end, - {ok, MapQueue} = couch_work_queue:new(100000, 500), - {ok, WriteQueue} = couch_work_queue:new(100000, 500), + {ok, MapQueue} = couch_work_queue:new( + [{max_size, 100000}, {max_items, 500}]), + {ok, WriteQueue} = couch_work_queue:new( + [{max_size, 100000}, {max_items, 500}]), Self = self(), ViewEmptyKVs = [{View, []} || View <- Group2#group.views], spawn_link(fun() -> do_maps(Group, MapQueue, WriteQueue, ViewEmptyKVs) end), @@ -49,9 +51,9 @@ update(Owner, Group) -> % update status every half second couch_task_status:set_update_frequency(500), #group{ design_options = DesignOptions } = Group, - IncludeDesign = proplists:get_value(<<"include_design">>, + IncludeDesign = couch_util:get_value(<<"include_design">>, DesignOptions, false), - LocalSeq = proplists:get_value(<<"local_seq">>, DesignOptions, false), + LocalSeq = couch_util:get_value(<<"local_seq">>, DesignOptions, false), DocOpts = case LocalSeq of true -> [conflicts, deleted_conflicts, local_seq]; @@ -94,19 +96,25 @@ purge_index(#group{db=Db, views=Views, id_btree=IdBtree}=Group) -> end, dict:new(), Lookups), % Now remove the values from the btrees + PurgeSeq = couch_db:get_purge_seq(Db), Views2 = lists:map( fun(#view{id_num=Num,btree=Btree}=View) -> case dict:find(Num, ViewKeysToRemoveDict) of {ok, RemoveKeys} -> - {ok, Btree2} = couch_btree:add_remove(Btree, [], RemoveKeys), - View#view{btree=Btree2}; + {ok, ViewBtree2} = couch_btree:add_remove(Btree, [], RemoveKeys), + case ViewBtree2 =/= Btree of + true -> + View#view{btree=ViewBtree2, purge_seq=PurgeSeq}; + _ -> + View#view{btree=ViewBtree2} + end; error -> % no keys to remove in this view View end end, Views), Group#group{id_btree=IdBtree2, views=Views2, - purge_seq=couch_db:get_purge_seq(Db)}. + purge_seq=PurgeSeq}. load_doc(Db, DocInfo, MapQueue, DocOpts, IncludeDesign) -> @@ -201,12 +209,12 @@ view_insert_doc_query_results(#doc{id=DocId}=Doc, [ResultKVs|RestResults], [{Vie view_compute(Group, []) -> {Group, []}; -view_compute(#group{def_lang=DefLang, query_server=QueryServerIn}=Group, Docs) -> +view_compute(#group{def_lang=DefLang, lib=Lib, query_server=QueryServerIn}=Group, Docs) -> {ok, QueryServer} = case QueryServerIn of nil -> % doc map not started Definitions = [View#view.def || View <- Group#group.views], - couch_query_servers:start_doc_map(DefLang, Definitions); + couch_query_servers:start_doc_map(DefLang, Definitions, Lib); _ -> {ok, QueryServerIn} end, @@ -245,7 +253,12 @@ write_changes(Group, ViewKeyValuesToAdd, DocIdViewIdKeys, NewSeq, InitialBuild) Views2 = lists:zipwith(fun(View, {_View, AddKeyValues}) -> KeysToRemove = couch_util:dict_find(View#view.id_num, KeysToRemoveByView, []), {ok, ViewBtree2} = couch_btree:add_remove(View#view.btree, AddKeyValues, KeysToRemove), - View#view{btree = ViewBtree2} + case ViewBtree2 =/= View#view.btree of + true -> + View#view{btree=ViewBtree2, update_seq=NewSeq}; + _ -> + View#view{btree=ViewBtree2} + end end, Group#group.views, ViewKeyValuesToAdd), Group#group{views=Views2, current_seq=NewSeq, id_btree=IdBtree2}. diff --git a/src/couchdb/couch_work_queue.erl b/src/couchdb/couch_work_queue.erl index ca9445d3..13ec7335 100644 --- a/src/couchdb/couch_work_queue.erl +++ b/src/couchdb/couch_work_queue.erl @@ -13,76 +13,139 @@ -module(couch_work_queue). -behaviour(gen_server). --export([new/2,queue/2,dequeue/1,close/1]). --export([init/1, terminate/2, handle_call/3, handle_cast/2, code_change/3, handle_info/2]). +% public API +-export([new/1, queue/2, dequeue/1, dequeue/2, close/1]). + +% gen_server callbacks +-export([init/1, terminate/2]). +-export([handle_call/3, handle_cast/2, code_change/3, handle_info/2]). -record(q, { - buffer=[], - blocked=[], + queue = queue:new(), + blocked = [], max_size, max_items, - items=0, - size=0, - work_waiter=nil, - close_on_dequeue=false + items = 0, + size = 0, + work_waiters = [], + close_on_dequeue = false, + multi_workers = false }). -new(MaxSize, MaxItems) -> - gen_server:start_link(couch_work_queue, {MaxSize, MaxItems}, []). + +new(Options) -> + gen_server:start_link(couch_work_queue, Options, []). + queue(Wq, Item) -> gen_server:call(Wq, {queue, Item}, infinity). + dequeue(Wq) -> - try gen_server:call(Wq, dequeue, infinity) + dequeue(Wq, all). + + +dequeue(Wq, MaxItems) -> + try + gen_server:call(Wq, {dequeue, MaxItems}, infinity) catch _:_ -> closed end. + close(Wq) -> gen_server:cast(Wq, close). -init({MaxSize,MaxItems}) -> - {ok, #q{max_size=MaxSize, max_items=MaxItems}}. +init(Options) -> + Q = #q{ + max_size = couch_util:get_value(max_size, Options), + max_items = couch_util:get_value(max_items, Options), + multi_workers = couch_util:get_value(multi_workers, Options, false) + }, + {ok, Q}. + + +terminate(_Reason, #q{work_waiters=Workers}) -> + lists:foreach(fun({W, _}) -> gen_server:reply(W, closed) end, Workers). -terminate(_Reason, #q{work_waiter=nil}) -> - ok; -terminate(_Reason, #q{work_waiter=WW}) -> - gen_server:reply(WW, closed). -handle_call({queue, Item}, From, #q{work_waiter=nil}=Q0) -> - Q = Q0#q{size=Q0#q.size + byte_size(term_to_binary(Item)), - items=Q0#q.items + 1, - buffer=[Item | Q0#q.buffer]}, +handle_call({queue, Item}, From, #q{work_waiters = []} = Q0) -> + Q = Q0#q{size = Q0#q.size + byte_size(term_to_binary(Item)), + items = Q0#q.items + 1, + queue = queue:in(Item, Q0#q.queue)}, case (Q#q.size >= Q#q.max_size) orelse (Q#q.items >= Q#q.max_items) of true -> - {noreply, Q#q{blocked=[From | Q#q.blocked]}}; + {noreply, Q#q{blocked = [From | Q#q.blocked]}}; false -> {reply, ok, Q} end; -handle_call({queue, Item}, _From, #q{work_waiter=WW}=Q) -> - gen_server:reply(WW, {ok, [Item]}), - {reply, ok, Q#q{work_waiter=nil}}; -handle_call(dequeue, _From, #q{work_waiter=WW}) when WW /= nil -> - exit("Only one caller allowed to wait for work at a time"); -handle_call(dequeue, From, #q{items=0}=Q) -> - {noreply, Q#q{work_waiter=From}}; -handle_call(dequeue, _From, #q{buffer=Buff, max_size=MaxSize, - max_items=MaxItems, close_on_dequeue=Close}=Q) -> - [gen_server:reply(From, ok) || From <- Q#q.blocked], - Q2 = #q{max_size=MaxSize, max_items=MaxItems}, - if Close -> - {stop, normal, {ok, Buff}, Q2}; + +handle_call({queue, Item}, _From, #q{work_waiters = [{W, _Max} | Rest]} = Q) -> + gen_server:reply(W, {ok, [Item]}), + {reply, ok, Q#q{work_waiters = Rest}}; + +handle_call({dequeue, Max}, From, Q) -> + #q{work_waiters = Workers, multi_workers = Multi, items = Count} = Q, + case {Workers, Multi} of + {[_ | _], false} -> + exit("Only one caller allowed to wait for this work at a time"); + {[_ | _], true} -> + {noreply, Q#q{work_waiters=Workers ++ [{From, Max}]}}; + _ -> + case Count of + 0 -> + {noreply, Q#q{work_waiters=Workers ++ [{From, Max}]}}; + C when C > 0 -> + deliver_queue_items(Max, Q) + end + end. + + +deliver_queue_items(Max, Q) -> + #q{ + queue = Queue, + items = Count, + close_on_dequeue = Close, + blocked = Blocked + } = Q, + case (Max =:= all) orelse (Max >= Count) of + false -> + {Items, Queue2, Blocked2} = dequeue_items(Max, Queue, Blocked, []), + Q2 = Q#q{items = Count - Max, blocked = Blocked2, queue = Queue2}, + {reply, {ok, Items}, Q2}; true -> - {reply, {ok, Buff}, #q{max_size=MaxSize, max_items=MaxItems}} + lists:foreach(fun(F) -> gen_server:reply(F, ok) end, Blocked), + Q2 = Q#q{items = 0, size = 0, blocked = [], queue = queue:new()}, + case Close of + false -> + {reply, {ok, queue:to_list(Queue)}, Q2}; + true -> + {stop, normal, {ok, queue:to_list(Queue)}, Q2} + end end. -handle_cast(close, #q{buffer=[]}=Q) -> + +dequeue_items(0, Queue, Blocked, DequeuedAcc) -> + {lists:reverse(DequeuedAcc), Queue, Blocked}; + +dequeue_items(NumItems, Queue, Blocked, DequeuedAcc) -> + {{value, Item}, Queue2} = queue:out(Queue), + case Blocked of + [] -> + Blocked2 = Blocked; + [From | Blocked2] -> + gen_server:reply(From, ok) + end, + dequeue_items(NumItems - 1, Queue2, Blocked2, [Item | DequeuedAcc]). + + +handle_cast(close, #q{items = 0} = Q) -> {stop, normal, Q}; + handle_cast(close, Q) -> - {noreply, Q#q{close_on_dequeue=true}}. + {noreply, Q#q{close_on_dequeue = true}}. code_change(_OldVsn, State, _Extra) -> diff --git a/src/couchdb/priv/Makefile.am b/src/couchdb/priv/Makefile.am index 5b76f8cd..b36d828d 100644 --- a/src/couchdb/priv/Makefile.am +++ b/src/couchdb/priv/Makefile.am @@ -47,7 +47,7 @@ COUCHJS_SRCS = \ locallibbin_PROGRAMS = couchjs couchjs_SOURCES = $(COUCHJS_SRCS) couchjs_LDFLAGS = $(CURL_LDFLAGS) -couchjs_CFLAGS = $(CURL_CFLAGS) +couchjs_CFLAGS = -D_BSD_SOURCE $(CURL_CFLAGS) couchjs_LDADD = $(CURL_LDFLAGS) @JSLIB@ couchpriv_DATA = stat_descriptions.cfg diff --git a/src/couchdb/priv/couch_js/http.c b/src/couchdb/priv/couch_js/http.c index 998c2439..6c2a8a82 100644 --- a/src/couchdb/priv/couch_js/http.c +++ b/src/couchdb/priv/couch_js/http.c @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +#include <stdio.h> #include <stdlib.h> #include <string.h> #include <jsapi.h> @@ -403,7 +404,8 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) { HTTP_HANDLE = curl_easy_init(); curl_easy_setopt(HTTP_HANDLE, CURLOPT_READFUNCTION, send_body); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_SEEKFUNCTION, seek_body); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_SEEKFUNCTION, + (curl_seek_callback) seek_body); curl_easy_setopt(HTTP_HANDLE, CURLOPT_HEADERFUNCTION, recv_header); curl_easy_setopt(HTTP_HANDLE, CURLOPT_WRITEFUNCTION, recv_body); curl_easy_setopt(HTTP_HANDLE, CURLOPT_NOPROGRESS, 1); diff --git a/src/couchdb/priv/stat_descriptions.cfg.in b/src/couchdb/priv/stat_descriptions.cfg.in index 554124cf..b80d7684 100644 --- a/src/couchdb/priv/stat_descriptions.cfg.in +++ b/src/couchdb/priv/stat_descriptions.cfg.in @@ -1,3 +1,15 @@ +%% Licensed under the Apache License, Version 2.0 (the "License"); you may not +%% use this file except in compliance with the License. You may obtain a copy of +%% the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +%% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +%% License for the specific language governing permissions and limitations under +%% the License. + % Style guide for descriptions: Start with a lowercase letter & do not add % a trailing full-stop / period % Please keep this in alphabetical order @@ -7,6 +19,8 @@ {couchdb, open_databases, "number of open databases"}. {couchdb, open_os_files, "number of file descriptors CouchDB has open"}. {couchdb, request_time, "length of a request inside CouchDB without MochiWeb"}. +{couchdb, auth_cache_hits, "number of authentication cache hits"}. +{couchdb, auth_cache_misses, "number of authentication cache misses"}. {httpd, bulk_requests, "number of bulk requests"}. {httpd, requests, "number of HTTP requests"}. @@ -18,7 +32,6 @@ {httpd_request_methods, 'DELETE', "number of HTTP DELETE requests"}. {httpd_request_methods, 'GET', "number of HTTP GET requests"}. {httpd_request_methods, 'HEAD', "number of HTTP HEAD requests"}. -{httpd_request_methods, 'MOVE', "number of HTTP MOVE requests"}. {httpd_request_methods, 'POST', "number of HTTP POST requests"}. {httpd_request_methods, 'PUT', "number of HTTP PUT requests"}. diff --git a/src/erlang-oauth/Makefile.am b/src/erlang-oauth/Makefile.am index 1d123396..48b76482 100644 --- a/src/erlang-oauth/Makefile.am +++ b/src/erlang-oauth/Makefile.am @@ -12,18 +12,19 @@ oauthebindir = $(localerlanglibdir)/erlang-oauth/ebin -# Removed oauth_rsa_sha1.erl until we require R12B5 or -# we add a ./configure option to enable it. - oauth_file_collection = \ oauth.app.in \ oauth.erl \ oauth_hmac_sha1.erl \ oauth_http.erl \ oauth_plaintext.erl \ + oauth_rsa_sha1.erl \ oauth_unix.erl \ oauth_uri.erl +# Removed oauth_rsa_sha1.beam until we require R12B5 or +# we add a ./configure option to enable it. + oauthebin_make_generated_file_list = \ oauth.app \ oauth.beam \ diff --git a/src/erlang-oauth/oauth_hmac_sha1.erl b/src/erlang-oauth/oauth_hmac_sha1.erl index 69064edd..79d59f37 100644 --- a/src/erlang-oauth/oauth_hmac_sha1.erl +++ b/src/erlang-oauth/oauth_hmac_sha1.erl @@ -8,4 +8,4 @@ signature(BaseString, CS, TS) -> base64:encode_to_string(crypto:sha_mac(Key, BaseString)). verify(Signature, BaseString, CS, TS) -> - Signature =:= signature(BaseString, CS, TS). + couch_util:verify(signature(BaseString, CS, TS), Signature). diff --git a/src/erlang-oauth/oauth_plaintext.erl b/src/erlang-oauth/oauth_plaintext.erl index d8085e02..41a1e9b2 100644 --- a/src/erlang-oauth/oauth_plaintext.erl +++ b/src/erlang-oauth/oauth_plaintext.erl @@ -7,4 +7,4 @@ signature(CS, TS) -> oauth_uri:calate("&", [CS, TS]). verify(Signature, CS, TS) -> - Signature =:= signature(CS, TS). + couch_util:verify(signature(CS, TS), Signature). diff --git a/src/ibrowse/Makefile.am b/src/ibrowse/Makefile.am index 510f36a9..8c5d3f8e 100644 --- a/src/ibrowse/Makefile.am +++ b/src/ibrowse/Makefile.am @@ -10,7 +10,7 @@ ## License for the specific language governing permissions and limitations under ## the License. -ibrowseebindir = $(localerlanglibdir)/ibrowse-1.5.2/ebin +ibrowseebindir = $(localerlanglibdir)/ibrowse-2.1.0/ebin ibrowse_file_collection = \ ibrowse.app.in \ diff --git a/src/ibrowse/ibrowse.app.in b/src/ibrowse/ibrowse.app.in index 4f43dd92..e8580d10 100644 --- a/src/ibrowse/ibrowse.app.in +++ b/src/ibrowse/ibrowse.app.in @@ -1,10 +1,10 @@ {application, ibrowse, [{description, "HTTP client application"}, - {vsn, "1.5.1"}, - {modules, [ ibrowse, - ibrowse_http_client, - ibrowse_app, - ibrowse_sup, + {vsn, "2.1.0"}, + {modules, [ ibrowse, + ibrowse_http_client, + ibrowse_app, + ibrowse_sup, ibrowse_lib, ibrowse_lb ]}, {registered, []}, diff --git a/src/ibrowse/ibrowse.erl b/src/ibrowse/ibrowse.erl index 1913ef59..1a42f4bc 100644 --- a/src/ibrowse/ibrowse.erl +++ b/src/ibrowse/ibrowse.erl @@ -6,9 +6,9 @@ %%% Created : 11 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> %%%------------------------------------------------------------------- %% @author Chandrashekhar Mullaparthi <chandrashekhar dot mullaparthi at gmail dot com> -%% @copyright 2005-2009 Chandrashekhar Mullaparthi -%% @version 1.5.2 -%% @doc The ibrowse application implements an HTTP 1.1 client. This +%% @copyright 2005-2010 Chandrashekhar Mullaparthi +%% @version 2.1.0 +%% @doc The ibrowse application implements an HTTP 1.1 client in erlang. This %% module implements the API of the HTTP client. There is one named %% process called 'ibrowse' which assists in load balancing and maintaining configuration. There is one load balancing process per unique webserver. There is %% one process to handle one TCP connection to a webserver @@ -21,22 +21,22 @@ %% <p>Here are a few sample invocations.</p> %% %% <code> -%% ibrowse:send_req("http://intranet/messenger/", [], get). +%% ibrowse:send_req("http://intranet/messenger/", [], get). %% <br/><br/> -%% -%% ibrowse:send_req("http://www.google.com/", [], get, [], -%% [{proxy_user, "XXXXX"}, -%% {proxy_password, "XXXXX"}, -%% {proxy_host, "proxy"}, -%% {proxy_port, 8080}], 1000). +%% +%% ibrowse:send_req("http://www.google.com/", [], get, [], +%% [{proxy_user, "XXXXX"}, +%% {proxy_password, "XXXXX"}, +%% {proxy_host, "proxy"}, +%% {proxy_port, 8080}], 1000). %% <br/><br/> %% %%ibrowse:send_req("http://www.erlang.org/download/otp_src_R10B-3.tar.gz", [], get, [], -%% [{proxy_user, "XXXXX"}, -%% {proxy_password, "XXXXX"}, -%% {proxy_host, "proxy"}, -%% {proxy_port, 8080}, -%% {save_response_to_file, true}], 1000). +%% [{proxy_user, "XXXXX"}, +%% {proxy_password, "XXXXX"}, +%% {proxy_host, "proxy"}, +%% {proxy_port, 8080}, +%% {save_response_to_file, true}], 1000). %% <br/><br/> %% %% ibrowse:send_req("http://www.erlang.org", [], head). @@ -48,17 +48,12 @@ %% ibrowse:send_req("http://www.bbc.co.uk", [], trace). %% %% <br/><br/> -%% ibrowse:send_req("http://www.google.com", [], get, [], +%% ibrowse:send_req("http://www.google.com", [], get, [], %% [{stream_to, self()}]). %% </code> %% -%% <p>A driver exists which implements URL encoding in C, but the -%% speed achieved using only erlang has been good enough, so the -%% driver isn't actually used.</p> -module(ibrowse). --vsn('$Id: ibrowse.erl,v 1.8 2009/07/01 22:43:19 chandrusf Exp $ '). - -behaviour(gen_server). %%-------------------------------------------------------------------- %% Include files @@ -70,48 +65,51 @@ %% gen_server callbacks -export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). + terminate/2, code_change/3]). %% API interface -export([ - rescan_config/0, - rescan_config/1, - get_config_value/1, - get_config_value/2, - spawn_worker_process/2, - spawn_link_worker_process/2, - stop_worker_process/1, - send_req/3, - send_req/4, - send_req/5, - send_req/6, - send_req_direct/4, - send_req_direct/5, - send_req_direct/6, - send_req_direct/7, - stream_next/1, - set_max_sessions/3, - set_max_pipeline_size/3, - set_dest/3, - trace_on/0, - trace_off/0, - trace_on/2, - trace_off/2, - all_trace_off/0, - show_dest_status/0, - show_dest_status/2 - ]). + rescan_config/0, + rescan_config/1, + get_config_value/1, + get_config_value/2, + spawn_worker_process/1, + spawn_worker_process/2, + spawn_link_worker_process/1, + spawn_link_worker_process/2, + stop_worker_process/1, + send_req/3, + send_req/4, + send_req/5, + send_req/6, + send_req_direct/4, + send_req_direct/5, + send_req_direct/6, + send_req_direct/7, + stream_next/1, + stream_close/1, + set_max_sessions/3, + set_max_pipeline_size/3, + set_dest/3, + trace_on/0, + trace_off/0, + trace_on/2, + trace_off/2, + all_trace_off/0, + show_dest_status/0, + show_dest_status/2 + ]). -ifdef(debug). -compile(export_all). -endif. -import(ibrowse_lib, [ - parse_url/1, - get_value/3, - do_trace/2 - ]). - + parse_url/1, + get_value/3, + do_trace/2 + ]). + -record(state, {trace = false}). -include("ibrowse.hrl"). @@ -159,7 +157,7 @@ stop() -> send_req(Url, Headers, Method) -> send_req(Url, Headers, Method, [], []). -%% @doc Same as send_req/3. +%% @doc Same as send_req/3. %% If a list is specified for the body it has to be a flat list. The body can also be a fun/0 or a fun/1. <br/> %% If fun/0, the connection handling process will repeatdely call the fun until it returns an error or eof. <pre>Fun() = {ok, Data} | eof</pre><br/> %% If fun/1, the connection handling process will repeatedly call the fun with the supplied state until it returns an error or eof. <pre>Fun(State) = {ok, Data} | {ok, Data, NewState} | eof</pre> @@ -169,19 +167,19 @@ send_req(Url, Headers, Method) -> send_req(Url, Headers, Method, Body) -> send_req(Url, Headers, Method, Body, []). -%% @doc Same as send_req/4. +%% @doc Same as send_req/4. %% For a description of SSL Options, look in the <a href="http://www.erlang.org/doc/apps/ssl/index.html">ssl</a> manpage. If the %% HTTP Version to use is not specified, the default is 1.1. %% <br/> -%% <p>The <code>host_header</code> option is useful in the case where ibrowse is +%% <ul> +%% <li>The <code>host_header</code> option is useful in the case where ibrowse is %% connecting to a component such as <a %% href="http://www.stunnel.org">stunnel</a> which then sets up a %% secure connection to a webserver. In this case, the URL supplied to %% ibrowse must have the stunnel host/port details, but that won't %% make sense to the destination webserver. This option can then be %% used to specify what should go in the <code>Host</code> header in -%% the request.</p> -%% <ul> +%% the request.</li> %% <li>The <code>stream_to</code> option can be used to have the HTTP %% response streamed to a process as messages as data arrives on the %% socket. If the calling process wishes to control the rate at which @@ -204,7 +202,11 @@ send_req(Url, Headers, Method, Body) -> %% dealing with large response bodies and/or slow links. In these %% cases, it might be hard to estimate how long a request will take to %% complete. In such cases, the client might want to timeout if no -%% data has been received on the link for a certain time interval.</li> +%% data has been received on the link for a certain time interval. +%% +%% This value is also used to close connections which are not in use for +%% the specified timeout value. +%% </li> %% %% <li> %% The <code>connect_timeout</code> option is to specify how long the @@ -220,12 +222,30 @@ send_req(Url, Headers, Method, Body) -> %% ibrowse:send_req("http://www.example.com/cgi-bin/request", [], get, [], [{connect_timeout, 100}], 1000). %% </code> %% In the above invocation, if the connection isn't established within -%% 100 milliseconds, the request will fail with +%% 100 milliseconds, the request will fail with %% <code>{error, conn_failed}</code>.<br/> %% If connection setup succeeds, the total time allowed for the %% request to complete will be 1000 milliseconds minus the time taken %% for connection setup. %% </li> +%% +%% <li> The <code>socket_options</code> option can be used to set +%% specific options on the socket. The <code>{active, true | false | once}</code> +%% and <code>{packet_type, Packet_type}</code> will be filtered out by ibrowse. </li> +%% +%% <li> The <code>headers_as_is</code> option is to enable the caller +%% to send headers exactly as specified in the request without ibrowse +%% adding some of its own. Required for some picky servers apparently. </li> +%% +%% <li>The <code>give_raw_headers</code> option is to enable the +%% caller to get access to the raw status line and raw unparsed +%% headers. Not quite sure why someone would want this, but one of my +%% users asked for it, so here it is. </li> +%% +%% <li> The <code>preserve_chunked_encoding</code> option enables the caller +%% to receive the raw data stream when the Transfer-Encoding of the server +%% response is Chunked. +%% </li> %% </ul> %% %% @spec send_req(Url::string(), Headers::headerList(), Method::method(), Body::body(), Options::optionList()) -> response() @@ -234,7 +254,7 @@ send_req(Url, Headers, Method, Body) -> %% {response_format,response_format()}| %% {stream_chunk_size, integer()} | %% {max_pipeline_size, integer()} | -%% {trace, boolean()} | +%% {trace, boolean()} | %% {is_ssl, boolean()} | %% {ssl_options, [SSLOpt]} | %% {pool_name, atom()} | @@ -253,13 +273,19 @@ send_req(Url, Headers, Method, Body) -> %% {host_header, string()} | %% {inactivity_timeout, integer()} | %% {connect_timeout, integer()} | -%% {transfer_encoding, {chunked, ChunkSize}} +%% {socket_options, Sock_opts} | +%% {transfer_encoding, {chunked, ChunkSize}} | +%% {headers_as_is, boolean()} | +%% {give_raw_headers, boolean()} | +%% {preserve_chunked_encoding,boolean()} %% %% stream_to() = process() | {process(), once} %% process() = pid() | atom() %% username() = string() %% password() = string() %% SSLOpt = term() +%% Sock_opts = [Sock_opt] +%% Sock_opt = term() %% ChunkSize = integer() %% srtf() = boolean() | filename() %% filename() = string() @@ -267,54 +293,76 @@ send_req(Url, Headers, Method, Body) -> send_req(Url, Headers, Method, Body, Options) -> send_req(Url, Headers, Method, Body, Options, 30000). -%% @doc Same as send_req/5. +%% @doc Same as send_req/5. %% All timeout values are in milliseconds. %% @spec send_req(Url, Headers::headerList(), Method::method(), Body::body(), Options::optionList(), Timeout) -> response() %% Timeout = integer() | infinity send_req(Url, Headers, Method, Body, Options, Timeout) -> case catch parse_url(Url) of - #url{host = Host, - port = Port, - protocol = Protocol} = Parsed_url -> - Lb_pid = case ets:lookup(ibrowse_lb, {Host, Port}) of - [] -> - get_lb_pid(Parsed_url); - [#lb_pid{pid = Lb_pid_1}] -> - Lb_pid_1 - end, - Max_sessions = get_max_sessions(Host, Port, Options), - Max_pipeline_size = get_max_pipeline_size(Host, Port, Options), - Options_1 = merge_options(Host, Port, Options), - {SSLOptions, IsSSL} = - case (Protocol == https) orelse - get_value(is_ssl, Options_1, false) of - false -> {[], false}; - true -> {get_value(ssl_options, Options_1, []), true} - end, - case ibrowse_lb:spawn_connection(Lb_pid, Parsed_url, - Max_sessions, - Max_pipeline_size, - {SSLOptions, IsSSL}) of - {ok, Conn_Pid} -> - do_send_req(Conn_Pid, Parsed_url, Headers, - Method, Body, Options_1, Timeout); - Err -> - Err - end; - Err -> - {error, {url_parsing_failed, Err}} + #url{host = Host, + port = Port, + protocol = Protocol} = Parsed_url -> + Lb_pid = case ets:lookup(ibrowse_lb, {Host, Port}) of + [] -> + get_lb_pid(Parsed_url); + [#lb_pid{pid = Lb_pid_1}] -> + Lb_pid_1 + end, + Max_sessions = get_max_sessions(Host, Port, Options), + Max_pipeline_size = get_max_pipeline_size(Host, Port, Options), + Options_1 = merge_options(Host, Port, Options), + {SSLOptions, IsSSL} = + case (Protocol == https) orelse + get_value(is_ssl, Options_1, false) of + false -> {[], false}; + true -> {get_value(ssl_options, Options_1, []), true} + end, + try_routing_request(Lb_pid, Parsed_url, + Max_sessions, + Max_pipeline_size, + {SSLOptions, IsSSL}, + Headers, Method, Body, Options_1, Timeout, 0); + Err -> + {error, {url_parsing_failed, Err}} end. +try_routing_request(Lb_pid, Parsed_url, + Max_sessions, + Max_pipeline_size, + {SSLOptions, IsSSL}, + Headers, Method, Body, Options_1, Timeout, Try_count) when Try_count < 3 -> + case ibrowse_lb:spawn_connection(Lb_pid, Parsed_url, + Max_sessions, + Max_pipeline_size, + {SSLOptions, IsSSL}) of + {ok, Conn_Pid} -> + case do_send_req(Conn_Pid, Parsed_url, Headers, + Method, Body, Options_1, Timeout) of + {error, sel_conn_closed} -> + try_routing_request(Lb_pid, Parsed_url, + Max_sessions, + Max_pipeline_size, + {SSLOptions, IsSSL}, + Headers, Method, Body, Options_1, Timeout, Try_count + 1); + Res -> + Res + end; + Err -> + Err + end; +try_routing_request(_, _, _, _, _, _, _, _, _, _, _) -> + {error, retry_later}. + merge_options(Host, Port, Options) -> Config_options = get_config_value({options, Host, Port}, []), lists:foldl( fun({Key, Val}, Acc) -> - case lists:keysearch(Key, 1, Options) of - false -> - [{Key, Val} | Acc]; - _ -> - Acc - end + case lists:keysearch(Key, 1, Options) of + false -> + [{Key, Val} | Acc]; + _ -> + Acc + end end, Options, Config_options). get_lb_pid(Url) -> @@ -322,11 +370,27 @@ get_lb_pid(Url) -> get_max_sessions(Host, Port, Options) -> get_value(max_sessions, Options, - get_config_value({max_sessions, Host, Port}, ?DEF_MAX_SESSIONS)). + get_config_value({max_sessions, Host, Port}, + default_max_sessions())). get_max_pipeline_size(Host, Port, Options) -> get_value(max_pipeline_size, Options, - get_config_value({max_pipeline_size, Host, Port}, ?DEF_MAX_PIPELINE_SIZE)). + get_config_value({max_pipeline_size, Host, Port}, + default_max_pipeline_size())). + +default_max_sessions() -> + safe_get_env(ibrowse, default_max_sessions, ?DEF_MAX_SESSIONS). + +default_max_pipeline_size() -> + safe_get_env(ibrowse, default_max_pipeline_size, ?DEF_MAX_PIPELINE_SIZE). + +safe_get_env(App, Key, Def_val) -> + case application:get_env(App, Key) of + undefined -> + Def_val; + {ok, Val} -> + Val + end. %% @doc Deprecated. Use set_max_sessions/3 and set_max_pipeline_size/3 %% for achieving the same effect. @@ -343,7 +407,7 @@ set_dest(_Host, _Port, [H | _]) -> exit({invalid_option, H}); set_dest(_, _, []) -> ok. - + %% @doc Set the maximum number of connections allowed to a specific Host:Port. %% @spec set_max_sessions(Host::string(), Port::integer(), Max::integer()) -> ok set_max_sessions(Host, Port, Max) when is_integer(Max), Max > 0 -> @@ -356,21 +420,25 @@ set_max_pipeline_size(Host, Port, Max) when is_integer(Max), Max > 0 -> do_send_req(Conn_Pid, Parsed_url, Headers, Method, Body, Options, Timeout) -> case catch ibrowse_http_client:send_req(Conn_Pid, Parsed_url, - Headers, Method, ensure_bin(Body), - Options, Timeout) of - {'EXIT', {timeout, _}} -> - {error, req_timedout}; - {'EXIT', Reason} -> - {error, {'EXIT', Reason}}; - {ok, St_code, Headers, Body} = Ret when is_binary(Body) -> - case get_value(response_format, Options, list) of - list -> - {ok, St_code, Headers, binary_to_list(Body)}; - binary -> - Ret - end; - Ret -> - Ret + Headers, Method, ensure_bin(Body), + Options, Timeout) of + {'EXIT', {timeout, _}} -> + {error, req_timedout}; + {'EXIT', {noproc, {gen_server, call, [Conn_Pid, _, _]}}} -> + {error, sel_conn_closed}; + {error, connection_closed} -> + {error, sel_conn_closed}; + {'EXIT', Reason} -> + {error, {'EXIT', Reason}}; + {ok, St_code, Headers, Body} = Ret when is_binary(Body) -> + case get_value(response_format, Options, list) of + list -> + {ok, St_code, Headers, binary_to_list(Body)}; + binary -> + Ret + end; + Ret -> + Ret end. ensure_bin(L) when is_list(L) -> list_to_binary(L); @@ -391,12 +459,25 @@ ensure_bin({Fun, _} = Body) when is_function(Fun) -> Body. %% <b>Note:</b> It is the responsibility of the calling process to control %% pipeline size on such connections. %% +%% @spec spawn_worker_process(Url::string()) -> {ok, pid()} +spawn_worker_process(Url) -> + ibrowse_http_client:start(Url). + +%% @doc Same as spawn_worker_process/1 but takes as input a Host and Port +%% instead of a URL. %% @spec spawn_worker_process(Host::string(), Port::integer()) -> {ok, pid()} spawn_worker_process(Host, Port) -> ibrowse_http_client:start({Host, Port}). +%% @doc Same as spawn_worker_process/1 except the the calling process +%% is linked to the worker process which is spawned. +%% @spec spawn_link_worker_process(Url::string()) -> {ok, pid()} +spawn_link_worker_process(Url) -> + ibrowse_http_client:start_link(Url). + %% @doc Same as spawn_worker_process/2 except the the calling process %% is linked to the worker process which is spawned. +%% @spec spawn_link_worker_process(Host::string(), Port::integer()) -> {ok, pid()} spawn_link_worker_process(Host, Port) -> ibrowse_http_client:start_link({Host, Port}). @@ -426,30 +507,45 @@ send_req_direct(Conn_pid, Url, Headers, Method, Body, Options) -> %% returned by spawn_worker_process/2 or spawn_link_worker_process/2 send_req_direct(Conn_pid, Url, Headers, Method, Body, Options, Timeout) -> case catch parse_url(Url) of - #url{host = Host, - port = Port} = Parsed_url -> - Options_1 = merge_options(Host, Port, Options), - case do_send_req(Conn_pid, Parsed_url, Headers, Method, Body, Options_1, Timeout) of - {error, {'EXIT', {noproc, _}}} -> - {error, worker_is_dead}; - Ret -> - Ret - end; - Err -> - {error, {url_parsing_failed, Err}} + #url{host = Host, + port = Port} = Parsed_url -> + Options_1 = merge_options(Host, Port, Options), + case do_send_req(Conn_pid, Parsed_url, Headers, Method, Body, Options_1, Timeout) of + {error, {'EXIT', {noproc, _}}} -> + {error, worker_is_dead}; + Ret -> + Ret + end; + Err -> + {error, {url_parsing_failed, Err}} end. %% @doc Tell ibrowse to stream the next chunk of data to the %% caller. Should be used in conjunction with the %% <code>stream_to</code> option %% @spec stream_next(Req_id :: req_id()) -> ok | {error, unknown_req_id} -stream_next(Req_id) -> +stream_next(Req_id) -> case ets:lookup(ibrowse_stream, {req_id_pid, Req_id}) of - [] -> - {error, unknown_req_id}; - [{_, Pid}] -> - catch Pid ! {stream_next, Req_id}, - ok + [] -> + {error, unknown_req_id}; + [{_, Pid}] -> + catch Pid ! {stream_next, Req_id}, + ok + end. + +%% @doc Tell ibrowse to close the connection associated with the +%% specified stream. Should be used in conjunction with the +%% <code>stream_to</code> option. Note that all requests in progress on +%% the connection which is serving this Req_id will be aborted, and an +%% error returned. +%% @spec stream_close(Req_id :: req_id()) -> ok | {error, unknown_req_id} +stream_close(Req_id) -> + case ets:lookup(ibrowse_stream, {req_id_pid, Req_id}) of + [] -> + {error, unknown_req_id}; + [{_, Pid}] -> + catch Pid ! {stream_close, Req_id}, + ok end. %% @doc Turn tracing on for the ibrowse process @@ -462,7 +558,7 @@ trace_off() -> %% @doc Turn tracing on for all connections to the specified HTTP %% server. Host is whatever is specified as the domain name in the URL %% @spec trace_on(Host, Port) -> ok -%% Host = string() +%% Host = string() %% Port = integer() trace_on(Host, Port) -> ibrowse ! {trace, true, Host, Port}, @@ -481,77 +577,80 @@ all_trace_off() -> ibrowse ! all_trace_off, ok. +%% @doc Shows some internal information about load balancing. Info +%% about workers spawned using spawn_worker_process/2 or +%% spawn_link_worker_process/2 is not included. show_dest_status() -> Dests = lists:filter(fun({lb_pid, {Host, Port}, _}) when is_list(Host), - is_integer(Port) -> - true; - (_) -> - false - end, ets:tab2list(ibrowse_lb)), + is_integer(Port) -> + true; + (_) -> + false + end, ets:tab2list(ibrowse_lb)), All_ets = ets:all(), io:format("~-40.40s | ~-5.5s | ~-10.10s | ~s~n", - ["Server:port", "ETS", "Num conns", "LB Pid"]), + ["Server:port", "ETS", "Num conns", "LB Pid"]), io:format("~80.80.=s~n", [""]), lists:foreach(fun({lb_pid, {Host, Port}, Lb_pid}) -> - case lists:dropwhile( - fun(Tid) -> - ets:info(Tid, owner) /= Lb_pid - end, All_ets) of - [] -> - io:format("~40.40s | ~-5.5s | ~-5.5s | ~s~n", - [Host ++ ":" ++ integer_to_list(Port), - "", - "", - io_lib:format("~p", [Lb_pid])] - ); - [Tid | _] -> - catch ( - begin - Size = ets:info(Tid, size), - io:format("~40.40s | ~-5.5s | ~-5.5s | ~s~n", - [Host ++ ":" ++ integer_to_list(Port), - integer_to_list(Tid), - integer_to_list(Size), - io_lib:format("~p", [Lb_pid])] - ) - end - ) - end - end, Dests). - + case lists:dropwhile( + fun(Tid) -> + ets:info(Tid, owner) /= Lb_pid + end, All_ets) of + [] -> + io:format("~40.40s | ~-5.5s | ~-5.5s | ~s~n", + [Host ++ ":" ++ integer_to_list(Port), + "", + "", + io_lib:format("~p", [Lb_pid])] + ); + [Tid | _] -> + catch ( + begin + Size = ets:info(Tid, size), + io:format("~40.40s | ~-5.5s | ~-5.5s | ~s~n", + [Host ++ ":" ++ integer_to_list(Port), + io_lib:format("~p", [Tid]), + integer_to_list(Size), + io_lib:format("~p", [Lb_pid])] + ) + end + ) + end + end, Dests). + %% @doc Shows some internal information about load balancing to a %% specified Host:Port. Info about workers spawned using %% spawn_worker_process/2 or spawn_link_worker_process/2 is not %% included. show_dest_status(Host, Port) -> case ets:lookup(ibrowse_lb, {Host, Port}) of - [] -> - no_active_processes; - [#lb_pid{pid = Lb_pid}] -> - io:format("Load Balancer Pid : ~p~n", [Lb_pid]), - io:format("LB process msg q size : ~p~n", [(catch process_info(Lb_pid, message_queue_len))]), - case lists:dropwhile( - fun(Tid) -> - ets:info(Tid, owner) /= Lb_pid - end, ets:all()) of - [] -> - io:format("Couldn't locate ETS table for ~p~n", [Lb_pid]); - [Tid | _] -> - First = ets:first(Tid), - Last = ets:last(Tid), - Size = ets:info(Tid, size), - io:format("LB ETS table id : ~p~n", [Tid]), - io:format("Num Connections : ~p~n", [Size]), - case Size of - 0 -> - ok; - _ -> - {First_p_sz, _} = First, - {Last_p_sz, _} = Last, - io:format("Smallest pipeline : ~1000.p~n", [First_p_sz]), - io:format("Largest pipeline : ~1000.p~n", [Last_p_sz]) - end - end + [] -> + no_active_processes; + [#lb_pid{pid = Lb_pid}] -> + io:format("Load Balancer Pid : ~p~n", [Lb_pid]), + io:format("LB process msg q size : ~p~n", [(catch process_info(Lb_pid, message_queue_len))]), + case lists:dropwhile( + fun(Tid) -> + ets:info(Tid, owner) /= Lb_pid + end, ets:all()) of + [] -> + io:format("Couldn't locate ETS table for ~p~n", [Lb_pid]); + [Tid | _] -> + First = ets:first(Tid), + Last = ets:last(Tid), + Size = ets:info(Tid, size), + io:format("LB ETS table id : ~p~n", [Tid]), + io:format("Num Connections : ~p~n", [Size]), + case Size of + 0 -> + ok; + _ -> + {First_p_sz, _} = First, + {Last_p_sz, _} = Last, + io:format("Smallest pipeline : ~1000.p~n", [First_p_sz]), + io:format("Largest pipeline : ~1000.p~n", [Last_p_sz]) + end + end end. %% @doc Clear current configuration for ibrowse and load from the file @@ -592,40 +691,40 @@ init(_) -> import_config() -> case code:priv_dir(ibrowse) of - {error, _} = Err -> - Err; - PrivDir -> - Filename = filename:join(PrivDir, "ibrowse.conf"), - import_config(Filename) + {error, _} = Err -> + Err; + PrivDir -> + Filename = filename:join(PrivDir, "ibrowse.conf"), + import_config(Filename) end. import_config(Filename) -> case file:consult(Filename) of - {ok, Terms} -> - ets:delete_all_objects(ibrowse_conf), - Fun = fun({dest, Host, Port, MaxSess, MaxPipe, Options}) - when is_list(Host), is_integer(Port), - is_integer(MaxSess), MaxSess > 0, - is_integer(MaxPipe), MaxPipe > 0, is_list(Options) -> - I = [{{max_sessions, Host, Port}, MaxSess}, - {{max_pipeline_size, Host, Port}, MaxPipe}, - {{options, Host, Port}, Options}], - lists:foreach( - fun({X, Y}) -> - ets:insert(ibrowse_conf, - #ibrowse_conf{key = X, - value = Y}) - end, I); - ({K, V}) -> - ets:insert(ibrowse_conf, - #ibrowse_conf{key = K, - value = V}); - (X) -> - io:format("Skipping unrecognised term: ~p~n", [X]) - end, - lists:foreach(Fun, Terms); - Err -> - Err + {ok, Terms} -> + ets:delete_all_objects(ibrowse_conf), + Fun = fun({dest, Host, Port, MaxSess, MaxPipe, Options}) + when is_list(Host), is_integer(Port), + is_integer(MaxSess), MaxSess > 0, + is_integer(MaxPipe), MaxPipe > 0, is_list(Options) -> + I = [{{max_sessions, Host, Port}, MaxSess}, + {{max_pipeline_size, Host, Port}, MaxPipe}, + {{options, Host, Port}, Options}], + lists:foreach( + fun({X, Y}) -> + ets:insert(ibrowse_conf, + #ibrowse_conf{key = X, + value = Y}) + end, I); + ({K, V}) -> + ets:insert(ibrowse_conf, + #ibrowse_conf{key = K, + value = V}); + (X) -> + io:format("Skipping unrecognised term: ~p~n", [X]) + end, + lists:foreach(Fun, Terms); + Err -> + Err end. %% @doc Internal export @@ -636,10 +735,10 @@ get_config_value(Key) -> %% @doc Internal export get_config_value(Key, DefVal) -> case ets:lookup(ibrowse_conf, Key) of - [] -> - DefVal; - [#ibrowse_conf{value = V}] -> - V + [] -> + DefVal; + [#ibrowse_conf{value = V}] -> + V end. set_config_value(Key, Val) -> @@ -660,6 +759,10 @@ handle_call({get_lb_pid, #url{host = Host, port = Port} = Url}, _From, State) -> handle_call(stop, _From, State) -> do_trace("IBROWSE shutting down~n", []), + ets:foldl(fun(#lb_pid{pid = Pid}, Acc) -> + ibrowse_lb:stop(Pid), + Acc + end, [], ibrowse_lb), {stop, normal, ok, State}; handle_call({set_config_value, Key, Val}, _From, State) -> @@ -700,36 +803,36 @@ handle_info(all_trace_off, State) -> Mspec = [{{ibrowse_conf,{trace,'$1','$2'},true},[],[{{'$1','$2'}}]}], Trace_on_dests = ets:select(ibrowse_conf, Mspec), Fun = fun(#lb_pid{host_port = {H, P}, pid = Pid}, _) -> - case lists:member({H, P}, Trace_on_dests) of - false -> - ok; - true -> - catch Pid ! {trace, false} - end; - (_, Acc) -> - Acc - end, + case lists:member({H, P}, Trace_on_dests) of + false -> + ok; + true -> + catch Pid ! {trace, false} + end; + (_, Acc) -> + Acc + end, ets:foldl(Fun, undefined, ibrowse_lb), ets:select_delete(ibrowse_conf, [{{ibrowse_conf,{trace,'$1','$2'},true},[],['true']}]), {noreply, State}; - + handle_info({trace, Bool}, State) -> put(my_trace_flag, Bool), {noreply, State}; handle_info({trace, Bool, Host, Port}, State) -> Fun = fun(#lb_pid{host_port = {H, P}, pid = Pid}, _) - when H == Host, - P == Port -> - catch Pid ! {trace, Bool}; - (_, Acc) -> - Acc - end, + when H == Host, + P == Port -> + catch Pid ! {trace, Bool}; + (_, Acc) -> + Acc + end, ets:foldl(Fun, undefined, ibrowse_lb), ets:insert(ibrowse_conf, #ibrowse_conf{key = {trace, Host, Port}, - value = Bool}), + value = Bool}), {noreply, State}; - + handle_info(_Info, State) -> {noreply, State}. diff --git a/src/ibrowse/ibrowse_app.erl b/src/ibrowse/ibrowse_app.erl index 8c83e8f1..d3a0f7bb 100644 --- a/src/ibrowse/ibrowse_app.erl +++ b/src/ibrowse/ibrowse_app.erl @@ -1,12 +1,11 @@ %%%------------------------------------------------------------------- %%% File : ibrowse_app.erl %%% Author : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> -%%% Description : +%%% Description : %%% %%% Created : 15 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> %%%------------------------------------------------------------------- -module(ibrowse_app). --vsn('$Id: ibrowse_app.erl,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ '). -behaviour(application). %%-------------------------------------------------------------------- @@ -42,11 +41,11 @@ %% Func: start/2 %% Returns: {ok, Pid} | %% {ok, Pid, State} | -%% {error, Reason} +%% {error, Reason} %%-------------------------------------------------------------------- start(_Type, _StartArgs) -> case ibrowse_sup:start_link() of - {ok, Pid} -> + {ok, Pid} -> {ok, Pid}; Error -> Error @@ -54,7 +53,7 @@ start(_Type, _StartArgs) -> %%-------------------------------------------------------------------- %% Func: stop/1 -%% Returns: any +%% Returns: any %%-------------------------------------------------------------------- stop(_State) -> ok. diff --git a/src/ibrowse/ibrowse_http_client.erl b/src/ibrowse/ibrowse_http_client.erl index 5f62f705..5ff323cd 100644 --- a/src/ibrowse/ibrowse_http_client.erl +++ b/src/ibrowse/ibrowse_http_client.erl @@ -6,8 +6,6 @@ %%% Created : 11 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> %%%------------------------------------------------------------------- -module(ibrowse_http_client). --vsn('$Id: ibrowse_http_client.erl,v 1.19 2009/07/01 22:43:19 chandrusf Exp $ '). - -behaviour(gen_server). %%-------------------------------------------------------------------- %% Include files @@ -16,11 +14,11 @@ %%-------------------------------------------------------------------- %% External exports -export([ - start_link/1, - start/1, - stop/1, - send_req/7 - ]). + start_link/1, + start/1, + stop/1, + send_req/7 + ]). -ifdef(debug). -compile(export_all). @@ -28,41 +26,47 @@ %% gen_server callbacks -export([ - init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3 - ]). + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 + ]). -include("ibrowse.hrl"). --record(state, {host, port, - use_proxy = false, proxy_auth_digest, - ssl_options = [], is_ssl = false, socket, - reqs=queue:new(), cur_req, status=idle, http_status_code, - reply_buffer = <<>>, rep_buf_size=0, streamed_size = 0, - recvd_headers=[], - is_closing, send_timer, content_length, - deleted_crlf = false, transfer_encoding, - chunk_size, chunk_size_buffer = <<>>, recvd_chunk_size, - lb_ets_tid, cur_pipeline_size = 0, prev_req_id - }). +-record(state, {host, port, connect_timeout, + inactivity_timer_ref, + use_proxy = false, proxy_auth_digest, + ssl_options = [], is_ssl = false, socket, + proxy_tunnel_setup = false, + tunnel_setup_queue = [], + reqs=queue:new(), cur_req, status=idle, http_status_code, + reply_buffer = <<>>, rep_buf_size=0, streamed_size = 0, + recvd_headers=[], + status_line, raw_headers, + is_closing, send_timer, content_length, + deleted_crlf = false, transfer_encoding, + chunk_size, chunk_size_buffer = <<>>, + recvd_chunk_size, interim_reply_sent = false, + lb_ets_tid, cur_pipeline_size = 0, prev_req_id + }). -record(request, {url, method, options, from, - stream_to, caller_controls_socket = false, - req_id, - stream_chunk_size, - save_response_to_file = false, - tmp_file_name, tmp_file_fd, - response_format}). + stream_to, caller_controls_socket = false, + caller_socket_options = [], + req_id, + stream_chunk_size, + save_response_to_file = false, + tmp_file_name, tmp_file_fd, preserve_chunked_encoding, + response_format}). -import(ibrowse_lib, [ - get_value/2, - get_value/3, - do_trace/2 - ]). + get_value/2, + get_value/3, + do_trace/2 + ]). -define(DEFAULT_STREAM_CHUNK_SIZE, 1024*1024). @@ -80,7 +84,13 @@ start_link(Args) -> gen_server:start_link(?MODULE, Args, []). stop(Conn_pid) -> - gen_server:call(Conn_pid, stop). + case catch gen_server:call(Conn_pid, stop) of + {'EXIT', {timeout, _}} -> + exit(Conn_pid, kill), + ok; + _ -> + ok + end. send_req(Conn_Pid, Url, Headers, Method, Body, Options, Timeout) -> gen_server:call( @@ -101,26 +111,23 @@ send_req(Conn_Pid, Url, Headers, Method, Body, Options, Timeout) -> %%-------------------------------------------------------------------- init({Lb_Tid, #url{host = Host, port = Port}, {SSLOptions, Is_ssl}}) -> State = #state{host = Host, - port = Port, - ssl_options = SSLOptions, - is_ssl = Is_ssl, - lb_ets_tid = Lb_Tid}, + port = Port, + ssl_options = SSLOptions, + is_ssl = Is_ssl, + lb_ets_tid = Lb_Tid}, put(ibrowse_trace_token, [Host, $:, integer_to_list(Port)]), put(my_trace_flag, ibrowse_lib:get_trace_status(Host, Port)), {ok, State}; +init(Url) when is_list(Url) -> + case catch ibrowse_lib:parse_url(Url) of + #url{protocol = Protocol} = Url_rec -> + init({undefined, Url_rec, {[], Protocol == https}}); + {'EXIT', _} -> + {error, invalid_url} + end; init({Host, Port}) -> State = #state{host = Host, - port = Port}, - put(ibrowse_trace_token, [Host, $:, integer_to_list(Port)]), - put(my_trace_flag, ibrowse_lib:get_trace_status(Host, Port)), - {ok, State}; -init(#url{host=Host, port=Port, protocol=Protocol}) -> - State = #state{ - host = Host, - port = Port, - is_ssl = (Protocol == https), - ssl_options = [{ssl_imp, new}] - }, + port = Port}, put(ibrowse_trace_token, [Host, $:, integer_to_list(Port)]), put(my_trace_flag, ibrowse_lib:get_trace_status(Host, Port)), {ok, State}. @@ -141,13 +148,13 @@ handle_call({send_req, _}, _From, #state{is_closing = true} = State) -> {reply, {error, connection_closing}, State}; handle_call({send_req, {Url, Headers, Method, Body, Options, Timeout}}, - From, State) -> + From, State) -> send_req_1(From, Url, Headers, Method, Body, Options, Timeout, State); handle_call(stop, _From, State) -> do_close(State), do_error_reply(State, closing_on_request), - {stop, normal, ok, State#state{socket=undefined}}; + {stop, normal, ok, State}; handle_call(Request, _From, State) -> Reply = {unknown_request, Request}, @@ -171,21 +178,28 @@ handle_cast(_Msg, State) -> %% {stop, Reason, State} (terminate/2 is called) %%-------------------------------------------------------------------- handle_info({tcp, _Sock, Data}, #state{status = Status} = State) -> +%% io:format("Recvd data: ~p~n", [Data]), do_trace("Data recvd in state: ~p. Size: ~p. ~p~n~n", [Status, size(Data), Data]), handle_sock_data(Data, State); handle_info({ssl, _Sock, Data}, State) -> handle_sock_data(Data, State); handle_info({stream_next, Req_id}, #state{socket = Socket, - is_ssl = Is_ssl, - cur_req = #request{req_id = Req_id}} = State) -> - do_setopts(Socket, [{active, once}], Is_ssl), + cur_req = #request{req_id = Req_id}} = State) -> + %% io:format("Client process set {active, once}~n", []), + do_setopts(Socket, [{active, once}], State), {noreply, State}; handle_info({stream_next, _Req_id}, State) -> {noreply, State}; -handle_info({tcp_closed, _Sock}, State) -> +handle_info({stream_close, _Req_id}, State) -> + shutting_down(State), + do_close(State), + do_error_reply(State, closing_on_request), + {stop, normal, ok, State}; + +handle_info({tcp_closed, _Sock}, State) -> do_trace("TCP connection closed by peer!~n", []), handle_sock_closed(State), {stop, normal, State}; @@ -195,25 +209,26 @@ handle_info({ssl_closed, _Sock}, State) -> {stop, normal, State}; handle_info({tcp_error, _Sock}, State) -> - io:format("Error on connection to ~1000.p:~1000.p~n", [State#state.host, State#state.port]), + do_trace("Error on connection to ~1000.p:~1000.p~n", [State#state.host, State#state.port]), handle_sock_closed(State), {stop, normal, State}; handle_info({ssl_error, _Sock}, State) -> - io:format("Error on SSL connection to ~1000.p:~1000.p~n", [State#state.host, State#state.port]), + do_trace("Error on SSL connection to ~1000.p:~1000.p~n", [State#state.host, State#state.port]), handle_sock_closed(State), {stop, normal, State}; handle_info({req_timedout, From}, State) -> - case lists:keysearch(From, #request.from, queue:to_list(State#state.reqs)) of - false -> - {noreply, State}; - {value, _} -> - shutting_down(State), - do_error_reply(State, req_timedout), - {stop, normal, State} + case lists:keymember(From, #request.from, queue:to_list(State#state.reqs)) of + false -> + {noreply, State}; + true -> + shutting_down(State), + do_error_reply(State, req_timedout), + {stop, normal, State} end; handle_info(timeout, State) -> + do_trace("Inactivity timeout triggered. Shutting down connection~n", []), shutting_down(State), do_error_reply(State, req_timedout), {stop, normal, State}; @@ -224,7 +239,7 @@ handle_info({trace, Bool}, State) -> handle_info(Info, State) -> io:format("Unknown message recvd for ~1000.p:~1000.p -> ~p~n", - [State#state.host, State#state.port, Info]), + [State#state.host, State#state.port, Info]), io:format("Recvd unknown message ~p when in state: ~p~n", [Info, State]), {noreply, State}. @@ -234,7 +249,8 @@ handle_info(Info, State) -> %% Returns: any (ignored by gen_server) %%-------------------------------------------------------------------- terminate(_Reason, State) -> - do_close(State). + do_close(State), + ok. %%-------------------------------------------------------------------- %% Func: code_change/3 @@ -260,133 +276,160 @@ handle_sock_data(Data, #state{status=idle}=State) -> handle_sock_data(Data, #state{status = get_header}=State) -> case parse_response(Data, State) of - {error, _Reason} -> - shutting_down(State), - {stop, normal, State}; - stop -> - shutting_down(State), - {stop, normal, State}; - State_1 -> - active_once(State_1), - {noreply, State_1, get_inac_timeout(State_1)} + {error, _Reason} -> + shutting_down(State), + {stop, normal, State}; + State_1 -> + active_once(State_1), + State_2 = set_inac_timer(State_1), + {noreply, State_2} end; handle_sock_data(Data, #state{status = get_body, - content_length = CL, - http_status_code = StatCode, - recvd_headers = Headers, - chunk_size = CSz} = State) -> + socket = Socket, + content_length = CL, + http_status_code = StatCode, + recvd_headers = Headers, + chunk_size = CSz} = State) -> case (CL == undefined) and (CSz == undefined) of - true -> - case accumulate_response(Data, State) of - {error, Reason} -> - shutting_down(State), - fail_pipelined_requests(State, - {error, {Reason, {stat_code, StatCode}, Headers}}), - {stop, normal, State}; - State_1 -> - active_once(State_1), - {noreply, State_1, get_inac_timeout(State_1)} - end; - _ -> - case parse_11_response(Data, State) of - {error, Reason} -> - shutting_down(State), - fail_pipelined_requests(State, - {error, {Reason, {stat_code, StatCode}, Headers}}), - {stop, normal, State}; - stop -> - shutting_down(State), - {stop, normal, State}; - State_1 -> - active_once(State_1), - {noreply, State_1, get_inac_timeout(State_1)} - end + true -> + case accumulate_response(Data, State) of + {error, Reason} -> + shutting_down(State), + fail_pipelined_requests(State, + {error, {Reason, {stat_code, StatCode}, Headers}}), + {stop, normal, State}; + State_1 -> + active_once(State_1), + State_2 = set_inac_timer(State_1), + {noreply, State_2} + end; + _ -> + case parse_11_response(Data, State) of + {error, Reason} -> + shutting_down(State), + fail_pipelined_requests(State, + {error, {Reason, {stat_code, StatCode}, Headers}}), + {stop, normal, State}; + #state{cur_req = #request{caller_controls_socket = Ccs}, + interim_reply_sent = Irs} = State_1 -> + case Irs of + true -> + active_once(State_1); + false when Ccs == true -> + do_setopts(Socket, [{active, once}], State); + false -> + active_once(State_1) + end, + State_2 = State_1#state{interim_reply_sent = false}, + State_3 = set_inac_timer(State_2), + {noreply, State_3}; + State_1 -> + active_once(State_1), + State_2 = set_inac_timer(State_1), + {noreply, State_2} + end end. accumulate_response(Data, - #state{ - cur_req = #request{save_response_to_file = true, - tmp_file_fd = undefined} = CurReq, - http_status_code=[$2 | _]}=State) -> - TmpFilename = make_tmp_filename(), + #state{ + cur_req = #request{save_response_to_file = Srtf, + tmp_file_fd = undefined} = CurReq, + http_status_code=[$2 | _]}=State) when Srtf /= false -> + TmpFilename = make_tmp_filename(Srtf), case file:open(TmpFilename, [write, delayed_write, raw]) of - {ok, Fd} -> - accumulate_response(Data, State#state{ - cur_req = CurReq#request{ - tmp_file_fd = Fd, - tmp_file_name = TmpFilename}}); - {error, Reason} -> - {error, {file_open_error, Reason}} + {ok, Fd} -> + accumulate_response(Data, State#state{ + cur_req = CurReq#request{ + tmp_file_fd = Fd, + tmp_file_name = TmpFilename}}); + {error, Reason} -> + {error, {file_open_error, Reason}} end; -accumulate_response(Data, #state{cur_req = #request{save_response_to_file = true, - tmp_file_fd = Fd}, - transfer_encoding=chunked, - reply_buffer = Reply_buf, - http_status_code=[$2 | _] - } = State) -> +accumulate_response(Data, #state{cur_req = #request{save_response_to_file = Srtf, + tmp_file_fd = Fd}, + transfer_encoding=chunked, + reply_buffer = Reply_buf, + http_status_code=[$2 | _] + } = State) when Srtf /= false -> case file:write(Fd, [Reply_buf, Data]) of - ok -> - State#state{reply_buffer = <<>>}; - {error, Reason} -> - {error, {file_write_error, Reason}} + ok -> + State#state{reply_buffer = <<>>}; + {error, Reason} -> + {error, {file_write_error, Reason}} end; -accumulate_response(Data, #state{cur_req = #request{save_response_to_file = true, - tmp_file_fd = Fd}, - reply_buffer = RepBuf, - http_status_code=[$2 | _] - } = State) -> +accumulate_response(Data, #state{cur_req = #request{save_response_to_file = Srtf, + tmp_file_fd = Fd}, + reply_buffer = RepBuf, + http_status_code=[$2 | _] + } = State) when Srtf /= false -> case file:write(Fd, [RepBuf, Data]) of - ok -> - State#state{reply_buffer = <<>>}; - {error, Reason} -> - {error, {file_write_error, Reason}} + ok -> + State#state{reply_buffer = <<>>}; + {error, Reason} -> + {error, {file_write_error, Reason}} end; -accumulate_response(<<>>, State) -> - State; -accumulate_response(Data, #state{reply_buffer = RepBuf, - rep_buf_size = RepBufSize, - streamed_size = Streamed_size, - cur_req = CurReq}=State) -> - #request{stream_to=StreamTo, req_id=ReqId, - stream_chunk_size = Stream_chunk_size, - response_format = Response_format, - caller_controls_socket = Caller_controls_socket} = CurReq, - RepBuf_1 = concat_binary([RepBuf, Data]), +%% accumulate_response(<<>>, #state{cur_req = #request{caller_controls_socket = Ccs}, +%% socket = Socket} = State) -> +%% case Ccs of +%% true -> +%% do_setopts(Socket, [{active, once}], State); +%% false -> +%% ok +%% end, +%% State; +accumulate_response(Data, #state{reply_buffer = RepBuf, + rep_buf_size = RepBufSize, + streamed_size = Streamed_size, + cur_req = CurReq}=State) -> + #request{stream_to = StreamTo, + req_id = ReqId, + stream_chunk_size = Stream_chunk_size, + response_format = Response_format, + caller_controls_socket = Caller_controls_socket} = CurReq, + RepBuf_1 = <<RepBuf/binary, Data/binary>>, New_data_size = RepBufSize - Streamed_size, case StreamTo of - undefined -> - State#state{reply_buffer = RepBuf_1}; - _ when Caller_controls_socket == true -> - do_interim_reply(StreamTo, Response_format, ReqId, RepBuf_1), - State#state{reply_buffer = <<>>, - streamed_size = Streamed_size + size(RepBuf_1)}; - _ when New_data_size >= Stream_chunk_size -> - {Stream_chunk, Rem_data} = split_binary(RepBuf_1, Stream_chunk_size), - do_interim_reply(StreamTo, Response_format, ReqId, Stream_chunk), - accumulate_response( - Rem_data, - State#state{ - reply_buffer = <<>>, - streamed_size = Streamed_size + Stream_chunk_size}); - _ -> - State#state{reply_buffer = RepBuf_1} + undefined -> + State#state{reply_buffer = RepBuf_1}; + _ when Caller_controls_socket == true -> + do_interim_reply(StreamTo, Response_format, ReqId, RepBuf_1), + State#state{reply_buffer = <<>>, + interim_reply_sent = true, + streamed_size = Streamed_size + size(RepBuf_1)}; + _ when New_data_size >= Stream_chunk_size -> + {Stream_chunk, Rem_data} = split_binary(RepBuf_1, Stream_chunk_size), + do_interim_reply(StreamTo, Response_format, ReqId, Stream_chunk), + State_1 = State#state{ + reply_buffer = <<>>, + interim_reply_sent = true, + streamed_size = Streamed_size + Stream_chunk_size}, + case Rem_data of + <<>> -> + State_1; + _ -> + accumulate_response(Rem_data, State_1) + end; + _ -> + State#state{reply_buffer = RepBuf_1} end. -make_tmp_filename() -> +make_tmp_filename(true) -> DownloadDir = ibrowse:get_config_value(download_dir, filename:absname("./")), {A,B,C} = now(), filename:join([DownloadDir, - "ibrowse_tmp_file_"++ - integer_to_list(A) ++ - integer_to_list(B) ++ - integer_to_list(C)]). + "ibrowse_tmp_file_"++ + integer_to_list(A) ++ + integer_to_list(B) ++ + integer_to_list(C)]); +make_tmp_filename(File) when is_list(File) -> + File. %%-------------------------------------------------------------------- %% Handles the case when the server closes the socket %%-------------------------------------------------------------------- -handle_sock_closed(#state{status=get_header}=State) -> +handle_sock_closed(#state{status=get_header} = State) -> shutting_down(State), do_error_reply(State, connection_closed); @@ -397,40 +440,73 @@ handle_sock_closed(#state{cur_req=undefined} = State) -> %% Connection-Close header and has closed the socket to indicate end %% of response. There maybe requests pipelined which need a response. handle_sock_closed(#state{reply_buffer = Buf, reqs = Reqs, http_status_code = SC, - is_closing = IsClosing, - cur_req = #request{tmp_file_name=TmpFilename, - tmp_file_fd=Fd} = CurReq, - status = get_body, recvd_headers = Headers}=State) -> + is_closing = IsClosing, + cur_req = #request{tmp_file_name=TmpFilename, + tmp_file_fd=Fd} = CurReq, + status = get_body, + recvd_headers = Headers, + status_line = Status_line, + raw_headers = Raw_headers + }=State) -> #request{from=From, stream_to=StreamTo, req_id=ReqId, - response_format = Resp_format} = CurReq, + response_format = Resp_format, + options = Options} = CurReq, case IsClosing of - true -> - {_, Reqs_1} = queue:out(Reqs), - case TmpFilename of - undefined -> - do_reply(State, From, StreamTo, ReqId, Resp_format, - {ok, SC, Headers, Buf}); - _ -> - file:close(Fd), - do_reply(State, From, StreamTo, ReqId, Resp_format, - {ok, SC, Headers, {file, TmpFilename}}) - end, - do_error_reply(State#state{reqs = Reqs_1}, connection_closed), - State; - _ -> - do_error_reply(State, connection_closed), - State + true -> + {_, Reqs_1} = queue:out(Reqs), + Body = case TmpFilename of + undefined -> + Buf; + _ -> + file:close(Fd), + {file, TmpFilename} + end, + Reply = case get_value(give_raw_headers, Options, false) of + true -> + {ok, Status_line, Raw_headers, Body}; + false -> + {ok, SC, Headers, Buf} + end, + do_reply(State, From, StreamTo, ReqId, Resp_format, Reply), + do_error_reply(State#state{reqs = Reqs_1}, connection_closed), + State; + _ -> + do_error_reply(State, connection_closed), + State end. -do_connect(Host, Port, _Options, #state{is_ssl=true, ssl_options=SSLOptions}, Timeout) -> +do_connect(Host, Port, Options, #state{is_ssl = true, + use_proxy = false, + ssl_options = SSLOptions}, + Timeout) -> + Caller_socket_options = get_value(socket_options, Options, []), + Other_sock_options = filter_sock_options(SSLOptions ++ Caller_socket_options), ssl:connect(Host, Port, - [binary, {nodelay, true}, {active, false} | SSLOptions], - Timeout); -do_connect(Host, Port, _Options, _State, Timeout) -> - gen_tcp:connect(Host, Port, - [binary, {nodelay, true}, {active, false}], - Timeout). - + [binary, {nodelay, true}, {active, false} | Other_sock_options], + Timeout); +do_connect(Host, Port, Options, _State, Timeout) -> + Caller_socket_options = get_value(socket_options, Options, []), + Other_sock_options = filter_sock_options(Caller_socket_options), + gen_tcp:connect(Host, to_integer(Port), + [binary, {nodelay, true}, {active, false} | Other_sock_options], + Timeout). + +%% We don't want the caller to specify certain options +filter_sock_options(Opts) -> + lists:filter(fun({active, _}) -> + false; + ({packet, _}) -> + false; + (list) -> + false; + (_) -> + true + end, Opts). + +do_send(Req, #state{socket = Sock, + is_ssl = true, + use_proxy = true, + proxy_tunnel_setup = Pts}) when Pts /= done -> gen_tcp:send(Sock, Req); do_send(Req, #state{socket = Sock, is_ssl = true}) -> ssl:send(Sock, Req); do_send(Req, #state{socket = Sock, is_ssl = false}) -> gen_tcp:send(Sock, Req). @@ -439,274 +515,375 @@ do_send(Req, #state{socket = Sock, is_ssl = false}) -> gen_tcp:send(Sock, Req). %% {fun_arity_0} | %% {fun_arity_1, term()} %% error() = term() -do_send_body(Source, State) when is_function(Source) -> - do_send_body({Source}, State); -do_send_body({Source}, State) when is_function(Source) -> - do_send_body1(Source, Source(), State); -do_send_body({Source, Source_state}, State) when is_function(Source) -> - do_send_body1(Source, Source(Source_state), State); -do_send_body(Body, State) -> +do_send_body(Source, State, TE) when is_function(Source) -> + do_send_body({Source}, State, TE); +do_send_body({Source}, State, TE) when is_function(Source) -> + do_send_body1(Source, Source(), State, TE); +do_send_body({Source, Source_state}, State, TE) when is_function(Source) -> + do_send_body1(Source, Source(Source_state), State, TE); +do_send_body(Body, State, _TE) -> do_send(Body, State). -do_send_body1(Source, Resp, State) -> +do_send_body1(Source, Resp, State, TE) -> case Resp of - {ok, Data} -> - do_send(Data, State), - do_send_body({Source}, State); - {ok, Data, New_source_state} -> - do_send(Data, State), - do_send_body({Source, New_source_state}, State); - eof -> - ok; - Err -> - Err + {ok, Data} -> + do_send(maybe_chunked_encode(Data, TE), State), + do_send_body({Source}, State, TE); + {ok, Data, New_source_state} -> + do_send(maybe_chunked_encode(Data, TE), State), + do_send_body({Source, New_source_state}, State, TE); + eof when TE == true -> + do_send(<<"0\r\n\r\n">>, State), + ok; + eof -> + ok; + Err -> + Err end. +maybe_chunked_encode(Data, false) -> + Data; +maybe_chunked_encode(Data, true) -> + [ibrowse_lib:dec2hex(byte_size(to_binary(Data))), "\r\n", Data, "\r\n"]. + do_close(#state{socket = undefined}) -> ok; -do_close(#state{socket = Sock, is_ssl = true}) -> ssl:close(Sock); -do_close(#state{socket = Sock, is_ssl = false}) -> gen_tcp:close(Sock). +do_close(#state{socket = Sock, + is_ssl = true, + use_proxy = true, + proxy_tunnel_setup = Pts + }) when Pts /= done -> catch gen_tcp:close(Sock); +do_close(#state{socket = Sock, is_ssl = true}) -> catch ssl:close(Sock); +do_close(#state{socket = Sock, is_ssl = false}) -> catch gen_tcp:close(Sock). active_once(#state{cur_req = #request{caller_controls_socket = true}}) -> ok; -active_once(#state{socket = Socket, is_ssl = Is_ssl}) -> - do_setopts(Socket, [{active, once}], Is_ssl). +active_once(#state{socket = Socket} = State) -> + do_setopts(Socket, [{active, once}], State). -do_setopts(Sock, Opts, true) -> ssl:setopts(Sock, Opts); -do_setopts(Sock, Opts, false) -> inet:setopts(Sock, Opts). +do_setopts(_Sock, [], _) -> ok; +do_setopts(Sock, Opts, #state{is_ssl = true, + use_proxy = true, + proxy_tunnel_setup = Pts} + ) when Pts /= done -> inet:setopts(Sock, Opts); +do_setopts(Sock, Opts, #state{is_ssl = true}) -> ssl:setopts(Sock, Opts); +do_setopts(Sock, Opts, _) -> inet:setopts(Sock, Opts). check_ssl_options(Options, State) -> case get_value(is_ssl, Options, false) of - false -> - State; - true -> - State#state{is_ssl=true, ssl_options=get_value(ssl_options, Options)} + false -> + State; + true -> + State#state{is_ssl=true, ssl_options=get_value(ssl_options, Options)} end. send_req_1(From, - #url{host = Host, - port = Port} = Url, - Headers, Method, Body, Options, Timeout, - #state{socket = undefined} = State) -> + #url{host = Host, + port = Port} = Url, + Headers, Method, Body, Options, Timeout, + #state{socket = undefined} = State) -> {Host_1, Port_1, State_1} = - case get_value(proxy_host, Options, false) of - false -> - {Host, Port, State}; - PHost -> - ProxyUser = get_value(proxy_user, Options, []), - ProxyPassword = get_value(proxy_password, Options, []), - Digest = http_auth_digest(ProxyUser, ProxyPassword), - {PHost, get_value(proxy_port, Options, 80), - State#state{use_proxy = true, - proxy_auth_digest = Digest}} - end, + case get_value(proxy_host, Options, false) of + false -> + {Host, Port, State}; + PHost -> + ProxyUser = get_value(proxy_user, Options, []), + ProxyPassword = get_value(proxy_password, Options, []), + Digest = http_auth_digest(ProxyUser, ProxyPassword), + {PHost, get_value(proxy_port, Options, 80), + State#state{use_proxy = true, + proxy_auth_digest = Digest}} + end, State_2 = check_ssl_options(Options, State_1), do_trace("Connecting...~n", []), - Start_ts = now(), Conn_timeout = get_value(connect_timeout, Options, Timeout), case do_connect(Host_1, Port_1, Options, State_2, Conn_timeout) of - {ok, Sock} -> - do_trace("Connected!~n", []), - End_ts = now(), - Timeout_1 = case Timeout of - infinity -> - infinity; - _ -> - Timeout - trunc(round(timer:now_diff(End_ts, Start_ts) / 1000)) - end, - State_3 = State_2#state{socket = Sock}, - send_req_1(From, Url, Headers, Method, Body, Options, Timeout_1, State_3); - Err -> - shutting_down(State_2), - do_trace("Error connecting. Reason: ~1000.p~n", [Err]), - gen_server:reply(From, {error, conn_failed}), - {stop, normal, State_2} + {ok, Sock} -> + do_trace("Connected! Socket: ~1000.p~n", [Sock]), + State_3 = State_2#state{socket = Sock, + connect_timeout = Conn_timeout}, + send_req_1(From, Url, Headers, Method, Body, Options, Timeout, State_3); + Err -> + shutting_down(State_2), + do_trace("Error connecting. Reason: ~1000.p~n", [Err]), + gen_server:reply(From, {error, {conn_failed, Err}}), + {stop, normal, State_2} end; + +%% Send a CONNECT request. +%% Wait for 200 OK +%% Upgrade to SSL connection +%% Then send request + +send_req_1(From, + #url{ + host = Server_host, + port = Server_port + } = Url, + Headers, Method, Body, Options, Timeout, + #state{ + proxy_tunnel_setup = false, + use_proxy = true, + is_ssl = true} = State) -> + NewReq = #request{ + method = connect, + preserve_chunked_encoding = get_value(preserve_chunked_encoding, Options, false), + options = Options + }, + State_1 = State#state{reqs=queue:in(NewReq, State#state.reqs)}, + Pxy_auth_headers = maybe_modify_headers(Url, Method, Options, [], State_1), + Path = [Server_host, $:, integer_to_list(Server_port)], + {Req, Body_1} = make_request(connect, Pxy_auth_headers, + Path, Path, + [], Options, State_1), + TE = is_chunked_encoding_specified(Options), + trace_request(Req), + case do_send(Req, State) of + ok -> + case do_send_body(Body_1, State_1, TE) of + ok -> + trace_request_body(Body_1), + active_once(State_1), + Ref = case Timeout of + infinity -> + undefined; + _ -> + erlang:send_after(Timeout, self(), {req_timedout, From}) + end, + State_2 = State_1#state{status = get_header, + cur_req = NewReq, + send_timer = Ref, + proxy_tunnel_setup = in_progress, + tunnel_setup_queue = [{From, Url, Headers, Method, Body, Options, Timeout}]}, + State_3 = set_inac_timer(State_2), + {noreply, State_3}; + Err -> + shutting_down(State_1), + do_trace("Send failed... Reason: ~p~n", [Err]), + gen_server:reply(From, {error, {send_failed, Err}}), + {stop, normal, State_1} + end; + Err -> + shutting_down(State_1), + do_trace("Send failed... Reason: ~p~n", [Err]), + gen_server:reply(From, {error, {send_failed, Err}}), + {stop, normal, State_1} + end; + +send_req_1(From, Url, Headers, Method, Body, Options, Timeout, + #state{proxy_tunnel_setup = in_progress, + tunnel_setup_queue = Q} = State) -> + do_trace("Queued SSL request awaiting tunnel setup: ~n" + "URL : ~s~n" + "Method : ~p~n" + "Headers : ~p~n", [Url, Method, Headers]), + {noreply, State#state{tunnel_setup_queue = [{From, Url, Headers, Method, Body, Options, Timeout} | Q]}}; + send_req_1(From, - #url{abspath = AbsPath, - host = Host, - port = Port, - path = RelPath} = Url, - Headers, Method, Body, Options, Timeout, - #state{status = Status} = State) -> + #url{abspath = AbsPath, + path = RelPath} = Url, + Headers, Method, Body, Options, Timeout, + #state{status = Status, + socket = Socket, + is_ssl = Is_ssl} = State) -> ReqId = make_req_id(), Resp_format = get_value(response_format, Options, list), + Caller_socket_options = get_value(socket_options, Options, []), {StreamTo, Caller_controls_socket} = - case get_value(stream_to, Options, undefined) of - {Caller, once} when is_pid(Caller) or - is_atom(Caller) -> - Async_pid_rec = {{req_id_pid, ReqId}, self()}, - true = ets:insert(ibrowse_stream, Async_pid_rec), - {Caller, true}; - undefined -> - {undefined, false}; - Caller when is_pid(Caller) or - is_atom(Caller) -> - {Caller, false}; - Stream_to_inv -> - exit({invalid_option, {stream_to, Stream_to_inv}}) - end, + case get_value(stream_to, Options, undefined) of + {Caller, once} when is_pid(Caller) or + is_atom(Caller) -> + Async_pid_rec = {{req_id_pid, ReqId}, self()}, + true = ets:insert(ibrowse_stream, Async_pid_rec), + {Caller, true}; + undefined -> + {undefined, false}; + Caller when is_pid(Caller) or + is_atom(Caller) -> + {Caller, false}; + Stream_to_inv -> + exit({invalid_option, {stream_to, Stream_to_inv}}) + end, SaveResponseToFile = get_value(save_response_to_file, Options, false), NewReq = #request{url = Url, - method = Method, - stream_to = StreamTo, - caller_controls_socket = Caller_controls_socket, - options = Options, - req_id = ReqId, - save_response_to_file = SaveResponseToFile, - stream_chunk_size = get_stream_chunk_size(Options), - response_format = Resp_format, - from = From}, + method = Method, + stream_to = StreamTo, + caller_controls_socket = Caller_controls_socket, + caller_socket_options = Caller_socket_options, + options = Options, + req_id = ReqId, + save_response_to_file = SaveResponseToFile, + stream_chunk_size = get_stream_chunk_size(Options), + response_format = Resp_format, + from = From, + preserve_chunked_encoding = get_value(preserve_chunked_encoding, Options, false) + }, State_1 = State#state{reqs=queue:in(NewReq, State#state.reqs)}, - Headers_1 = add_auth_headers(Url, Options, Headers, State), - HostHeaderValue = case lists:keysearch(host_header, 1, Options) of - false -> - case Port of - 80 -> Host; - _ -> [Host, ":", integer_to_list(Port)] - end; - {value, {_, Host_h_val}} -> - Host_h_val - end, + Headers_1 = maybe_modify_headers(Url, Method, Options, Headers, State_1), {Req, Body_1} = make_request(Method, - [{"Host", HostHeaderValue} | Headers_1], - AbsPath, RelPath, Body, Options, State#state.use_proxy), - case get(my_trace_flag) of - true -> - %%Avoid the binary operations if trace is not on... - NReq = binary_to_list(list_to_binary(Req)), - do_trace("Sending request: ~n" - "--- Request Begin ---~n~s~n" - "--- Request End ---~n", [NReq]); - _ -> ok - end, - case do_send(Req, State) of - ok -> - case do_send_body(Body_1, State) of - ok -> - State_2 = inc_pipeline_counter(State_1), - active_once(State_1), - Ref = case Timeout of - infinity -> - undefined; - _ -> - erlang:send_after(Timeout, self(), {req_timedout, From}) - end, - State_3 = case Status of - idle -> - State_2#state{status = get_header, - cur_req = NewReq, - send_timer = Ref}; - _ -> - State_2#state{send_timer = Ref} - end, - case StreamTo of - undefined -> - ok; - _ -> - gen_server:reply(From, {ibrowse_req_id, ReqId}) - end, - {noreply, State_3, get_inac_timeout(State_3)}; - Err -> - shutting_down(State_1), - do_trace("Send failed... Reason: ~p~n", [Err]), - gen_server:reply(From, {error, send_failed}), - {stop, normal, State_1} - end; - Err -> - shutting_down(State_1), - do_trace("Send failed... Reason: ~p~n", [Err]), - gen_server:reply(From, {error, send_failed}), - {stop, normal, State_1} + Headers_1, + AbsPath, RelPath, Body, Options, State_1), + trace_request(Req), + do_setopts(Socket, Caller_socket_options, Is_ssl), + TE = is_chunked_encoding_specified(Options), + case do_send(Req, State_1) of + ok -> + case do_send_body(Body_1, State_1, TE) of + ok -> + trace_request_body(Body_1), + State_2 = inc_pipeline_counter(State_1), + active_once(State_2), + Ref = case Timeout of + infinity -> + undefined; + _ -> + erlang:send_after(Timeout, self(), {req_timedout, From}) + end, + State_3 = case Status of + idle -> + State_2#state{status = get_header, + cur_req = NewReq, + send_timer = Ref}; + _ -> + State_2#state{send_timer = Ref} + end, + case StreamTo of + undefined -> + ok; + _ -> + gen_server:reply(From, {ibrowse_req_id, ReqId}) + end, + State_4 = set_inac_timer(State_3), + {noreply, State_4}; + Err -> + shutting_down(State_1), + do_trace("Send failed... Reason: ~p~n", [Err]), + gen_server:reply(From, {error, {send_failed, Err}}), + {stop, normal, State_1} + end; + Err -> + shutting_down(State_1), + do_trace("Send failed... Reason: ~p~n", [Err]), + gen_server:reply(From, {error, {send_failed, Err}}), + {stop, normal, State_1} + end. + +maybe_modify_headers(#url{}, connect, _, Headers, State) -> + add_proxy_auth_headers(State, Headers); +maybe_modify_headers(#url{host = Host, port = Port} = Url, + _Method, + Options, Headers, State) -> + case get_value(headers_as_is, Options, false) of + false -> + Headers_1 = add_auth_headers(Url, Options, Headers, State), + HostHeaderValue = case lists:keysearch(host_header, 1, Options) of + false -> + case Port of + 80 -> Host; + 443 -> Host; + _ -> [Host, ":", integer_to_list(Port)] + end; + {value, {_, Host_h_val}} -> + Host_h_val + end, + [{"Host", HostHeaderValue} | Headers_1]; + true -> + Headers end. add_auth_headers(#url{username = User, - password = UPw}, - Options, - Headers, - #state{use_proxy = UseProxy, - proxy_auth_digest = ProxyAuthDigest}) -> + password = UPw}, + Options, + Headers, + State) -> Headers_1 = case User of - undefined -> - case get_value(basic_auth, Options, undefined) of - undefined -> - Headers; - {U,P} -> - [{"Authorization", ["Basic ", http_auth_digest(U, P)]} | Headers] - end; - _ -> - [{"Authorization", ["Basic ", http_auth_digest(User, UPw)]} | Headers] - end, - case UseProxy of - false -> - Headers_1; - true when ProxyAuthDigest == [] -> - Headers_1; - true -> - [{"Proxy-Authorization", ["Basic ", ProxyAuthDigest]} | Headers_1] - end. + undefined -> + case get_value(basic_auth, Options, undefined) of + undefined -> + Headers; + {U,P} -> + [{"Authorization", ["Basic ", http_auth_digest(U, P)]} | Headers] + end; + _ -> + [{"Authorization", ["Basic ", http_auth_digest(User, UPw)]} | Headers] + end, + add_proxy_auth_headers(State, Headers_1). + +add_proxy_auth_headers(#state{use_proxy = false}, Headers) -> + Headers; +add_proxy_auth_headers(#state{proxy_auth_digest = []}, Headers) -> + Headers; +add_proxy_auth_headers(#state{proxy_auth_digest = Auth_digest}, Headers) -> + [{"Proxy-Authorization", ["Basic ", Auth_digest]} | Headers]. http_auth_digest([], []) -> []; http_auth_digest(Username, Password) -> - encode_base64(Username ++ [$: | Password]). + ibrowse_lib:encode_base64(Username ++ [$: | Password]). -encode_base64([]) -> - []; -encode_base64([A]) -> - [e(A bsr 2), e((A band 3) bsl 4), $=, $=]; -encode_base64([A,B]) -> - [e(A bsr 2), e(((A band 3) bsl 4) bor (B bsr 4)), e((B band 15) bsl 2), $=]; -encode_base64([A,B,C|Ls]) -> - encode_base64_do(A,B,C, Ls). -encode_base64_do(A,B,C, Rest) -> - BB = (A bsl 16) bor (B bsl 8) bor C, - [e(BB bsr 18), e((BB bsr 12) band 63), - e((BB bsr 6) band 63), e(BB band 63)|encode_base64(Rest)]. - -e(X) when X >= 0, X < 26 -> X+65; -e(X) when X>25, X<52 -> X+71; -e(X) when X>51, X<62 -> X-4; -e(62) -> $+; -e(63) -> $/; -e(X) -> exit({bad_encode_base64_token, X}). - -make_request(Method, Headers, AbsPath, RelPath, Body, Options, UseProxy) -> +make_request(Method, Headers, AbsPath, RelPath, Body, Options, + #state{use_proxy = UseProxy, is_ssl = Is_ssl}) -> HttpVsn = http_vsn_string(get_value(http_vsn, Options, {1,1})), + Fun1 = fun({X, Y}) when is_atom(X) -> + {to_lower(atom_to_list(X)), X, Y}; + ({X, Y}) when is_list(X) -> + {to_lower(X), X, Y} + end, + Headers_0 = [Fun1(X) || X <- Headers], Headers_1 = - case get_value(content_length, Headers, false) of - false when (Body == []) or - (Body == <<>>) or - is_tuple(Body) or - is_function(Body) -> - Headers; - false when is_binary(Body) -> - [{"content-length", integer_to_list(size(Body))} | Headers]; - false -> - [{"content-length", integer_to_list(length(Body))} | Headers]; - _ -> - Headers - end, + case lists:keysearch("content-length", 1, Headers_0) of + false when (Body == []) orelse + (Body == <<>>) orelse + is_tuple(Body) orelse + is_function(Body) -> + Headers_0; + false when is_binary(Body) -> + [{"content-length", "content-length", integer_to_list(size(Body))} | Headers_0]; + false when is_list(Body) -> + [{"content-length", "content-length", integer_to_list(length(Body))} | Headers_0]; + _ -> + %% Content-Length is already specified + Headers_0 + end, {Headers_2, Body_1} = - case get_value(transfer_encoding, Options, false) of - false -> - {Headers_1, Body}; - {chunked, ChunkSize} -> - {[{X, Y} || {X, Y} <- Headers_1, - X /= "Content-Length", - X /= "content-length", - X /= content_length] ++ - [{"Transfer-Encoding", "chunked"}], - chunk_request_body(Body, ChunkSize)} - end, + case is_chunked_encoding_specified(Options) of + false -> + {[{Y, Z} || {_, Y, Z} <- Headers_1], Body}; + true -> + Chunk_size_1 = case get_value(transfer_encoding, Options) of + chunked -> + 5120; + {chunked, Chunk_size} -> + Chunk_size + end, + {[{Y, Z} || {X, Y, Z} <- Headers_1, + X /= "content-length"] ++ + [{"Transfer-Encoding", "chunked"}], + chunk_request_body(Body, Chunk_size_1)} + end, Headers_3 = cons_headers(Headers_2), Uri = case get_value(use_absolute_uri, Options, false) or UseProxy of - true -> - AbsPath; - false -> - RelPath - end, + true -> + case Is_ssl of + true -> + RelPath; + false -> + AbsPath + end; + false -> + RelPath + end, {[method(Method), " ", Uri, " ", HttpVsn, crnl(), Headers_3, crnl()], Body_1}. +is_chunked_encoding_specified(Options) -> + case get_value(transfer_encoding, Options, false) of + false -> + false; + {chunked, _} -> + true; + chunked -> + true + end. + http_vsn_string({0,9}) -> "HTTP/0.9"; http_vsn_string({1,0}) -> "HTTP/1.0"; http_vsn_string({1,1}) -> "HTTP/1.1". @@ -717,7 +894,7 @@ cons_headers([], Acc) -> encode_headers(Acc); cons_headers([{basic_auth, {U,P}} | T], Acc) -> cons_headers(T, [{"Authorization", - ["Basic ", ibrowse_lib:encode_base64(U++":"++P)]} | Acc]); + ["Basic ", ibrowse_lib:encode_base64(U++":"++P)]} | Acc]); cons_headers([{cookie, Cookie} | T], Acc) -> cons_headers(T, [{"Cookie", Cookie} | Acc]); cons_headers([{content_length, L} | T], Acc) -> @@ -738,6 +915,9 @@ encode_headers([{Name,Val} | T], Acc) when is_atom(Name) -> encode_headers([], Acc) -> lists:reverse(Acc). +chunk_request_body(Body, _ChunkSize) when is_tuple(Body) orelse + is_function(Body) -> + Body; chunk_request_body(Body, ChunkSize) -> chunk_request_body(Body, ChunkSize, []). @@ -747,25 +927,24 @@ chunk_request_body(Body, _ChunkSize, Acc) when Body == <<>>; Body == [] -> chunk_request_body(Body, ChunkSize, Acc) when is_binary(Body), size(Body) >= ChunkSize -> <<ChunkBody:ChunkSize/binary, Rest/binary>> = Body, - Chunk = [ibrowse_lib:dec2hex(4, ChunkSize),"\r\n", - ChunkBody, "\r\n"], + Chunk = [ibrowse_lib:dec2hex(ChunkSize),"\r\n", + ChunkBody, "\r\n"], chunk_request_body(Rest, ChunkSize, [Chunk | Acc]); chunk_request_body(Body, _ChunkSize, Acc) when is_binary(Body) -> BodySize = size(Body), - Chunk = [ibrowse_lib:dec2hex(4, BodySize),"\r\n", - Body, "\r\n"], + Chunk = [ibrowse_lib:dec2hex(BodySize),"\r\n", + Body, "\r\n"], LastChunk = "0\r\n", lists:reverse(["\r\n", LastChunk, Chunk | Acc]); -chunk_request_body(Body, ChunkSize, Acc) when is_list(Body), - length(Body) >= ChunkSize -> +chunk_request_body(Body, ChunkSize, Acc) when length(Body) >= ChunkSize -> {ChunkBody, Rest} = split_list_at(Body, ChunkSize), - Chunk = [ibrowse_lib:dec2hex(4, ChunkSize),"\r\n", - ChunkBody, "\r\n"], + Chunk = [ibrowse_lib:dec2hex(ChunkSize),"\r\n", + ChunkBody, "\r\n"], chunk_request_body(Rest, ChunkSize, [Chunk | Acc]); chunk_request_body(Body, _ChunkSize, Acc) when is_list(Body) -> BodySize = length(Body), - Chunk = [ibrowse_lib:dec2hex(4, BodySize),"\r\n", - Body, "\r\n"], + Chunk = [ibrowse_lib:dec2hex(BodySize),"\r\n", + Body, "\r\n"], LastChunk = "0\r\n", lists:reverse(["\r\n", LastChunk, Chunk | Acc]). @@ -773,114 +952,172 @@ chunk_request_body(Body, _ChunkSize, Acc) when is_list(Body) -> parse_response(_Data, #state{cur_req = undefined}=State) -> State#state{status = idle}; parse_response(Data, #state{reply_buffer = Acc, reqs = Reqs, - cur_req = CurReq} = State) -> + cur_req = CurReq} = State) -> #request{from=From, stream_to=StreamTo, req_id=ReqId, - method=Method, response_format = Resp_format} = CurReq, + method=Method, response_format = Resp_format, + options = Options + } = CurReq, MaxHeaderSize = ibrowse:get_config_value(max_headers_size, infinity), case scan_header(Acc, Data) of - {yes, Headers, Data_1} -> - do_trace("Recvd Header Data -> ~s~n----~n", [Headers]), - do_trace("Recvd headers~n--- Headers Begin ---~n~s~n--- Headers End ---~n~n", [Headers]), - {HttpVsn, StatCode, Headers_1} = parse_headers(Headers), - do_trace("HttpVsn: ~p StatusCode: ~p Headers_1 -> ~1000.p~n", [HttpVsn, StatCode, Headers_1]), - LCHeaders = [{to_lower(X), Y} || {X,Y} <- Headers_1], - ConnClose = to_lower(get_value("connection", LCHeaders, "false")), - IsClosing = is_connection_closing(HttpVsn, ConnClose), - case IsClosing of - true -> + {yes, Headers, Data_1} -> + do_trace("Recvd Header Data -> ~s~n----~n", [Headers]), + do_trace("Recvd headers~n--- Headers Begin ---~n~s~n--- Headers End ---~n~n", [Headers]), + {HttpVsn, StatCode, Headers_1, Status_line, Raw_headers} = parse_headers(Headers), + do_trace("HttpVsn: ~p StatusCode: ~p Headers_1 -> ~1000.p~n", [HttpVsn, StatCode, Headers_1]), + LCHeaders = [{to_lower(X), Y} || {X,Y} <- Headers_1], + ConnClose = to_lower(get_value("connection", LCHeaders, "false")), + IsClosing = is_connection_closing(HttpVsn, ConnClose), + case IsClosing of + true -> shutting_down(State); - false -> - ok - end, - State_1 = State#state{recvd_headers=Headers_1, status=get_body, - reply_buffer = <<>>, - http_status_code=StatCode, is_closing=IsClosing}, - put(conn_close, ConnClose), - TransferEncoding = to_lower(get_value("transfer-encoding", LCHeaders, "false")), - case get_value("content-length", LCHeaders, undefined) of - _ when Method == head -> - {_, Reqs_1} = queue:out(Reqs), - send_async_headers(ReqId, StreamTo, StatCode, Headers_1), - State_1_1 = do_reply(State_1, From, StreamTo, ReqId, Resp_format, - {ok, StatCode, Headers_1, []}), - cancel_timer(State_1_1#state.send_timer, {eat_message, {req_timedout, From}}), - State_2 = reset_state(State_1_1), - State_3 = set_cur_request(State_2#state{reqs = Reqs_1}), - parse_response(Data_1, State_3); - _ when hd(StatCode) == $1 -> - %% No message body is expected. Server may send - %% one or more 1XX responses before a proper - %% response. - send_async_headers(ReqId, StreamTo, StatCode, Headers_1), - do_trace("Recvd a status code of ~p. Ignoring and waiting for a proper response~n", [StatCode]), - parse_response(Data_1, State_1#state{recvd_headers = [], - status = get_header}); - _ when StatCode == "204"; - StatCode == "304" -> - %% No message body is expected for these Status Codes. - %% RFC2616 - Sec 4.4 - {_, Reqs_1} = queue:out(Reqs), - send_async_headers(ReqId, StreamTo, StatCode, Headers_1), - State_1_1 = do_reply(State_1, From, StreamTo, ReqId, Resp_format, - {ok, StatCode, Headers_1, []}), - cancel_timer(State_1_1#state.send_timer, {eat_message, {req_timedout, From}}), - State_2 = reset_state(State_1_1), - State_3 = set_cur_request(State_2#state{reqs = Reqs_1}), - parse_response(Data_1, State_3); - _ when TransferEncoding == "chunked" -> - do_trace("Chunked encoding detected...~n",[]), - send_async_headers(ReqId, StreamTo, StatCode, Headers_1), - case parse_11_response(Data_1, State_1#state{transfer_encoding=chunked, - chunk_size=chunk_start, - reply_buffer = <<>>}) of - {error, Reason} -> - fail_pipelined_requests(State_1, - {error, {Reason, - {stat_code, StatCode}, Headers_1}}), - {error, Reason}; - State_2 -> - State_2 - end; - undefined when HttpVsn == "HTTP/1.0"; - ConnClose == "close" -> - send_async_headers(ReqId, StreamTo, StatCode, Headers_1), - State_1#state{reply_buffer = Data_1}; - undefined -> - fail_pipelined_requests(State_1, - {error, {content_length_undefined, - {stat_code, StatCode}, Headers}}), - {error, content_length_undefined}; - V -> - case catch list_to_integer(V) of - V_1 when is_integer(V_1), V_1 >= 0 -> - send_async_headers(ReqId, StreamTo, StatCode, Headers_1), - do_trace("Recvd Content-Length of ~p~n", [V_1]), - State_2 = State_1#state{rep_buf_size=0, - reply_buffer = <<>>, - content_length=V_1}, - case parse_11_response(Data_1, State_2) of - {error, Reason} -> - fail_pipelined_requests(State_1, - {error, {Reason, - {stat_code, StatCode}, Headers_1}}), - {error, Reason}; - State_3 -> - State_3 - end; - _ -> - fail_pipelined_requests(State_1, - {error, {content_length_undefined, - {stat_code, StatCode}, Headers}}), - {error, content_length_undefined} - end - end; - {no, Acc_1} when MaxHeaderSize == infinity -> - State#state{reply_buffer = Acc_1}; - {no, Acc_1} when size(Acc_1) < MaxHeaderSize -> - State#state{reply_buffer = Acc_1}; - {no, _Acc_1} -> - fail_pipelined_requests(State, {error, max_headers_size_exceeded}), - {error, max_headers_size_exceeded} + false -> + ok + end, + Give_raw_headers = get_value(give_raw_headers, Options, false), + State_1 = case Give_raw_headers of + true -> + State#state{recvd_headers=Headers_1, status=get_body, + reply_buffer = <<>>, + status_line = Status_line, + raw_headers = Raw_headers, + http_status_code=StatCode, is_closing=IsClosing}; + false -> + State#state{recvd_headers=Headers_1, status=get_body, + reply_buffer = <<>>, + http_status_code=StatCode, is_closing=IsClosing} + end, + put(conn_close, ConnClose), + TransferEncoding = to_lower(get_value("transfer-encoding", LCHeaders, "false")), + case get_value("content-length", LCHeaders, undefined) of + _ when Method == connect, + hd(StatCode) == $2 -> + cancel_timer(State#state.send_timer), + {_, Reqs_1} = queue:out(Reqs), + upgrade_to_ssl(set_cur_request(State#state{reqs = Reqs_1, + recvd_headers = [], + status = idle + })); + _ when Method == connect -> + {_, Reqs_1} = queue:out(Reqs), + do_error_reply(State#state{reqs = Reqs_1}, + {error, proxy_tunnel_failed}), + {error, proxy_tunnel_failed}; + _ when Method == head -> + {_, Reqs_1} = queue:out(Reqs), + send_async_headers(ReqId, StreamTo, Give_raw_headers, State_1), + State_1_1 = do_reply(State_1, From, StreamTo, ReqId, Resp_format, + {ok, StatCode, Headers_1, []}), + cancel_timer(State_1_1#state.send_timer, {eat_message, {req_timedout, From}}), + State_2 = reset_state(State_1_1), + State_3 = set_cur_request(State_2#state{reqs = Reqs_1}), + parse_response(Data_1, State_3); + _ when hd(StatCode) =:= $1 -> + %% No message body is expected. Server may send + %% one or more 1XX responses before a proper + %% response. + send_async_headers(ReqId, StreamTo, Give_raw_headers, State_1), + do_trace("Recvd a status code of ~p. Ignoring and waiting for a proper response~n", [StatCode]), + parse_response(Data_1, State_1#state{recvd_headers = [], + status = get_header}); + _ when StatCode =:= "204"; + StatCode =:= "304" -> + %% No message body is expected for these Status Codes. + %% RFC2616 - Sec 4.4 + {_, Reqs_1} = queue:out(Reqs), + send_async_headers(ReqId, StreamTo, Give_raw_headers, State_1), + State_1_1 = do_reply(State_1, From, StreamTo, ReqId, Resp_format, + {ok, StatCode, Headers_1, []}), + cancel_timer(State_1_1#state.send_timer, {eat_message, {req_timedout, From}}), + State_2 = reset_state(State_1_1), + State_3 = set_cur_request(State_2#state{reqs = Reqs_1}), + parse_response(Data_1, State_3); + _ when TransferEncoding =:= "chunked" -> + do_trace("Chunked encoding detected...~n",[]), + send_async_headers(ReqId, StreamTo, Give_raw_headers, State_1), + case parse_11_response(Data_1, State_1#state{transfer_encoding=chunked, + chunk_size=chunk_start, + reply_buffer = <<>>}) of + {error, Reason} -> + fail_pipelined_requests(State_1, + {error, {Reason, + {stat_code, StatCode}, Headers_1}}), + {error, Reason}; + State_2 -> + State_2 + end; + undefined when HttpVsn =:= "HTTP/1.0"; + ConnClose =:= "close" -> + send_async_headers(ReqId, StreamTo, Give_raw_headers, State_1), + State_1#state{reply_buffer = Data_1}; + undefined -> + fail_pipelined_requests(State_1, + {error, {content_length_undefined, + {stat_code, StatCode}, Headers}}), + {error, content_length_undefined}; + V -> + case catch list_to_integer(V) of + V_1 when is_integer(V_1), V_1 >= 0 -> + send_async_headers(ReqId, StreamTo, Give_raw_headers, State_1), + do_trace("Recvd Content-Length of ~p~n", [V_1]), + State_2 = State_1#state{rep_buf_size=0, + reply_buffer = <<>>, + content_length=V_1}, + case parse_11_response(Data_1, State_2) of + {error, Reason} -> + fail_pipelined_requests(State_1, + {error, {Reason, + {stat_code, StatCode}, Headers_1}}), + {error, Reason}; + State_3 -> + State_3 + end; + _ -> + fail_pipelined_requests(State_1, + {error, {content_length_undefined, + {stat_code, StatCode}, Headers}}), + {error, content_length_undefined} + end + end; + {no, Acc_1} when MaxHeaderSize == infinity -> + State#state{reply_buffer = Acc_1}; + {no, Acc_1} when size(Acc_1) < MaxHeaderSize -> + State#state{reply_buffer = Acc_1}; + {no, _Acc_1} -> + fail_pipelined_requests(State, {error, max_headers_size_exceeded}), + {error, max_headers_size_exceeded} + end. + +upgrade_to_ssl(#state{socket = Socket, + connect_timeout = Conn_timeout, + ssl_options = Ssl_options, + tunnel_setup_queue = Q} = State) -> + case ssl:connect(Socket, Ssl_options, Conn_timeout) of + {ok, Ssl_socket} -> + do_trace("Upgraded to SSL socket!!~n", []), + State_1 = State#state{socket = Ssl_socket, + proxy_tunnel_setup = done}, + send_queued_requests(lists:reverse(Q), State_1); + Err -> + do_trace("Upgrade to SSL socket failed. Reson: ~p~n", [Err]), + do_error_reply(State, {error, {send_failed, Err}}), + {error, send_failed} + end. + +send_queued_requests([], State) -> + do_trace("Sent all queued requests via SSL connection~n", []), + State#state{tunnel_setup_queue = []}; +send_queued_requests([{From, Url, Headers, Method, Body, Options, Timeout} | Q], + State) -> + case send_req_1(From, Url, Headers, Method, Body, Options, Timeout, State) of + {noreply, State_1} -> + send_queued_requests(Q, State_1); + Err -> + do_trace("Error sending queued SSL request: ~n" + "URL : ~s~n" + "Method : ~p~n" + "Headers : ~p~n", [Url, Method, Headers]), + do_error_reply(State, {error, {send_failed, Err}}), + {error, send_failed} end. is_connection_closing("HTTP/0.9", _) -> true; @@ -890,200 +1127,220 @@ is_connection_closing(_, _) -> false. %% This clause determines the chunk size when given data from the beginning of the chunk parse_11_response(DataRecvd, - #state{transfer_encoding = chunked, - chunk_size = chunk_start, - chunk_size_buffer = Chunk_sz_buf - } = State) -> + #state{transfer_encoding = chunked, + chunk_size = chunk_start, + chunk_size_buffer = Chunk_sz_buf + } = State) -> case scan_crlf(Chunk_sz_buf, DataRecvd) of - {yes, ChunkHeader, Data_1} -> - case parse_chunk_header(ChunkHeader) of - {error, Reason} -> - {error, Reason}; - ChunkSize -> - %% - %% Do we have to preserve the chunk encoding when - %% streaming? NO. This should be transparent to the client - %% process. Chunked encoding was only introduced to make - %% it efficient for the server. - %% - RemLen = size(Data_1), - do_trace("Determined chunk size: ~p. Already recvd: ~p~n", [ChunkSize, RemLen]), - parse_11_response(Data_1, State#state{chunk_size_buffer = <<>>, - deleted_crlf = true, - recvd_chunk_size = 0, - chunk_size = ChunkSize}) - end; - {no, Data_1} -> - State#state{chunk_size_buffer = Data_1} + {yes, ChunkHeader, Data_1} -> + State_1 = maybe_accumulate_ce_data(State, <<ChunkHeader/binary, $\r, $\n>>), + ChunkSize = parse_chunk_header(ChunkHeader), + %% + %% Do we have to preserve the chunk encoding when + %% streaming? NO. This should be transparent to the client + %% process. Chunked encoding was only introduced to make + %% it efficient for the server. + %% + RemLen = size(Data_1), + do_trace("Determined chunk size: ~p. Already recvd: ~p~n", + [ChunkSize, RemLen]), + parse_11_response(Data_1, State_1#state{chunk_size_buffer = <<>>, + deleted_crlf = true, + recvd_chunk_size = 0, + chunk_size = ChunkSize}); + {no, Data_1} -> + State#state{chunk_size_buffer = Data_1} end; %% This clause is to remove the CRLF between two chunks %% parse_11_response(DataRecvd, - #state{transfer_encoding = chunked, - chunk_size = tbd, - chunk_size_buffer = Buf}=State) -> + #state{transfer_encoding = chunked, + chunk_size = tbd, + chunk_size_buffer = Buf + } = State) -> case scan_crlf(Buf, DataRecvd) of - {yes, _, NextChunk} -> - State_1 = State#state{chunk_size = chunk_start, - chunk_size_buffer = <<>>, - deleted_crlf = true}, - parse_11_response(NextChunk, State_1); - {no, Data_1} -> - State#state{chunk_size_buffer = Data_1} + {yes, _, NextChunk} -> + State_1 = maybe_accumulate_ce_data(State, <<$\r, $\n>>), + State_2 = State_1#state{chunk_size = chunk_start, + chunk_size_buffer = <<>>, + deleted_crlf = true}, + parse_11_response(NextChunk, State_2); + {no, Data_1} -> + State#state{chunk_size_buffer = Data_1} end; %% This clause deals with the end of a chunked transfer. ibrowse does %% not support Trailers in the Chunked Transfer encoding. Any trailer %% received is silently discarded. parse_11_response(DataRecvd, - #state{transfer_encoding = chunked, chunk_size = 0, - cur_req = CurReq, - deleted_crlf = DelCrlf, - chunk_size_buffer = Trailer, reqs = Reqs}=State) -> + #state{transfer_encoding = chunked, chunk_size = 0, + cur_req = CurReq, + deleted_crlf = DelCrlf, + chunk_size_buffer = Trailer, + reqs = Reqs} = State) -> do_trace("Detected end of chunked transfer...~n", []), DataRecvd_1 = case DelCrlf of - false -> - DataRecvd; - true -> - <<$\r, $\n, DataRecvd/binary>> + false -> + DataRecvd; + true -> + <<$\r, $\n, DataRecvd/binary>> end, case scan_header(Trailer, DataRecvd_1) of - {yes, _TEHeaders, Rem} -> - {_, Reqs_1} = queue:out(Reqs), - State_1 = handle_response(CurReq, State#state{reqs = Reqs_1}), - parse_response(Rem, reset_state(State_1)); - {no, Rem} -> - State#state{chunk_size_buffer = Rem, deleted_crlf = false} + {yes, TEHeaders, Rem} -> + {_, Reqs_1} = queue:out(Reqs), + State_1 = maybe_accumulate_ce_data(State, <<TEHeaders/binary, $\r, $\n>>), + State_2 = handle_response(CurReq, + State_1#state{reqs = Reqs_1}), + parse_response(Rem, reset_state(State_2)); + {no, Rem} -> + accumulate_response(<<>>, State#state{chunk_size_buffer = Rem, deleted_crlf = false}) end; %% This clause extracts a chunk, given the size. parse_11_response(DataRecvd, - #state{transfer_encoding = chunked, - chunk_size = CSz, - recvd_chunk_size = Recvd_csz, - rep_buf_size = RepBufSz} = State) -> + #state{transfer_encoding = chunked, + chunk_size = CSz, + recvd_chunk_size = Recvd_csz, + rep_buf_size = RepBufSz} = State) -> NeedBytes = CSz - Recvd_csz, DataLen = size(DataRecvd), do_trace("Recvd more data: size: ~p. NeedBytes: ~p~n", [DataLen, NeedBytes]), case DataLen >= NeedBytes of - true -> - {RemChunk, RemData} = split_binary(DataRecvd, NeedBytes), - do_trace("Recvd another chunk...~n", []), - do_trace("RemData -> ~p~n", [RemData]), - case accumulate_response(RemChunk, State) of - {error, Reason} -> - do_trace("Error accumulating response --> ~p~n", [Reason]), - {error, Reason}; - #state{} = State_1 -> - State_2 = State_1#state{chunk_size=tbd}, - parse_11_response(RemData, State_2) - end; - false -> - accumulate_response(DataRecvd, - State#state{rep_buf_size = RepBufSz + DataLen, - recvd_chunk_size = Recvd_csz + DataLen}) + true -> + {RemChunk, RemData} = split_binary(DataRecvd, NeedBytes), + do_trace("Recvd another chunk...~p~n", [RemChunk]), + do_trace("RemData -> ~p~n", [RemData]), + case accumulate_response(RemChunk, State) of + {error, Reason} -> + do_trace("Error accumulating response --> ~p~n", [Reason]), + {error, Reason}; + #state{} = State_1 -> + State_2 = State_1#state{chunk_size=tbd}, + parse_11_response(RemData, State_2) + end; + false -> + accumulate_response(DataRecvd, + State#state{rep_buf_size = RepBufSz + DataLen, + recvd_chunk_size = Recvd_csz + DataLen}) end; %% This clause to extract the body when Content-Length is specified parse_11_response(DataRecvd, - #state{content_length=CL, rep_buf_size=RepBufSz, - reqs=Reqs}=State) -> + #state{content_length=CL, rep_buf_size=RepBufSz, + reqs=Reqs}=State) -> NeedBytes = CL - RepBufSz, DataLen = size(DataRecvd), case DataLen >= NeedBytes of - true -> - {RemBody, Rem} = split_binary(DataRecvd, NeedBytes), - {_, Reqs_1} = queue:out(Reqs), - State_1 = accumulate_response(RemBody, State), - State_2 = handle_response(State_1#state.cur_req, State_1#state{reqs=Reqs_1}), - State_3 = reset_state(State_2), - parse_response(Rem, State_3); - false -> - accumulate_response(DataRecvd, State#state{rep_buf_size = (RepBufSz+DataLen)}) + true -> + {RemBody, Rem} = split_binary(DataRecvd, NeedBytes), + {_, Reqs_1} = queue:out(Reqs), + State_1 = accumulate_response(RemBody, State), + State_2 = handle_response(State_1#state.cur_req, State_1#state{reqs=Reqs_1}), + State_3 = reset_state(State_2), + parse_response(Rem, State_3); + false -> + accumulate_response(DataRecvd, State#state{rep_buf_size = (RepBufSz+DataLen)}) end. +maybe_accumulate_ce_data(#state{cur_req = #request{preserve_chunked_encoding = false}} = State, _) -> + State; +maybe_accumulate_ce_data(State, Data) -> + accumulate_response(Data, State). + handle_response(#request{from=From, stream_to=StreamTo, req_id=ReqId, - response_format = Resp_format, - save_response_to_file = SaveResponseToFile, - tmp_file_name = TmpFilename, - tmp_file_fd = Fd - }, - #state{http_status_code = SCode, - send_timer = ReqTimer, - reply_buffer = RepBuf, - recvd_headers = RespHeaders}=State) when SaveResponseToFile /= false -> + response_format = Resp_format, + save_response_to_file = SaveResponseToFile, + tmp_file_name = TmpFilename, + tmp_file_fd = Fd, + options = Options + }, + #state{http_status_code = SCode, + status_line = Status_line, + raw_headers = Raw_headers, + send_timer = ReqTimer, + reply_buffer = RepBuf, + recvd_headers = RespHeaders}=State) when SaveResponseToFile /= false -> Body = RepBuf, - State_1 = set_cur_request(State), file:close(Fd), ResponseBody = case TmpFilename of - undefined -> - Body; - _ -> - {file, TmpFilename} - end, - State_2 = do_reply(State_1, From, StreamTo, ReqId, Resp_format, - {ok, SCode, RespHeaders, ResponseBody}), + undefined -> + Body; + _ -> + {file, TmpFilename} + end, + {Resp_headers_1, Raw_headers_1} = maybe_add_custom_headers(RespHeaders, Raw_headers, Options), + Reply = case get_value(give_raw_headers, Options, false) of + true -> + {ok, Status_line, Raw_headers_1, ResponseBody}; + false -> + {ok, SCode, Resp_headers_1, ResponseBody} + end, + State_1 = do_reply(State, From, StreamTo, ReqId, Resp_format, Reply), cancel_timer(ReqTimer, {eat_message, {req_timedout, From}}), - State_2; + set_cur_request(State_1); handle_response(#request{from=From, stream_to=StreamTo, req_id=ReqId, - response_format = Resp_format}, - #state{http_status_code=SCode, recvd_headers=RespHeaders, - reply_buffer = RepBuf, - send_timer=ReqTimer}=State) -> + response_format = Resp_format, + options = Options}, + #state{http_status_code = SCode, + status_line = Status_line, + raw_headers = Raw_headers, + recvd_headers = Resp_headers, + reply_buffer = RepBuf, + send_timer = ReqTimer} = State) -> Body = RepBuf, -%% State_1 = set_cur_request(State), - State_1 = case get(conn_close) of - "close" -> - do_reply(State, From, StreamTo, ReqId, Resp_format, - {ok, SCode, RespHeaders, Body}), - exit(normal); - _ -> - State_1_1 = do_reply(State, From, StreamTo, ReqId, Resp_format, - {ok, SCode, RespHeaders, Body}), - cancel_timer(ReqTimer, {eat_message, {req_timedout, From}}), - State_1_1 - end, + {Resp_headers_1, Raw_headers_1} = maybe_add_custom_headers(Resp_headers, Raw_headers, Options), + Reply = case get_value(give_raw_headers, Options, false) of + true -> + {ok, Status_line, Raw_headers_1, Body}; + false -> + {ok, SCode, Resp_headers_1, Body} + end, + State_1 = do_reply(State, From, StreamTo, ReqId, Resp_format, Reply), + cancel_timer(ReqTimer, {eat_message, {req_timedout, From}}), set_cur_request(State_1). reset_state(State) -> State#state{status = get_header, - rep_buf_size = 0, - streamed_size = 0, - content_length = undefined, - reply_buffer = <<>>, - chunk_size_buffer = <<>>, - recvd_headers = [], - deleted_crlf = false, - http_status_code = undefined, - chunk_size = undefined, - transfer_encoding = undefined}. + rep_buf_size = 0, + streamed_size = 0, + content_length = undefined, + reply_buffer = <<>>, + chunk_size_buffer = <<>>, + recvd_headers = [], + status_line = undefined, + raw_headers = undefined, + deleted_crlf = false, + http_status_code = undefined, + chunk_size = undefined, + transfer_encoding = undefined + }. set_cur_request(#state{reqs = Reqs} = State) -> case queue:to_list(Reqs) of - [] -> - State#state{cur_req = undefined}; - [NextReq | _] -> - State#state{cur_req = NextReq} + [] -> + State#state{cur_req = undefined}; + [NextReq | _] -> + State#state{cur_req = NextReq} end. parse_headers(Headers) -> case scan_crlf(Headers) of - {yes, StatusLine, T} -> - parse_headers(StatusLine, T); - {no, StatusLine} -> - parse_headers(StatusLine, <<>>) + {yes, StatusLine, T} -> + parse_headers(StatusLine, T); + {no, StatusLine} -> + parse_headers(StatusLine, <<>>) end. parse_headers(StatusLine, Headers) -> Headers_1 = parse_headers_1(Headers), case parse_status_line(StatusLine) of - {ok, HttpVsn, StatCode, _Msg} -> - put(http_prot_vsn, HttpVsn), - {HttpVsn, StatCode, Headers_1}; - _ -> %% A HTTP 0.9 response? - put(http_prot_vsn, "HTTP/0.9"), - {"HTTP/0.9", undefined, Headers} + {ok, HttpVsn, StatCode, _Msg} -> + put(http_prot_vsn, HttpVsn), + {HttpVsn, StatCode, Headers_1, StatusLine, Headers}; + _ -> %% A HTTP 0.9 response? + put(http_prot_vsn, "HTTP/0.9"), + {"HTTP/0.9", undefined, Headers, StatusLine, Headers} end. % From RFC 2616 @@ -1094,22 +1351,22 @@ parse_headers(StatusLine, Headers) -> % SP. A recipient MAY replace any linear white space with a single % SP before interpreting the field value or forwarding the message % downstream. - parse_headers_1(B) when is_binary(B) -> - parse_headers_1(binary_to_list(B)); - parse_headers_1(String) -> - parse_headers_1(String, [], []). +parse_headers_1(B) when is_binary(B) -> + parse_headers_1(binary_to_list(B)); +parse_headers_1(String) -> + parse_headers_1(String, [], []). -parse_headers_1([$\n, H |T], [$\r | L], Acc) when H == 32; - H == $\t -> +parse_headers_1([$\n, H |T], [$\r | L], Acc) when H =:= 32; + H =:= $\t -> parse_headers_1(lists:dropwhile(fun(X) -> - is_whitespace(X) - end, T), [32 | L], Acc); + is_whitespace(X) + end, T), [32 | L], Acc); parse_headers_1([$\n|T], [$\r | L], Acc) -> case parse_header(lists:reverse(L)) of - invalid -> - parse_headers_1(T, [], Acc); - NewHeader -> - parse_headers_1(T, [], [NewHeader | Acc]) + invalid -> + parse_headers_1(T, [], Acc); + NewHeader -> + parse_headers_1(T, [], [NewHeader | Acc]) end; parse_headers_1([H|T], L, Acc) -> parse_headers_1(T, [H|L], Acc); @@ -1117,11 +1374,11 @@ parse_headers_1([], [], Acc) -> lists:reverse(Acc); parse_headers_1([], L, Acc) -> Acc_1 = case parse_header(lists:reverse(L)) of - invalid -> - Acc; - NewHeader -> - [NewHeader | Acc] - end, + invalid -> + Acc; + NewHeader -> + [NewHeader | Acc] + end, lists:reverse(Acc_1). parse_status_line(Line) when is_binary(Line) -> @@ -1132,6 +1389,8 @@ parse_status_line([32 | T], get_prot_vsn, ProtVsn, StatCode) -> parse_status_line(T, get_status_code, ProtVsn, StatCode); parse_status_line([32 | T], get_status_code, ProtVsn, StatCode) -> {ok, lists:reverse(ProtVsn), lists:reverse(StatCode), T}; +parse_status_line([], get_status_code, ProtVsn, StatCode) -> + {ok, lists:reverse(ProtVsn), lists:reverse(StatCode), []}; parse_status_line([H | T], get_prot_vsn, ProtVsn, StatCode) -> parse_status_line(T, get_prot_vsn, [H|ProtVsn], StatCode); parse_status_line([H | T], get_status_code, ProtVsn, StatCode) -> @@ -1139,10 +1398,9 @@ parse_status_line([H | T], get_status_code, ProtVsn, StatCode) -> parse_status_line([], _, _, _) -> http_09. -parse_header(B) when is_binary(B) -> - parse_header(binary_to_list(B)); parse_header(L) -> parse_header(L, []). + parse_header([$: | V], Acc) -> {lists:reverse(Acc), string:strip(V)}; parse_header([H | T], Acc) -> @@ -1152,11 +1410,11 @@ parse_header([], _) -> scan_header(Bin) -> case get_crlf_crlf_pos(Bin, 0) of - {yes, Pos} -> - {Headers, <<_:4/binary, Body/binary>>} = split_binary(Bin, Pos), - {yes, Headers, Body}; - no -> - {no, Bin} + {yes, Pos} -> + {Headers, <<_:4/binary, Body/binary>>} = split_binary(Bin, Pos), + {yes, Headers, Body}; + no -> + {no, Bin} end. scan_header(Bin1, Bin2) when size(Bin1) < 4 -> @@ -1168,11 +1426,11 @@ scan_header(Bin1, Bin2) -> <<Headers_prefix:Bin1_already_scanned_size/binary, Rest/binary>> = Bin1, Bin_to_scan = <<Rest/binary, Bin2/binary>>, case get_crlf_crlf_pos(Bin_to_scan, 0) of - {yes, Pos} -> - {Headers_suffix, <<_:4/binary, Body/binary>>} = split_binary(Bin_to_scan, Pos), - {yes, <<Headers_prefix/binary, Headers_suffix/binary>>, Body}; - no -> - {no, <<Bin1/binary, Bin2/binary>>} + {yes, Pos} -> + {Headers_suffix, <<_:4/binary, Body/binary>>} = split_binary(Bin_to_scan, Pos), + {yes, <<Headers_prefix/binary, Headers_suffix/binary>>, Body}; + no -> + {no, <<Bin1/binary, Bin2/binary>>} end. get_crlf_crlf_pos(<<$\r, $\n, $\r, $\n, _/binary>>, Pos) -> {yes, Pos}; @@ -1181,11 +1439,11 @@ get_crlf_crlf_pos(<<>>, _) -> no. scan_crlf(Bin) -> case get_crlf_pos(Bin) of - {yes, Pos} -> - {Prefix, <<_, _, Suffix/binary>>} = split_binary(Bin, Pos), - {yes, Prefix, Suffix}; - no -> - {no, Bin} + {yes, Pos} -> + {Prefix, <<_, _, Suffix/binary>>} = split_binary(Bin, Pos), + {yes, Prefix, Suffix}; + no -> + {no, Bin} end. scan_crlf(<<>>, Bin2) -> @@ -1199,11 +1457,11 @@ scan_crlf_1(Bin1_head_size, Bin1, Bin2) -> <<Bin1_head:Bin1_head_size/binary, Bin1_tail/binary>> = Bin1, Bin3 = <<Bin1_tail/binary, Bin2/binary>>, case get_crlf_pos(Bin3) of - {yes, Pos} -> - {Prefix, <<_, _, Suffix/binary>>} = split_binary(Bin3, Pos), - {yes, concat_binary([Bin1_head, Prefix]), Suffix}; - no -> - {no, concat_binary([Bin1, Bin2])} + {yes, Pos} -> + {Prefix, <<_, _, Suffix/binary>>} = split_binary(Bin3, Pos), + {yes, list_to_binary([Bin1_head, Prefix]), Suffix}; + no -> + {no, list_to_binary([Bin1, Bin2])} end. get_crlf_pos(Bin) -> @@ -1213,13 +1471,6 @@ get_crlf_pos(<<$\r, $\n, _/binary>>, Pos) -> {yes, Pos}; get_crlf_pos(<<_, Rest/binary>>, Pos) -> get_crlf_pos(Rest, Pos + 1); get_crlf_pos(<<>>, _) -> no. -%% scan_crlf(<<$\n, T/binary>>, [$\r | L]) -> {yes, lists:reverse(L), T}; -%% scan_crlf(<<H, T/binary>>, L) -> scan_crlf(T, [H|L]); -%% scan_crlf(<<>>, L) -> {no, L}; -%% scan_crlf([$\n|T], [$\r | L]) -> {yes, lists:reverse(L), T}; -%% scan_crlf([H|T], L) -> scan_crlf(T, [H|L]); -%% scan_crlf([], L) -> {no, L}. - fmt_val(L) when is_list(L) -> L; fmt_val(I) when is_integer(I) -> integer_to_list(I); fmt_val(A) when is_atom(A) -> atom_to_list(A); @@ -1240,7 +1491,8 @@ method(proppatch) -> "PROPPATCH"; method(lock) -> "LOCK"; method(unlock) -> "UNLOCK"; method(move) -> "MOVE"; -method(copy) -> "COPY". +method(copy) -> "COPY"; +method(connect) -> "CONNECT". %% From RFC 2616 %% @@ -1250,19 +1502,19 @@ method(copy) -> "COPY". % fields. This allows dynamically produced content to be transferred % along with the information necessary for the recipient to verify % that it has received the full message. -% Chunked-Body = *chunk -% last-chunk -% trailer -% CRLF -% chunk = chunk-size [ chunk-extension ] CRLF -% chunk-data CRLF -% chunk-size = 1*HEX -% last-chunk = 1*("0") [ chunk-extension ] CRLF -% chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) -% chunk-ext-name = token -% chunk-ext-val = token | quoted-string -% chunk-data = chunk-size(OCTET) -% trailer = *(entity-header CRLF) +% Chunked-Body = *chunk +% last-chunk +% trailer +% CRLF +% chunk = chunk-size [ chunk-extension ] CRLF +% chunk-data CRLF +% chunk-size = 1*HEX +% last-chunk = 1*("0") [ chunk-extension ] CRLF +% chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) +% chunk-ext-name = token +% chunk-ext-val = token | quoted-string +% chunk-data = chunk-size(OCTET) +% trailer = *(entity-header CRLF) % The chunk-size field is a string of hex digits indicating the size % of the chunk. The chunked encoding is ended by any chunk whose size % is zero, followed by the trailer, which is terminated by an empty @@ -1271,8 +1523,6 @@ method(copy) -> "COPY". %% The parsing implemented here discards all chunk extensions. It also %% strips trailing spaces from the chunk size fields as Apache 1.3.27 was %% sending them. -parse_chunk_header([]) -> - throw({error, invalid_chunk_size}); parse_chunk_header(ChunkHeader) -> parse_chunk_header(ChunkHeader, []). @@ -1280,10 +1530,10 @@ parse_chunk_header(<<$;, _/binary>>, Acc) -> hexlist_to_integer(lists:reverse(Acc)); parse_chunk_header(<<H, T/binary>>, Acc) -> case is_whitespace(H) of - true -> - parse_chunk_header(T, Acc); - false -> - parse_chunk_header(T, [H | Acc]) + true -> + parse_chunk_header(T, Acc); + false -> + parse_chunk_header(T, [H | Acc]) end; parse_chunk_header(<<>>, Acc) -> hexlist_to_integer(lists:reverse(Acc)). @@ -1294,24 +1544,45 @@ is_whitespace($\n) -> true; is_whitespace($\t) -> true; is_whitespace(_) -> false. - -send_async_headers(_ReqId, undefined, _StatCode, _Headers) -> +send_async_headers(_ReqId, undefined, _, _State) -> ok; -send_async_headers(ReqId, StreamTo, StatCode, Headers) -> - catch StreamTo ! {ibrowse_async_headers, ReqId, StatCode, Headers}. +send_async_headers(ReqId, StreamTo, Give_raw_headers, + #state{status_line = Status_line, raw_headers = Raw_headers, + recvd_headers = Headers, http_status_code = StatCode, + cur_req = #request{options = Opts} + }) -> + {Headers_1, Raw_headers_1} = maybe_add_custom_headers(Headers, Raw_headers, Opts), + case Give_raw_headers of + false -> + catch StreamTo ! {ibrowse_async_headers, ReqId, StatCode, Headers_1}; + true -> + catch StreamTo ! {ibrowse_async_headers, ReqId, Status_line, Raw_headers_1} + end. + +maybe_add_custom_headers(Headers, Raw_headers, Opts) -> + Custom_headers = get_value(add_custom_headers, Opts, []), + Headers_1 = Headers ++ Custom_headers, + Raw_headers_1 = case Custom_headers of + [_ | _] when is_binary(Raw_headers) -> + Custom_headers_bin = list_to_binary(string:join([[X, $:, Y] || {X, Y} <- Custom_headers], "\r\n")), + <<Raw_headers/binary, "\r\n", Custom_headers_bin/binary>>; + _ -> + Raw_headers + end, + {Headers_1, Raw_headers_1}. format_response_data(Resp_format, Body) -> case Resp_format of - list when is_list(Body) -> - flatten(Body); - list when is_binary(Body) -> - binary_to_list(Body); - binary when is_list(Body) -> - list_to_binary(Body); - _ -> - %% This is to cater for sending messages such as - %% {chunk_start, _}, chunk_end etc - Body + list when is_list(Body) -> + flatten(Body); + list when is_binary(Body) -> + binary_to_list(Body); + binary when is_list(Body) -> + list_to_binary(Body); + _ -> + %% This is to cater for sending messages such as + %% {chunk_start, _}, chunk_end etc + Body end. do_reply(State, From, undefined, _, Resp_format, {ok, St_code, Headers, Body}) -> @@ -1322,14 +1593,14 @@ do_reply(State, From, undefined, _, _, Msg) -> gen_server:reply(From, Msg), dec_pipeline_counter(State); do_reply(#state{prev_req_id = Prev_req_id} = State, - _From, StreamTo, ReqId, Resp_format, {ok, _, _, Body}) -> + _From, StreamTo, ReqId, Resp_format, {ok, _, _, Body}) -> State_1 = dec_pipeline_counter(State), case Body of - [] -> - ok; - _ -> - Body_1 = format_response_data(Resp_format, Body), - catch StreamTo ! {ibrowse_async_response, ReqId, Body_1} + [] -> + ok; + _ -> + Body_1 = format_response_data(Resp_format, Body), + catch StreamTo ! {ibrowse_async_response, ReqId, Body_1} end, catch StreamTo ! {ibrowse_async_response_end, ReqId}, %% We don't want to delete the Req-id to Pid mapping straightaway @@ -1356,23 +1627,28 @@ do_interim_reply(StreamTo, Response_format, ReqId, Msg) -> Msg_1 = format_response_data(Response_format, Msg), catch StreamTo ! {ibrowse_async_response, ReqId, Msg_1}. -do_error_reply(#state{reqs = Reqs} = State, Err) -> +do_error_reply(#state{reqs = Reqs, tunnel_setup_queue = Tun_q} = State, Err) -> ReqList = queue:to_list(Reqs), lists:foreach(fun(#request{from=From, stream_to=StreamTo, req_id=ReqId, - response_format = Resp_format}) -> - ets:delete(ibrowse_stream, {req_id_pid, ReqId}), + response_format = Resp_format}) -> + ets:delete(ibrowse_stream, {req_id_pid, ReqId}), do_reply(State, From, StreamTo, ReqId, Resp_format, {error, Err}) - end, ReqList). + end, ReqList), + lists:foreach( + fun({From, _Url, _Headers, _Method, _Body, _Options, _Timeout}) -> + do_reply(State, From, undefined, undefined, undefined, Err) + end, Tun_q). fail_pipelined_requests(#state{reqs = Reqs, cur_req = CurReq} = State, Reply) -> {_, Reqs_1} = queue:out(Reqs), #request{from=From, stream_to=StreamTo, req_id=ReqId, - response_format = Resp_format} = CurReq, + response_format = Resp_format} = CurReq, do_reply(State, From, StreamTo, ReqId, Resp_format, Reply), do_error_reply(State#state{reqs = Reqs_1}, previous_request_failed). split_list_at(List, N) -> split_list_at(List, N, []). + split_list_at([], _, Acc) -> {lists:reverse(Acc), []}; split_list_at(List2, 0, List1) -> @@ -1382,6 +1658,7 @@ split_list_at([H | List2], N, List1) -> hexlist_to_integer(List) -> hexlist_to_integer(lists:reverse(List), 1, 0). + hexlist_to_integer([H | T], Multiplier, Acc) -> hexlist_to_integer(T, Multiplier*16, Multiplier*to_ascii(H) + Acc); hexlist_to_integer([], _, Acc) -> @@ -1416,10 +1693,10 @@ cancel_timer(Ref) -> erlang:cancel_timer(Ref). cancel_timer(Ref, {eat_message, Msg}) -> cancel_timer(Ref), receive - Msg -> - ok + Msg -> + ok after 0 -> - ok + ok end. make_req_id() -> @@ -1437,7 +1714,7 @@ to_lower([], Acc) -> shutting_down(#state{lb_ets_tid = undefined}) -> ok; shutting_down(#state{lb_ets_tid = Tid, - cur_pipeline_size = Sz}) -> + cur_pipeline_size = Sz}) -> catch ets:delete(Tid, {Sz, self()}). inc_pipeline_counter(#state{is_closing = true} = State) -> @@ -1450,7 +1727,7 @@ dec_pipeline_counter(#state{is_closing = true} = State) -> dec_pipeline_counter(#state{lb_ets_tid = undefined} = State) -> State; dec_pipeline_counter(#state{cur_pipeline_size = Pipe_sz, - lb_ets_tid = Tid} = State) -> + lb_ets_tid = Tid} = State) -> ets:delete(Tid, {Pipe_sz, self()}), ets:insert(Tid, {{Pipe_sz - 1, self()}, []}), State#state{cur_pipeline_size = Pipe_sz - 1}. @@ -1464,13 +1741,68 @@ flatten([]) -> get_stream_chunk_size(Options) -> case lists:keysearch(stream_chunk_size, 1, Options) of - {value, {_, V}} when V > 0 -> - V; - _ -> - ?DEFAULT_STREAM_CHUNK_SIZE + {value, {_, V}} when V > 0 -> + V; + _ -> + ?DEFAULT_STREAM_CHUNK_SIZE end. -get_inac_timeout(#state{cur_req = #request{options = Opts}}) -> +set_inac_timer(State) -> + cancel_timer(State#state.inactivity_timer_ref), + set_inac_timer(State#state{inactivity_timer_ref = undefined}, + get_inac_timeout(State)). + +set_inac_timer(State, Timeout) when is_integer(Timeout) -> + Ref = erlang:send_after(Timeout, self(), timeout), + State#state{inactivity_timer_ref = Ref}; +set_inac_timer(State, _) -> + State. + +get_inac_timeout(#state{cur_req = #request{options = Opts}}) -> get_value(inactivity_timeout, Opts, infinity); get_inac_timeout(#state{cur_req = undefined}) -> - infinity. + case ibrowse:get_config_value(inactivity_timeout, undefined) of + Val when is_integer(Val) -> + Val; + _ -> + case application:get_env(ibrowse, inactivity_timeout) of + {ok, Val} when is_integer(Val), Val > 0 -> + Val; + _ -> + 10000 + end + end. + +trace_request(Req) -> + case get(my_trace_flag) of + true -> + %%Avoid the binary operations if trace is not on... + NReq = to_binary(Req), + do_trace("Sending request: ~n" + "--- Request Begin ---~n~s~n" + "--- Request End ---~n", [NReq]); + _ -> ok + end. + +trace_request_body(Body) -> + case get(my_trace_flag) of + true -> + %%Avoid the binary operations if trace is not on... + NBody = to_binary(Body), + case size(NBody) > 1024 of + true -> + ok; + false -> + do_trace("Sending request body: ~n" + "--- Request Body Begin ---~n~s~n" + "--- Request Body End ---~n", [NBody]) + end; + false -> + ok + end. + +to_integer(X) when is_list(X) -> list_to_integer(X); +to_integer(X) when is_integer(X) -> X. + +to_binary(X) when is_list(X) -> list_to_binary(X); +to_binary(X) when is_binary(X) -> X. diff --git a/src/ibrowse/ibrowse_lb.erl b/src/ibrowse/ibrowse_lb.erl index 834054a7..0e001d48 100644 --- a/src/ibrowse/ibrowse_lb.erl +++ b/src/ibrowse/ibrowse_lb.erl @@ -1,13 +1,11 @@ %%%------------------------------------------------------------------- %%% File : ibrowse_lb.erl %%% Author : chandru <chandrashekhar.mullaparthi@t-mobile.co.uk> -%%% Description : +%%% Description : %%% %%% Created : 6 Mar 2008 by chandru <chandrashekhar.mullaparthi@t-mobile.co.uk> %%%------------------------------------------------------------------- -module(ibrowse_lb). - --vsn('$Id: ibrowse_lb.erl,v 1.2 2009/07/01 22:43:19 chandrusf Exp $ '). -author(chandru). -behaviour(gen_server). %%-------------------------------------------------------------------- @@ -18,7 +16,8 @@ %% External exports -export([ start_link/1, - spawn_connection/5 + spawn_connection/5, + stop/1 ]). %% gen_server callbacks @@ -87,6 +86,14 @@ spawn_connection(Lb_pid, Url, is_integer(Max_sessions) -> gen_server:call(Lb_pid, {spawn_connection, Url, Max_sessions, Max_pipeline_size, SSL_options}). + +stop(Lb_pid) -> + case catch gen_server:call(Lb_pid, stop) of + {'EXIT', {timeout, _}} -> + exit(Lb_pid, kill); + ok -> + ok + end. %%-------------------------------------------------------------------- %% Function: handle_call/3 %% Description: Handling call messages @@ -101,14 +108,14 @@ spawn_connection(Lb_pid, Url, % #state{max_sessions = Max_sess, % ets_tid = Tid, % max_pipeline_size = Max_pipe_sz, -% num_cur_sessions = Num} = State) +% num_cur_sessions = Num} = State) % when Num >= Max -> % Reply = find_best_connection(Tid), % {reply, sorry_dude_reuse, State}; %% Update max_sessions in #state with supplied value handle_call({spawn_connection, _Url, Max_sess, Max_pipe, _}, _From, - #state{num_cur_sessions = Num} = State) + #state{num_cur_sessions = Num} = State) when Num >= Max_sess -> State_1 = maybe_create_ets(State), Reply = find_best_connection(State_1#state.ets_tid, Max_pipe), @@ -122,6 +129,18 @@ handle_call({spawn_connection, Url, _Max_sess, _Max_pipe, SSL_options}, _From, ets:insert(Tid, {{1, Pid}, []}), {reply, {ok, Pid}, State_1#state{num_cur_sessions = Cur + 1}}; +handle_call(stop, _From, #state{ets_tid = undefined} = State) -> + gen_server:reply(_From, ok), + {stop, normal, State}; + +handle_call(stop, _From, #state{ets_tid = Tid} = State) -> + ets:foldl(fun({{_, Pid}, _}, Acc) -> + ibrowse_http_client:stop(Pid), + Acc + end, [], Tid), + gen_server:reply(_From, ok), + {stop, normal, State}; + handle_call(Request, _From, State) -> Reply = {unknown_request, Request}, {reply, Reply, State}. diff --git a/src/ibrowse/ibrowse_lib.erl b/src/ibrowse/ibrowse_lib.erl index 6c7b1546..e913adbe 100644 --- a/src/ibrowse/ibrowse_lib.erl +++ b/src/ibrowse/ibrowse_lib.erl @@ -1,11 +1,10 @@ %%% File : ibrowse_lib.erl %%% Author : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> -%%% Description : +%%% Description : %%% Created : 27 Feb 2004 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> %% @doc Module with a few useful functions -module(ibrowse_lib). --vsn('$Id: ibrowse_lib.erl,v 1.6 2008/03/27 01:35:50 chandrusf Exp $ '). -author('chandru'). -ifdef(debug). -compile(export_all). @@ -14,22 +13,22 @@ -include("ibrowse.hrl"). -export([ - get_trace_status/2, - do_trace/2, - do_trace/3, - url_encode/1, - decode_rfc822_date/1, - status_code/1, - dec2hex/2, - drv_ue/1, - drv_ue/2, - encode_base64/1, - decode_base64/1, - get_value/2, - get_value/3, - parse_url/1, - printable_date/0 - ]). + get_trace_status/2, + do_trace/2, + do_trace/3, + url_encode/1, + decode_rfc822_date/1, + status_code/1, + dec2hex/1, + drv_ue/1, + drv_ue/2, + encode_base64/1, + decode_base64/1, + get_value/2, + get_value/3, + parse_url/1, + printable_date/0 + ]). get_trace_status(Host, Port) -> ibrowse:get_config_value({trace, Host, Port}, false). @@ -39,10 +38,10 @@ drv_ue(Str) -> drv_ue(Str, Port). drv_ue(Str, Port) -> case erlang:port_control(Port, 1, Str) of - [] -> - Str; - Res -> - Res + [] -> + Str; + Res -> + Res end. %% @doc URL-encodes a string based on RFC 1738. Returns a flat list. @@ -72,10 +71,10 @@ d2h(N) -> N+$a-10. decode_rfc822_date(String) when is_list(String) -> case catch decode_rfc822_date_1(string:tokens(String, ", \t\r\n")) of - {'EXIT', _} -> - {error, invalid_date}; - Res -> - Res + {'EXIT', _} -> + {error, invalid_date}; + Res -> + Res end. % TODO: Have to handle the Zone @@ -86,15 +85,15 @@ decode_rfc822_date_1([Day,Month,Year, Time,_Zone]) -> MonthI = month_int(Month), YearI = list_to_integer(Year), TimeTup = case string:tokens(Time, ":") of - [H,M] -> - {list_to_integer(H), - list_to_integer(M), - 0}; - [H,M,S] -> - {list_to_integer(H), - list_to_integer(M), - list_to_integer(S)} - end, + [H,M] -> + {list_to_integer(H), + list_to_integer(M), + 0}; + [H,M,S] -> + {list_to_integer(H), + list_to_integer(M), + list_to_integer(S)} + end, {{YearI,MonthI,DayI}, TimeTup}. month_int("Jan") -> 1; @@ -110,7 +109,7 @@ month_int("Oct") -> 10; month_int("Nov") -> 11; month_int("Dec") -> 12. -%% @doc Given a status code, returns an atom describing the status code. +%% @doc Given a status code, returns an atom describing the status code. %% @spec status_code(StatusCode::status_code()) -> StatusDescription %% status_code() = string() | integer() %% StatusDescription = atom() @@ -164,100 +163,35 @@ status_code(507) -> insufficient_storage; status_code(X) when is_list(X) -> status_code(list_to_integer(X)); status_code(_) -> unknown_status_code. -%% @doc dec2hex taken from gtk.erl in std dist -%% M = integer() -- number of hex digits required +%% @doc Returns a string with the hexadecimal representation of a given decimal. %% N = integer() -- the number to represent as hex -%% @spec dec2hex(M::integer(), N::integer()) -> string() -dec2hex(M,N) -> dec2hex(M,N,[]). - -dec2hex(0,_N,Ack) -> Ack; -dec2hex(M,N,Ack) -> dec2hex(M-1,N bsr 4,[d2h(N band 15)|Ack]). +%% @spec dec2hex(N::integer()) -> string() +dec2hex(N) -> lists:flatten(io_lib:format("~.16B", [N])). %% @doc Implements the base64 encoding algorithm. The output data type matches in the input data type. %% @spec encode_base64(In) -> Out %% In = string() | binary() %% Out = string() | binary() encode_base64(List) when is_list(List) -> - encode_base64_1(list_to_binary(List)); + binary_to_list(base64:encode(List)); encode_base64(Bin) when is_binary(Bin) -> - List = encode_base64_1(Bin), - list_to_binary(List). - -encode_base64_1(<<A:6, B:6, C:6, D:6, Rest/binary>>) -> - [int_to_b64(A), int_to_b64(B), - int_to_b64(C), int_to_b64(D) | encode_base64_1(Rest)]; -encode_base64_1(<<A:6, B:6, C:4>>) -> - [int_to_b64(A), int_to_b64(B), int_to_b64(C bsl 2), $=]; -encode_base64_1(<<A:6, B:2>>) -> - [int_to_b64(A), int_to_b64(B bsl 4), $=, $=]; -encode_base64_1(<<>>) -> - []. + base64:encode(Bin). %% @doc Implements the base64 decoding algorithm. The output data type matches in the input data type. %% @spec decode_base64(In) -> Out | exit({error, invalid_input}) %% In = string() | binary() %% Out = string() | binary() decode_base64(List) when is_list(List) -> - decode_base64_1(List, []); + binary_to_list(base64:decode(List)); decode_base64(Bin) when is_binary(Bin) -> - List = decode_base64_1(binary_to_list(Bin), []), - list_to_binary(List). - -decode_base64_1([H | T], Acc) when ((H == $\t) or - (H == 32) or - (H == $\r) or - (H == $\n)) -> - decode_base64_1(T, Acc); - -decode_base64_1([$=, $=], Acc) -> - lists:reverse(Acc); -decode_base64_1([$=, _ | _], _Acc) -> - exit({error, invalid_input}); - -decode_base64_1([A1, B1, $=, $=], Acc) -> - A = b64_to_int(A1), - B = b64_to_int(B1), - Oct1 = (A bsl 2) bor (B bsr 4), - decode_base64_1([], [Oct1 | Acc]); -decode_base64_1([A1, B1, C1, $=], Acc) -> - A = b64_to_int(A1), - B = b64_to_int(B1), - C = b64_to_int(C1), - Oct1 = (A bsl 2) bor (B bsr 4), - Oct2 = ((B band 16#f) bsl 6) bor (C bsr 2), - decode_base64_1([], [Oct2, Oct1 | Acc]); -decode_base64_1([A1, B1, C1, D1 | T], Acc) -> - A = b64_to_int(A1), - B = b64_to_int(B1), - C = b64_to_int(C1), - D = b64_to_int(D1), - Oct1 = (A bsl 2) bor (B bsr 4), - Oct2 = ((B band 16#f) bsl 4) bor (C bsr 2), - Oct3 = ((C band 2#11) bsl 6) bor D, - decode_base64_1(T, [Oct3, Oct2, Oct1 | Acc]); -decode_base64_1([], Acc) -> - lists:reverse(Acc). - -%% Taken from httpd_util.erl -int_to_b64(X) when X >= 0, X =< 25 -> X + $A; -int_to_b64(X) when X >= 26, X =< 51 -> X - 26 + $a; -int_to_b64(X) when X >= 52, X =< 61 -> X - 52 + $0; -int_to_b64(62) -> $+; -int_to_b64(63) -> $/. - -%% Taken from httpd_util.erl -b64_to_int(X) when X >= $A, X =< $Z -> X - $A; -b64_to_int(X) when X >= $a, X =< $z -> X - $a + 26; -b64_to_int(X) when X >= $0, X =< $9 -> X - $0 + 52; -b64_to_int($+) -> 62; -b64_to_int($/) -> 63. + base64:decode(Bin). get_value(Tag, TVL, DefVal) -> case lists:keysearch(Tag, 1, TVL) of - false -> - DefVal; - {value, {_, Val}} -> - Val + false -> + DefVal; + {value, {_, Val}} -> + Val end. get_value(Tag, TVL) -> @@ -270,93 +204,121 @@ parse_url(Url) -> parse_url([$:, $/, $/ | _], get_protocol, Url, []) -> {invalid_uri_1, Url}; parse_url([$:, $/, $/ | T], get_protocol, Url, TmpAcc) -> - Prot = list_to_atom(lists:reverse(TmpAcc)), - parse_url(T, get_username, - Url#url{protocol = Prot}, - []); -parse_url([$/ | T], get_username, Url, TmpAcc) -> + Prot = list_to_existing_atom(lists:reverse(TmpAcc)), + parse_url(T, get_username, + Url#url{protocol = Prot}, + []); +parse_url([H | T], get_username, Url, TmpAcc) when H == $/; + H == $? -> + Path = case H of + $/ -> + [$/ | T]; + $? -> + [$/, $? | T] + end, %% No username/password. No port number Url#url{host = lists:reverse(TmpAcc), - port = default_port(Url#url.protocol), - path = [$/ | T]}; + port = default_port(Url#url.protocol), + path = Path}; parse_url([$: | T], get_username, Url, TmpAcc) -> %% It is possible that no username/password has been %% specified. But we'll continue with the assumption that there is %% a username/password. If we encounter a '@' later on, there is a %% username/password indeed. If we encounter a '/', it was %% actually the hostname - parse_url(T, get_password, - Url#url{username = lists:reverse(TmpAcc)}, - []); + parse_url(T, get_password, + Url#url{username = lists:reverse(TmpAcc)}, + []); parse_url([$@ | T], get_username, Url, TmpAcc) -> - parse_url(T, get_host, - Url#url{username = lists:reverse(TmpAcc), - password = ""}, - []); + parse_url(T, get_host, + Url#url{username = lists:reverse(TmpAcc), + password = ""}, + []); parse_url([$@ | T], get_password, Url, TmpAcc) -> - parse_url(T, get_host, - Url#url{password = lists:reverse(TmpAcc)}, - []); -parse_url([$/ | T], get_password, Url, TmpAcc) -> + parse_url(T, get_host, + Url#url{password = lists:reverse(TmpAcc)}, + []); +parse_url([H | T], get_password, Url, TmpAcc) when H == $/; + H == $? -> %% Ok, what we thought was the username/password was the hostname %% and portnumber #url{username=User} = Url, Port = list_to_integer(lists:reverse(TmpAcc)), + Path = case H of + $/ -> + [$/ | T]; + $? -> + [$/, $? | T] + end, Url#url{host = User, - port = Port, - username = undefined, - password = undefined, - path = [$/ | T]}; + port = Port, + username = undefined, + password = undefined, + path = Path}; parse_url([$: | T], get_host, #url{} = Url, TmpAcc) -> - parse_url(T, get_port, - Url#url{host = lists:reverse(TmpAcc)}, - []); -parse_url([$/ | T], get_host, #url{protocol=Prot} = Url, TmpAcc) -> + parse_url(T, get_port, + Url#url{host = lists:reverse(TmpAcc)}, + []); +parse_url([H | T], get_host, #url{protocol=Prot} = Url, TmpAcc) when H == $/; + H == $? -> + Path = case H of + $/ -> + [$/ | T]; + $? -> + [$/, $? | T] + end, Url#url{host = lists:reverse(TmpAcc), - port = default_port(Prot), - path = [$/ | T]}; -parse_url([$/ | T], get_port, #url{protocol=Prot} = Url, TmpAcc) -> + port = default_port(Prot), + path = Path}; +parse_url([H | T], get_port, #url{protocol=Prot} = Url, TmpAcc) when H == $/; + H == $? -> + Path = case H of + $/ -> + [$/ | T]; + $? -> + [$/, $? | T] + end, Port = case TmpAcc of - [] -> - default_port(Prot); - _ -> - list_to_integer(lists:reverse(TmpAcc)) - end, - Url#url{port = Port, path = [$/ | T]}; + [] -> + default_port(Prot); + _ -> + list_to_integer(lists:reverse(TmpAcc)) + end, + Url#url{port = Port, path = Path}; parse_url([H | T], State, Url, TmpAcc) -> parse_url(T, State, Url, [H | TmpAcc]); parse_url([], get_host, Url, TmpAcc) when TmpAcc /= [] -> Url#url{host = lists:reverse(TmpAcc), - port = default_port(Url#url.protocol), - path = "/"}; + port = default_port(Url#url.protocol), + path = "/"}; parse_url([], get_username, Url, TmpAcc) when TmpAcc /= [] -> Url#url{host = lists:reverse(TmpAcc), - port = default_port(Url#url.protocol), - path = "/"}; + port = default_port(Url#url.protocol), + path = "/"}; parse_url([], get_port, #url{protocol=Prot} = Url, TmpAcc) -> Port = case TmpAcc of - [] -> - default_port(Prot); - _ -> - list_to_integer(lists:reverse(TmpAcc)) - end, - Url#url{port = Port, - path = "/"}; + [] -> + default_port(Prot); + _ -> + list_to_integer(lists:reverse(TmpAcc)) + end, + Url#url{port = Port, + path = "/"}; parse_url([], get_password, Url, TmpAcc) -> %% Ok, what we thought was the username/password was the hostname %% and portnumber #url{username=User} = Url, Port = case TmpAcc of - [] -> - default_port(Url#url.protocol); - _ -> - list_to_integer(lists:reverse(TmpAcc)) - end, + [] -> + default_port(Url#url.protocol); + _ -> + list_to_integer(lists:reverse(TmpAcc)) + end, Url#url{host = User, - port = Port, - username = undefined, - password = undefined, - path = "/"}; + port = Port, + username = undefined, + password = undefined, + path = "/"}; parse_url([], State, Url, TmpAcc) -> {invalid_uri_2, State, Url, TmpAcc}. @@ -387,13 +349,13 @@ do_trace(Fmt, Args) -> -ifdef(DEBUG). do_trace(_, Fmt, Args) -> io:format("~s -- (~s) - "++Fmt, - [printable_date(), - get(ibrowse_trace_token) | Args]). + [printable_date(), + get(ibrowse_trace_token) | Args]). -else. do_trace(true, Fmt, Args) -> io:format("~s -- (~s) - "++Fmt, - [printable_date(), - get(ibrowse_trace_token) | Args]); + [printable_date(), + get(ibrowse_trace_token) | Args]); do_trace(_, _, _) -> ok. -endif. diff --git a/src/ibrowse/ibrowse_sup.erl b/src/ibrowse/ibrowse_sup.erl index 1b9b863a..ace33d16 100644 --- a/src/ibrowse/ibrowse_sup.erl +++ b/src/ibrowse/ibrowse_sup.erl @@ -1,13 +1,11 @@ %%%------------------------------------------------------------------- %%% File : ibrowse_sup.erl %%% Author : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> -%%% Description : +%%% Description : %%% %%% Created : 15 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> %%%------------------------------------------------------------------- -module(ibrowse_sup). --vsn('$Id: ibrowse_sup.erl,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ '). - -behaviour(supervisor). %%-------------------------------------------------------------------- %% Include files @@ -53,7 +51,7 @@ start_link() -> %% Func: init/1 %% Returns: {ok, {SupFlags, [ChildSpec]}} | %% ignore | -%% {error, Reason} +%% {error, Reason} %%-------------------------------------------------------------------- init([]) -> AChild = {ibrowse,{ibrowse,start_link,[]}, diff --git a/src/ibrowse/ibrowse_test.erl b/src/ibrowse/ibrowse_test.erl index 3dc66ecf..3ad76603 100644 --- a/src/ibrowse/ibrowse_test.erl +++ b/src/ibrowse/ibrowse_test.erl @@ -4,7 +4,6 @@ %%% Created : 14 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> -module(ibrowse_test). --vsn('$Id: ibrowse_test.erl,v 1.4 2009/07/01 22:43:19 chandrusf Exp $ '). -export([ load_test/3, send_reqs_1/3, @@ -18,6 +17,7 @@ ue_test/1, verify_chunked_streaming/0, verify_chunked_streaming/1, + test_chunked_streaming_once/0, i_do_async_req_list/4, test_stream_once/3, test_stream_once/4 @@ -193,6 +193,7 @@ dump_errors(Key, Iod) -> {"http://www.google.co.uk", get}, {"http://www.google.com", get}, {"http://www.google.com", options}, + {"https://mail.google.com", get}, {"http://www.sun.com", get}, {"http://www.oracle.com", get}, {"http://www.bbc.co.uk", get}, @@ -216,16 +217,21 @@ dump_errors(Key, Iod) -> {"http://jigsaw.w3.org/HTTP/300/", get}, {"http://jigsaw.w3.org/HTTP/Basic/", get, [{basic_auth, {"guest", "guest"}}]}, {"http://jigsaw.w3.org/HTTP/CL/", get}, - {"http://www.httpwatch.com/httpgallery/chunked/", get} + {"http://www.httpwatch.com/httpgallery/chunked/", get}, + {"https://github.com", get, [{ssl_options, [{depth, 2}]}]} ]). unit_tests() -> unit_tests([]). unit_tests(Options) -> + application:start(crypto), + application:start(public_key), + application:start(ssl), + ibrowse:start(), Options_1 = Options ++ [{connect_timeout, 5000}], {Pid, Ref} = erlang:spawn_monitor(?MODULE, unit_tests_1, [self(), Options_1]), - receive + receive {done, Pid} -> ok; {'DOWN', Ref, _, _, Info} -> @@ -247,19 +253,45 @@ verify_chunked_streaming() -> verify_chunked_streaming([]). verify_chunked_streaming(Options) -> + io:format("~nVerifying that chunked streaming is working...~n", []), Url = "http://www.httpwatch.com/httpgallery/chunked/", - io:format("URL: ~s~n", [Url]), - io:format("Fetching data without streaming...~n", []), + io:format(" URL: ~s~n", [Url]), + io:format(" Fetching data without streaming...~n", []), Result_without_streaming = ibrowse:send_req( Url, [], get, [], [{response_format, binary} | Options]), - io:format("Fetching data with streaming as list...~n", []), + io:format(" Fetching data with streaming as list...~n", []), Async_response_list = do_async_req_list( Url, get, [{response_format, list} | Options]), - io:format("Fetching data with streaming as binary...~n", []), + io:format(" Fetching data with streaming as binary...~n", []), Async_response_bin = do_async_req_list( Url, get, [{response_format, binary} | Options]), - compare_responses(Result_without_streaming, Async_response_list, Async_response_bin). + io:format(" Fetching data with streaming as binary, {active, once}...~n", []), + Async_response_bin_once = do_async_req_list( + Url, get, [once, {response_format, binary} | Options]), + Res1 = compare_responses(Result_without_streaming, Async_response_list, Async_response_bin), + Res2 = compare_responses(Result_without_streaming, Async_response_list, Async_response_bin_once), + case {Res1, Res2} of + {success, success} -> + io:format(" Chunked streaming working~n", []); + _ -> + ok + end. + +test_chunked_streaming_once() -> + test_chunked_streaming_once([]). + +test_chunked_streaming_once(Options) -> + io:format("~nTesting chunked streaming with the {stream_to, {Pid, once}} option...~n", []), + Url = "http://www.httpwatch.com/httpgallery/chunked/", + io:format(" URL: ~s~n", [Url]), + io:format(" Fetching data with streaming as binary, {active, once}...~n", []), + case do_async_req_list(Url, get, [once, {response_format, binary} | Options]) of + {ok, _, _, _} -> + io:format(" Success!~n", []); + Err -> + io:format(" Fail: ~p~n", [Err]) + end. compare_responses({ok, St_code, _, Body}, {ok, St_code, _, Body}, {ok, St_code, _, Body}) -> success; @@ -293,9 +325,9 @@ compare_responses(R1, R2, R3) -> do_async_req_list(Url, Method, Options) -> {Pid,_} = erlang:spawn_monitor(?MODULE, i_do_async_req_list, - [self(), Url, Method, + [self(), Url, Method, Options ++ [{stream_chunk_size, 1000}]]), - io:format("Spawned process ~p~n", [Pid]), +%% io:format("Spawned process ~p~n", [Pid]), wait_for_resp(Pid). wait_for_resp(Pid) -> @@ -312,31 +344,54 @@ wait_for_resp(Pid) -> Msg -> io:format("Recvd unknown message: ~p~n", [Msg]), wait_for_resp(Pid) - after 10000 -> + after 100000 -> {error, timeout} end. i_do_async_req_list(Parent, Url, Method, Options) -> - Res = ibrowse:send_req(Url, [], Method, [], [{stream_to, self()} | Options]), + Options_1 = case lists:member(once, Options) of + true -> + [{stream_to, {self(), once}} | (Options -- [once])]; + false -> + [{stream_to, self()} | Options] + end, + Res = ibrowse:send_req(Url, [], Method, [], Options_1), case Res of {ibrowse_req_id, Req_id} -> - Result = wait_for_async_resp(Req_id, undefined, undefined, []), + Result = wait_for_async_resp(Req_id, Options, undefined, undefined, []), Parent ! {async_result, self(), Result}; Err -> Parent ! {async_result, self(), Err} end. -wait_for_async_resp(Req_id, Acc_Stat_code, Acc_Headers, Body) -> +wait_for_async_resp(Req_id, Options, Acc_Stat_code, Acc_Headers, Body) -> receive {ibrowse_async_headers, Req_id, StatCode, Headers} -> - wait_for_async_resp(Req_id, StatCode, Headers, Body); + %% io:format("Recvd headers...~n", []), + maybe_stream_next(Req_id, Options), + wait_for_async_resp(Req_id, Options, StatCode, Headers, Body); {ibrowse_async_response_end, Req_id} -> + %% io:format("Recvd end of response.~n", []), Body_1 = list_to_binary(lists:reverse(Body)), {ok, Acc_Stat_code, Acc_Headers, Body_1}; {ibrowse_async_response, Req_id, Data} -> - wait_for_async_resp(Req_id, Acc_Stat_code, Acc_Headers, [Data | Body]); + maybe_stream_next(Req_id, Options), + %% io:format("Recvd data...~n", []), + wait_for_async_resp(Req_id, Options, Acc_Stat_code, Acc_Headers, [Data | Body]); + {ibrowse_async_response, Req_id, {error, _} = Err} -> + {ok, Acc_Stat_code, Acc_Headers, Err}; Err -> {ok, Acc_Stat_code, Acc_Headers, Err} + after 10000 -> + {timeout, Acc_Stat_code, Acc_Headers, Body} + end. + +maybe_stream_next(Req_id, Options) -> + case lists:member(once, Options) of + true -> + ibrowse:stream_next(Req_id); + false -> + ok end. execute_req(Url, Method, Options) -> @@ -346,7 +401,7 @@ execute_req(Url, Method, Options) -> {ok, SCode, _H, _B} -> io:format("Status code: ~p~n", [SCode]); Err -> - io:format("Err -> ~p~n", [Err]) + io:format("~p~n", [Err]) end. drv_ue_test() -> diff --git a/src/mochiweb/Makefile.am b/src/mochiweb/Makefile.am index c191abfa..752118df 100644 --- a/src/mochiweb/Makefile.am +++ b/src/mochiweb/Makefile.am @@ -10,65 +10,86 @@ ## License for the specific language governing permissions and limitations under ## the License. -mochiwebebindir = $(localerlanglibdir)/mochiweb-r113/ebin +mochiwebebindir = $(localerlanglibdir)/mochiweb-7c2bc2/ebin mochiweb_file_collection = \ - mochifmt.erl \ - mochifmt_records.erl \ - mochifmt_std.erl \ - mochihex.erl \ - mochijson.erl \ - mochijson2.erl \ - mochinum.erl \ + mochifmt.erl \ + mochifmt_records.erl \ + mochifmt_std.erl \ + mochiglobal.erl \ + mochihex.erl \ + mochijson.erl \ + mochijson2.erl \ + mochilists.erl \ + mochilogfile2.erl \ + mochinum.erl \ + mochitemp.erl \ + mochiutf8.erl \ mochiweb.app.in \ - mochiweb.erl \ - mochiweb_app.erl \ - mochiweb_charref.erl \ - mochiweb_cookies.erl \ - mochiweb_echo.erl \ - mochiweb_headers.erl \ - mochiweb_html.erl \ - mochiweb_http.erl \ - mochiweb_multipart.erl \ - mochiweb_request.erl \ - mochiweb_response.erl \ - mochiweb_skel.erl \ - mochiweb_socket_server.erl \ - mochiweb_sup.erl \ - mochiweb_util.erl \ - reloader.erl + mochiweb.erl \ + mochiweb_acceptor.erl \ + mochiweb_app.erl \ + mochiweb_charref.erl \ + mochiweb_cookies.erl \ + mochiweb_cover.erl \ + mochiweb_echo.erl \ + mochiweb_headers.erl \ + mochiweb_html.erl \ + mochiweb_http.erl \ + mochiweb_io.erl \ + mochiweb_mime.erl \ + mochiweb_multipart.erl \ + mochiweb_request.erl \ + mochiweb_response.erl \ + mochiweb_skel.erl \ + mochiweb_socket.erl \ + mochiweb_socket_server.erl \ + mochiweb_sup.erl \ + mochiweb_util.erl \ + reloader.erl mochiwebebin_make_generated_file_list = \ - mochifmt.beam \ - mochifmt_records.beam \ - mochifmt_std.beam \ - mochihex.beam \ - mochijson.beam \ - mochijson2.beam \ - mochinum.beam \ + mochifmt.beam \ + mochifmt_records.beam \ + mochifmt_std.beam \ + mochiglobal.beam \ + mochihex.beam \ + mochijson.beam \ + mochijson2.beam \ + mochilists.beam \ + mochilogfile2.beam \ + mochinum.beam \ + mochitemp.beam \ + mochiutf8.beam \ mochiweb.app \ - mochiweb.beam \ - mochiweb_app.beam \ - mochiweb_charref.beam \ - mochiweb_cookies.beam \ - mochiweb_echo.beam \ - mochiweb_headers.beam \ - mochiweb_html.beam \ - mochiweb_http.beam \ - mochiweb_multipart.beam \ - mochiweb_request.beam \ - mochiweb_response.beam \ - mochiweb_skel.beam \ - mochiweb_socket_server.beam \ - mochiweb_sup.beam \ - mochiweb_util.beam \ - reloader.beam + mochiweb.beam \ + mochiweb_acceptor.beam \ + mochiweb_app.beam \ + mochiweb_charref.beam \ + mochiweb_cookies.beam \ + mochiweb_cover.beam \ + mochiweb_echo.beam \ + mochiweb_headers.beam \ + mochiweb_html.beam \ + mochiweb_http.beam \ + mochiweb_io.beam \ + mochiweb_mime.beam \ + mochiweb_multipart.beam \ + mochiweb_request.beam \ + mochiweb_response.beam \ + mochiweb_skel.beam \ + mochiweb_socket.beam \ + mochiweb_socket_server.beam \ + mochiweb_sup.beam \ + mochiweb_util.beam \ + reloader.beam mochiwebebin_DATA = \ $(mochiwebebin_make_generated_file_list) EXTRA_DIST = \ - $(mochiweb_file_collection) + $(mochiweb_file_collection) \ + internal.hrl CLEANFILES = \ $(mochiwebebin_make_generated_file_list) @@ -77,4 +98,5 @@ CLEANFILES = \ cp $< $@ %.beam: %.erl + $(ERLC) $(ERLC_FLAGS) $< diff --git a/src/mochiweb/internal.hrl b/src/mochiweb/internal.hrl new file mode 100644 index 00000000..6db899a0 --- /dev/null +++ b/src/mochiweb/internal.hrl @@ -0,0 +1,3 @@ + +-define(RECBUF_SIZE, 8192). + diff --git a/src/mochiweb/mochifmt.erl b/src/mochiweb/mochifmt.erl index da0a133a..5bc6b9c4 100644 --- a/src/mochiweb/mochifmt.erl +++ b/src/mochiweb/mochifmt.erl @@ -10,7 +10,6 @@ -export([tokenize/1, format/3, get_field/3, format_field/3]). -export([bformat/2, bformat/3]). -export([f/2, f/3]). --export([test/0]). -record(conversion, {length, precision, ctype, align, fill_char, sign}). @@ -113,15 +112,6 @@ bformat(Format, Args) -> bformat(Format, Args, Module) -> iolist_to_binary(format(Format, Args, Module)). -%% @spec test() -> ok -%% @doc Run tests. -test() -> - ok = test_tokenize(), - ok = test_format(), - ok = test_std(), - ok = test_records(), - ok. - %% Internal API add_raw("", Acc) -> @@ -375,14 +365,21 @@ parse_std_conversion([$. | Spec], Acc) -> parse_std_conversion([Type], Acc) -> parse_std_conversion("", Acc#conversion{ctype=ctype(Type)}). -test_tokenize() -> + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +tokenize_test() -> {?MODULE, [{raw, "ABC"}]} = tokenize("ABC"), {?MODULE, [{format, {"0", "", ""}}]} = tokenize("{0}"), {?MODULE, [{raw, "ABC"}, {format, {"1", "", ""}}, {raw, "DEF"}]} = tokenize("ABC{1}DEF"), ok. -test_format() -> +format_test() -> <<" -4">> = bformat("{0:4}", [-4]), <<" 4">> = bformat("{0:4}", [4]), <<" 4">> = bformat("{0:{0}}", [4]), @@ -410,12 +407,12 @@ test_format() -> {{2008,5,4}, {4, 2, 2}}), ok. -test_std() -> +std_test() -> M = mochifmt_std:new(), <<"01">> = bformat("{0}{1}", [0, 1], M), ok. -test_records() -> +records_test() -> M = mochifmt_records:new([{conversion, record_info(fields, conversion)}]), R = #conversion{length=long, precision=hard, sign=peace}, long = M:get_value("length", R), @@ -424,3 +421,5 @@ test_records() -> <<"long hard">> = bformat("{length} {precision}", R, M), <<"long hard">> = bformat("{0.length} {0.precision}", [R], M), ok. + +-endif. diff --git a/src/mochiweb/mochifmt_records.erl b/src/mochiweb/mochifmt_records.erl index 94c77978..2326d1dd 100644 --- a/src/mochiweb/mochifmt_records.erl +++ b/src/mochiweb/mochifmt_records.erl @@ -28,3 +28,11 @@ get_rec_index(Atom, [Atom | _], Index) -> Index; get_rec_index(Atom, [_ | Rest], Index) -> get_rec_index(Atom, Rest, 1 + Index). + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/src/mochiweb/mochifmt_std.erl b/src/mochiweb/mochifmt_std.erl index 9442016a..d4d74f6f 100644 --- a/src/mochiweb/mochifmt_std.erl +++ b/src/mochiweb/mochifmt_std.erl @@ -21,3 +21,10 @@ get_value(Key, Args) -> format_field(Arg, Format) -> mochifmt:format_field(Arg, Format, THIS). + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/src/mochiweb/mochiglobal.erl b/src/mochiweb/mochiglobal.erl new file mode 100644 index 00000000..c740b878 --- /dev/null +++ b/src/mochiweb/mochiglobal.erl @@ -0,0 +1,107 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2010 Mochi Media, Inc. +%% @doc Abuse module constant pools as a "read-only shared heap" (since erts 5.6) +%% <a href="http://www.erlang.org/pipermail/erlang-questions/2009-March/042503.html">[1]</a>. +-module(mochiglobal). +-author("Bob Ippolito <bob@mochimedia.com>"). +-export([get/1, get/2, put/2, delete/1]). + +-spec get(atom()) -> any() | undefined. +%% @equiv get(K, undefined) +get(K) -> + get(K, undefined). + +-spec get(atom(), T) -> any() | T. +%% @doc Get the term for K or return Default. +get(K, Default) -> + get(K, Default, key_to_module(K)). + +get(_K, Default, Mod) -> + try Mod:term() + catch error:undef -> + Default + end. + +-spec put(atom(), any()) -> ok. +%% @doc Store term V at K, replaces an existing term if present. +put(K, V) -> + put(K, V, key_to_module(K)). + +put(_K, V, Mod) -> + Bin = compile(Mod, V), + code:purge(Mod), + code:load_binary(Mod, atom_to_list(Mod) ++ ".erl", Bin), + ok. + +-spec delete(atom()) -> boolean(). +%% @doc Delete term stored at K, no-op if non-existent. +delete(K) -> + delete(K, key_to_module(K)). + +delete(_K, Mod) -> + code:purge(Mod), + code:delete(Mod). + +-spec key_to_module(atom()) -> atom(). +key_to_module(K) -> + list_to_atom("mochiglobal:" ++ atom_to_list(K)). + +-spec compile(atom(), any()) -> binary(). +compile(Module, T) -> + {ok, Module, Bin} = compile:forms(forms(Module, T), + [verbose, report_errors]), + Bin. + +-spec forms(atom(), any()) -> [erl_syntax:syntaxTree()]. +forms(Module, T) -> + [erl_syntax:revert(X) || X <- term_to_abstract(Module, term, T)]. + +-spec term_to_abstract(atom(), atom(), any()) -> [erl_syntax:syntaxTree()]. +term_to_abstract(Module, Getter, T) -> + [%% -module(Module). + erl_syntax:attribute( + erl_syntax:atom(module), + [erl_syntax:atom(Module)]), + %% -export([Getter/0]). + erl_syntax:attribute( + erl_syntax:atom(export), + [erl_syntax:list( + [erl_syntax:arity_qualifier( + erl_syntax:atom(Getter), + erl_syntax:integer(0))])]), + %% Getter() -> T. + erl_syntax:function( + erl_syntax:atom(Getter), + [erl_syntax:clause([], none, [erl_syntax:abstract(T)])])]. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +get_put_delete_test() -> + K = '$$test$$mochiglobal', + delete(K), + ?assertEqual( + bar, + get(K, bar)), + try + ?MODULE:put(K, baz), + ?assertEqual( + baz, + get(K, bar)), + ?MODULE:put(K, wibble), + ?assertEqual( + wibble, + ?MODULE:get(K)) + after + delete(K) + end, + ?assertEqual( + bar, + get(K, bar)), + ?assertEqual( + undefined, + ?MODULE:get(K)), + ok. +-endif. diff --git a/src/mochiweb/mochihex.erl b/src/mochiweb/mochihex.erl index 7fe6899e..44a2aa7f 100644 --- a/src/mochiweb/mochihex.erl +++ b/src/mochiweb/mochihex.erl @@ -6,7 +6,7 @@ -module(mochihex). -author('bob@mochimedia.com'). --export([test/0, to_hex/1, to_bin/1, to_int/1, dehex/1, hexdigit/1]). +-export([to_hex/1, to_bin/1, to_int/1, dehex/1, hexdigit/1]). %% @type iolist() = [char() | binary() | iolist()] %% @type iodata() = iolist() | binary() @@ -46,16 +46,6 @@ hexdigit(C) when C >= 0, C =< 9 -> hexdigit(C) when C =< 15 -> C + $a - 10. -%% @spec test() -> ok -%% @doc Test this module. -test() -> - "ff000ff1" = to_hex([255, 0, 15, 241]), - <<255, 0, 15, 241>> = to_bin("ff000ff1"), - 16#ff000ff1 = to_int("ff000ff1"), - "ff000ff1" = to_hex(16#ff000ff1), - ok. - - %% Internal API to_hex(<<>>, Acc) -> @@ -73,3 +63,29 @@ to_bin([], Acc) -> to_bin([C1, C2 | Rest], Acc) -> to_bin(Rest, [(dehex(C1) bsl 4) bor dehex(C2) | Acc]). + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +to_hex_test() -> + "ff000ff1" = to_hex([255, 0, 15, 241]), + "ff000ff1" = to_hex(16#ff000ff1), + "0" = to_hex(16#0), + ok. + +to_bin_test() -> + <<255, 0, 15, 241>> = to_bin("ff000ff1"), + <<255, 0, 10, 161>> = to_bin("Ff000aA1"), + ok. + +to_int_test() -> + 16#ff000ff1 = to_int("ff000ff1"), + 16#ff000aa1 = to_int("FF000Aa1"), + 16#0 = to_int("0"), + ok. + +-endif. diff --git a/src/mochiweb/mochijson.erl b/src/mochiweb/mochijson.erl index 74695a75..2e3d1452 100644 --- a/src/mochiweb/mochijson.erl +++ b/src/mochiweb/mochijson.erl @@ -8,7 +8,6 @@ -export([decoder/1, decode/1]). -export([binary_encoder/1, binary_encode/1]). -export([binary_decoder/1, binary_decode/1]). --export([test/0]). % This is a macro to placate syntax highlighters.. -define(Q, $\"). @@ -91,10 +90,6 @@ binary_encode(Any) -> binary_decode(S) -> mochijson2:decode(S). -test() -> - test_all(), - mochijson2:test(). - %% Internal API parse_encoder_options([], State) -> @@ -145,7 +140,7 @@ json_encode_proplist([], _State) -> "{}"; json_encode_proplist(Props, State) -> F = fun ({K, V}, Acc) -> - KS = case K of + KS = case K of K when is_atom(K) -> json_encode_string_utf8(atom_to_list(K)); K when is_integer(K) -> @@ -321,12 +316,12 @@ tokenize_string([$\\, $u, C3, C2, C1, C0 | Rest], S, Acc) -> % coalesce UTF-16 surrogate pair? C = dehex(C0) bor (dehex(C1) bsl 4) bor - (dehex(C2) bsl 8) bor + (dehex(C2) bsl 8) bor (dehex(C3) bsl 12), tokenize_string(Rest, ?ADV_COL(S, 6), [C | Acc]); tokenize_string([C | Rest], S, Acc) when C >= $\s; C < 16#10FFFF -> tokenize_string(Rest, ?ADV_COL(S, 1), [C | Acc]). - + tokenize_number(IoList=[C | _], Mode, S=#decoder{input_encoding=utf8}, Acc) when is_list(C); is_binary(C); C >= 16#7f -> List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)), @@ -407,6 +402,13 @@ tokenize(L=[C | _], S) when C >= $0, C =< $9; C == $- -> {{const, list_to_float(Float)}, Rest, S1} end. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + %% testing constructs borrowed from the Yaws JSON implementation. %% Create an object from a list of Key/Value pairs. @@ -419,7 +421,7 @@ is_obj({struct, Props}) -> true; (_) -> false - end, + end, lists:all(F, Props). obj_from_list(Props) -> @@ -462,11 +464,10 @@ equiv_list([], []) -> equiv_list([V1 | L1], [V2 | L2]) -> equiv(V1, V2) andalso equiv_list(L1, L2). -test_all() -> - test_issue33(), +e2j_vec_test() -> test_one(e2j_test_vec(utf8), 1). -test_issue33() -> +issue33_test() -> %% http://code.google.com/p/mochiweb/issues/detail?id=33 Js = {struct, [{"key", [194, 163]}]}, Encoder = encoder([{input_encoding, utf8}]), @@ -526,3 +527,5 @@ e2j_test_vec(utf8) -> {{array, [-123, "foo", obj_from_list([{"bar", {array, []}}]), null]}, "[-123,\"foo\",{\"bar\":[]},null]"} ]. + +-endif. diff --git a/src/mochiweb/mochijson2.erl b/src/mochiweb/mochijson2.erl index 66f68bf0..64cabc86 100644 --- a/src/mochiweb/mochijson2.erl +++ b/src/mochiweb/mochijson2.erl @@ -9,7 +9,6 @@ -author('bob@mochimedia.com'). -export([encoder/1, encode/1]). -export([decoder/1, decode/1]). --export([test/0]). % This is a macro to placate syntax highlighters.. -define(Q, $\"). @@ -39,8 +38,9 @@ %% @type json_number() = integer() | float() %% @type json_array() = [json_term()] %% @type json_object() = {struct, [{json_string(), json_term()}]} +%% @type json_iolist() = {json, iolist()} %% @type json_term() = json_string() | json_number() | json_array() | -%% json_object() +%% json_object() | json_iolist() -record(encoder, {handler=null, utf8=false}). @@ -75,9 +75,6 @@ decoder(Options) -> decode(S) -> json_decode(S, #decoder{}). -test() -> - test_all(). - %% Internal API parse_encoder_options([], State) -> @@ -98,11 +95,8 @@ json_encode(false, _State) -> <<"false">>; json_encode(null, _State) -> <<"null">>; -json_encode(I, _State) when is_integer(I) andalso I >= -2147483648 andalso I =< 2147483647 -> - %% Anything outside of 32-bit integers should be encoded as a float - integer_to_list(I); json_encode(I, _State) when is_integer(I) -> - mochinum:digits(float(I)); + integer_to_list(I); json_encode(F, _State) when is_float(F) -> mochinum:digits(F); json_encode(S, State) when is_binary(S); is_atom(S) -> @@ -111,6 +105,8 @@ json_encode(Array, State) when is_list(Array) -> json_encode_array(Array, State); json_encode({struct, Props}, State) when is_list(Props) -> json_encode_proplist(Props, State); +json_encode({json, IoList}, _State) -> + IoList; json_encode(Bad, #encoder{handler=null}) -> exit({json_encode, {bad_term, Bad}}); json_encode(Bad, State=#encoder{handler=Handler}) -> @@ -205,12 +201,10 @@ json_bin_is_safe(<<C, Rest/binary>>) -> false; $\t -> false; - C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> + C when C >= 0, C < $\s; C >= 16#7f -> false; C when C < 16#7f -> - json_bin_is_safe(Rest); - _ -> - false + json_bin_is_safe(Rest) end. json_encode_string_unicode([], _State, Acc) -> @@ -408,8 +402,22 @@ tokenize_string(B, S=#decoder{offset=O}, Acc) -> Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc), tokenize_string(B, ?ADV_COL(S, 6), Acc1) end; - <<_:O/binary, C, _/binary>> -> - tokenize_string(B, ?INC_CHAR(S, C), [C | Acc]) + <<_:O/binary, C1, _/binary>> when C1 < 128 -> + tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]); + _ -> + throw(invalid_utf8) end. tokenize_number(B, S) -> @@ -510,6 +518,12 @@ tokenize(B, S=#decoder{offset=O}) -> trim = S#decoder.state, {eof, S} end. +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + %% testing constructs borrowed from the Yaws JSON implementation. @@ -519,19 +533,13 @@ obj_new() -> {struct, []}. is_obj({struct, Props}) -> - F = fun ({K, _}) when is_binary(K) -> - true; - (_) -> - false - end, + F = fun ({K, _}) when is_binary(K) -> true end, lists:all(F, Props). obj_from_list(Props) -> Obj = {struct, Props}, - case is_obj(Obj) of - true -> Obj; - false -> exit({json_bad_object, Obj}) - end. + ?assert(is_obj(Obj)), + Obj. %% Test for equivalence of Erlang terms. %% Due to arbitrary order of construction, equivalent objects might @@ -544,9 +552,7 @@ equiv(L1, L2) when is_list(L1), is_list(L2) -> equiv_list(L1, L2); equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2; -equiv(true, true) -> true; -equiv(false, false) -> true; -equiv(null, null) -> true. +equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true. %% Object representation and traversal order is unknown. %% Use the sledgehammer and sort property lists. @@ -566,11 +572,11 @@ equiv_list([], []) -> equiv_list([V1 | L1], [V2 | L2]) -> equiv(V1, V2) andalso equiv_list(L1, L2). -test_all() -> +decode_test() -> [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>), - <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]), - test_encoder_utf8(), - test_input_validation(), + <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]). + +e2j_vec_test() -> test_one(e2j_test_vec(utf8), 1). test_one([], _N) -> @@ -627,7 +633,7 @@ e2j_test_vec(utf8) -> ]. %% test utf8 encoding -test_encoder_utf8() -> +encoder_utf8_test() -> %% safe conversion case (default) [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] = encode(<<1,"\321\202\320\265\321\201\321\202">>), @@ -637,11 +643,11 @@ test_encoder_utf8() -> [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] = Enc(<<1,"\321\202\320\265\321\201\321\202">>). -test_input_validation() -> +input_validation_test() -> Good = [ - {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, % pound - {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, % euro - {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} % denarius + {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound + {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro + {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius ], lists:foreach(fun({CodePoint, UTF8}) -> Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)), @@ -649,15 +655,148 @@ test_input_validation() -> end, Good), Bad = [ - % 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte + %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte <<?Q, 16#80, ?Q>>, - % missing continuations, last byte in each should be 80-BF + %% missing continuations, last byte in each should be 80-BF <<?Q, 16#C2, 16#7F, ?Q>>, <<?Q, 16#E0, 16#80,16#7F, ?Q>>, <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>, - % we don't support code points > 10FFFF per RFC 3629 - <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>> + %% we don't support code points > 10FFFF per RFC 3629 + <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>, + %% escape characters trigger a different code path + <<?Q, $\\, $\n, 16#80, ?Q>> ], - lists:foreach(fun(X) -> - ok = try decode(X) catch invalid_utf8 -> ok end - end, Bad). + lists:foreach( + fun(X) -> + ok = try decode(X) catch invalid_utf8 -> ok end, + %% could be {ucs,{bad_utf8_character_code}} or + %% {json_encode,{bad_char,_}} + {'EXIT', _} = (catch encode(X)) + end, Bad). + +inline_json_test() -> + ?assertEqual(<<"\"iodata iodata\"">>, + iolist_to_binary( + encode({json, [<<"\"iodata">>, " iodata\""]}))), + ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]}, + decode( + encode({struct, + [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))), + ok. + +big_unicode_test() -> + UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)), + ?assertEqual( + <<"\"\\ud834\\udd20\"">>, + iolist_to_binary(encode(UTF8Seq))), + ?assertEqual( + UTF8Seq, + decode(iolist_to_binary(encode(UTF8Seq)))), + ok. + +custom_decoder_test() -> + ?assertEqual( + {struct, [{<<"key">>, <<"value">>}]}, + (decoder([]))("{\"key\": \"value\"}")), + F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end, + ?assertEqual( + win, + (decoder([{object_hook, F}]))("{\"key\": \"value\"}")), + ok. + +atom_test() -> + %% JSON native atoms + [begin + ?assertEqual(A, decode(atom_to_list(A))), + ?assertEqual(iolist_to_binary(atom_to_list(A)), + iolist_to_binary(encode(A))) + end || A <- [true, false, null]], + %% Atom to string + ?assertEqual( + <<"\"foo\"">>, + iolist_to_binary(encode(foo))), + ?assertEqual( + <<"\"\\ud834\\udd20\"">>, + iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))), + ok. + +key_encode_test() -> + %% Some forms are accepted as keys that would not be strings in other + %% cases + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{foo, 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{"foo", 1}]}))), + ?assertEqual( + <<"{\"\\ud834\\udd20\":1}">>, + iolist_to_binary( + encode({struct, [{[16#0001d120], 1}]}))), + ?assertEqual( + <<"{\"1\":1}">>, + iolist_to_binary(encode({struct, [{1, 1}]}))), + ok. + +unsafe_chars_test() -> + Chars = "\"\\\b\f\n\r\t", + [begin + ?assertEqual(false, json_string_is_safe([C])), + ?assertEqual(false, json_bin_is_safe(<<C>>)), + ?assertEqual(<<C>>, decode(encode(<<C>>))) + end || C <- Chars], + ?assertEqual( + false, + json_string_is_safe([16#0001d120])), + ?assertEqual( + false, + json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#0001d120)))), + ?assertEqual( + [16#0001d120], + xmerl_ucs:from_utf8( + binary_to_list( + decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))))), + ?assertEqual( + false, + json_string_is_safe([16#110000])), + ?assertEqual( + false, + json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#110000])))), + %% solidus can be escaped but isn't unsafe by default + ?assertEqual( + <<"/">>, + decode(<<"\"\\/\"">>)), + ok. + +int_test() -> + ?assertEqual(0, decode("0")), + ?assertEqual(1, decode("1")), + ?assertEqual(11, decode("11")), + ok. + +large_int_test() -> + ?assertEqual(<<"-2147483649214748364921474836492147483649">>, + iolist_to_binary(encode(-2147483649214748364921474836492147483649))), + ?assertEqual(<<"2147483649214748364921474836492147483649">>, + iolist_to_binary(encode(2147483649214748364921474836492147483649))), + ok. + +float_test() -> + ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))), + ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))), + ok. + +handler_test() -> + ?assertEqual( + {'EXIT',{json_encode,{bad_term,{}}}}, + catch encode({})), + F = fun ({}) -> [] end, + ?assertEqual( + <<"[]">>, + iolist_to_binary((encoder([{handler, F}]))({}))), + ok. + +-endif. diff --git a/src/mochiweb/mochilists.erl b/src/mochiweb/mochilists.erl new file mode 100644 index 00000000..8981e7b6 --- /dev/null +++ b/src/mochiweb/mochilists.erl @@ -0,0 +1,104 @@ +%% @copyright Copyright (c) 2010 Mochi Media, Inc. +%% @author David Reid <dreid@mochimedia.com> + +%% @doc Utility functions for dealing with proplists. + +-module(mochilists). +-author("David Reid <dreid@mochimedia.com>"). +-export([get_value/2, get_value/3, is_defined/2, set_default/2, set_defaults/2]). + +%% @spec set_default({Key::term(), Value::term()}, Proplist::list()) -> list() +%% +%% @doc Return new Proplist with {Key, Value} set if not is_defined(Key, Proplist). +set_default({Key, Value}, Proplist) -> + case is_defined(Key, Proplist) of + true -> + Proplist; + false -> + [{Key, Value} | Proplist] + end. + +%% @spec set_defaults([{Key::term(), Value::term()}], Proplist::list()) -> list() +%% +%% @doc Return new Proplist with {Key, Value} set if not is_defined(Key, Proplist). +set_defaults(DefaultProps, Proplist) -> + lists:foldl(fun set_default/2, Proplist, DefaultProps). + + +%% @spec is_defined(Key::term(), Proplist::list()) -> bool() +%% +%% @doc Returns true if Propist contains at least one entry associated +%% with Key, otherwise false is returned. +is_defined(Key, Proplist) -> + lists:keyfind(Key, 1, Proplist) =/= false. + + +%% @spec get_value(Key::term(), Proplist::list()) -> term() | undefined +%% +%% @doc Return the value of <code>Key</code> or undefined +get_value(Key, Proplist) -> + get_value(Key, Proplist, undefined). + +%% @spec get_value(Key::term(), Proplist::list(), Default::term()) -> term() +%% +%% @doc Return the value of <code>Key</code> or <code>Default</code> +get_value(_Key, [], Default) -> + Default; +get_value(Key, Proplist, Default) -> + case lists:keyfind(Key, 1, Proplist) of + false -> + Default; + {Key, Value} -> + Value + end. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +set_defaults_test() -> + ?assertEqual( + [{k, v}], + set_defaults([{k, v}], [])), + ?assertEqual( + [{k, v}], + set_defaults([{k, vee}], [{k, v}])), + ?assertEqual( + lists:sort([{kay, vee}, {k, v}]), + lists:sort(set_defaults([{k, vee}, {kay, vee}], [{k, v}]))), + ok. + +set_default_test() -> + ?assertEqual( + [{k, v}], + set_default({k, v}, [])), + ?assertEqual( + [{k, v}], + set_default({k, vee}, [{k, v}])), + ok. + +get_value_test() -> + ?assertEqual( + undefined, + get_value(foo, [])), + ?assertEqual( + undefined, + get_value(foo, [{bar, baz}])), + ?assertEqual( + bar, + get_value(foo, [{foo, bar}])), + ?assertEqual( + default, + get_value(foo, [], default)), + ?assertEqual( + default, + get_value(foo, [{bar, baz}], default)), + ?assertEqual( + bar, + get_value(foo, [{foo, bar}], default)), + ok. + +-endif. + diff --git a/src/mochiweb/mochilogfile2.erl b/src/mochiweb/mochilogfile2.erl new file mode 100644 index 00000000..c34ee73a --- /dev/null +++ b/src/mochiweb/mochilogfile2.erl @@ -0,0 +1,140 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2010 Mochi Media, Inc. + +%% @doc Write newline delimited log files, ensuring that if a truncated +%% entry is found on log open then it is fixed before writing. Uses +%% delayed writes and raw files for performance. +-module(mochilogfile2). +-author('bob@mochimedia.com'). + +-export([open/1, write/2, close/1, name/1]). + +%% @spec open(Name) -> Handle +%% @doc Open the log file Name, creating or appending as necessary. All data +%% at the end of the file will be truncated until a newline is found, to +%% ensure that all records are complete. +open(Name) -> + {ok, FD} = file:open(Name, [raw, read, write, delayed_write, binary]), + fix_log(FD), + {?MODULE, Name, FD}. + +%% @spec name(Handle) -> string() +%% @doc Return the path of the log file. +name({?MODULE, Name, _FD}) -> + Name. + +%% @spec write(Handle, IoData) -> ok +%% @doc Write IoData to the log file referenced by Handle. +write({?MODULE, _Name, FD}, IoData) -> + ok = file:write(FD, [IoData, $\n]), + ok. + +%% @spec close(Handle) -> ok +%% @doc Close the log file referenced by Handle. +close({?MODULE, _Name, FD}) -> + ok = file:sync(FD), + ok = file:close(FD), + ok. + +fix_log(FD) -> + {ok, Location} = file:position(FD, eof), + Seek = find_last_newline(FD, Location), + {ok, Seek} = file:position(FD, Seek), + ok = file:truncate(FD), + ok. + +%% Seek backwards to the last valid log entry +find_last_newline(_FD, N) when N =< 1 -> + 0; +find_last_newline(FD, Location) -> + case file:pread(FD, Location - 1, 1) of + {ok, <<$\n>>} -> + Location; + {ok, _} -> + find_last_newline(FD, Location - 1) + end. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +name_test() -> + D = mochitemp:mkdtemp(), + FileName = filename:join(D, "open_close_test.log"), + H = open(FileName), + ?assertEqual( + FileName, + name(H)), + close(H), + file:delete(FileName), + file:del_dir(D), + ok. + +open_close_test() -> + D = mochitemp:mkdtemp(), + FileName = filename:join(D, "open_close_test.log"), + OpenClose = fun () -> + H = open(FileName), + ?assertEqual( + true, + filelib:is_file(FileName)), + ok = close(H), + ?assertEqual( + {ok, <<>>}, + file:read_file(FileName)), + ok + end, + OpenClose(), + OpenClose(), + file:delete(FileName), + file:del_dir(D), + ok. + +write_test() -> + D = mochitemp:mkdtemp(), + FileName = filename:join(D, "write_test.log"), + F = fun () -> + H = open(FileName), + write(H, "test line"), + close(H), + ok + end, + F(), + ?assertEqual( + {ok, <<"test line\n">>}, + file:read_file(FileName)), + F(), + ?assertEqual( + {ok, <<"test line\ntest line\n">>}, + file:read_file(FileName)), + file:delete(FileName), + file:del_dir(D), + ok. + +fix_log_test() -> + D = mochitemp:mkdtemp(), + FileName = filename:join(D, "write_test.log"), + file:write_file(FileName, <<"first line good\nsecond line bad">>), + F = fun () -> + H = open(FileName), + write(H, "test line"), + close(H), + ok + end, + F(), + ?assertEqual( + {ok, <<"first line good\ntest line\n">>}, + file:read_file(FileName)), + file:write_file(FileName, <<"first line bad">>), + F(), + ?assertEqual( + {ok, <<"test line\n">>}, + file:read_file(FileName)), + F(), + ?assertEqual( + {ok, <<"test line\ntest line\n">>}, + file:read_file(FileName)), + ok. + +-endif. diff --git a/src/mochiweb/mochinum.erl b/src/mochiweb/mochinum.erl index 6a866042..a7e2bfbc 100644 --- a/src/mochiweb/mochinum.erl +++ b/src/mochiweb/mochinum.erl @@ -11,7 +11,7 @@ -module(mochinum). -author("Bob Ippolito <bob@mochimedia.com>"). --export([digits/1, frexp/1, int_pow/2, int_ceil/1, test/0]). +-export([digits/1, frexp/1, int_pow/2, int_ceil/1]). %% IEEE 754 Float exponent bias -define(FLOAT_BIAS, 1022). @@ -120,7 +120,7 @@ digits1(Float, Exp, Frac) -> case Exp >= 0 of true -> BExp = 1 bsl Exp, - case (Frac /= ?BIG_POW) of + case (Frac =/= ?BIG_POW) of true -> scale((Frac * BExp * 2), 2, BExp, BExp, Round, Round, Float); @@ -129,7 +129,7 @@ digits1(Float, Exp, Frac) -> Round, Round, Float) end; false -> - case (Exp == ?MIN_EXP) orelse (Frac /= ?BIG_POW) of + case (Exp =:= ?MIN_EXP) orelse (Frac =/= ?BIG_POW) of true -> scale((Frac * 2), 1 bsl (1 - Exp), 1, 1, Round, Round, Float); @@ -228,14 +228,13 @@ log2floor(Int, N) -> log2floor(Int bsr 1, 1 + N). -test() -> - ok = test_frexp(), - ok = test_int_ceil(), - ok = test_int_pow(), - ok = test_digits(), - ok. +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). -test_int_ceil() -> +int_ceil_test() -> 1 = int_ceil(0.0001), 0 = int_ceil(0.0), 1 = int_ceil(0.99), @@ -244,7 +243,7 @@ test_int_ceil() -> -2 = int_ceil(-2.0), ok. -test_int_pow() -> +int_pow_test() -> 1 = int_pow(1, 1), 1 = int_pow(1, 0), 1 = int_pow(10, 0), @@ -253,17 +252,58 @@ test_int_pow() -> 1000 = int_pow(10, 3), ok. -test_digits() -> - "0" = digits(0), - "0.0" = digits(0.0), - "1.0" = digits(1.0), - "-1.0" = digits(-1.0), - "0.1" = digits(0.1), - "0.01" = digits(0.01), - "0.001" = digits(0.001), +digits_test() -> + ?assertEqual("0", + digits(0)), + ?assertEqual("0.0", + digits(0.0)), + ?assertEqual("1.0", + digits(1.0)), + ?assertEqual("-1.0", + digits(-1.0)), + ?assertEqual("0.1", + digits(0.1)), + ?assertEqual("0.01", + digits(0.01)), + ?assertEqual("0.001", + digits(0.001)), + ?assertEqual("1.0e+6", + digits(1000000.0)), + ?assertEqual("0.5", + digits(0.5)), + ?assertEqual("4503599627370496.0", + digits(4503599627370496.0)), + %% small denormalized number + %% 4.94065645841246544177e-324 + <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>, + ?assertEqual("4.9406564584124654e-324", + digits(SmallDenorm)), + ?assertEqual(SmallDenorm, + list_to_float(digits(SmallDenorm))), + %% large denormalized number + %% 2.22507385850720088902e-308 + <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>, + ?assertEqual("2.225073858507201e-308", + digits(BigDenorm)), + ?assertEqual(BigDenorm, + list_to_float(digits(BigDenorm))), + %% small normalized number + %% 2.22507385850720138309e-308 + <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>, + ?assertEqual("2.2250738585072014e-308", + digits(SmallNorm)), + ?assertEqual(SmallNorm, + list_to_float(digits(SmallNorm))), + %% large normalized number + %% 1.79769313486231570815e+308 + <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>, + ?assertEqual("1.7976931348623157e+308", + digits(LargeNorm)), + ?assertEqual(LargeNorm, + list_to_float(digits(LargeNorm))), ok. -test_frexp() -> +frexp_test() -> %% zero {0.0, 0} = frexp(0.0), %% one @@ -287,3 +327,5 @@ test_frexp() -> <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>, {0.99999999999999989, 1024} = frexp(LargeNorm), ok. + +-endif. diff --git a/src/mochiweb/mochitemp.erl b/src/mochiweb/mochitemp.erl new file mode 100644 index 00000000..bb23d2a6 --- /dev/null +++ b/src/mochiweb/mochitemp.erl @@ -0,0 +1,310 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2010 Mochi Media, Inc. + +%% @doc Create temporary files and directories. Requires crypto to be started. + +-module(mochitemp). +-export([gettempdir/0]). +-export([mkdtemp/0, mkdtemp/3]). +-export([rmtempdir/1]). +%% -export([mkstemp/4]). +-define(SAFE_CHARS, {$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, + $n, $o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z, + $A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M, + $N, $O, $P, $Q, $R, $S, $T, $U, $V, $W, $X, $Y, $Z, + $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $_}). +-define(TMP_MAX, 10000). + +-include_lib("kernel/include/file.hrl"). + +%% TODO: An ugly wrapper over the mktemp tool with open_port and sadness? +%% We can't implement this race-free in Erlang without the ability +%% to issue O_CREAT|O_EXCL. I suppose we could hack something with +%% mkdtemp, del_dir, open. +%% mkstemp(Suffix, Prefix, Dir, Options) -> +%% ok. + +rmtempdir(Dir) -> + case file:del_dir(Dir) of + {error, eexist} -> + ok = rmtempdirfiles(Dir), + ok = file:del_dir(Dir); + ok -> + ok + end. + +rmtempdirfiles(Dir) -> + {ok, Files} = file:list_dir(Dir), + ok = rmtempdirfiles(Dir, Files). + +rmtempdirfiles(_Dir, []) -> + ok; +rmtempdirfiles(Dir, [Basename | Rest]) -> + Path = filename:join([Dir, Basename]), + case filelib:is_dir(Path) of + true -> + ok = rmtempdir(Path); + false -> + ok = file:delete(Path) + end, + rmtempdirfiles(Dir, Rest). + +mkdtemp() -> + mkdtemp("", "tmp", gettempdir()). + +mkdtemp(Suffix, Prefix, Dir) -> + mkdtemp_n(rngpath_fun(Suffix, Prefix, Dir), ?TMP_MAX). + + + +mkdtemp_n(RngPath, 1) -> + make_dir(RngPath()); +mkdtemp_n(RngPath, N) -> + try make_dir(RngPath()) + catch throw:{error, eexist} -> + mkdtemp_n(RngPath, N - 1) + end. + +make_dir(Path) -> + case file:make_dir(Path) of + ok -> + ok; + E={error, eexist} -> + throw(E) + end, + %% Small window for a race condition here because dir is created 777 + ok = file:write_file_info(Path, #file_info{mode=8#0700}), + Path. + +rngpath_fun(Prefix, Suffix, Dir) -> + fun () -> + filename:join([Dir, Prefix ++ rngchars(6) ++ Suffix]) + end. + +rngchars(0) -> + ""; +rngchars(N) -> + [rngchar() | rngchars(N - 1)]. + +rngchar() -> + rngchar(crypto:rand_uniform(0, tuple_size(?SAFE_CHARS))). + +rngchar(C) -> + element(1 + C, ?SAFE_CHARS). + +%% @spec gettempdir() -> string() +%% @doc Get a usable temporary directory using the first of these that is a directory: +%% $TMPDIR, $TMP, $TEMP, "/tmp", "/var/tmp", "/usr/tmp", ".". +gettempdir() -> + gettempdir(gettempdir_checks(), fun normalize_dir/1). + +gettempdir_checks() -> + [{fun os:getenv/1, ["TMPDIR", "TMP", "TEMP"]}, + {fun gettempdir_identity/1, ["/tmp", "/var/tmp", "/usr/tmp"]}, + {fun gettempdir_cwd/1, [cwd]}]. + +gettempdir_identity(L) -> + L. + +gettempdir_cwd(cwd) -> + {ok, L} = file:get_cwd(), + L. + +gettempdir([{_F, []} | RestF], Normalize) -> + gettempdir(RestF, Normalize); +gettempdir([{F, [L | RestL]} | RestF], Normalize) -> + case Normalize(F(L)) of + false -> + gettempdir([{F, RestL} | RestF], Normalize); + Dir -> + Dir + end. + +normalize_dir(False) when False =:= false orelse False =:= "" -> + %% Erlang doesn't have an unsetenv, wtf. + false; +normalize_dir(L) -> + Dir = filename:absname(L), + case filelib:is_dir(Dir) of + false -> + false; + true -> + Dir + end. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +pushenv(L) -> + [{K, os:getenv(K)} || K <- L]. +popenv(L) -> + F = fun ({K, false}) -> + %% Erlang doesn't have an unsetenv, wtf. + os:putenv(K, ""); + ({K, V}) -> + os:putenv(K, V) + end, + lists:foreach(F, L). + +gettempdir_fallback_test() -> + ?assertEqual( + "/", + gettempdir([{fun gettempdir_identity/1, ["/--not-here--/"]}, + {fun gettempdir_identity/1, ["/"]}], + fun normalize_dir/1)), + ?assertEqual( + "/", + %% simulate a true os:getenv unset env + gettempdir([{fun gettempdir_identity/1, [false]}, + {fun gettempdir_identity/1, ["/"]}], + fun normalize_dir/1)), + ok. + +gettempdir_identity_test() -> + ?assertEqual( + "/", + gettempdir([{fun gettempdir_identity/1, ["/"]}], fun normalize_dir/1)), + ok. + +gettempdir_cwd_test() -> + {ok, Cwd} = file:get_cwd(), + ?assertEqual( + normalize_dir(Cwd), + gettempdir([{fun gettempdir_cwd/1, [cwd]}], fun normalize_dir/1)), + ok. + +rngchars_test() -> + crypto:start(), + ?assertEqual( + "", + rngchars(0)), + ?assertEqual( + 10, + length(rngchars(10))), + ok. + +rngchar_test() -> + ?assertEqual( + $a, + rngchar(0)), + ?assertEqual( + $A, + rngchar(26)), + ?assertEqual( + $_, + rngchar(62)), + ok. + +mkdtemp_n_failonce_test() -> + crypto:start(), + D = mkdtemp(), + Path = filename:join([D, "testdir"]), + %% Toggle the existence of a dir so that it fails + %% the first time and succeeds the second. + F = fun () -> + case filelib:is_dir(Path) of + true -> + file:del_dir(Path); + false -> + file:make_dir(Path) + end, + Path + end, + try + %% Fails the first time + ?assertThrow( + {error, eexist}, + mkdtemp_n(F, 1)), + %% Reset state + file:del_dir(Path), + %% Succeeds the second time + ?assertEqual( + Path, + mkdtemp_n(F, 2)) + after rmtempdir(D) + end, + ok. + +mkdtemp_n_fail_test() -> + {ok, Cwd} = file:get_cwd(), + ?assertThrow( + {error, eexist}, + mkdtemp_n(fun () -> Cwd end, 1)), + ?assertThrow( + {error, eexist}, + mkdtemp_n(fun () -> Cwd end, 2)), + ok. + +make_dir_fail_test() -> + {ok, Cwd} = file:get_cwd(), + ?assertThrow( + {error, eexist}, + make_dir(Cwd)), + ok. + +mkdtemp_test() -> + crypto:start(), + D = mkdtemp(), + ?assertEqual( + true, + filelib:is_dir(D)), + ?assertEqual( + ok, + file:del_dir(D)), + ok. + +rmtempdir_test() -> + crypto:start(), + D1 = mkdtemp(), + ?assertEqual( + true, + filelib:is_dir(D1)), + ?assertEqual( + ok, + rmtempdir(D1)), + D2 = mkdtemp(), + ?assertEqual( + true, + filelib:is_dir(D2)), + ok = file:write_file(filename:join([D2, "foo"]), <<"bytes">>), + D3 = mkdtemp("suffix", "prefix", D2), + ?assertEqual( + true, + filelib:is_dir(D3)), + ok = file:write_file(filename:join([D3, "foo"]), <<"bytes">>), + ?assertEqual( + ok, + rmtempdir(D2)), + ?assertEqual( + {error, enoent}, + file:consult(D3)), + ?assertEqual( + {error, enoent}, + file:consult(D2)), + ok. + +gettempdir_env_test() -> + Env = pushenv(["TMPDIR", "TEMP", "TMP"]), + FalseEnv = [{"TMPDIR", false}, {"TEMP", false}, {"TMP", false}], + try + popenv(FalseEnv), + popenv([{"TMPDIR", "/"}]), + ?assertEqual( + "/", + os:getenv("TMPDIR")), + ?assertEqual( + "/", + gettempdir()), + {ok, Cwd} = file:get_cwd(), + popenv(FalseEnv), + popenv([{"TMP", Cwd}]), + ?assertEqual( + normalize_dir(Cwd), + gettempdir()) + after popenv(Env) + end, + ok. + +-endif. diff --git a/src/mochiweb/mochiutf8.erl b/src/mochiweb/mochiutf8.erl new file mode 100644 index 00000000..206e1186 --- /dev/null +++ b/src/mochiweb/mochiutf8.erl @@ -0,0 +1,316 @@ +%% @copyright 2010 Mochi Media, Inc. +%% @author Bob Ippolito <bob@mochimedia.com> + +%% @doc Algorithm to convert any binary to a valid UTF-8 sequence by ignoring +%% invalid bytes. + +-module(mochiutf8). +-export([valid_utf8_bytes/1, codepoint_to_bytes/1, bytes_to_codepoints/1]). +-export([bytes_foldl/3, codepoint_foldl/3, read_codepoint/1, len/1]). + +%% External API + +-type unichar_low() :: 0..16#d7ff. +-type unichar_high() :: 16#e000..16#10ffff. +-type unichar() :: unichar_low() | unichar_high(). + +-spec codepoint_to_bytes(unichar()) -> binary(). +%% @doc Convert a unicode codepoint to UTF-8 bytes. +codepoint_to_bytes(C) when (C >= 16#00 andalso C =< 16#7f) -> + %% U+0000 - U+007F - 7 bits + <<C>>; +codepoint_to_bytes(C) when (C >= 16#080 andalso C =< 16#07FF) -> + %% U+0080 - U+07FF - 11 bits + <<0:5, B1:5, B0:6>> = <<C:16>>, + <<2#110:3, B1:5, + 2#10:2, B0:6>>; +codepoint_to_bytes(C) when (C >= 16#0800 andalso C =< 16#FFFF) andalso + (C < 16#D800 orelse C > 16#DFFF) -> + %% U+0800 - U+FFFF - 16 bits (excluding UTC-16 surrogate code points) + <<B2:4, B1:6, B0:6>> = <<C:16>>, + <<2#1110:4, B2:4, + 2#10:2, B1:6, + 2#10:2, B0:6>>; +codepoint_to_bytes(C) when (C >= 16#010000 andalso C =< 16#10FFFF) -> + %% U+10000 - U+10FFFF - 21 bits + <<0:3, B3:3, B2:6, B1:6, B0:6>> = <<C:24>>, + <<2#11110:5, B3:3, + 2#10:2, B2:6, + 2#10:2, B1:6, + 2#10:2, B0:6>>. + +-spec codepoints_to_bytes([unichar()]) -> binary(). +%% @doc Convert a list of codepoints to a UTF-8 binary. +codepoints_to_bytes(L) -> + <<<<(codepoint_to_bytes(C))/binary>> || C <- L>>. + +-spec read_codepoint(binary()) -> {unichar(), binary(), binary()}. +read_codepoint(Bin = <<2#0:1, C:7, Rest/binary>>) -> + %% U+0000 - U+007F - 7 bits + <<B:1/binary, _/binary>> = Bin, + {C, B, Rest}; +read_codepoint(Bin = <<2#110:3, B1:5, + 2#10:2, B0:6, + Rest/binary>>) -> + %% U+0080 - U+07FF - 11 bits + case <<B1:5, B0:6>> of + <<C:11>> when C >= 16#80 -> + <<B:2/binary, _/binary>> = Bin, + {C, B, Rest} + end; +read_codepoint(Bin = <<2#1110:4, B2:4, + 2#10:2, B1:6, + 2#10:2, B0:6, + Rest/binary>>) -> + %% U+0800 - U+FFFF - 16 bits (excluding UTC-16 surrogate code points) + case <<B2:4, B1:6, B0:6>> of + <<C:16>> when (C >= 16#0800 andalso C =< 16#FFFF) andalso + (C < 16#D800 orelse C > 16#DFFF) -> + <<B:3/binary, _/binary>> = Bin, + {C, B, Rest} + end; +read_codepoint(Bin = <<2#11110:5, B3:3, + 2#10:2, B2:6, + 2#10:2, B1:6, + 2#10:2, B0:6, + Rest/binary>>) -> + %% U+10000 - U+10FFFF - 21 bits + case <<B3:3, B2:6, B1:6, B0:6>> of + <<C:21>> when (C >= 16#010000 andalso C =< 16#10FFFF) -> + <<B:4/binary, _/binary>> = Bin, + {C, B, Rest} + end. + +-spec codepoint_foldl(fun((unichar(), _) -> _), _, binary()) -> _. +codepoint_foldl(F, Acc, <<>>) when is_function(F, 2) -> + Acc; +codepoint_foldl(F, Acc, Bin) -> + {C, _, Rest} = read_codepoint(Bin), + codepoint_foldl(F, F(C, Acc), Rest). + +-spec bytes_foldl(fun((binary(), _) -> _), _, binary()) -> _. +bytes_foldl(F, Acc, <<>>) when is_function(F, 2) -> + Acc; +bytes_foldl(F, Acc, Bin) -> + {_, B, Rest} = read_codepoint(Bin), + bytes_foldl(F, F(B, Acc), Rest). + +-spec bytes_to_codepoints(binary()) -> [unichar()]. +bytes_to_codepoints(B) -> + lists:reverse(codepoint_foldl(fun (C, Acc) -> [C | Acc] end, [], B)). + +-spec len(binary()) -> non_neg_integer(). +len(<<>>) -> + 0; +len(B) -> + {_, _, Rest} = read_codepoint(B), + 1 + len(Rest). + +-spec valid_utf8_bytes(B::binary()) -> binary(). +%% @doc Return only the bytes in B that represent valid UTF-8. Uses +%% the following recursive algorithm: skip one byte if B does not +%% follow UTF-8 syntax (a 1-4 byte encoding of some number), +%% skip sequence of 2-4 bytes if it represents an overlong encoding +%% or bad code point (surrogate U+D800 - U+DFFF or > U+10FFFF). +valid_utf8_bytes(B) when is_binary(B) -> + binary_skip_bytes(B, invalid_utf8_indexes(B)). + +%% Internal API + +-spec binary_skip_bytes(binary(), [non_neg_integer()]) -> binary(). +%% @doc Return B, but skipping the 0-based indexes in L. +binary_skip_bytes(B, []) -> + B; +binary_skip_bytes(B, L) -> + binary_skip_bytes(B, L, 0, []). + +%% @private +-spec binary_skip_bytes(binary(), [non_neg_integer()], non_neg_integer(), iolist()) -> binary(). +binary_skip_bytes(B, [], _N, Acc) -> + iolist_to_binary(lists:reverse([B | Acc])); +binary_skip_bytes(<<_, RestB/binary>>, [N | RestL], N, Acc) -> + binary_skip_bytes(RestB, RestL, 1 + N, Acc); +binary_skip_bytes(<<C, RestB/binary>>, L, N, Acc) -> + binary_skip_bytes(RestB, L, 1 + N, [C | Acc]). + +-spec invalid_utf8_indexes(binary()) -> [non_neg_integer()]. +%% @doc Return the 0-based indexes in B that are not valid UTF-8. +invalid_utf8_indexes(B) -> + invalid_utf8_indexes(B, 0, []). + +%% @private. +-spec invalid_utf8_indexes(binary(), non_neg_integer(), [non_neg_integer()]) -> [non_neg_integer()]. +invalid_utf8_indexes(<<C, Rest/binary>>, N, Acc) when C < 16#80 -> + %% U+0000 - U+007F - 7 bits + invalid_utf8_indexes(Rest, 1 + N, Acc); +invalid_utf8_indexes(<<C1, C2, Rest/binary>>, N, Acc) + when C1 band 16#E0 =:= 16#C0, + C2 band 16#C0 =:= 16#80 -> + %% U+0080 - U+07FF - 11 bits + case ((C1 band 16#1F) bsl 6) bor (C2 band 16#3F) of + C when C < 16#80 -> + %% Overlong encoding. + invalid_utf8_indexes(Rest, 2 + N, [1 + N, N | Acc]); + _ -> + %% Upper bound U+07FF does not need to be checked + invalid_utf8_indexes(Rest, 2 + N, Acc) + end; +invalid_utf8_indexes(<<C1, C2, C3, Rest/binary>>, N, Acc) + when C1 band 16#F0 =:= 16#E0, + C2 band 16#C0 =:= 16#80, + C3 band 16#C0 =:= 16#80 -> + %% U+0800 - U+FFFF - 16 bits + case ((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor + (C3 band 16#3F) of + C when (C < 16#800) orelse (C >= 16#D800 andalso C =< 16#DFFF) -> + %% Overlong encoding or surrogate. + invalid_utf8_indexes(Rest, 3 + N, [2 + N, 1 + N, N | Acc]); + _ -> + %% Upper bound U+FFFF does not need to be checked + invalid_utf8_indexes(Rest, 3 + N, Acc) + end; +invalid_utf8_indexes(<<C1, C2, C3, C4, Rest/binary>>, N, Acc) + when C1 band 16#F8 =:= 16#F0, + C2 band 16#C0 =:= 16#80, + C3 band 16#C0 =:= 16#80, + C4 band 16#C0 =:= 16#80 -> + %% U+10000 - U+10FFFF - 21 bits + case ((((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor + (C3 band 16#3F)) bsl 6) bor (C4 band 16#3F) of + C when (C < 16#10000) orelse (C > 16#10FFFF) -> + %% Overlong encoding or invalid code point. + invalid_utf8_indexes(Rest, 4 + N, [3 + N, 2 + N, 1 + N, N | Acc]); + _ -> + invalid_utf8_indexes(Rest, 4 + N, Acc) + end; +invalid_utf8_indexes(<<_, Rest/binary>>, N, Acc) -> + %% Invalid char + invalid_utf8_indexes(Rest, 1 + N, [N | Acc]); +invalid_utf8_indexes(<<>>, _N, Acc) -> + lists:reverse(Acc). + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +binary_skip_bytes_test() -> + ?assertEqual(<<"foo">>, + binary_skip_bytes(<<"foo">>, [])), + ?assertEqual(<<"foobar">>, + binary_skip_bytes(<<"foo bar">>, [3])), + ?assertEqual(<<"foo">>, + binary_skip_bytes(<<"foo bar">>, [3, 4, 5, 6])), + ?assertEqual(<<"oo bar">>, + binary_skip_bytes(<<"foo bar">>, [0])), + ok. + +invalid_utf8_indexes_test() -> + ?assertEqual( + [], + invalid_utf8_indexes(<<"unicode snowman for you: ", 226, 152, 131>>)), + ?assertEqual( + [0], + invalid_utf8_indexes(<<128>>)), + ?assertEqual( + [57,59,60,64,66,67], + invalid_utf8_indexes(<<"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; (", + 167, 65, 170, 186, 73, 83, 80, 166, 87, 186, 217, 41, 41>>)), + ok. + +codepoint_to_bytes_test() -> + %% U+0000 - U+007F - 7 bits + %% U+0080 - U+07FF - 11 bits + %% U+0800 - U+FFFF - 16 bits (excluding UTC-16 surrogate code points) + %% U+10000 - U+10FFFF - 21 bits + ?assertEqual( + <<"a">>, + codepoint_to_bytes($a)), + ?assertEqual( + <<16#c2, 16#80>>, + codepoint_to_bytes(16#80)), + ?assertEqual( + <<16#df, 16#bf>>, + codepoint_to_bytes(16#07ff)), + ?assertEqual( + <<16#ef, 16#bf, 16#bf>>, + codepoint_to_bytes(16#ffff)), + ?assertEqual( + <<16#f4, 16#8f, 16#bf, 16#bf>>, + codepoint_to_bytes(16#10ffff)), + ok. + +bytes_foldl_test() -> + ?assertEqual( + <<"abc">>, + bytes_foldl(fun (B, Acc) -> <<Acc/binary, B/binary>> end, <<>>, <<"abc">>)), + ?assertEqual( + <<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>, + bytes_foldl(fun (B, Acc) -> <<Acc/binary, B/binary>> end, <<>>, + <<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>)), + ok. + +bytes_to_codepoints_test() -> + ?assertEqual( + "abc" ++ [16#2603, 16#4e2d, 16#85, 16#10ffff], + bytes_to_codepoints(<<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>)), + ok. + +codepoint_foldl_test() -> + ?assertEqual( + "cba", + codepoint_foldl(fun (C, Acc) -> [C | Acc] end, [], <<"abc">>)), + ?assertEqual( + [16#10ffff, 16#85, 16#4e2d, 16#2603 | "cba"], + codepoint_foldl(fun (C, Acc) -> [C | Acc] end, [], + <<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>)), + ok. + +len_test() -> + ?assertEqual( + 29, + len(<<"unicode snowman for you: ", 226, 152, 131, 228, 184, 173, 194, 133, 244, 143, 191, 191>>)), + ok. + +codepoints_to_bytes_test() -> + ?assertEqual( + iolist_to_binary(lists:map(fun codepoint_to_bytes/1, lists:seq(1, 1000))), + codepoints_to_bytes(lists:seq(1, 1000))), + ok. + +valid_utf8_bytes_test() -> + ?assertEqual( + <<"invalid U+11ffff: ">>, + valid_utf8_bytes(<<"invalid U+11ffff: ", 244, 159, 191, 191>>)), + ?assertEqual( + <<"U+10ffff: ", 244, 143, 191, 191>>, + valid_utf8_bytes(<<"U+10ffff: ", 244, 143, 191, 191>>)), + ?assertEqual( + <<"overlong 2-byte encoding (a): ">>, + valid_utf8_bytes(<<"overlong 2-byte encoding (a): ", 2#11000001, 2#10100001>>)), + ?assertEqual( + <<"overlong 2-byte encoding (!): ">>, + valid_utf8_bytes(<<"overlong 2-byte encoding (!): ", 2#11000000, 2#10100001>>)), + ?assertEqual( + <<"mu: ", 194, 181>>, + valid_utf8_bytes(<<"mu: ", 194, 181>>)), + ?assertEqual( + <<"bad coding bytes: ">>, + valid_utf8_bytes(<<"bad coding bytes: ", 2#10011111, 2#10111111, 2#11111111>>)), + ?assertEqual( + <<"low surrogate (unpaired): ">>, + valid_utf8_bytes(<<"low surrogate (unpaired): ", 237, 176, 128>>)), + ?assertEqual( + <<"high surrogate (unpaired): ">>, + valid_utf8_bytes(<<"high surrogate (unpaired): ", 237, 191, 191>>)), + ?assertEqual( + <<"unicode snowman for you: ", 226, 152, 131>>, + valid_utf8_bytes(<<"unicode snowman for you: ", 226, 152, 131>>)), + ?assertEqual( + <<"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; (AISPW))">>, + valid_utf8_bytes(<<"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; (", + 167, 65, 170, 186, 73, 83, 80, 166, 87, 186, 217, 41, 41>>)), + ok. + +-endif. diff --git a/src/mochiweb/mochiweb.app.in b/src/mochiweb/mochiweb.app.in index b0f90144..c6a2630b 100644 --- a/src/mochiweb/mochiweb.app.in +++ b/src/mochiweb/mochiweb.app.in @@ -1,6 +1,6 @@ {application, mochiweb, [{description, "MochiMedia Web Server"}, - {vsn, "113"}, + {vsn, "7c2bc2"}, {modules, [ mochihex, mochijson, diff --git a/src/mochiweb/mochiweb.app.src b/src/mochiweb/mochiweb.app.src new file mode 100644 index 00000000..a1c95aae --- /dev/null +++ b/src/mochiweb/mochiweb.app.src @@ -0,0 +1,9 @@ +%% This is generated from src/mochiweb.app.src +{application, mochiweb, + [{description, "MochiMedia Web Server"}, + {vsn, "7c2bc2"}, + {modules, []}, + {registered, []}, + {mod, {mochiweb_app, []}}, + {env, []}, + {applications, [kernel, stdlib, crypto, inets]}]}. diff --git a/src/mochiweb/mochiweb.erl b/src/mochiweb/mochiweb.erl index 0f4d52a6..3118028b 100644 --- a/src/mochiweb/mochiweb.erl +++ b/src/mochiweb/mochiweb.erl @@ -9,7 +9,6 @@ -export([start/0, stop/0]). -export([new_request/1, new_response/1]). -export([all_loaded/0, all_loaded/1, reload/0]). --export([test/0]). %% @spec start() -> ok %% @doc Start the MochiWeb server. @@ -24,21 +23,6 @@ stop() -> application:stop(crypto), Res. -%% @spec test() -> ok -%% @doc Run all of the tests for MochiWeb. -test() -> - mochiweb_util:test(), - mochiweb_headers:test(), - mochiweb_cookies:test(), - mochihex:test(), - mochinum:test(), - mochijson:test(), - mochiweb_charref:test(), - mochiweb_html:test(), - mochifmt:test(), - test_request(), - ok. - reload() -> [c:l(Module) || Module <- all_loaded()]. @@ -96,11 +80,6 @@ new_response({Request, Code, Headers}) -> %% Internal API -test_request() -> - R = mochiweb_request:new(z, z, "/foo/bar/baz%20wibble+quux?qs=2", z, []), - "/foo/bar/baz wibble quux" = R:get(path), - ok. - ensure_started(App) -> case application:start(App) of ok -> @@ -108,3 +87,203 @@ ensure_started(App) -> {error, {already_started, App}} -> ok end. + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +-record(treq, {path, body= <<>>, xreply= <<>>}). + +ssl_cert_opts() -> + EbinDir = filename:dirname(code:which(?MODULE)), + CertDir = filename:join([EbinDir, "..", "support", "test-materials"]), + CertFile = filename:join(CertDir, "test_ssl_cert.pem"), + KeyFile = filename:join(CertDir, "test_ssl_key.pem"), + [{certfile, CertFile}, {keyfile, KeyFile}]. + +with_server(Transport, ServerFun, ClientFun) -> + ServerOpts0 = [{ip, "127.0.0.1"}, {port, 0}, {loop, ServerFun}], + ServerOpts = case Transport of + plain -> + ServerOpts0; + ssl -> + ServerOpts0 ++ [{ssl, true}, {ssl_opts, ssl_cert_opts()}] + end, + {ok, Server} = mochiweb_http:start(ServerOpts), + Port = mochiweb_socket_server:get(Server, port), + Res = (catch ClientFun(Transport, Port)), + mochiweb_http:stop(Server), + Res. + +request_test() -> + R = mochiweb_request:new(z, z, "/foo/bar/baz%20wibble+quux?qs=2", z, []), + "/foo/bar/baz wibble quux" = R:get(path), + ok. + +single_http_GET_test() -> + do_GET(plain, 1). + +single_https_GET_test() -> + do_GET(ssl, 1). + +multiple_http_GET_test() -> + do_GET(plain, 3). + +multiple_https_GET_test() -> + do_GET(ssl, 3). + +hundred_http_GET_test() -> + do_GET(plain, 100). + +hundred_https_GET_test() -> + do_GET(ssl, 100). + +single_128_http_POST_test() -> + do_POST(plain, 128, 1). + +single_128_https_POST_test() -> + do_POST(ssl, 128, 1). + +single_2k_http_POST_test() -> + do_POST(plain, 2048, 1). + +single_2k_https_POST_test() -> + do_POST(ssl, 2048, 1). + +single_100k_http_POST_test() -> + do_POST(plain, 102400, 1). + +single_100k_https_POST_test() -> + do_POST(ssl, 102400, 1). + +multiple_100k_http_POST_test() -> + do_POST(plain, 102400, 3). + +multiple_100K_https_POST_test() -> + do_POST(ssl, 102400, 3). + +hundred_128_http_POST_test() -> + do_POST(plain, 128, 100). + +hundred_128_https_POST_test() -> + do_POST(ssl, 128, 100). + +do_GET(Transport, Times) -> + PathPrefix = "/whatever/", + ReplyPrefix = "You requested: ", + ServerFun = fun (Req) -> + Reply = ReplyPrefix ++ Req:get(path), + Req:ok({"text/plain", Reply}) + end, + TestReqs = [begin + Path = PathPrefix ++ integer_to_list(N), + ExpectedReply = list_to_binary(ReplyPrefix ++ Path), + #treq{path=Path, xreply=ExpectedReply} + end || N <- lists:seq(1, Times)], + ClientFun = new_client_fun('GET', TestReqs), + ok = with_server(Transport, ServerFun, ClientFun), + ok. + +do_POST(Transport, Size, Times) -> + ServerFun = fun (Req) -> + Body = Req:recv_body(), + Headers = [{"Content-Type", "application/octet-stream"}], + Req:respond({201, Headers, Body}) + end, + TestReqs = [begin + Path = "/stuff/" ++ integer_to_list(N), + Body = crypto:rand_bytes(Size), + #treq{path=Path, body=Body, xreply=Body} + end || N <- lists:seq(1, Times)], + ClientFun = new_client_fun('POST', TestReqs), + ok = with_server(Transport, ServerFun, ClientFun), + ok. + +new_client_fun(Method, TestReqs) -> + fun (Transport, Port) -> + client_request(Transport, Port, Method, TestReqs) + end. + +client_request(Transport, Port, Method, TestReqs) -> + Opts = [binary, {active, false}, {packet, http}], + SockFun = case Transport of + plain -> + {ok, Socket} = gen_tcp:connect("127.0.0.1", Port, Opts), + fun (recv) -> + gen_tcp:recv(Socket, 0); + ({recv, Length}) -> + gen_tcp:recv(Socket, Length); + ({send, Data}) -> + gen_tcp:send(Socket, Data); + ({setopts, L}) -> + inet:setopts(Socket, L) + end; + ssl -> + {ok, Socket} = ssl:connect("127.0.0.1", Port, [{ssl_imp, new} | Opts]), + fun (recv) -> + ssl:recv(Socket, 0); + ({recv, Length}) -> + ssl:recv(Socket, Length); + ({send, Data}) -> + ssl:send(Socket, Data); + ({setopts, L}) -> + ssl:setopts(Socket, L) + end + end, + client_request(SockFun, Method, TestReqs). + +client_request(SockFun, _Method, []) -> + {the_end, {error, closed}} = {the_end, SockFun(recv)}, + ok; +client_request(SockFun, Method, + [#treq{path=Path, body=Body, xreply=ExReply} | Rest]) -> + Request = [atom_to_list(Method), " ", Path, " HTTP/1.1\r\n", + client_headers(Body, Rest =:= []), + "\r\n", + Body], + ok = SockFun({send, Request}), + case Method of + 'GET' -> + {ok, {http_response, {1,1}, 200, "OK"}} = SockFun(recv); + 'POST' -> + {ok, {http_response, {1,1}, 201, "Created"}} = SockFun(recv) + end, + ok = SockFun({setopts, [{packet, httph}]}), + {ok, {http_header, _, 'Server', _, "MochiWeb" ++ _}} = SockFun(recv), + {ok, {http_header, _, 'Date', _, _}} = SockFun(recv), + {ok, {http_header, _, 'Content-Type', _, _}} = SockFun(recv), + {ok, {http_header, _, 'Content-Length', _, ConLenStr}} = SockFun(recv), + ContentLength = list_to_integer(ConLenStr), + {ok, http_eoh} = SockFun(recv), + ok = SockFun({setopts, [{packet, raw}]}), + {payload, ExReply} = {payload, drain_reply(SockFun, ContentLength, <<>>)}, + ok = SockFun({setopts, [{packet, http}]}), + client_request(SockFun, Method, Rest). + +client_headers(Body, IsLastRequest) -> + ["Host: localhost\r\n", + case Body of + <<>> -> + ""; + _ -> + ["Content-Type: application/octet-stream\r\n", + "Content-Length: ", integer_to_list(byte_size(Body)), "\r\n"] + end, + case IsLastRequest of + true -> + "Connection: close\r\n"; + false -> + "" + end]. + +drain_reply(_SockFun, 0, Acc) -> + Acc; +drain_reply(SockFun, Length, Acc) -> + Sz = erlang:min(Length, 1024), + {ok, B} = SockFun({recv, Sz}), + drain_reply(SockFun, Length - Sz, <<Acc/bytes, B/bytes>>). + +-endif. diff --git a/src/mochiweb/mochiweb_acceptor.erl b/src/mochiweb/mochiweb_acceptor.erl new file mode 100644 index 00000000..79d172c3 --- /dev/null +++ b/src/mochiweb/mochiweb_acceptor.erl @@ -0,0 +1,48 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2010 Mochi Media, Inc. + +%% @doc MochiWeb acceptor. + +-module(mochiweb_acceptor). +-author('bob@mochimedia.com'). + +-include("internal.hrl"). + +-export([start_link/3, init/3]). + +start_link(Server, Listen, Loop) -> + proc_lib:spawn_link(?MODULE, init, [Server, Listen, Loop]). + +init(Server, Listen, Loop) -> + T1 = now(), + case catch mochiweb_socket:accept(Listen) of + {ok, Socket} -> + gen_server:cast(Server, {accepted, self(), timer:now_diff(now(), T1)}), + call_loop(Loop, Socket); + {error, closed} -> + exit(normal); + {error, timeout} -> + exit(normal); + {error, esslaccept} -> + exit(normal); + Other -> + error_logger:error_report( + [{application, mochiweb}, + "Accept failed error", + lists:flatten(io_lib:format("~p", [Other]))]), + exit({error, accept_failed}) + end. + +call_loop({M, F}, Socket) -> + M:F(Socket); +call_loop({M, F, A}, Socket) -> + erlang:apply(M, F, [Socket | A]); +call_loop(Loop, Socket) -> + Loop(Socket). + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/src/mochiweb/mochiweb_app.erl b/src/mochiweb/mochiweb_app.erl index 2b437f6c..5d67787b 100644 --- a/src/mochiweb/mochiweb_app.erl +++ b/src/mochiweb/mochiweb_app.erl @@ -18,3 +18,10 @@ start(_Type, _StartArgs) -> %% @doc application stop callback for mochiweb. stop(_State) -> ok. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/src/mochiweb/mochiweb_charref.erl b/src/mochiweb/mochiweb_charref.erl index d037d2f8..99cd5502 100644 --- a/src/mochiweb/mochiweb_charref.erl +++ b/src/mochiweb/mochiweb_charref.erl @@ -3,7 +3,7 @@ %% @doc Converts HTML 4 charrefs and entities to codepoints. -module(mochiweb_charref). --export([charref/1, test/0]). +-export([charref/1]). %% External API. @@ -27,16 +27,6 @@ charref([$# | L]) -> charref(L) -> entity(L). -%% @spec test() -> ok -%% @doc Run tests for mochiweb_charref. -test() -> - 1234 = charref("#1234"), - 255 = charref("#xfF"), - 255 = charref("#XFf"), - 38 = charref("amp"), - undefined = charref("not_an_entity"), - ok. - %% Internal API. entity("nbsp") -> 160; @@ -293,3 +283,26 @@ entity("rsaquo") -> 8250; entity("euro") -> 8364; entity(_) -> undefined. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +exhaustive_entity_test() -> + T = mochiweb_cover:clause_lookup_table(?MODULE, entity), + [?assertEqual(V, entity(K)) || {K, V} <- T]. + +charref_test() -> + 1234 = charref("#1234"), + 255 = charref("#xfF"), + 255 = charref(<<"#XFf">>), + 38 = charref("amp"), + 38 = charref(<<"amp">>), + undefined = charref("not_an_entity"), + undefined = charref("#not_an_entity"), + undefined = charref("#xnot_an_entity"), + ok. + +-endif. diff --git a/src/mochiweb/mochiweb_cookies.erl b/src/mochiweb/mochiweb_cookies.erl index 61711ff0..c090b714 100644 --- a/src/mochiweb/mochiweb_cookies.erl +++ b/src/mochiweb/mochiweb_cookies.erl @@ -4,7 +4,7 @@ %% @doc HTTP Cookie parsing and generating (RFC 2109, RFC 2965). -module(mochiweb_cookies). --export([parse_cookie/1, cookie/3, cookie/2, test/0]). +-export([parse_cookie/1, cookie/3, cookie/2]). -define(QUOTE, $\"). @@ -130,13 +130,6 @@ parse_cookie("") -> parse_cookie(Cookie) -> parse_cookie(Cookie, []). -%% @spec test() -> ok -%% @doc Run tests for mochiweb_cookies. -test() -> - parse_cookie_test(), - cookie_test(), - ok. - %% Internal API parse_cookie([], Acc) -> @@ -198,24 +191,6 @@ skip_past_separator([$, | Rest]) -> skip_past_separator([_ | Rest]) -> skip_past_separator(Rest). -parse_cookie_test() -> - %% RFC example - C1 = "$Version=\"1\"; Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\"; - Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\"; - Shipping=\"FedEx\"; $Path=\"/acme\"", - [ - {"Customer","WILE_E_COYOTE"}, - {"Part_Number","Rocket_Launcher_0001"}, - {"Shipping","FedEx"} - ] = parse_cookie(C1), - %% Potential edge cases - [{"foo", "x"}] = parse_cookie("foo=\"\\x\""), - [] = parse_cookie("="), - [{"foo", ""}, {"bar", ""}] = parse_cookie(" foo ; bar "), - [{"foo", ""}, {"bar", ""}] = parse_cookie("foo=;bar="), - [{"foo", "\";"}, {"bar", ""}] = parse_cookie("foo = \"\\\";\";bar "), - [{"foo", "\";bar"}] = parse_cookie("foo=\"\\\";bar"). - any_to_list(V) when is_list(V) -> V; any_to_list(V) when is_atom(V) -> @@ -225,6 +200,81 @@ any_to_list(V) when is_binary(V) -> any_to_list(V) when is_integer(V) -> integer_to_list(V). +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +quote_test() -> + %% ?assertError eunit macro is not compatible with coverage module + try quote(":wq") + catch error:{cookie_quoting_required, ":wq"} -> ok + end, + ?assertEqual( + "foo", + quote(foo)), + ok. + +parse_cookie_test() -> + %% RFC example + C1 = "$Version=\"1\"; Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\"; + Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\"; + Shipping=\"FedEx\"; $Path=\"/acme\"", + ?assertEqual( + [{"Customer","WILE_E_COYOTE"}, + {"Part_Number","Rocket_Launcher_0001"}, + {"Shipping","FedEx"}], + parse_cookie(C1)), + %% Potential edge cases + ?assertEqual( + [{"foo", "x"}], + parse_cookie("foo=\"\\x\"")), + ?assertEqual( + [], + parse_cookie("=")), + ?assertEqual( + [{"foo", ""}, {"bar", ""}], + parse_cookie(" foo ; bar ")), + ?assertEqual( + [{"foo", ""}, {"bar", ""}], + parse_cookie("foo=;bar=")), + ?assertEqual( + [{"foo", "\";"}, {"bar", ""}], + parse_cookie("foo = \"\\\";\";bar ")), + ?assertEqual( + [{"foo", "\";bar"}], + parse_cookie("foo=\"\\\";bar")), + ?assertEqual( + [], + parse_cookie([])), + ?assertEqual( + [{"foo", "bar"}, {"baz", "wibble"}], + parse_cookie("foo=bar , baz=wibble ")), + ok. + +domain_test() -> + ?assertEqual( + {"Set-Cookie", + "Customer=WILE_E_COYOTE; " + "Version=1; " + "Domain=acme.com; " + "HttpOnly"}, + cookie("Customer", "WILE_E_COYOTE", + [{http_only, true}, {domain, "acme.com"}])), + ok. + +local_time_test() -> + {"Set-Cookie", S} = cookie("Customer", "WILE_E_COYOTE", + [{max_age, 111}, {secure, true}]), + ?assertMatch( + ["Customer=WILE_E_COYOTE", + " Version=1", + " Expires=" ++ _, + " Max-Age=111", + " Secure"], + string:tokens(S, ";")), + ok. cookie_test() -> C1 = {"Set-Cookie", @@ -238,8 +288,8 @@ cookie_test() -> C1 = cookie(<<"Customer">>, <<"WILE_E_COYOTE">>, [{path, <<"/acme">>}]), {"Set-Cookie","=NoKey; Version=1"} = cookie("", "NoKey", []), - - LocalTime = calendar:universal_time_to_local_time({{2007, 5, 15}, {13, 45, 33}}), + {"Set-Cookie","=NoKey; Version=1"} = cookie("", "NoKey"), + LocalTime = calendar:universal_time_to_local_time({{2007, 5, 15}, {13, 45, 33}}), C2 = {"Set-Cookie", "Customer=WILE_E_COYOTE; " "Version=1; " @@ -255,3 +305,5 @@ cookie_test() -> C3 = cookie("Customer", "WILE_E_COYOTE", [{max_age, 86417}, {local_time, LocalTime}]), ok. + +-endif. diff --git a/src/mochiweb/mochiweb_cover.erl b/src/mochiweb/mochiweb_cover.erl new file mode 100644 index 00000000..6a14ef51 --- /dev/null +++ b/src/mochiweb/mochiweb_cover.erl @@ -0,0 +1,75 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2010 Mochi Media, Inc. + +%% @doc Workarounds for various cover deficiencies. +-module(mochiweb_cover). +-export([get_beam/1, get_abstract_code/1, + get_clauses/2, clause_lookup_table/1]). +-export([clause_lookup_table/2]). + +%% Internal + +get_beam(Module) -> + {Module, Beam, _Path} = code:get_object_code(Module), + Beam. + +get_abstract_code(Beam) -> + {ok, {_Module, + [{abstract_code, + {raw_abstract_v1, L}}]}} = beam_lib:chunks(Beam, [abstract_code]), + L. + +get_clauses(Function, Code) -> + [L] = [Clauses || {function, _, FName, _, Clauses} + <- Code, FName =:= Function], + L. + +clause_lookup_table(Module, Function) -> + clause_lookup_table( + get_clauses(Function, + get_abstract_code(get_beam(Module)))). + +clause_lookup_table(Clauses) -> + lists:foldr(fun clause_fold/2, [], Clauses). + +clause_fold({clause, _, + [InTerm], + _Guards=[], + [OutTerm]}, + Acc) -> + try [{erl_parse:normalise(InTerm), erl_parse:normalise(OutTerm)} | Acc] + catch error:_ -> Acc + end; +clause_fold(_, Acc) -> + Acc. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +foo_table(a) -> b; +foo_table("a") -> <<"b">>; +foo_table(123) -> {4, 3, 2}; +foo_table([list]) -> []; +foo_table([list1, list2]) -> [list1, list2, list3]; +foo_table(ignored) -> some, code, ignored; +foo_table(Var) -> Var. + +foo_table_test() -> + T = clause_lookup_table(?MODULE, foo_table), + [?assertEqual(V, foo_table(K)) || {K, V} <- T]. + +clause_lookup_table_test() -> + ?assertEqual(b, foo_table(a)), + ?assertEqual(ignored, foo_table(ignored)), + ?assertEqual('Var', foo_table('Var')), + ?assertEqual( + [{a, b}, + {"a", <<"b">>}, + {123, {4, 3, 2}}, + {[list], []}, + {[list1, list2], [list1, list2, list3]}], + clause_lookup_table(?MODULE, foo_table)). + +-endif. diff --git a/src/mochiweb/mochiweb_echo.erl b/src/mochiweb/mochiweb_echo.erl index f32d6803..6f7872b9 100644 --- a/src/mochiweb/mochiweb_echo.erl +++ b/src/mochiweb/mochiweb_echo.erl @@ -18,9 +18,9 @@ start() -> {loop, {?MODULE, loop}}]). loop(Socket) -> - case gen_tcp:recv(Socket, 0, 30000) of + case mochiweb_socket:recv(Socket, 0, 30000) of {ok, Data} -> - case gen_tcp:send(Socket, Data) of + case mochiweb_socket:send(Socket, Data) of ok -> loop(Socket); _ -> @@ -29,3 +29,10 @@ loop(Socket) -> _Other -> exit(normal) end. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/src/mochiweb/mochiweb_headers.erl b/src/mochiweb/mochiweb_headers.erl index d90fd679..4fce9838 100644 --- a/src/mochiweb/mochiweb_headers.erl +++ b/src/mochiweb/mochiweb_headers.erl @@ -10,66 +10,11 @@ -export([default/3, enter_from_list/2, default_from_list/2]). -export([to_list/1, make/1]). -export([from_binary/1]). --export([test/0]). %% @type headers(). %% @type key() = atom() | binary() | string(). %% @type value() = atom() | binary() | string() | integer(). -%% @spec test() -> ok -%% @doc Run tests for this module. -test() -> - H = ?MODULE:make([{hdr, foo}, {"Hdr", "bar"}, {'Hdr', 2}]), - [{hdr, "foo, bar, 2"}] = ?MODULE:to_list(H), - H1 = ?MODULE:insert(taco, grande, H), - [{hdr, "foo, bar, 2"}, {taco, "grande"}] = ?MODULE:to_list(H1), - H2 = ?MODULE:make([{"Set-Cookie", "foo"}]), - [{"Set-Cookie", "foo"}] = ?MODULE:to_list(H2), - H3 = ?MODULE:insert("Set-Cookie", "bar", H2), - [{"Set-Cookie", "foo"}, {"Set-Cookie", "bar"}] = ?MODULE:to_list(H3), - "foo, bar" = ?MODULE:get_value("set-cookie", H3), - {value, {"Set-Cookie", "foo, bar"}} = ?MODULE:lookup("set-cookie", H3), - undefined = ?MODULE:get_value("shibby", H3), - none = ?MODULE:lookup("shibby", H3), - H4 = ?MODULE:insert("content-type", - "application/x-www-form-urlencoded; charset=utf8", - H3), - "application/x-www-form-urlencoded" = ?MODULE:get_primary_value( - "content-type", H4), - H4 = ?MODULE:delete_any("nonexistent-header", H4), - H3 = ?MODULE:delete_any("content-type", H4), - HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>, - H_HB = ?MODULE:from_binary(HB), - H_HB = ?MODULE:from_binary(binary_to_list(HB)), - "47" = ?MODULE:get_value("Content-Length", H_HB), - "text/plain" = ?MODULE:get_value("Content-Type", H_HB), - L_H_HB = ?MODULE:to_list(H_HB), - 2 = length(L_H_HB), - true = lists:member({'Content-Length', "47"}, L_H_HB), - true = lists:member({'Content-Type', "text/plain"}, L_H_HB), - HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ], - HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ], - HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ], - H_HL = ?MODULE:from_binary(HL), - H_HL = ?MODULE:from_binary(HL2), - H_HL = ?MODULE:from_binary(HL3), - "47" = ?MODULE:get_value("Content-Length", H_HL), - "text/plain" = ?MODULE:get_value("Content-Type", H_HL), - L_H_HL = ?MODULE:to_list(H_HL), - 2 = length(L_H_HL), - true = lists:member({'Content-Length', "47"}, L_H_HL), - true = lists:member({'Content-Type', "text/plain"}, L_H_HL), - [] = ?MODULE:to_list(?MODULE:from_binary(<<>>)), - [] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)), - [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)), - [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)), - [] = ?MODULE:to_list(?MODULE:from_binary("")), - [] = ?MODULE:to_list(?MODULE:from_binary([<<>>])), - [] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])), - [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])), - [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])), - ok. - %% @spec empty() -> headers() %% @doc Create an empty headers structure. empty() -> @@ -83,35 +28,34 @@ make(L) when is_list(L) -> make(T) when is_tuple(T) -> T. -%% @spec from_binary(RawHttpHeader()) -> headers() -%% @type RawHttpHeader() -> string() | binary() | [ string() | binary() ] -%% +%% @spec from_binary(iolist()) -> headers() %% @doc Transforms a raw HTTP header into a mochiweb headers structure. %% %% The given raw HTTP header can be one of the following: %% -%% 1) A string or a binary representing a full HTTP header ending with +%% 1) A string or a binary representing a full HTTP header ending with %% double CRLF. %% Examples: +%% ``` %% "Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n" -%% <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">> +%% <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>''' %% -%% 2) A list of binaries or strings where each element represents a raw +%% 2) A list of binaries or strings where each element represents a raw %% HTTP header line ending with a single CRLF. %% Examples: -%% [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ] -%% [ "Content-Length: 47\r\n", "Content-Type: text/plain\r\n" ] -%% [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ] +%% ``` +%% [<<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">>] +%% ["Content-Length: 47\r\n", "Content-Type: text/plain\r\n"] +%% ["Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">>]''' %% from_binary(RawHttpHeader) when is_binary(RawHttpHeader) -> from_binary(RawHttpHeader, []); - from_binary(RawHttpHeaderList) -> from_binary(list_to_binary([RawHttpHeaderList, "\r\n"])). from_binary(RawHttpHeader, Acc) -> case erlang:decode_packet(httph, RawHttpHeader, []) of - { ok, {http_header, _, H, _, V}, Rest } -> + {ok, {http_header, _, H, _, V}, Rest} -> from_binary(Rest, [{H, V} | Acc]); _ -> make(Acc) @@ -248,4 +192,108 @@ any_to_list(V) when is_binary(V) -> any_to_list(V) when is_integer(V) -> integer_to_list(V). +%% +%% Tests. +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +make_test() -> + Identity = make([{hdr, foo}]), + ?assertEqual( + Identity, + make(Identity)). + +enter_from_list_test() -> + H = make([{hdr, foo}]), + ?assertEqual( + [{baz, "wibble"}, {hdr, "foo"}], + to_list(enter_from_list([{baz, wibble}], H))), + ?assertEqual( + [{hdr, "bar"}], + to_list(enter_from_list([{hdr, bar}], H))), + ok. + +default_from_list_test() -> + H = make([{hdr, foo}]), + ?assertEqual( + [{baz, "wibble"}, {hdr, "foo"}], + to_list(default_from_list([{baz, wibble}], H))), + ?assertEqual( + [{hdr, "foo"}], + to_list(default_from_list([{hdr, bar}], H))), + ok. + +get_primary_value_test() -> + H = make([{hdr, foo}, {baz, <<"wibble;taco">>}]), + ?assertEqual( + "foo", + get_primary_value(hdr, H)), + ?assertEqual( + undefined, + get_primary_value(bar, H)), + ?assertEqual( + "wibble", + get_primary_value(<<"baz">>, H)), + ok. + +set_cookie_test() -> + H = make([{"set-cookie", foo}, {"set-cookie", bar}, {"set-cookie", baz}]), + ?assertEqual( + [{"set-cookie", "foo"}, {"set-cookie", "bar"}, {"set-cookie", "baz"}], + to_list(H)), + ok. + +headers_test() -> + H = ?MODULE:make([{hdr, foo}, {"Hdr", "bar"}, {'Hdr', 2}]), + [{hdr, "foo, bar, 2"}] = ?MODULE:to_list(H), + H1 = ?MODULE:insert(taco, grande, H), + [{hdr, "foo, bar, 2"}, {taco, "grande"}] = ?MODULE:to_list(H1), + H2 = ?MODULE:make([{"Set-Cookie", "foo"}]), + [{"Set-Cookie", "foo"}] = ?MODULE:to_list(H2), + H3 = ?MODULE:insert("Set-Cookie", "bar", H2), + [{"Set-Cookie", "foo"}, {"Set-Cookie", "bar"}] = ?MODULE:to_list(H3), + "foo, bar" = ?MODULE:get_value("set-cookie", H3), + {value, {"Set-Cookie", "foo, bar"}} = ?MODULE:lookup("set-cookie", H3), + undefined = ?MODULE:get_value("shibby", H3), + none = ?MODULE:lookup("shibby", H3), + H4 = ?MODULE:insert("content-type", + "application/x-www-form-urlencoded; charset=utf8", + H3), + "application/x-www-form-urlencoded" = ?MODULE:get_primary_value( + "content-type", H4), + H4 = ?MODULE:delete_any("nonexistent-header", H4), + H3 = ?MODULE:delete_any("content-type", H4), + HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>, + H_HB = ?MODULE:from_binary(HB), + H_HB = ?MODULE:from_binary(binary_to_list(HB)), + "47" = ?MODULE:get_value("Content-Length", H_HB), + "text/plain" = ?MODULE:get_value("Content-Type", H_HB), + L_H_HB = ?MODULE:to_list(H_HB), + 2 = length(L_H_HB), + true = lists:member({'Content-Length', "47"}, L_H_HB), + true = lists:member({'Content-Type', "text/plain"}, L_H_HB), + HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ], + HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ], + HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ], + H_HL = ?MODULE:from_binary(HL), + H_HL = ?MODULE:from_binary(HL2), + H_HL = ?MODULE:from_binary(HL3), + "47" = ?MODULE:get_value("Content-Length", H_HL), + "text/plain" = ?MODULE:get_value("Content-Type", H_HL), + L_H_HL = ?MODULE:to_list(H_HL), + 2 = length(L_H_HL), + true = lists:member({'Content-Length', "47"}, L_H_HL), + true = lists:member({'Content-Type', "text/plain"}, L_H_HL), + [] = ?MODULE:to_list(?MODULE:from_binary(<<>>)), + [] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)), + [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)), + [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)), + [] = ?MODULE:to_list(?MODULE:from_binary("")), + [] = ?MODULE:to_list(?MODULE:from_binary([<<>>])), + [] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])), + [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])), + [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])), + ok. +-endif. diff --git a/src/mochiweb/mochiweb_html.erl b/src/mochiweb/mochiweb_html.erl index 77100d50..a15c359c 100644 --- a/src/mochiweb/mochiweb_html.erl +++ b/src/mochiweb/mochiweb_html.erl @@ -4,9 +4,9 @@ %% @doc Loosely tokenizes and generates parse trees for HTML 4. -module(mochiweb_html). -export([tokens/1, parse/1, parse_tokens/1, to_tokens/1, escape/1, - escape_attr/1, to_html/1, test/0]). + escape_attr/1, to_html/1]). -% This is a macro to placate syntax highlighters.. +%% This is a macro to placate syntax highlighters.. -define(QUOTE, $\"). -define(SQUOTE, $\'). -define(ADV_COL(S, N), @@ -35,6 +35,8 @@ -define(IS_LITERAL_SAFE(C), ((C >= $A andalso C =< $Z) orelse (C >= $a andalso C =< $z) orelse (C >= $0 andalso C =< $9))). +-define(PROBABLE_CLOSE(C), + (C =:= $> orelse ?IS_WHITESPACE(C))). -record(decoder, {line=1, column=1, @@ -89,6 +91,7 @@ to_tokens(T={doctype, _}) -> to_tokens(T={comment, _}) -> [T]; to_tokens({Tag0, Acc}) -> + %% This is only allowed in sub-tags: {p, [{"class", "foo"}]} to_tokens({Tag0, [], Acc}); to_tokens({Tag0, Attrs, Acc}) -> Tag = to_tag(Tag0), @@ -124,40 +127,6 @@ escape_attr(I) when is_integer(I) -> escape_attr(F) when is_float(F) -> escape_attr(mochinum:digits(F), []). -%% @spec test() -> ok -%% @doc Run tests for mochiweb_html. -test() -> - test_destack(), - test_tokens(), - test_tokens2(), - test_parse(), - test_parse2(), - test_parse_tokens(), - test_escape(), - test_escape_attr(), - test_to_html(), - ok. - - -%% Internal API - -test_to_html() -> - Expect = <<"<html><head><title>hey!</title></head><body><p class=\"foo\">what's up<br /></p><div>sucka</div><!-- comment! --></body></html>">>, - Expect = iolist_to_binary( - to_html({html, [], - [{<<"head">>, [], - [{title, <<"hey!">>}]}, - {body, [], - [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]}, - {'div', <<"sucka">>}, - {comment, <<" comment! ">>}]}]})), - Expect1 = <<"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">">>, - Expect1 = iolist_to_binary( - to_html({doctype, - [<<"html">>, <<"PUBLIC">>, - <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>, - <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]})), - ok. to_html([], Acc) -> lists:reverse(Acc); to_html([{'=', Content} | Rest], Acc) -> @@ -205,16 +174,6 @@ attrs_to_html([{K, V} | Rest], Acc) -> [[<<" ">>, escape(K), <<"=\"">>, escape_attr(V), <<"\"">>] | Acc]). -test_escape() -> - <<"&quot;\"word <<up!&quot;">> = - escape(<<""\"word <<up!"">>), - ok. - -test_escape_attr() -> - <<"&quot;"word <<up!&quot;">> = - escape_attr(<<""\"word <<up!"">>), - ok. - escape([], Acc) -> list_to_binary(lists:reverse(Acc)); escape("<" ++ Rest, Acc) -> @@ -257,6 +216,9 @@ to_tokens([{Tag0, [T0={'=', _C0} | R1]} | Rest], Acc) -> to_tokens([{Tag0, [T0={comment, _C0} | R1]} | Rest], Acc) -> %% Allow {comment, iolist()} to_tokens([{Tag0, R1} | Rest], [T0 | Acc]); +to_tokens([{Tag0, [T0={pi, _S0, _A0} | R1]} | Rest], Acc) -> + %% Allow {pi, binary(), list()} + to_tokens([{Tag0, R1} | Rest], [T0 | Acc]); to_tokens([{Tag0, [{T0, A0=[{_, _} | _]} | R1]} | Rest], Acc) -> %% Allow {p, [{"class", "foo"}]} to_tokens([{Tag0, [{T0, A0, []} | R1]} | Rest], Acc); @@ -290,39 +252,6 @@ to_tokens([{Tag0, [B | R1]} | Rest], Acc) when is_binary(B) -> Tag = to_tag(Tag0), to_tokens([{Tag, R1} | Rest], [{data, B, false} | Acc]). -test_tokens() -> - [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>}, - {<<"wibble">>, <<"wibble">>}, - {<<"alice">>, <<"bob">>}], true}] = - tokens(<<"<foo bar=baz wibble='wibble' alice=\"bob\"/>">>), - [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>}, - {<<"wibble">>, <<"wibble">>}, - {<<"alice">>, <<"bob">>}], true}] = - tokens(<<"<foo bar=baz wibble='wibble' alice=bob/>">>), - [{comment, <<"[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]">>}] = - tokens(<<"<!--[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]-->">>), - [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, - {data, <<" A= B <= C ">>, false}, - {end_tag, <<"script">>}] = - tokens(<<"<script type=\"text/javascript\"> A= B <= C </script>">>), - [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, - {data, <<" A= B <= C ">>, false}, - {end_tag, <<"script">>}] = - tokens(<<"<script type =\"text/javascript\"> A= B <= C </script>">>), - [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, - {data, <<" A= B <= C ">>, false}, - {end_tag, <<"script">>}] = - tokens(<<"<script type = \"text/javascript\"> A= B <= C </script>">>), - [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, - {data, <<" A= B <= C ">>, false}, - {end_tag, <<"script">>}] = - tokens(<<"<script type= \"text/javascript\"> A= B <= C </script>">>), - [{start_tag, <<"textarea">>, [], false}, - {data, <<"<html></body>">>, false}, - {end_tag, <<"textarea">>}] = - tokens(<<"<textarea><html></body></textarea>">>), - ok. - tokens(B, S=#decoder{offset=O}, Acc) -> case B of <<_:O/binary>> -> @@ -374,7 +303,8 @@ tokenize(B, S=#decoder{offset=O}) -> {{end_tag, Tag}, S2}; <<_:O/binary, "<", C, _/binary>> when ?IS_WHITESPACE(C) -> %% This isn't really strict HTML - tokenize_data(B, ?INC_COL(S)); + {{data, Data, _Whitespace}, S1} = tokenize_data(B, ?INC_COL(S)), + {{data, <<$<, Data/binary>>, false}, S1}; <<_:O/binary, "<", _/binary>> -> {Tag, S1} = tokenize_literal(B, ?INC_COL(S)), {Attrs, S2} = tokenize_attributes(B, S1), @@ -385,149 +315,6 @@ tokenize(B, S=#decoder{offset=O}) -> tokenize_data(B, S) end. -test_parse() -> - D0 = <<"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\"> -<html> - <head> - <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"> - <title>Foo</title> - <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/rel/dojo/resources/dojo.css\" media=\"screen\"> - <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/foo.css\" media=\"screen\"> - <!--[if lt IE 7]> - <style type=\"text/css\"> - .no_ie { display: none; } - </style> - <![endif]--> - <link rel=\"icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\"> - <link rel=\"shortcut icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\"> - </head> - <body id=\"home\" class=\"tundra\"><![CDATA[<<this<!-- is -->CDATA>>]]></body> -</html>">>, - Expect = {<<"html">>, [], - [{<<"head">>, [], - [{<<"meta">>, - [{<<"http-equiv">>,<<"Content-Type">>}, - {<<"content">>,<<"text/html; charset=UTF-8">>}], - []}, - {<<"title">>,[],[<<"Foo">>]}, - {<<"link">>, - [{<<"rel">>,<<"stylesheet">>}, - {<<"type">>,<<"text/css">>}, - {<<"href">>,<<"/static/rel/dojo/resources/dojo.css">>}, - {<<"media">>,<<"screen">>}], - []}, - {<<"link">>, - [{<<"rel">>,<<"stylesheet">>}, - {<<"type">>,<<"text/css">>}, - {<<"href">>,<<"/static/foo.css">>}, - {<<"media">>,<<"screen">>}], - []}, - {comment,<<"[if lt IE 7]>\n <style type=\"text/css\">\n .no_ie { display: none; }\n </style>\n <![endif]">>}, - {<<"link">>, - [{<<"rel">>,<<"icon">>}, - {<<"href">>,<<"/static/images/favicon.ico">>}, - {<<"type">>,<<"image/x-icon">>}], - []}, - {<<"link">>, - [{<<"rel">>,<<"shortcut icon">>}, - {<<"href">>,<<"/static/images/favicon.ico">>}, - {<<"type">>,<<"image/x-icon">>}], - []}]}, - {<<"body">>, - [{<<"id">>,<<"home">>}, - {<<"class">>,<<"tundra">>}], - [<<"<<this<!-- is -->CDATA>>">>]}]}, - Expect = parse(D0), - ok. - -test_tokens2() -> - D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org</link><description>Bob's Rants</description></channel>">>, - Expect = [{start_tag,<<"channel">>,[],false}, - {start_tag,<<"title">>,[],false}, - {data,<<"from __future__ import *">>,false}, - {end_tag,<<"title">>}, - {start_tag,<<"link">>,[],true}, - {data,<<"http://bob.pythonmac.org">>,false}, - {end_tag,<<"link">>}, - {start_tag,<<"description">>,[],false}, - {data,<<"Bob's Rants">>,false}, - {end_tag,<<"description">>}, - {end_tag,<<"channel">>}], - Expect = tokens(D0), - ok. - -test_parse2() -> - D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org<br>foo</link><description>Bob's Rants</description></channel>">>, - Expect = {<<"channel">>,[], - [{<<"title">>,[],[<<"from __future__ import *">>]}, - {<<"link">>,[],[ - <<"http://bob.pythonmac.org">>, - {<<"br">>,[],[]}, - <<"foo">>]}, - {<<"description">>,[],[<<"Bob's Rants">>]}]}, - Expect = parse(D0), - ok. - -test_parse_tokens() -> - D0 = [{doctype,[<<"HTML">>,<<"PUBLIC">>,<<"-//W3C//DTD HTML 4.01 Transitional//EN">>]}, - {data,<<"\n">>,true}, - {start_tag,<<"html">>,[],false}], - {<<"html">>, [], []} = parse_tokens(D0), - D1 = D0 ++ [{end_tag, <<"html">>}], - {<<"html">>, [], []} = parse_tokens(D1), - D2 = D0 ++ [{start_tag, <<"body">>, [], false}], - {<<"html">>, [], [{<<"body">>, [], []}]} = parse_tokens(D2), - D3 = D0 ++ [{start_tag, <<"head">>, [], false}, - {end_tag, <<"head">>}, - {start_tag, <<"body">>, [], false}], - {<<"html">>, [], [{<<"head">>, [], []}, {<<"body">>, [], []}]} = parse_tokens(D3), - D4 = D3 ++ [{data,<<"\n">>,true}, - {start_tag,<<"div">>,[{<<"class">>,<<"a">>}],false}, - {start_tag,<<"a">>,[{<<"name">>,<<"#anchor">>}],false}, - {end_tag,<<"a">>}, - {end_tag,<<"div">>}, - {start_tag,<<"div">>,[{<<"class">>,<<"b">>}],false}, - {start_tag,<<"div">>,[{<<"class">>,<<"c">>}],false}, - {end_tag,<<"div">>}, - {end_tag,<<"div">>}], - {<<"html">>, [], - [{<<"head">>, [], []}, - {<<"body">>, [], - [{<<"div">>, [{<<"class">>, <<"a">>}], [{<<"a">>, [{<<"name">>, <<"#anchor">>}], []}]}, - {<<"div">>, [{<<"class">>, <<"b">>}], [{<<"div">>, [{<<"class">>, <<"c">>}], []}]} - ]}]} = parse_tokens(D4), - D5 = [{start_tag,<<"html">>,[],false}, - {data,<<"\n">>,true}, - {data,<<"boo">>,false}, - {data,<<"hoo">>,false}, - {data,<<"\n">>,true}, - {end_tag,<<"html">>}], - {<<"html">>, [], [<<"\nboohoo\n">>]} = parse_tokens(D5), - D6 = [{start_tag,<<"html">>,[],false}, - {data,<<"\n">>,true}, - {data,<<"\n">>,true}, - {end_tag,<<"html">>}], - {<<"html">>, [], []} = parse_tokens(D6), - D7 = [{start_tag,<<"html">>,[],false}, - {start_tag,<<"ul">>,[],false}, - {start_tag,<<"li">>,[],false}, - {data,<<"word">>,false}, - {start_tag,<<"li">>,[],false}, - {data,<<"up">>,false}, - {end_tag,<<"li">>}, - {start_tag,<<"li">>,[],false}, - {data,<<"fdsa">>,false}, - {start_tag,<<"br">>,[],true}, - {data,<<"asdf">>,false}, - {end_tag,<<"ul">>}, - {end_tag,<<"html">>}], - {<<"html">>, [], - [{<<"ul">>, [], - [{<<"li">>, [], [<<"word">>]}, - {<<"li">>, [], [<<"up">>]}, - {<<"li">>, [], [<<"fdsa">>,{<<"br">>, [], []}, <<"asdf">>]}]}]} = parse_tokens(D7), - ok. - tree_data([{data, Data, Whitespace} | Rest], AllWhitespace, Acc) -> tree_data(Rest, (Whitespace andalso AllWhitespace), [Data | Acc]); tree_data(Rest, AllWhitespace, Acc) -> @@ -556,7 +343,9 @@ tree(L=[{data, _Data, _Whitespace} | _], S) -> tree(Rest, S); {Data, false, Rest} -> tree(Rest, append_stack_child(Data, S)) - end. + end; +tree([{doctype, _} | Rest], Stack) -> + tree(Rest, Stack). norm({Tag, Attrs}) -> {norm(Tag), [{norm(K), iolist_to_binary(V)} || {K, V} <- Attrs], []}; @@ -565,21 +354,6 @@ norm(Tag) when is_binary(Tag) -> norm(Tag) -> list_to_binary(string:to_lower(Tag)). -test_destack() -> - {<<"a">>, [], []} = - destack([{<<"a">>, [], []}]), - {<<"a">>, [], [{<<"b">>, [], []}]} = - destack([{<<"b">>, [], []}, {<<"a">>, [], []}]), - {<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]} = - destack([{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]), - [{<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]}] = - destack(<<"b">>, - [{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]), - [{<<"b">>, [], [{<<"c">>, [], []}]}, {<<"a">>, [], []}] = - destack(<<"c">>, - [{<<"c">>, [], []}, {<<"b">>, [], []},{<<"a">>, [], []}]), - ok. - stack(T1={TN, _, _}, Stack=[{TN, _, _} | _Rest]) when TN =:= <<"li">> orelse TN =:= <<"option">> -> [T1 | destack(TN, Stack)]; @@ -719,9 +493,10 @@ find_qgt(Bin, S=#decoder{offset=O}) -> case Bin of <<_:O/binary, "?>", _/binary>> -> ?ADV_COL(S, 2); - <<_:O/binary, C, _/binary>> -> - find_qgt(Bin, ?INC_CHAR(S, C)); - _ -> + %% tokenize_attributes takes care of this state: + %% <<_:O/binary, C, _/binary>> -> + %% find_qgt(Bin, ?INC_CHAR(S, C)); + <<_:O/binary>> -> S end. @@ -766,7 +541,7 @@ tokenize_charref(Bin, S=#decoder{offset=O}, Start) -> <<_:Start1/binary, R:Len1/binary, _/binary>> = Bin, R; Unichar -> - list_to_binary(xmerl_ucs:to_utf8(Unichar)) + mochiutf8:codepoint_to_bytes(Unichar) end, {{data, Data, false}, ?INC_COL(S)}; _ -> @@ -791,11 +566,10 @@ tokenize_doctype(Bin, S=#decoder{offset=O}, Acc) -> tokenize_word_or_literal(Bin, S=#decoder{offset=O}) -> case Bin of - <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> - {error, {whitespace, [C], S}}; <<_:O/binary, C, _/binary>> when C =:= ?QUOTE orelse C =:= ?SQUOTE -> tokenize_word(Bin, ?INC_COL(S), C); - _ -> + <<_:O/binary, C, _/binary>> when not ?IS_WHITESPACE(C) -> + %% Sanity check for whitespace tokenize_literal(Bin, S, []) end. @@ -852,13 +626,14 @@ tokenize_script(Bin, S=#decoder{offset=O}) -> tokenize_script(Bin, S=#decoder{offset=O}, Start) -> case Bin of %% Just a look-ahead, we want the end_tag separately - <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, _/binary>> + <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, ZZ, _/binary>> when (SS =:= $s orelse SS =:= $S) andalso (CC =:= $c orelse CC =:= $C) andalso (RR =:= $r orelse RR =:= $R) andalso (II =:= $i orelse II =:= $I) andalso (PP =:= $p orelse PP =:= $P) andalso - (TT=:= $t orelse TT =:= $T) -> + (TT=:= $t orelse TT =:= $T) andalso + ?PROBABLE_CLOSE(ZZ) -> Len = O - Start, <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, {{data, Raw, false}, S}; @@ -874,7 +649,7 @@ tokenize_textarea(Bin, S=#decoder{offset=O}) -> tokenize_textarea(Bin, S=#decoder{offset=O}, Start) -> case Bin of %% Just a look-ahead, we want the end_tag separately - <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, _/binary>> + <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, ZZ, _/binary>> when (TT =:= $t orelse TT =:= $T) andalso (EE =:= $e orelse EE =:= $E) andalso (XX =:= $x orelse XX =:= $X) andalso @@ -882,7 +657,8 @@ tokenize_textarea(Bin, S=#decoder{offset=O}, Start) -> (AA =:= $a orelse AA =:= $A) andalso (RR =:= $r orelse RR =:= $R) andalso (EE2 =:= $e orelse EE2 =:= $E) andalso - (AA2 =:= $a orelse AA2 =:= $A) -> + (AA2 =:= $a orelse AA2 =:= $A) andalso + ?PROBABLE_CLOSE(ZZ) -> Len = O - Start, <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, {{data, Raw, false}, S}; @@ -891,3 +667,395 @@ tokenize_textarea(Bin, S=#decoder{offset=O}, Start) -> <<_:Start/binary, Raw/binary>> -> {{data, Raw, false}, S} end. + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +to_html_test() -> + ?assertEqual( + <<"<html><head><title>hey!</title></head><body><p class=\"foo\">what's up<br /></p><div>sucka</div>RAW!<!-- comment! --></body></html>">>, + iolist_to_binary( + to_html({html, [], + [{<<"head">>, [], + [{title, <<"hey!">>}]}, + {body, [], + [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]}, + {'div', <<"sucka">>}, + {'=', <<"RAW!">>}, + {comment, <<" comment! ">>}]}]}))), + ?assertEqual( + <<"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">">>, + iolist_to_binary( + to_html({doctype, + [<<"html">>, <<"PUBLIC">>, + <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>, + <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]}))), + ?assertEqual( + <<"<html><?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?></html>">>, + iolist_to_binary( + to_html({<<"html">>,[], + [{pi, <<"xml:namespace">>, + [{<<"prefix">>,<<"o">>}, + {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}]}))), + ok. + +escape_test() -> + ?assertEqual( + <<"&quot;\"word ><<up!&quot;">>, + escape(<<""\"word ><<up!"">>)), + ?assertEqual( + <<"&quot;\"word ><<up!&quot;">>, + escape(""\"word ><<up!"")), + ?assertEqual( + <<"&quot;\"word ><<up!&quot;">>, + escape('"\"word ><<up!"')), + ok. + +escape_attr_test() -> + ?assertEqual( + <<"&quot;"word ><<up!&quot;">>, + escape_attr(<<""\"word ><<up!"">>)), + ?assertEqual( + <<"&quot;"word ><<up!&quot;">>, + escape_attr(""\"word ><<up!"")), + ?assertEqual( + <<"&quot;"word ><<up!&quot;">>, + escape_attr('"\"word ><<up!"')), + ?assertEqual( + <<"12345">>, + escape_attr(12345)), + ?assertEqual( + <<"1.5">>, + escape_attr(1.5)), + ok. + +tokens_test() -> + ?assertEqual( + [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>}, + {<<"wibble">>, <<"wibble">>}, + {<<"alice">>, <<"bob">>}], true}], + tokens(<<"<foo bar=baz wibble='wibble' alice=\"bob\"/>">>)), + ?assertEqual( + [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>}, + {<<"wibble">>, <<"wibble">>}, + {<<"alice">>, <<"bob">>}], true}], + tokens(<<"<foo bar=baz wibble='wibble' alice=bob/>">>)), + ?assertEqual( + [{comment, <<"[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]">>}], + tokens(<<"<!--[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]-->">>)), + ?assertEqual( + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}], + tokens(<<"<script type=\"text/javascript\"> A= B <= C </script>">>)), + ?assertEqual( + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}], + tokens(<<"<script type =\"text/javascript\"> A= B <= C </script>">>)), + ?assertEqual( + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}], + tokens(<<"<script type = \"text/javascript\"> A= B <= C </script>">>)), + ?assertEqual( + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}], + tokens(<<"<script type= \"text/javascript\"> A= B <= C </script>">>)), + ?assertEqual( + [{start_tag, <<"textarea">>, [], false}, + {data, <<"<html></body>">>, false}, + {end_tag, <<"textarea">>}], + tokens(<<"<textarea><html></body></textarea>">>)), + ?assertEqual( + [{start_tag, <<"textarea">>, [], false}, + {data, <<"<html></body></textareaz>">>, false}], + tokens(<<"<textarea ><html></body></textareaz>">>)), + ?assertEqual( + [{pi, <<"xml:namespace">>, + [{<<"prefix">>,<<"o">>}, + {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}], + tokens(<<"<?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?>">>)), + ?assertEqual( + [{pi, <<"xml:namespace">>, + [{<<"prefix">>,<<"o">>}, + {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}], + tokens(<<"<?xml:namespace prefix=o ns=urn:schemas-microsoft-com:office:office \n?>">>)), + ?assertEqual( + [{pi, <<"xml:namespace">>, + [{<<"prefix">>,<<"o">>}, + {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}], + tokens(<<"<?xml:namespace prefix=o ns=urn:schemas-microsoft-com:office:office">>)), + ?assertEqual( + [{data, <<"<">>, false}], + tokens(<<"<">>)), + ?assertEqual( + [{data, <<"not html ">>, false}, + {data, <<"< at all">>, false}], + tokens(<<"not html < at all">>)), + ok. + +parse_test() -> + D0 = <<"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\"> +<html> + <head> + <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"> + <title>Foo</title> + <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/rel/dojo/resources/dojo.css\" media=\"screen\"> + <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/foo.css\" media=\"screen\"> + <!--[if lt IE 7]> + <style type=\"text/css\"> + .no_ie { display: none; } + </style> + <![endif]--> + <link rel=\"icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\"> + <link rel=\"shortcut icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\"> + </head> + <body id=\"home\" class=\"tundra\"><![CDATA[<<this<!-- is -->CDATA>>]]></body> +</html>">>, + ?assertEqual( + {<<"html">>, [], + [{<<"head">>, [], + [{<<"meta">>, + [{<<"http-equiv">>,<<"Content-Type">>}, + {<<"content">>,<<"text/html; charset=UTF-8">>}], + []}, + {<<"title">>,[],[<<"Foo">>]}, + {<<"link">>, + [{<<"rel">>,<<"stylesheet">>}, + {<<"type">>,<<"text/css">>}, + {<<"href">>,<<"/static/rel/dojo/resources/dojo.css">>}, + {<<"media">>,<<"screen">>}], + []}, + {<<"link">>, + [{<<"rel">>,<<"stylesheet">>}, + {<<"type">>,<<"text/css">>}, + {<<"href">>,<<"/static/foo.css">>}, + {<<"media">>,<<"screen">>}], + []}, + {comment,<<"[if lt IE 7]>\n <style type=\"text/css\">\n .no_ie { display: none; }\n </style>\n <![endif]">>}, + {<<"link">>, + [{<<"rel">>,<<"icon">>}, + {<<"href">>,<<"/static/images/favicon.ico">>}, + {<<"type">>,<<"image/x-icon">>}], + []}, + {<<"link">>, + [{<<"rel">>,<<"shortcut icon">>}, + {<<"href">>,<<"/static/images/favicon.ico">>}, + {<<"type">>,<<"image/x-icon">>}], + []}]}, + {<<"body">>, + [{<<"id">>,<<"home">>}, + {<<"class">>,<<"tundra">>}], + [<<"<<this<!-- is -->CDATA>>">>]}]}, + parse(D0)), + ?assertEqual( + {<<"html">>,[], + [{pi, <<"xml:namespace">>, + [{<<"prefix">>,<<"o">>}, + {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}]}, + parse( + <<"<html><?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?></html>">>)), + ?assertEqual( + {<<"html">>, [], + [{<<"dd">>, [], [<<"foo">>]}, + {<<"dt">>, [], [<<"bar">>]}]}, + parse(<<"<html><dd>foo<dt>bar</html>">>)), + %% Singleton sadness + ?assertEqual( + {<<"html">>, [], + [{<<"link">>, [], []}, + <<"foo">>, + {<<"br">>, [], []}, + <<"bar">>]}, + parse(<<"<html><link>foo<br>bar</html>">>)), + ?assertEqual( + {<<"html">>, [], + [{<<"link">>, [], [<<"foo">>, + {<<"br">>, [], []}, + <<"bar">>]}]}, + parse(<<"<html><link>foo<br>bar</link></html>">>)), + ok. + +exhaustive_is_singleton_test() -> + T = mochiweb_cover:clause_lookup_table(?MODULE, is_singleton), + [?assertEqual(V, is_singleton(K)) || {K, V} <- T]. + +tokenize_attributes_test() -> + ?assertEqual( + {<<"foo">>, + [{<<"bar">>, <<"b\"az">>}, + {<<"wibble">>, <<"wibble">>}, + {<<"taco", 16#c2, 16#a9>>, <<"bell">>}, + {<<"quux">>, <<"quux">>}], + []}, + parse(<<"<foo bar=\"b"az\" wibble taco©=bell quux">>)), + ok. + +tokens2_test() -> + D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org</link><description>Bob's Rants</description></channel>">>, + ?assertEqual( + [{start_tag,<<"channel">>,[],false}, + {start_tag,<<"title">>,[],false}, + {data,<<"from __future__ import *">>,false}, + {end_tag,<<"title">>}, + {start_tag,<<"link">>,[],true}, + {data,<<"http://bob.pythonmac.org">>,false}, + {end_tag,<<"link">>}, + {start_tag,<<"description">>,[],false}, + {data,<<"Bob's Rants">>,false}, + {end_tag,<<"description">>}, + {end_tag,<<"channel">>}], + tokens(D0)), + ok. + +to_tokens_test() -> + ?assertEqual( + [{start_tag, <<"p">>, [{class, 1}], false}, + {end_tag, <<"p">>}], + to_tokens({p, [{class, 1}], []})), + ?assertEqual( + [{start_tag, <<"p">>, [], false}, + {end_tag, <<"p">>}], + to_tokens({p})), + ?assertEqual( + [{'=', <<"data">>}], + to_tokens({'=', <<"data">>})), + ?assertEqual( + [{comment, <<"comment">>}], + to_tokens({comment, <<"comment">>})), + %% This is only allowed in sub-tags: + %% {p, [{"class", "foo"}]} as {p, [{"class", "foo"}], []} + %% On the outside it's always treated as follows: + %% {p, [], [{"class", "foo"}]} as {p, [], [{"class", "foo"}]} + ?assertEqual( + [{start_tag, <<"html">>, [], false}, + {start_tag, <<"p">>, [{class, 1}], false}, + {end_tag, <<"p">>}, + {end_tag, <<"html">>}], + to_tokens({html, [{p, [{class, 1}]}]})), + ok. + +parse2_test() -> + D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org<br>foo</link><description>Bob's Rants</description></channel>">>, + ?assertEqual( + {<<"channel">>,[], + [{<<"title">>,[],[<<"from __future__ import *">>]}, + {<<"link">>,[],[ + <<"http://bob.pythonmac.org">>, + {<<"br">>,[],[]}, + <<"foo">>]}, + {<<"description">>,[],[<<"Bob's Rants">>]}]}, + parse(D0)), + ok. + +parse_tokens_test() -> + D0 = [{doctype,[<<"HTML">>,<<"PUBLIC">>,<<"-//W3C//DTD HTML 4.01 Transitional//EN">>]}, + {data,<<"\n">>,true}, + {start_tag,<<"html">>,[],false}], + ?assertEqual( + {<<"html">>, [], []}, + parse_tokens(D0)), + D1 = D0 ++ [{end_tag, <<"html">>}], + ?assertEqual( + {<<"html">>, [], []}, + parse_tokens(D1)), + D2 = D0 ++ [{start_tag, <<"body">>, [], false}], + ?assertEqual( + {<<"html">>, [], [{<<"body">>, [], []}]}, + parse_tokens(D2)), + D3 = D0 ++ [{start_tag, <<"head">>, [], false}, + {end_tag, <<"head">>}, + {start_tag, <<"body">>, [], false}], + ?assertEqual( + {<<"html">>, [], [{<<"head">>, [], []}, {<<"body">>, [], []}]}, + parse_tokens(D3)), + D4 = D3 ++ [{data,<<"\n">>,true}, + {start_tag,<<"div">>,[{<<"class">>,<<"a">>}],false}, + {start_tag,<<"a">>,[{<<"name">>,<<"#anchor">>}],false}, + {end_tag,<<"a">>}, + {end_tag,<<"div">>}, + {start_tag,<<"div">>,[{<<"class">>,<<"b">>}],false}, + {start_tag,<<"div">>,[{<<"class">>,<<"c">>}],false}, + {end_tag,<<"div">>}, + {end_tag,<<"div">>}], + ?assertEqual( + {<<"html">>, [], + [{<<"head">>, [], []}, + {<<"body">>, [], + [{<<"div">>, [{<<"class">>, <<"a">>}], [{<<"a">>, [{<<"name">>, <<"#anchor">>}], []}]}, + {<<"div">>, [{<<"class">>, <<"b">>}], [{<<"div">>, [{<<"class">>, <<"c">>}], []}]} + ]}]}, + parse_tokens(D4)), + D5 = [{start_tag,<<"html">>,[],false}, + {data,<<"\n">>,true}, + {data,<<"boo">>,false}, + {data,<<"hoo">>,false}, + {data,<<"\n">>,true}, + {end_tag,<<"html">>}], + ?assertEqual( + {<<"html">>, [], [<<"\nboohoo\n">>]}, + parse_tokens(D5)), + D6 = [{start_tag,<<"html">>,[],false}, + {data,<<"\n">>,true}, + {data,<<"\n">>,true}, + {end_tag,<<"html">>}], + ?assertEqual( + {<<"html">>, [], []}, + parse_tokens(D6)), + D7 = [{start_tag,<<"html">>,[],false}, + {start_tag,<<"ul">>,[],false}, + {start_tag,<<"li">>,[],false}, + {data,<<"word">>,false}, + {start_tag,<<"li">>,[],false}, + {data,<<"up">>,false}, + {end_tag,<<"li">>}, + {start_tag,<<"li">>,[],false}, + {data,<<"fdsa">>,false}, + {start_tag,<<"br">>,[],true}, + {data,<<"asdf">>,false}, + {end_tag,<<"ul">>}, + {end_tag,<<"html">>}], + ?assertEqual( + {<<"html">>, [], + [{<<"ul">>, [], + [{<<"li">>, [], [<<"word">>]}, + {<<"li">>, [], [<<"up">>]}, + {<<"li">>, [], [<<"fdsa">>,{<<"br">>, [], []}, <<"asdf">>]}]}]}, + parse_tokens(D7)), + ok. + +destack_test() -> + {<<"a">>, [], []} = + destack([{<<"a">>, [], []}]), + {<<"a">>, [], [{<<"b">>, [], []}]} = + destack([{<<"b">>, [], []}, {<<"a">>, [], []}]), + {<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]} = + destack([{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]), + [{<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]}] = + destack(<<"b">>, + [{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]), + [{<<"b">>, [], [{<<"c">>, [], []}]}, {<<"a">>, [], []}] = + destack(<<"c">>, + [{<<"c">>, [], []}, {<<"b">>, [], []},{<<"a">>, [], []}]), + ok. + +doctype_test() -> + ?assertEqual( + {<<"html">>,[],[{<<"head">>,[],[]}]}, + mochiweb_html:parse("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">" + "<html><head></head></body></html>")), + %% http://code.google.com/p/mochiweb/issues/detail?id=52 + ?assertEqual( + {<<"html">>,[],[{<<"head">>,[],[]}]}, + mochiweb_html:parse("<html>" + "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">" + "<head></head></body></html>")), + ok. + +-endif. diff --git a/src/mochiweb/mochiweb_http.erl b/src/mochiweb/mochiweb_http.erl index f1821f40..ab0af7e8 100644 --- a/src/mochiweb/mochiweb_http.erl +++ b/src/mochiweb/mochiweb_http.erl @@ -8,31 +8,22 @@ -export([start/0, start/1, stop/0, stop/1]). -export([loop/2, default_body/1]). -export([after_response/2, reentry/1]). +-export([parse_range_request/1, range_skip_length/2]). --define(IDLE_TIMEOUT, 30000). +-define(REQUEST_RECV_TIMEOUT, 300000). % timeout waiting for request line +-define(HEADERS_RECV_TIMEOUT, 30000). % timeout waiting for headers -define(MAX_HEADERS, 1000). -define(DEFAULTS, [{name, ?MODULE}, {port, 8888}]). -set_default({Prop, Value}, PropList) -> - case proplists:is_defined(Prop, PropList) of - true -> - PropList; - false -> - [{Prop, Value} | PropList] - end. - -set_defaults(Defaults, PropList) -> - lists:foldl(fun set_default/2, PropList, Defaults). - parse_options(Options) -> {loop, HttpLoop} = proplists:lookup(loop, Options), Loop = fun (S) -> ?MODULE:loop(S, HttpLoop) end, Options1 = [{loop, Loop} | proplists:delete(loop, Options)], - set_defaults(?DEFAULTS, Options1). + mochilists:set_defaults(?DEFAULTS, Options1). stop() -> mochiweb_socket_server:stop(?MODULE). @@ -95,20 +86,26 @@ default_body(Req) -> default_body(Req, Req:get(method), Req:get(path)). loop(Socket, Body) -> - inet:setopts(Socket, [{packet, http}]), + mochiweb_socket:setopts(Socket, [{packet, http}]), request(Socket, Body). request(Socket, Body) -> - case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of + case mochiweb_socket:recv(Socket, 0, ?REQUEST_RECV_TIMEOUT) of {ok, {http_request, Method, Path, Version}} -> + mochiweb_socket:setopts(Socket, [{packet, httph}]), headers(Socket, {Method, Path, Version}, [], Body, 0); {error, {http_error, "\r\n"}} -> request(Socket, Body); {error, {http_error, "\n"}} -> request(Socket, Body); + {error, closed} -> + mochiweb_socket:close(Socket), + exit(normal); + {error, timeout} -> + mochiweb_socket:close(Socket), + exit(normal); _Other -> - gen_tcp:close(Socket), - exit(normal) + handle_invalid_request(Socket) end. reentry(Body) -> @@ -118,35 +115,159 @@ reentry(Body) -> headers(Socket, Request, Headers, _Body, ?MAX_HEADERS) -> %% Too many headers sent, bad request. - inet:setopts(Socket, [{packet, raw}]), - Req = mochiweb:new_request({Socket, Request, - lists:reverse(Headers)}), - Req:respond({400, [], []}), - gen_tcp:close(Socket), - exit(normal); + mochiweb_socket:setopts(Socket, [{packet, raw}]), + handle_invalid_request(Socket, Request, Headers); headers(Socket, Request, Headers, Body, HeaderCount) -> - case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of + case mochiweb_socket:recv(Socket, 0, ?HEADERS_RECV_TIMEOUT) of {ok, http_eoh} -> - inet:setopts(Socket, [{packet, raw}]), + mochiweb_socket:setopts(Socket, [{packet, raw}]), Req = mochiweb:new_request({Socket, Request, lists:reverse(Headers)}), - Body(Req), + call_body(Body, Req), ?MODULE:after_response(Body, Req); {ok, {http_header, _, Name, _, Value}} -> headers(Socket, Request, [{Name, Value} | Headers], Body, 1 + HeaderCount); + {error, closed} -> + mochiweb_socket:close(Socket), + exit(normal); _Other -> - gen_tcp:close(Socket), - exit(normal) + handle_invalid_request(Socket, Request, Headers) end. +call_body({M, F}, Req) -> + M:F(Req); +call_body(Body, Req) -> + Body(Req). + +handle_invalid_request(Socket) -> + handle_invalid_request(Socket, {'GET', {abs_path, "/"}, {0,9}}, []). + +handle_invalid_request(Socket, Request, RevHeaders) -> + mochiweb_socket:setopts(Socket, [{packet, raw}]), + Req = mochiweb:new_request({Socket, Request, + lists:reverse(RevHeaders)}), + Req:respond({400, [], []}), + mochiweb_socket:close(Socket), + exit(normal). + after_response(Body, Req) -> Socket = Req:get(socket), case Req:should_close() of true -> - gen_tcp:close(Socket), + mochiweb_socket:close(Socket), exit(normal); false -> Req:cleanup(), ?MODULE:loop(Socket, Body) end. + +parse_range_request(RawRange) when is_list(RawRange) -> + try + "bytes=" ++ RangeString = RawRange, + Ranges = string:tokens(RangeString, ","), + lists:map(fun ("-" ++ V) -> + {none, list_to_integer(V)}; + (R) -> + case string:tokens(R, "-") of + [S1, S2] -> + {list_to_integer(S1), list_to_integer(S2)}; + [S] -> + {list_to_integer(S), none} + end + end, + Ranges) + catch + _:_ -> + fail + end. + +range_skip_length(Spec, Size) -> + case Spec of + {none, R} when R =< Size, R >= 0 -> + {Size - R, R}; + {none, _OutOfRange} -> + {0, Size}; + {R, none} when R >= 0, R < Size -> + {R, Size - R}; + {_OutOfRange, none} -> + invalid_range; + {Start, End} when 0 =< Start, Start =< End, End < Size -> + {Start, End - Start + 1}; + {_OutOfRange, _End} -> + invalid_range + end. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +range_test() -> + %% valid, single ranges + ?assertEqual([{20, 30}], parse_range_request("bytes=20-30")), + ?assertEqual([{20, none}], parse_range_request("bytes=20-")), + ?assertEqual([{none, 20}], parse_range_request("bytes=-20")), + + %% trivial single range + ?assertEqual(undefined, parse_range_request("bytes=0-")), + + %% invalid, single ranges + ?assertEqual(fail, parse_range_request("")), + ?assertEqual(fail, parse_range_request("garbage")), + ?assertEqual(fail, parse_range_request("bytes=-20-30")), + + %% valid, multiple range + ?assertEqual( + [{20, 30}, {50, 100}, {110, 200}], + parse_range_request("bytes=20-30,50-100,110-200")), + ?assertEqual( + [{20, none}, {50, 100}, {none, 200}], + parse_range_request("bytes=20-,50-100,-200")), + + %% no ranges + ?assertEqual([], parse_range_request("bytes=")), + ok. + +range_skip_length_test() -> + Body = <<"012345678901234567890123456789012345678901234567890123456789">>, + BodySize = byte_size(Body), %% 60 + BodySize = 60, + + %% these values assume BodySize =:= 60 + ?assertEqual({1,9}, range_skip_length({1,9}, BodySize)), %% 1-9 + ?assertEqual({10,10}, range_skip_length({10,19}, BodySize)), %% 10-19 + ?assertEqual({40, 20}, range_skip_length({none, 20}, BodySize)), %% -20 + ?assertEqual({30, 30}, range_skip_length({30, none}, BodySize)), %% 30- + + %% valid edge cases for range_skip_length + ?assertEqual({BodySize, 0}, range_skip_length({none, 0}, BodySize)), + ?assertEqual({0, BodySize}, range_skip_length({none, BodySize}, BodySize)), + ?assertEqual({0, BodySize}, range_skip_length({0, none}, BodySize)), + BodySizeLess1 = BodySize - 1, + ?assertEqual({BodySizeLess1, 1}, + range_skip_length({BodySize - 1, none}, BodySize)), + + %% out of range, return whole thing + ?assertEqual({0, BodySize}, + range_skip_length({none, BodySize + 1}, BodySize)), + ?assertEqual({0, BodySize}, + range_skip_length({none, -1}, BodySize)), + + %% invalid ranges + ?assertEqual(invalid_range, + range_skip_length({-1, 30}, BodySize)), + ?assertEqual(invalid_range, + range_skip_length({0, BodySize + 1}, BodySize)), + ?assertEqual(invalid_range, + range_skip_length({-1, BodySize + 1}, BodySize)), + ?assertEqual(invalid_range, + range_skip_length({BodySize, 40}, BodySize)), + ?assertEqual(invalid_range, + range_skip_length({-1, none}, BodySize)), + ?assertEqual(invalid_range, + range_skip_length({BodySize, none}, BodySize)), + ok. + +-endif. diff --git a/src/mochiweb/mochiweb_io.erl b/src/mochiweb/mochiweb_io.erl new file mode 100644 index 00000000..6ce57ec8 --- /dev/null +++ b/src/mochiweb/mochiweb_io.erl @@ -0,0 +1,46 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Utilities for dealing with IO devices (open files). + +-module(mochiweb_io). +-author('bob@mochimedia.com'). + +-export([iodevice_stream/3, iodevice_stream/2]). +-export([iodevice_foldl/4, iodevice_foldl/3]). +-export([iodevice_size/1]). +-define(READ_SIZE, 8192). + +iodevice_foldl(F, Acc, IoDevice) -> + iodevice_foldl(F, Acc, IoDevice, ?READ_SIZE). + +iodevice_foldl(F, Acc, IoDevice, BufferSize) -> + case file:read(IoDevice, BufferSize) of + eof -> + Acc; + {ok, Data} -> + iodevice_foldl(F, F(Data, Acc), IoDevice, BufferSize) + end. + +iodevice_stream(Callback, IoDevice) -> + iodevice_stream(Callback, IoDevice, ?READ_SIZE). + +iodevice_stream(Callback, IoDevice, BufferSize) -> + F = fun (Data, ok) -> Callback(Data) end, + ok = iodevice_foldl(F, ok, IoDevice, BufferSize). + +iodevice_size(IoDevice) -> + {ok, Size} = file:position(IoDevice, eof), + {ok, 0} = file:position(IoDevice, bof), + Size. + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + + + +-endif. diff --git a/src/mochiweb/mochiweb_mime.erl b/src/mochiweb/mochiweb_mime.erl new file mode 100644 index 00000000..5344aee7 --- /dev/null +++ b/src/mochiweb/mochiweb_mime.erl @@ -0,0 +1,94 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Gives a good MIME type guess based on file extension. + +-module(mochiweb_mime). +-author('bob@mochimedia.com'). +-export([from_extension/1]). + +%% @spec from_extension(S::string()) -> string() | undefined +%% @doc Given a filename extension (e.g. ".html") return a guess for the MIME +%% type such as "text/html". Will return the atom undefined if no good +%% guess is available. +from_extension(".html") -> + "text/html"; +from_extension(".xhtml") -> + "application/xhtml+xml"; +from_extension(".xml") -> + "application/xml"; +from_extension(".css") -> + "text/css"; +from_extension(".js") -> + "application/x-javascript"; +from_extension(".jpg") -> + "image/jpeg"; +from_extension(".gif") -> + "image/gif"; +from_extension(".png") -> + "image/png"; +from_extension(".swf") -> + "application/x-shockwave-flash"; +from_extension(".zip") -> + "application/zip"; +from_extension(".bz2") -> + "application/x-bzip2"; +from_extension(".gz") -> + "application/x-gzip"; +from_extension(".tar") -> + "application/x-tar"; +from_extension(".tgz") -> + "application/x-gzip"; +from_extension(".txt") -> + "text/plain"; +from_extension(".doc") -> + "application/msword"; +from_extension(".pdf") -> + "application/pdf"; +from_extension(".xls") -> + "application/vnd.ms-excel"; +from_extension(".rtf") -> + "application/rtf"; +from_extension(".mov") -> + "video/quicktime"; +from_extension(".mp3") -> + "audio/mpeg"; +from_extension(".z") -> + "application/x-compress"; +from_extension(".wav") -> + "audio/x-wav"; +from_extension(".ico") -> + "image/x-icon"; +from_extension(".bmp") -> + "image/bmp"; +from_extension(".m4a") -> + "audio/mpeg"; +from_extension(".m3u") -> + "audio/x-mpegurl"; +from_extension(".exe") -> + "application/octet-stream"; +from_extension(".csv") -> + "text/csv"; +from_extension(_) -> + undefined. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +exhaustive_from_extension_test() -> + T = mochiweb_cover:clause_lookup_table(?MODULE, from_extension), + [?assertEqual(V, from_extension(K)) || {K, V} <- T]. + +from_extension_test() -> + ?assertEqual("text/html", + from_extension(".html")), + ?assertEqual(undefined, + from_extension("")), + ?assertEqual(undefined, + from_extension(".wtf")), + ok. + +-endif. diff --git a/src/mochiweb/mochiweb_multipart.erl b/src/mochiweb/mochiweb_multipart.erl index 0368a9a6..3069cf4d 100644 --- a/src/mochiweb/mochiweb_multipart.erl +++ b/src/mochiweb/mochiweb_multipart.erl @@ -8,17 +8,73 @@ -export([parse_form/1, parse_form/2]). -export([parse_multipart_request/2]). --export([test/0]). +-export([parts_to_body/3, parts_to_multipart_body/4]). +-export([default_file_handler/2]). -define(CHUNKSIZE, 4096). -record(mp, {state, boundary, length, buffer, callback, req}). %% TODO: DOCUMENT THIS MODULE. - +%% @type key() = atom() | string() | binary(). +%% @type value() = atom() | iolist() | integer(). +%% @type header() = {key(), value()}. +%% @type bodypart() = {Start::integer(), End::integer(), Body::iolist()}. +%% @type formfile() = {Name::string(), ContentType::string(), Content::binary()}. +%% @type request(). +%% @type file_handler() = (Filename::string(), ContentType::string()) -> file_handler_callback(). +%% @type file_handler_callback() = (binary() | eof) -> file_handler_callback() | term(). + +%% @spec parts_to_body([bodypart()], ContentType::string(), +%% Size::integer()) -> {[header()], iolist()} +%% @doc Return {[header()], iolist()} representing the body for the given +%% parts, may be a single part or multipart. +parts_to_body([{Start, End, Body}], ContentType, Size) -> + HeaderList = [{"Content-Type", ContentType}, + {"Content-Range", + ["bytes ", + mochiweb_util:make_io(Start), "-", mochiweb_util:make_io(End), + "/", mochiweb_util:make_io(Size)]}], + {HeaderList, Body}; +parts_to_body(BodyList, ContentType, Size) when is_list(BodyList) -> + parts_to_multipart_body(BodyList, ContentType, Size, + mochihex:to_hex(crypto:rand_bytes(8))). + +%% @spec parts_to_multipart_body([bodypart()], ContentType::string(), +%% Size::integer(), Boundary::string()) -> +%% {[header()], iolist()} +%% @doc Return {[header()], iolist()} representing the body for the given +%% parts, always a multipart response. +parts_to_multipart_body(BodyList, ContentType, Size, Boundary) -> + HeaderList = [{"Content-Type", + ["multipart/byteranges; ", + "boundary=", Boundary]}], + MultiPartBody = multipart_body(BodyList, ContentType, Boundary, Size), + + {HeaderList, MultiPartBody}. + +%% @spec multipart_body([bodypart()], ContentType::string(), +%% Boundary::string(), Size::integer()) -> iolist() +%% @doc Return the representation of a multipart body for the given [bodypart()]. +multipart_body([], _ContentType, Boundary, _Size) -> + ["--", Boundary, "--\r\n"]; +multipart_body([{Start, End, Body} | BodyList], ContentType, Boundary, Size) -> + ["--", Boundary, "\r\n", + "Content-Type: ", ContentType, "\r\n", + "Content-Range: ", + "bytes ", mochiweb_util:make_io(Start), "-", mochiweb_util:make_io(End), + "/", mochiweb_util:make_io(Size), "\r\n\r\n", + Body, "\r\n" + | multipart_body(BodyList, ContentType, Boundary, Size)]. + +%% @spec parse_form(request()) -> [{string(), string() | formfile()}] +%% @doc Parse a multipart form from the given request using the in-memory +%% default_file_handler/2. parse_form(Req) -> parse_form(Req, fun default_file_handler/2). +%% @spec parse_form(request(), F::file_handler()) -> [{string(), string() | term()}] +%% @doc Parse a multipart form from the given request using the given file_handler(). parse_form(Req, FileHandler) -> Callback = fun (Next) -> parse_form_outer(Next, FileHandler, []) end, {_, _, Res} = parse_multipart_request(Req, Callback), @@ -236,13 +292,38 @@ find_boundary(Prefix, Data) -> not_found end. -with_socket_server(ServerFun, ClientFun) -> - {ok, Server} = mochiweb_socket_server:start([{ip, "127.0.0.1"}, - {port, 0}, - {loop, ServerFun}]), +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +ssl_cert_opts() -> + EbinDir = filename:dirname(code:which(?MODULE)), + CertDir = filename:join([EbinDir, "..", "support", "test-materials"]), + CertFile = filename:join(CertDir, "test_ssl_cert.pem"), + KeyFile = filename:join(CertDir, "test_ssl_key.pem"), + [{certfile, CertFile}, {keyfile, KeyFile}]. + +with_socket_server(Transport, ServerFun, ClientFun) -> + ServerOpts0 = [{ip, "127.0.0.1"}, {port, 0}, {loop, ServerFun}], + ServerOpts = case Transport of + plain -> + ServerOpts0; + ssl -> + ServerOpts0 ++ [{ssl, true}, {ssl_opts, ssl_cert_opts()}] + end, + {ok, Server} = mochiweb_socket_server:start(ServerOpts), Port = mochiweb_socket_server:get(Server, port), - {ok, Client} = gen_tcp:connect("127.0.0.1", Port, - [binary, {active, false}]), + ClientOpts = [binary, {active, false}], + {ok, Client} = case Transport of + plain -> + gen_tcp:connect("127.0.0.1", Port, ClientOpts); + ssl -> + ClientOpts1 = [{ssl_imp, new} | ClientOpts], + {ok, SslSocket} = ssl:connect("127.0.0.1", Port, ClientOpts1), + {ok, {ssl, SslSocket}} + end, Res = (catch ClientFun(Client)), mochiweb_socket_server:stop(Server), Res. @@ -256,19 +337,30 @@ fake_request(Socket, ContentType, Length) -> [{"content-type", ContentType}, {"content-length", Length}])). -test_callback(Expect, [Expect | Rest]) -> +test_callback({body, <<>>}, Rest=[body_end | _]) -> + %% When expecting the body_end we might get an empty binary + fun (Next) -> test_callback(Next, Rest) end; +test_callback({body, Got}, [{body, Expect} | Rest]) when Got =/= Expect -> + %% Partial response + GotSize = size(Got), + <<Got:GotSize/binary, Expect1/binary>> = Expect, + fun (Next) -> test_callback(Next, [{body, Expect1} | Rest]) end; +test_callback(Got, [Expect | Rest]) -> + ?assertEqual(Got, Expect), case Rest of [] -> ok; _ -> fun (Next) -> test_callback(Next, Rest) end - end; -test_callback({body, Got}, [{body, Expect} | Rest]) -> - GotSize = size(Got), - <<Got:GotSize/binary, Expect1/binary>> = Expect, - fun (Next) -> test_callback(Next, [{body, Expect1} | Rest]) end. + end. -test_parse3() -> +parse3_http_test() -> + parse3(plain). + +parse3_https_test() -> + parse3(ssl). + +parse3(Transport) -> ContentType = "multipart/form-data; boundary=---------------------------7386909285754635891697677882", BinContent = <<"-----------------------------7386909285754635891697677882\r\nContent-Disposition: form-data; name=\"hidden\"\r\n\r\nmultipart message\r\n-----------------------------7386909285754635891697677882\r\nContent-Disposition: form-data; name=\"file\"; filename=\"test_file.txt\"\r\nContent-Type: text/plain\r\n\r\nWoo multiline text file\n\nLa la la\r\n-----------------------------7386909285754635891697677882--\r\n">>, Expect = [{headers, @@ -285,8 +377,8 @@ test_parse3() -> eof], TestCallback = fun (Next) -> test_callback(Next, Expect) end, ServerFun = fun (Socket) -> - ok = gen_tcp:send(Socket, BinContent), - exit(normal) + ok = mochiweb_socket:send(Socket, BinContent), + exit(normal) end, ClientFun = fun (Socket) -> Req = fake_request(Socket, ContentType, @@ -295,11 +387,16 @@ test_parse3() -> {0, <<>>, ok} = Res, ok end, - ok = with_socket_server(ServerFun, ClientFun), + ok = with_socket_server(Transport, ServerFun, ClientFun), ok. +parse2_http_test() -> + parse2(plain). + +parse2_https_test() -> + parse2(ssl). -test_parse2() -> +parse2(Transport) -> ContentType = "multipart/form-data; boundary=---------------------------6072231407570234361599764024", BinContent = <<"-----------------------------6072231407570234361599764024\r\nContent-Disposition: form-data; name=\"hidden\"\r\n\r\nmultipart message\r\n-----------------------------6072231407570234361599764024\r\nContent-Disposition: form-data; name=\"file\"; filename=\"\"\r\nContent-Type: application/octet-stream\r\n\r\n\r\n-----------------------------6072231407570234361599764024--\r\n">>, Expect = [{headers, @@ -316,8 +413,8 @@ test_parse2() -> eof], TestCallback = fun (Next) -> test_callback(Next, Expect) end, ServerFun = fun (Socket) -> - ok = gen_tcp:send(Socket, BinContent), - exit(normal) + ok = mochiweb_socket:send(Socket, BinContent), + exit(normal) end, ClientFun = fun (Socket) -> Req = fake_request(Socket, ContentType, @@ -326,10 +423,16 @@ test_parse2() -> {0, <<>>, ok} = Res, ok end, - ok = with_socket_server(ServerFun, ClientFun), + ok = with_socket_server(Transport, ServerFun, ClientFun), ok. -test_parse_form() -> +parse_form_http_test() -> + do_parse_form(plain). + +parse_form_https_test() -> + do_parse_form(ssl). + +do_parse_form(Transport) -> ContentType = "multipart/form-data; boundary=AaB03x", "AaB03x" = get_boundary(ContentType), Content = mochiweb_util:join( @@ -347,8 +450,8 @@ test_parse_form() -> ""], "\r\n"), BinContent = iolist_to_binary(Content), ServerFun = fun (Socket) -> - ok = gen_tcp:send(Socket, BinContent), - exit(normal) + ok = mochiweb_socket:send(Socket, BinContent), + exit(normal) end, ClientFun = fun (Socket) -> Req = fake_request(Socket, ContentType, @@ -360,10 +463,16 @@ test_parse_form() -> }] = Res, ok end, - ok = with_socket_server(ServerFun, ClientFun), + ok = with_socket_server(Transport, ServerFun, ClientFun), ok. -test_parse() -> +parse_http_test() -> + do_parse(plain). + +parse_https_test() -> + do_parse(ssl). + +do_parse(Transport) -> ContentType = "multipart/form-data; boundary=AaB03x", "AaB03x" = get_boundary(ContentType), Content = mochiweb_util:join( @@ -394,8 +503,113 @@ test_parse() -> eof], TestCallback = fun (Next) -> test_callback(Next, Expect) end, ServerFun = fun (Socket) -> - ok = gen_tcp:send(Socket, BinContent), - exit(normal) + ok = mochiweb_socket:send(Socket, BinContent), + exit(normal) + end, + ClientFun = fun (Socket) -> + Req = fake_request(Socket, ContentType, + byte_size(BinContent)), + Res = parse_multipart_request(Req, TestCallback), + {0, <<>>, ok} = Res, + ok + end, + ok = with_socket_server(Transport, ServerFun, ClientFun), + ok. + +parse_partial_body_boundary_http_test() -> + parse_partial_body_boundary(plain). + +parse_partial_body_boundary_https_test() -> + parse_partial_body_boundary(ssl). + +parse_partial_body_boundary(Transport) -> + Boundary = string:copies("$", 2048), + ContentType = "multipart/form-data; boundary=" ++ Boundary, + ?assertEqual(Boundary, get_boundary(ContentType)), + Content = mochiweb_util:join( + ["--" ++ Boundary, + "Content-Disposition: form-data; name=\"submit-name\"", + "", + "Larry", + "--" ++ Boundary, + "Content-Disposition: form-data; name=\"files\";" + ++ "filename=\"file1.txt\"", + "Content-Type: text/plain", + "", + "... contents of file1.txt ...", + "--" ++ Boundary ++ "--", + ""], "\r\n"), + BinContent = iolist_to_binary(Content), + Expect = [{headers, + [{"content-disposition", + {"form-data", [{"name", "submit-name"}]}}]}, + {body, <<"Larry">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "files"}, {"filename", "file1.txt"}]}}, + {"content-type", {"text/plain", []}} + ]}, + {body, <<"... contents of file1.txt ...">>}, + body_end, + eof], + TestCallback = fun (Next) -> test_callback(Next, Expect) end, + ServerFun = fun (Socket) -> + ok = mochiweb_socket:send(Socket, BinContent), + exit(normal) + end, + ClientFun = fun (Socket) -> + Req = fake_request(Socket, ContentType, + byte_size(BinContent)), + Res = parse_multipart_request(Req, TestCallback), + {0, <<>>, ok} = Res, + ok + end, + ok = with_socket_server(Transport, ServerFun, ClientFun), + ok. + +parse_large_header_http_test() -> + parse_large_header(plain). + +parse_large_header_https_test() -> + parse_large_header(ssl). + +parse_large_header(Transport) -> + ContentType = "multipart/form-data; boundary=AaB03x", + "AaB03x" = get_boundary(ContentType), + Content = mochiweb_util:join( + ["--AaB03x", + "Content-Disposition: form-data; name=\"submit-name\"", + "", + "Larry", + "--AaB03x", + "Content-Disposition: form-data; name=\"files\";" + ++ "filename=\"file1.txt\"", + "Content-Type: text/plain", + "x-large-header: " ++ string:copies("%", 4096), + "", + "... contents of file1.txt ...", + "--AaB03x--", + ""], "\r\n"), + BinContent = iolist_to_binary(Content), + Expect = [{headers, + [{"content-disposition", + {"form-data", [{"name", "submit-name"}]}}]}, + {body, <<"Larry">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "files"}, {"filename", "file1.txt"}]}}, + {"content-type", {"text/plain", []}}, + {"x-large-header", {string:copies("%", 4096), []}} + ]}, + {body, <<"... contents of file1.txt ...">>}, + body_end, + eof], + TestCallback = fun (Next) -> test_callback(Next, Expect) end, + ServerFun = fun (Socket) -> + ok = mochiweb_socket:send(Socket, BinContent), + exit(normal) end, ClientFun = fun (Socket) -> Req = fake_request(Socket, ContentType, @@ -404,10 +618,10 @@ test_parse() -> {0, <<>>, ok} = Res, ok end, - ok = with_socket_server(ServerFun, ClientFun), + ok = with_socket_server(Transport, ServerFun, ClientFun), ok. -test_find_boundary() -> +find_boundary_test() -> B = <<"\r\n--X">>, {next_boundary, 0, 7} = find_boundary(B, <<"\r\n--X\r\nRest">>), {next_boundary, 1, 7} = find_boundary(B, <<"!\r\n--X\r\nRest">>), @@ -422,9 +636,10 @@ test_find_boundary() -> 45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45, 49,54,48,51,55,52,53,52,51,53,49>>, {maybe, 30} = find_boundary(P, B0), + not_found = find_boundary(B, <<"\r\n--XJOPKE">>), ok. -test_find_in_binary() -> +find_in_binary_test() -> {exact, 0} = find_in_binary(<<"foo">>, <<"foobarbaz">>), {exact, 1} = find_in_binary(<<"oo">>, <<"foobarbaz">>), {exact, 8} = find_in_binary(<<"z">>, <<"foobarbaz">>), @@ -435,7 +650,13 @@ test_find_in_binary() -> {partial, 1, 3} = find_in_binary(<<"foobar">>, <<"afoo">>), ok. -test_flash_parse() -> +flash_parse_http_test() -> + flash_parse(plain). + +flash_parse_https_test() -> + flash_parse(ssl). + +flash_parse(Transport) -> ContentType = "multipart/form-data; boundary=----------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5", "----------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5" = get_boundary(ContentType), BinContent = <<"------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"Filename\"\r\n\r\nhello.txt\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"success_action_status\"\r\n\r\n201\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"file\"; filename=\"hello.txt\"\r\nContent-Type: application/octet-stream\r\n\r\nhello\n\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"Upload\"\r\n\r\nSubmit Query\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5--">>, @@ -463,8 +684,8 @@ test_flash_parse() -> eof], TestCallback = fun (Next) -> test_callback(Next, Expect) end, ServerFun = fun (Socket) -> - ok = gen_tcp:send(Socket, BinContent), - exit(normal) + ok = mochiweb_socket:send(Socket, BinContent), + exit(normal) end, ClientFun = fun (Socket) -> Req = fake_request(Socket, ContentType, @@ -473,10 +694,16 @@ test_flash_parse() -> {0, <<>>, ok} = Res, ok end, - ok = with_socket_server(ServerFun, ClientFun), + ok = with_socket_server(Transport, ServerFun, ClientFun), ok. -test_flash_parse2() -> +flash_parse2_http_test() -> + flash_parse2(plain). + +flash_parse2_https_test() -> + flash_parse2(ssl). + +flash_parse2(Transport) -> ContentType = "multipart/form-data; boundary=----------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5", "----------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5" = get_boundary(ContentType), Chunk = iolist_to_binary(string:copies("%", 4096)), @@ -505,8 +732,8 @@ test_flash_parse2() -> eof], TestCallback = fun (Next) -> test_callback(Next, Expect) end, ServerFun = fun (Socket) -> - ok = gen_tcp:send(Socket, BinContent), - exit(normal) + ok = mochiweb_socket:send(Socket, BinContent), + exit(normal) end, ClientFun = fun (Socket) -> Req = fake_request(Socket, ContentType, @@ -515,16 +742,83 @@ test_flash_parse2() -> {0, <<>>, ok} = Res, ok end, - ok = with_socket_server(ServerFun, ClientFun), + ok = with_socket_server(Transport, ServerFun, ClientFun), + ok. + +parse_headers_test() -> + ?assertEqual([], parse_headers(<<>>)). + +flash_multipart_hack_test() -> + Buffer = <<"prefix-">>, + Prefix = <<"prefix">>, + State = #mp{length=0, buffer=Buffer, boundary=Prefix}, + ?assertEqual(State, + flash_multipart_hack(State)). + +parts_to_body_single_test() -> + {HL, B} = parts_to_body([{0, 5, <<"01234">>}], + "text/plain", + 10), + [{"Content-Range", Range}, + {"Content-Type", Type}] = lists:sort(HL), + ?assertEqual( + <<"bytes 0-5/10">>, + iolist_to_binary(Range)), + ?assertEqual( + <<"text/plain">>, + iolist_to_binary(Type)), + ?assertEqual( + <<"01234">>, + iolist_to_binary(B)), + ok. + +parts_to_body_multi_test() -> + {[{"Content-Type", Type}], + _B} = parts_to_body([{0, 5, <<"01234">>}, {5, 10, <<"56789">>}], + "text/plain", + 10), + ?assertMatch( + <<"multipart/byteranges; boundary=", _/binary>>, + iolist_to_binary(Type)), ok. -test() -> - test_find_in_binary(), - test_find_boundary(), - test_parse(), - test_parse2(), - test_parse3(), - test_parse_form(), - test_flash_parse(), - test_flash_parse2(), +parts_to_multipart_body_test() -> + {[{"Content-Type", V}], B} = parts_to_multipart_body( + [{0, 5, <<"01234">>}, {5, 10, <<"56789">>}], + "text/plain", + 10, + "BOUNDARY"), + MB = multipart_body( + [{0, 5, <<"01234">>}, {5, 10, <<"56789">>}], + "text/plain", + "BOUNDARY", + 10), + ?assertEqual( + <<"multipart/byteranges; boundary=BOUNDARY">>, + iolist_to_binary(V)), + ?assertEqual( + iolist_to_binary(MB), + iolist_to_binary(B)), ok. + +multipart_body_test() -> + ?assertEqual( + <<"--BOUNDARY--\r\n">>, + iolist_to_binary(multipart_body([], "text/plain", "BOUNDARY", 0))), + ?assertEqual( + <<"--BOUNDARY\r\n" + "Content-Type: text/plain\r\n" + "Content-Range: bytes 0-5/10\r\n\r\n" + "01234\r\n" + "--BOUNDARY\r\n" + "Content-Type: text/plain\r\n" + "Content-Range: bytes 5-10/10\r\n\r\n" + "56789\r\n" + "--BOUNDARY--\r\n">>, + iolist_to_binary(multipart_body([{0, 5, <<"01234">>}, {5, 10, <<"56789">>}], + "text/plain", + "BOUNDARY", + 10))), + ok. + +-endif. diff --git a/src/mochiweb/mochiweb_request.erl b/src/mochiweb/mochiweb_request.erl index fc296f40..ffe4e9eb 100644 --- a/src/mochiweb/mochiweb_request.erl +++ b/src/mochiweb/mochiweb_request.erl @@ -7,9 +7,9 @@ -author('bob@mochimedia.com'). -include_lib("kernel/include/file.hrl"). +-include("internal.hrl"). -define(QUIP, "Any of you quaids got a smint?"). --define(READ_SIZE, 8192). -export([get_header_value/1, get_primary_header_value/1, get/1, dump/0]). -export([send/1, recv/1, recv/2, recv_body/0, recv_body/1, stream_body/3]). @@ -20,7 +20,8 @@ -export([should_close/0, cleanup/0]). -export([parse_cookie/0, get_cookie_value/1]). -export([serve_file/2, serve_file/3]). --export([test/0]). +-export([accepted_encodings/1]). +-export([accepts_content_type/1]). -define(SAVE_QS, mochiweb_request_qs). -define(SAVE_PATH, mochiweb_request_path). @@ -39,8 +40,8 @@ %% @type response(). A mochiweb_response parameterized module instance. %% @type ioheaders() = headers() | [{key(), value()}]. -% 5 minute default idle timeout --define(IDLE_TIMEOUT, 300000). +% 10 second default idle timeout +-define(IDLE_TIMEOUT, 10000). % Maximum recv_body() length of 1MB -define(MAX_RECV_BODY, (1024*1024)). @@ -53,12 +54,23 @@ get_header_value(K) -> get_primary_header_value(K) -> mochiweb_headers:get_primary_value(K, Headers). -%% @type field() = socket | method | raw_path | version | headers | peer | path | body_length | range +%% @type field() = socket | scheme | method | raw_path | version | headers | peer | path | body_length | range %% @spec get(field()) -> term() -%% @doc Return the internal representation of the given field. +%% @doc Return the internal representation of the given field. If +%% <code>socket</code> is requested on a HTTPS connection, then +%% an ssl socket will be returned as <code>{ssl, SslSocket}</code>. +%% You can use <code>SslSocket</code> with the <code>ssl</code> +%% application, eg: <code>ssl:peercert(SslSocket)</code>. get(socket) -> Socket; +get(scheme) -> + case mochiweb_socket:type(Socket) of + plain -> + http; + ssl -> + https + end; get(method) -> Method; get(raw_path) -> @@ -68,7 +80,7 @@ get(version) -> get(headers) -> Headers; get(peer) -> - case inet:peername(Socket) of + case mochiweb_socket:peername(Socket) of {ok, {Addr={10, _, _, _}, _Port}} -> case get_header_value("x-forwarded-for") of undefined -> @@ -84,7 +96,9 @@ get(peer) -> string:strip(lists:last(string:tokens(Hosts, ","))) end; {ok, {Addr, _Port}} -> - inet_parse:ntoa(Addr) + inet_parse:ntoa(Addr); + {error, enotconn} -> + exit(normal) end; get(path) -> case erlang:get(?SAVE_PATH) of @@ -97,13 +111,20 @@ get(path) -> Cached end; get(body_length) -> - erlang:get(?SAVE_BODY_LENGTH); + case erlang:get(?SAVE_BODY_LENGTH) of + undefined -> + BodyLength = body_length(), + put(?SAVE_BODY_LENGTH, {cached, BodyLength}), + BodyLength; + {cached, Cached} -> + Cached + end; get(range) -> case get_header_value(range) of undefined -> undefined; RawRange -> - parse_range_request(RawRange) + mochiweb_http:parse_range_request(RawRange) end. %% @spec dump() -> {mochiweb_request, [{atom(), term()}]} @@ -118,7 +139,7 @@ dump() -> %% @spec send(iodata()) -> ok %% @doc Send data over the socket. send(Data) -> - case gen_tcp:send(Socket, Data) of + case mochiweb_socket:send(Socket, Data) of ok -> ok; _ -> @@ -135,7 +156,7 @@ recv(Length) -> %% @doc Receive Length bytes from the client as a binary, with the given %% Timeout in msec. recv(Length, Timeout) -> - case gen_tcp:recv(Socket, Length, Timeout) of + case mochiweb_socket:recv(Socket, Length, Timeout) of {ok, Data} -> put(?SAVE_RECV, true), Data; @@ -171,20 +192,24 @@ recv_body() -> %% @doc Receive the body of the HTTP request (defined by Content-Length). %% Will receive up to MaxBody bytes. recv_body(MaxBody) -> - % we could use a sane constant for max chunk size - Body = stream_body(?MAX_RECV_BODY, fun - ({0, _ChunkedFooter}, {_LengthAcc, BinAcc}) -> - iolist_to_binary(lists:reverse(BinAcc)); - ({Length, Bin}, {LengthAcc, BinAcc}) -> - NewLength = Length + LengthAcc, - if NewLength > MaxBody -> - exit({body_too_large, chunked}); - true -> - {NewLength, [Bin | BinAcc]} - end - end, {0, []}, MaxBody), - put(?SAVE_BODY, Body), - Body. + case erlang:get(?SAVE_BODY) of + undefined -> + % we could use a sane constant for max chunk size + Body = stream_body(?MAX_RECV_BODY, fun + ({0, _ChunkedFooter}, {_LengthAcc, BinAcc}) -> + iolist_to_binary(lists:reverse(BinAcc)); + ({Length, Bin}, {LengthAcc, BinAcc}) -> + NewLength = Length + LengthAcc, + if NewLength > MaxBody -> + exit({body_too_large, chunked}); + true -> + {NewLength, [Bin | BinAcc]} + end + end, {0, []}, MaxBody), + put(?SAVE_BODY, Body), + Body; + Cached -> Cached + end. stream_body(MaxChunkSize, ChunkFun, FunState) -> stream_body(MaxChunkSize, ChunkFun, FunState, undefined). @@ -241,7 +266,7 @@ start_response({Code, ResponseHeaders}) -> %% ResponseHeaders. start_raw_response({Code, ResponseHeaders}) -> F = fun ({K, V}, Acc) -> - [make_io(K), <<": ">>, V, <<"\r\n">> | Acc] + [mochiweb_util:make_io(K), <<": ">>, V, <<"\r\n">> | Acc] end, End = lists:foldl(F, [<<"\r\n">>], mochiweb_headers:to_list(ResponseHeaders)), @@ -265,13 +290,13 @@ start_response_length({Code, ResponseHeaders, Length}) -> %% will be set by the Body length, and the server will insert header %% defaults. respond({Code, ResponseHeaders, {file, IoDevice}}) -> - Length = iodevice_size(IoDevice), + Length = mochiweb_io:iodevice_size(IoDevice), Response = start_response_length({Code, ResponseHeaders, Length}), case Method of 'HEAD' -> ok; _ -> - iodevice_stream(IoDevice) + mochiweb_io:iodevice_stream(fun send/1, IoDevice) end, Response; respond({Code, ResponseHeaders, chunked}) -> @@ -326,8 +351,12 @@ ok({ContentType, Body}) -> ok({ContentType, ResponseHeaders, Body}) -> HResponse = mochiweb_headers:make(ResponseHeaders), case THIS:get(range) of - X when X =:= undefined; X =:= fail -> - HResponse1 = mochiweb_headers:enter("Content-Type", ContentType, HResponse), + X when (X =:= undefined orelse X =:= fail) orelse Body =:= chunked -> + %% http://code.google.com/p/mochiweb/issues/detail?id=54 + %% Range header not supported when chunked, return 200 and provide + %% full response. + HResponse1 = mochiweb_headers:enter("Content-Type", ContentType, + HResponse), respond({200, HResponse1, Body}); Ranges -> {PartList, Size} = range_parts(Body, Ranges), @@ -340,7 +369,7 @@ ok({ContentType, ResponseHeaders, Body}) -> respond({200, HResponse1, Body}); PartList -> {RangeHeaders, RangeBody} = - parts_to_body(PartList, ContentType, Size), + mochiweb_multipart:parts_to_body(PartList, ContentType, Size), HResponse1 = mochiweb_headers:enter_from_list( [{"Accept-Ranges", "bytes"} | RangeHeaders], @@ -457,26 +486,23 @@ stream_chunked_body(MaxChunkSize, Fun, FunState) -> stream_unchunked_body(0, Fun, FunState) -> Fun({0, <<>>}, FunState); stream_unchunked_body(Length, Fun, FunState) when Length > 0 -> - Bin = recv(0), - BinSize = byte_size(Bin), - if BinSize > Length -> - <<OurBody:Length/binary, Extra/binary>> = Bin, - gen_tcp:unrecv(Socket, Extra), - NewState = Fun({Length, OurBody}, FunState), - stream_unchunked_body(0, Fun, NewState); - true -> - NewState = Fun({BinSize, Bin}, FunState), - stream_unchunked_body(Length - BinSize, Fun, NewState) - end. - + PktSize = case Length > ?RECBUF_SIZE of + true -> + ?RECBUF_SIZE; + false -> + Length + end, + Bin = recv(PktSize), + NewState = Fun({PktSize, Bin}, FunState), + stream_unchunked_body(Length - PktSize, Fun, NewState). %% @spec read_chunk_length() -> integer() %% @doc Read the length of the next HTTP chunk. read_chunk_length() -> - inet:setopts(Socket, [{packet, line}]), - case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of + mochiweb_socket:setopts(Socket, [{packet, line}]), + case mochiweb_socket:recv(Socket, 0, ?IDLE_TIMEOUT) of {ok, Header} -> - inet:setopts(Socket, [{packet, raw}]), + mochiweb_socket:setopts(Socket, [{packet, raw}]), Splitter = fun (C) -> C =/= $\r andalso C =/= $\n andalso C =/= $ end, @@ -490,9 +516,9 @@ read_chunk_length() -> %% @doc Read in a HTTP chunk of the given length. If Length is 0, then read the %% HTTP footers (as a list of binaries, since they're nominal). read_chunk(0) -> - inet:setopts(Socket, [{packet, line}]), + mochiweb_socket:setopts(Socket, [{packet, line}]), F = fun (F1, Acc) -> - case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of + case mochiweb_socket:recv(Socket, 0, ?IDLE_TIMEOUT) of {ok, <<"\r\n">>} -> Acc; {ok, Footer} -> @@ -502,10 +528,11 @@ read_chunk(0) -> end end, Footers = F(F, []), - inet:setopts(Socket, [{packet, raw}]), + mochiweb_socket:setopts(Socket, [{packet, raw}]), + put(?SAVE_RECV, true), Footers; read_chunk(Length) -> - case gen_tcp:recv(Socket, 2 + Length, ?IDLE_TIMEOUT) of + case mochiweb_socket:recv(Socket, 2 + Length, ?IDLE_TIMEOUT) of {ok, <<Chunk:Length/binary, "\r\n">>} -> Chunk; _ -> @@ -600,13 +627,6 @@ server_headers() -> [{"Server", "MochiWeb/1.0 (" ++ ?QUIP ++ ")"}, {"Date", httpd_util:rfc1123_date()}]. -make_io(Atom) when is_atom(Atom) -> - atom_to_list(Atom); -make_io(Integer) when is_integer(Integer) -> - integer_to_list(Integer); -make_io(Io) when is_list(Io); is_binary(Io) -> - Io. - make_code(X) when is_integer(X) -> [integer_to_list(X), [" " | httpd_util:reason_phrase(X)]]; make_code(Io) when is_list(Io); is_binary(Io) -> @@ -617,56 +637,10 @@ make_version({1, 0}) -> make_version(_) -> <<"HTTP/1.1 ">>. -iodevice_stream(IoDevice) -> - case file:read(IoDevice, ?READ_SIZE) of - eof -> - ok; - {ok, Data} -> - ok = send(Data), - iodevice_stream(IoDevice) - end. - - -parts_to_body([{Start, End, Body}], ContentType, Size) -> - %% return body for a range reponse with a single body - HeaderList = [{"Content-Type", ContentType}, - {"Content-Range", - ["bytes ", - make_io(Start), "-", make_io(End), - "/", make_io(Size)]}], - {HeaderList, Body}; -parts_to_body(BodyList, ContentType, Size) when is_list(BodyList) -> - %% return - %% header Content-Type: multipart/byteranges; boundary=441934886133bdee4 - %% and multipart body - Boundary = mochihex:to_hex(crypto:rand_bytes(8)), - HeaderList = [{"Content-Type", - ["multipart/byteranges; ", - "boundary=", Boundary]}], - MultiPartBody = multipart_body(BodyList, ContentType, Boundary, Size), - - {HeaderList, MultiPartBody}. - -multipart_body([], _ContentType, Boundary, _Size) -> - ["--", Boundary, "--\r\n"]; -multipart_body([{Start, End, Body} | BodyList], ContentType, Boundary, Size) -> - ["--", Boundary, "\r\n", - "Content-Type: ", ContentType, "\r\n", - "Content-Range: ", - "bytes ", make_io(Start), "-", make_io(End), - "/", make_io(Size), "\r\n\r\n", - Body, "\r\n" - | multipart_body(BodyList, ContentType, Boundary, Size)]. - -iodevice_size(IoDevice) -> - {ok, Size} = file:position(IoDevice, eof), - {ok, 0} = file:position(IoDevice, bof), - Size. - range_parts({file, IoDevice}, Ranges) -> - Size = iodevice_size(IoDevice), + Size = mochiweb_io:iodevice_size(IoDevice), F = fun (Spec, Acc) -> - case range_skip_length(Spec, Size) of + case mochiweb_http:range_skip_length(Spec, Size) of invalid_range -> Acc; V -> @@ -684,7 +658,7 @@ range_parts(Body0, Ranges) -> Body = iolist_to_binary(Body0), Size = size(Body), F = fun(Spec, Acc) -> - case range_skip_length(Spec, Size) of + case mochiweb_http:range_skip_length(Spec, Size) of invalid_range -> Acc; {Skip, Length} -> @@ -694,133 +668,101 @@ range_parts(Body0, Ranges) -> end, {lists:foldr(F, [], Ranges), Size}. -range_skip_length(Spec, Size) -> - case Spec of - {none, R} when R =< Size, R >= 0 -> - {Size - R, R}; - {none, _OutOfRange} -> - {0, Size}; - {R, none} when R >= 0, R < Size -> - {R, Size - R}; - {_OutOfRange, none} -> - invalid_range; - {Start, End} when 0 =< Start, Start =< End, End < Size -> - {Start, End - Start + 1}; - {_OutOfRange, _End} -> - invalid_range +%% @spec accepted_encodings([encoding()]) -> [encoding()] | bad_accept_encoding_value +%% @type encoding() = string(). +%% +%% @doc Returns a list of encodings accepted by a request. Encodings that are +%% not supported by the server will not be included in the return list. +%% This list is computed from the "Accept-Encoding" header and +%% its elements are ordered, descendingly, according to their Q values. +%% +%% Section 14.3 of the RFC 2616 (HTTP 1.1) describes the "Accept-Encoding" +%% header and the process of determining which server supported encodings +%% can be used for encoding the body for the request's response. +%% +%% Examples +%% +%% 1) For a missing "Accept-Encoding" header: +%% accepted_encodings(["gzip", "identity"]) -> ["identity"] +%% +%% 2) For an "Accept-Encoding" header with value "gzip, deflate": +%% accepted_encodings(["gzip", "identity"]) -> ["gzip", "identity"] +%% +%% 3) For an "Accept-Encoding" header with value "gzip;q=0.5, deflate": +%% accepted_encodings(["gzip", "deflate", "identity"]) -> +%% ["deflate", "gzip", "identity"] +%% +accepted_encodings(SupportedEncodings) -> + AcceptEncodingHeader = case get_header_value("Accept-Encoding") of + undefined -> + ""; + Value -> + Value + end, + case mochiweb_util:parse_qvalues(AcceptEncodingHeader) of + invalid_qvalue_string -> + bad_accept_encoding_value; + QList -> + mochiweb_util:pick_accepted_encodings( + QList, SupportedEncodings, "identity" + ) end. -parse_range_request(RawRange) when is_list(RawRange) -> - try - "bytes=" ++ RangeString = RawRange, - Ranges = string:tokens(RangeString, ","), - lists:map(fun ("-" ++ V) -> - {none, list_to_integer(V)}; - (R) -> - case string:tokens(R, "-") of - [S1, S2] -> - {list_to_integer(S1), list_to_integer(S2)}; - [S] -> - {list_to_integer(S), none} - end - end, - Ranges) - catch - _:_ -> - fail +%% @spec accepts_content_type(string() | binary()) -> boolean() | bad_accept_header +%% +%% @doc Determines whether a request accepts a given media type by analyzing its +%% "Accept" header. +%% +%% Examples +%% +%% 1) For a missing "Accept" header: +%% accepts_content_type("application/json") -> true +%% +%% 2) For an "Accept" header with value "text/plain, application/*": +%% accepts_content_type("application/json") -> true +%% +%% 3) For an "Accept" header with value "text/plain, */*; q=0.0": +%% accepts_content_type("application/json") -> false +%% +%% 4) For an "Accept" header with value "text/plain; q=0.5, */*; q=0.1": +%% accepts_content_type("application/json") -> true +%% +%% 5) For an "Accept" header with value "text/*; q=0.0, */*": +%% accepts_content_type("text/plain") -> false +%% +accepts_content_type(ContentType) when is_binary(ContentType) -> + accepts_content_type(binary_to_list(ContentType)); +accepts_content_type(ContentType1) -> + ContentType = re:replace(ContentType1, "\\s", "", [global, {return, list}]), + AcceptHeader = case get_header_value("Accept") of + undefined -> + "*/*"; + Value -> + Value + end, + case mochiweb_util:parse_qvalues(AcceptHeader) of + invalid_qvalue_string -> + bad_accept_header; + QList -> + [MainType, _SubType] = string:tokens(ContentType, "/"), + SuperType = MainType ++ "/*", + lists:any( + fun({"*/*", Q}) when Q > 0.0 -> + true; + ({Type, Q}) when Q > 0.0 -> + Type =:= ContentType orelse Type =:= SuperType; + (_) -> + false + end, + QList + ) andalso + (not lists:member({ContentType, 0.0}, QList)) andalso + (not lists:member({SuperType, 0.0}, QList)) end. - -test() -> - ok = test_range(), - ok. - -test_range() -> - %% valid, single ranges - io:format("Testing parse_range_request with valid single ranges~n"), - io:format("1"), - [{20, 30}] = parse_range_request("bytes=20-30"), - io:format("2"), - [{20, none}] = parse_range_request("bytes=20-"), - io:format("3"), - [{none, 20}] = parse_range_request("bytes=-20"), - io:format(".. ok ~n"), - - %% invalid, single ranges - io:format("Testing parse_range_request with invalid ranges~n"), - io:format("1"), - fail = parse_range_request(""), - io:format("2"), - fail = parse_range_request("garbage"), - io:format("3"), - fail = parse_range_request("bytes=-20-30"), - io:format(".. ok ~n"), - - %% valid, multiple range - io:format("Testing parse_range_request with valid multiple ranges~n"), - io:format("1"), - [{20, 30}, {50, 100}, {110, 200}] = - parse_range_request("bytes=20-30,50-100,110-200"), - io:format("2"), - [{20, none}, {50, 100}, {none, 200}] = - parse_range_request("bytes=20-,50-100,-200"), - io:format(".. ok~n"), - - %% no ranges - io:format("Testing out parse_range_request with no ranges~n"), - io:format("1"), - [] = parse_range_request("bytes="), - io:format(".. ok~n"), - - Body = <<"012345678901234567890123456789012345678901234567890123456789">>, - BodySize = byte_size(Body), %% 60 - BodySize = 60, - - %% these values assume BodySize =:= 60 - io:format("Testing out range_skip_length on valid ranges~n"), - io:format("1"), - {1,9} = range_skip_length({1,9}, BodySize), %% 1-9 - io:format("2"), - {10,10} = range_skip_length({10,19}, BodySize), %% 10-19 - io:format("3"), - {40, 20} = range_skip_length({none, 20}, BodySize), %% -20 - io:format("4"), - {30, 30} = range_skip_length({30, none}, BodySize), %% 30- - io:format(".. ok ~n"), - - %% valid edge cases for range_skip_length - io:format("Testing out range_skip_length on valid edge case ranges~n"), - io:format("1"), - {BodySize, 0} = range_skip_length({none, 0}, BodySize), - io:format("2"), - {0, BodySize} = range_skip_length({none, BodySize}, BodySize), - io:format("3"), - {0, BodySize} = range_skip_length({0, none}, BodySize), - BodySizeLess1 = BodySize - 1, - io:format("4"), - {BodySizeLess1, 1} = range_skip_length({BodySize - 1, none}, BodySize), - - %% out of range, return whole thing - io:format("5"), - {0, BodySize} = range_skip_length({none, BodySize + 1}, BodySize), - io:format("6"), - {0, BodySize} = range_skip_length({none, -1}, BodySize), - io:format(".. ok ~n"), - - %% invalid ranges - io:format("Testing out range_skip_length on invalid ranges~n"), - io:format("1"), - invalid_range = range_skip_length({-1, 30}, BodySize), - io:format("2"), - invalid_range = range_skip_length({0, BodySize + 1}, BodySize), - io:format("3"), - invalid_range = range_skip_length({-1, BodySize + 1}, BodySize), - io:format("4"), - invalid_range = range_skip_length({BodySize, 40}, BodySize), - io:format("5"), - invalid_range = range_skip_length({-1, none}, BodySize), - io:format("6"), - invalid_range = range_skip_length({BodySize, none}, BodySize), - io:format(".. ok ~n"), - ok. - +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/src/mochiweb/mochiweb_response.erl b/src/mochiweb/mochiweb_response.erl index 6285c4c4..ab8ee61c 100644 --- a/src/mochiweb/mochiweb_response.erl +++ b/src/mochiweb/mochiweb_response.erl @@ -54,3 +54,11 @@ write_chunk(Data) -> _ -> send(Data) end. + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/src/mochiweb/mochiweb_skel.erl b/src/mochiweb/mochiweb_skel.erl index 36b48be5..76eefa60 100644 --- a/src/mochiweb/mochiweb_skel.erl +++ b/src/mochiweb/mochiweb_skel.erl @@ -14,10 +14,11 @@ skelcopy(DestDir, Name) -> N + 1 end, skelcopy(src(), DestDir, Name, LDst), + DestLink = filename:join([DestDir, Name, "deps", "mochiweb-src"]), + ok = filelib:ensure_dir(DestLink), ok = file:make_symlink( - filename:join(filename:dirname(code:which(?MODULE)), ".."), - filename:join([DestDir, Name, "deps", "mochiweb-src"])). - + filename:join(filename:dirname(code:which(?MODULE)), ".."), + DestLink). %% Internal API @@ -37,17 +38,22 @@ skelcopy(Src, DestDir, Name, LDst) -> EDst = lists:nthtail(LDst, Dir), ok = ensuredir(Dir), ok = file:write_file_info(Dir, #file_info{mode=Mode}), - {ok, Files} = file:list_dir(Src), - io:format("~s/~n", [EDst]), - lists:foreach(fun ("." ++ _) -> ok; - (F) -> - skelcopy(filename:join(Src, F), - Dir, - Name, - LDst) - end, - Files), - ok; + case filename:basename(Src) of + "ebin" -> + ok; + _ -> + {ok, Files} = file:list_dir(Src), + io:format("~s/~n", [EDst]), + lists:foreach(fun ("." ++ _) -> ok; + (F) -> + skelcopy(filename:join(Src, F), + Dir, + Name, + LDst) + end, + Files), + ok + end; {ok, #file_info{type=regular, mode=Mode}} -> OutFile = filename:join(DestDir, Dest), {ok, B} = file:read_file(Src), @@ -71,3 +77,10 @@ ensuredir(Dir) -> E -> E end. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/src/mochiweb/mochiweb_socket.erl b/src/mochiweb/mochiweb_socket.erl new file mode 100644 index 00000000..76b018c8 --- /dev/null +++ b/src/mochiweb/mochiweb_socket.erl @@ -0,0 +1,84 @@ +%% @copyright 2010 Mochi Media, Inc. + +%% @doc MochiWeb socket - wrapper for plain and ssl sockets. + +-module(mochiweb_socket). + +-export([listen/4, accept/1, recv/3, send/2, close/1, port/1, peername/1, + setopts/2, type/1]). + +-define(ACCEPT_TIMEOUT, 2000). + +listen(Ssl, Port, Opts, SslOpts) -> + case Ssl of + true -> + case ssl:listen(Port, Opts ++ SslOpts) of + {ok, ListenSocket} -> + {ok, {ssl, ListenSocket}}; + {error, _} = Err -> + Err + end; + false -> + gen_tcp:listen(Port, Opts) + end. + +accept({ssl, ListenSocket}) -> + % There's a bug in ssl:transport_accept/2 at the moment, which is the + % reason for the try...catch block. Should be fixed in OTP R14. + try ssl:transport_accept(ListenSocket) of + {ok, Socket} -> + case ssl:ssl_accept(Socket) of + ok -> + {ok, {ssl, Socket}}; + {error, _} = Err -> + Err + end; + {error, _} = Err -> + Err + catch + error:{badmatch, {error, Reason}} -> + {error, Reason} + end; +accept(ListenSocket) -> + gen_tcp:accept(ListenSocket, ?ACCEPT_TIMEOUT). + +recv({ssl, Socket}, Length, Timeout) -> + ssl:recv(Socket, Length, Timeout); +recv(Socket, Length, Timeout) -> + gen_tcp:recv(Socket, Length, Timeout). + +send({ssl, Socket}, Data) -> + ssl:send(Socket, Data); +send(Socket, Data) -> + gen_tcp:send(Socket, Data). + +close({ssl, Socket}) -> + ssl:close(Socket); +close(Socket) -> + gen_tcp:close(Socket). + +port({ssl, Socket}) -> + case ssl:sockname(Socket) of + {ok, {_, Port}} -> + {ok, Port}; + {error, _} = Err -> + Err + end; +port(Socket) -> + inet:port(Socket). + +peername({ssl, Socket}) -> + ssl:peername(Socket); +peername(Socket) -> + inet:peername(Socket). + +setopts({ssl, Socket}, Opts) -> + ssl:setopts(Socket, Opts); +setopts(Socket, Opts) -> + inet:setopts(Socket, Opts). + +type({ssl, _}) -> + ssl; +type(_) -> + plain. + diff --git a/src/mochiweb/mochiweb_socket_server.erl b/src/mochiweb/mochiweb_socket_server.erl index 7aafe290..1aae09ac 100644 --- a/src/mochiweb/mochiweb_socket_server.erl +++ b/src/mochiweb/mochiweb_socket_server.erl @@ -7,22 +7,28 @@ -author('bob@mochimedia.com'). -behaviour(gen_server). +-include("internal.hrl"). + -export([start/1, stop/1]). -export([init/1, handle_call/3, handle_cast/2, terminate/2, code_change/3, handle_info/2]). -export([get/2]). --export([acceptor_loop/1]). - -record(mochiweb_socket_server, {port, loop, name=undefined, + %% NOTE: This is currently ignored. max=2048, ip=any, listen=null, - acceptor=null, - backlog=128}). + nodelay=false, + backlog=128, + active_sockets=0, + acceptor_pool_size=16, + ssl=false, + ssl_opts=[{ssl_imp, new}], + acceptor_pool=sets:new()}). start(State=#mochiweb_socket_server{}) -> start_server(State); @@ -54,6 +60,8 @@ parse_options([], State) -> parse_options([{name, L} | Rest], State) when is_list(L) -> Name = {local, list_to_atom(L)}, parse_options(Rest, State#mochiweb_socket_server{name=Name}); +parse_options([{name, A} | Rest], State) when A =:= undefined -> + parse_options(Rest, State#mochiweb_socket_server{name=A}); parse_options([{name, A} | Rest], State) when is_atom(A) -> Name = {local, A}, parse_options(Rest, State#mochiweb_socket_server{name=Name}); @@ -79,16 +87,32 @@ parse_options([{loop, Loop} | Rest], State) -> parse_options(Rest, State#mochiweb_socket_server{loop=Loop}); parse_options([{backlog, Backlog} | Rest], State) -> parse_options(Rest, State#mochiweb_socket_server{backlog=Backlog}); +parse_options([{nodelay, NoDelay} | Rest], State) -> + parse_options(Rest, State#mochiweb_socket_server{nodelay=NoDelay}); +parse_options([{acceptor_pool_size, Max} | Rest], State) -> + MaxInt = ensure_int(Max), + parse_options(Rest, + State#mochiweb_socket_server{acceptor_pool_size=MaxInt}); parse_options([{max, Max} | Rest], State) -> - MaxInt = case Max of - Max when is_list(Max) -> - list_to_integer(Max); - Max when is_integer(Max) -> - Max - end, - parse_options(Rest, State#mochiweb_socket_server{max=MaxInt}). - -start_server(State=#mochiweb_socket_server{name=Name}) -> + error_logger:info_report([{warning, "TODO: max is currently unsupported"}, + {max, Max}]), + MaxInt = ensure_int(Max), + parse_options(Rest, State#mochiweb_socket_server{max=MaxInt}); +parse_options([{ssl, Ssl} | Rest], State) when is_boolean(Ssl) -> + parse_options(Rest, State#mochiweb_socket_server{ssl=Ssl}); +parse_options([{ssl_opts, SslOpts} | Rest], State) when is_list(SslOpts) -> + SslOpts1 = [{ssl_imp, new} | proplists:delete(ssl_imp, SslOpts)], + parse_options(Rest, State#mochiweb_socket_server{ssl_opts=SslOpts1}). + +start_server(State=#mochiweb_socket_server{ssl=Ssl, name=Name}) -> + case Ssl of + true -> + application:start(crypto), + application:start(public_key), + application:start(ssl); + false -> + void + end, case Name of undefined -> gen_server:start_link(?MODULE, State, []); @@ -96,6 +120,11 @@ start_server(State=#mochiweb_socket_server{name=Name}) -> gen_server:start_link(Name, ?MODULE, State, []) end. +ensure_int(N) when is_integer(N) -> + N; +ensure_int(S) when is_list(S) -> + integer_to_list(S). + ipv6_supported() -> case (catch inet:getaddr("localhost", inet6)) of {ok, _Addr} -> @@ -104,15 +133,15 @@ ipv6_supported() -> false end. -init(State=#mochiweb_socket_server{ip=Ip, port=Port, backlog=Backlog}) -> +init(State=#mochiweb_socket_server{ip=Ip, port=Port, backlog=Backlog, nodelay=NoDelay}) -> process_flag(trap_exit, true), BaseOpts = [binary, {reuseaddr, true}, {packet, 0}, {backlog, Backlog}, - {recbuf, 8192}, + {recbuf, ?RECBUF_SIZE}, {active, false}, - {nodelay, true}], + {nodelay, NoDelay}], Opts = case Ip of any -> case ipv6_supported() of % IPv4, and IPv6 if supported @@ -124,7 +153,7 @@ init(State=#mochiweb_socket_server{ip=Ip, port=Port, backlog=Backlog}) -> {_, _, _, _, _, _, _, _} -> % IPv6 [inet6, {ip, Ip} | BaseOpts] end, - case gen_tcp_listen(Port, Opts, State) of + case listen(Port, Opts, State) of {stop, eacces} -> case Port < 1024 of true -> @@ -132,7 +161,7 @@ init(State=#mochiweb_socket_server{ip=Ip, port=Port, backlog=Backlog}) -> {ok, _} -> case fdsrv:bind_socket(tcp, Port) of {ok, Fd} -> - gen_tcp_listen(Port, [{fd, Fd} | Opts], State); + listen(Port, [{fd, Fd} | Opts], State); _ -> {stop, fdsrv_bind_failed} end; @@ -146,47 +175,33 @@ init(State=#mochiweb_socket_server{ip=Ip, port=Port, backlog=Backlog}) -> Other end. -gen_tcp_listen(Port, Opts, State) -> - case gen_tcp:listen(Port, Opts) of +new_acceptor_pool(Listen, + State=#mochiweb_socket_server{acceptor_pool=Pool, + acceptor_pool_size=Size, + loop=Loop}) -> + F = fun (_, S) -> + Pid = mochiweb_acceptor:start_link(self(), Listen, Loop), + sets:add_element(Pid, S) + end, + Pool1 = lists:foldl(F, Pool, lists:seq(1, Size)), + State#mochiweb_socket_server{acceptor_pool=Pool1}. + +listen(Port, Opts, State=#mochiweb_socket_server{ssl=Ssl, ssl_opts=SslOpts}) -> + case mochiweb_socket:listen(Ssl, Port, Opts, SslOpts) of {ok, Listen} -> - {ok, ListenPort} = inet:port(Listen), - {ok, new_acceptor(State#mochiweb_socket_server{listen=Listen, - port=ListenPort})}; + {ok, ListenPort} = mochiweb_socket:port(Listen), + {ok, new_acceptor_pool( + Listen, + State#mochiweb_socket_server{listen=Listen, + port=ListenPort})}; {error, Reason} -> {stop, Reason} end. -new_acceptor(State=#mochiweb_socket_server{max=0}) -> - io:format("Not accepting new connections~n"), - State#mochiweb_socket_server{acceptor=null}; -new_acceptor(State=#mochiweb_socket_server{listen=Listen,loop=Loop}) -> - Pid = proc_lib:spawn_link(?MODULE, acceptor_loop, - [{self(), Listen, Loop}]), - State#mochiweb_socket_server{acceptor=Pid}. - -call_loop({M, F}, Socket) -> - M:F(Socket); -call_loop(Loop, Socket) -> - Loop(Socket). - -acceptor_loop({Server, Listen, Loop}) -> - case catch gen_tcp:accept(Listen) of - {ok, Socket} -> - gen_server:cast(Server, {accepted, self()}), - call_loop(Loop, Socket); - {error, closed} -> - exit({error, closed}); - Other -> - error_logger:error_report( - [{application, mochiweb}, - "Accept failed error", - lists:flatten(io_lib:format("~p", [Other]))]), - exit({error, accept_failed}) - end. - - do_get(port, #mochiweb_socket_server{port=Port}) -> - Port. + Port; +do_get(active_sockets, #mochiweb_socket_server{active_sockets=ActiveSockets}) -> + ActiveSockets. handle_call({get, Property}, _From, State) -> Res = do_get(Property, State), @@ -195,16 +210,15 @@ handle_call(_Message, _From, State) -> Res = error, {reply, Res, State}. -handle_cast({accepted, Pid}, - State=#mochiweb_socket_server{acceptor=Pid, max=Max}) -> - % io:format("accepted ~p~n", [Pid]), - State1 = State#mochiweb_socket_server{max=Max - 1}, - {noreply, new_acceptor(State1)}; +handle_cast({accepted, Pid, _Timing}, + State=#mochiweb_socket_server{active_sockets=ActiveSockets}) -> + State1 = State#mochiweb_socket_server{active_sockets=1 + ActiveSockets}, + {noreply, recycle_acceptor(Pid, State1)}; handle_cast(stop, State) -> {stop, normal, State}. terminate(_Reason, #mochiweb_socket_server{listen=Listen, port=Port}) -> - gen_tcp:close(Listen), + mochiweb_socket:close(Listen), case Port < 1024 of true -> catch fdsrv:stop(), @@ -216,33 +230,43 @@ terminate(_Reason, #mochiweb_socket_server{listen=Listen, port=Port}) -> code_change(_OldVsn, State, _Extra) -> State. -handle_info({'EXIT', Pid, normal}, - State=#mochiweb_socket_server{acceptor=Pid}) -> - % io:format("normal acceptor down~n"), - {noreply, new_acceptor(State)}; +recycle_acceptor(Pid, State=#mochiweb_socket_server{ + acceptor_pool=Pool, + listen=Listen, + loop=Loop, + active_sockets=ActiveSockets}) -> + case sets:is_element(Pid, Pool) of + true -> + Acceptor = mochiweb_acceptor:start_link(self(), Listen, Loop), + Pool1 = sets:add_element(Acceptor, sets:del_element(Pid, Pool)), + State#mochiweb_socket_server{acceptor_pool=Pool1}; + false -> + State#mochiweb_socket_server{active_sockets=ActiveSockets - 1} + end. + +handle_info({'EXIT', Pid, normal}, State) -> + {noreply, recycle_acceptor(Pid, State)}; handle_info({'EXIT', Pid, Reason}, - State=#mochiweb_socket_server{acceptor=Pid}) -> - error_logger:error_report({?MODULE, ?LINE, - {acceptor_error, Reason}}), - timer:sleep(100), - {noreply, new_acceptor(State)}; -handle_info({'EXIT', _LoopPid, Reason}, - State=#mochiweb_socket_server{acceptor=Pid, max=Max}) -> - case Reason of - normal -> - ok; - _ -> + State=#mochiweb_socket_server{acceptor_pool=Pool}) -> + case sets:is_element(Pid, Pool) of + true -> + %% If there was an unexpected error accepting, log and sleep. error_logger:error_report({?MODULE, ?LINE, - {child_error, Reason}}) + {acceptor_error, Reason}}), + timer:sleep(100); + false -> + ok end, - State1 = State#mochiweb_socket_server{max=Max + 1}, - State2 = case Pid of - null -> - new_acceptor(State1); - _ -> - State1 - end, - {noreply, State2}; + {noreply, recycle_acceptor(Pid, State)}; handle_info(Info, State) -> error_logger:info_report([{'INFO', Info}, {'State', State}]), {noreply, State}. + + + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/src/mochiweb/mochiweb_sup.erl b/src/mochiweb/mochiweb_sup.erl index 5cb525b5..af7df9b3 100644 --- a/src/mochiweb/mochiweb_sup.erl +++ b/src/mochiweb/mochiweb_sup.erl @@ -32,3 +32,10 @@ upgrade() -> init([]) -> Processes = [], {ok, {{one_for_one, 10, 10}, Processes}}. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. diff --git a/src/mochiweb/mochiweb_util.erl b/src/mochiweb/mochiweb_util.erl index 73cacea4..62ff0d06 100644 --- a/src/mochiweb/mochiweb_util.erl +++ b/src/mochiweb/mochiweb_util.erl @@ -9,10 +9,11 @@ -export([path_split/1]). -export([urlsplit/1, urlsplit_path/1, urlunsplit/1, urlunsplit_path/1]). -export([guess_mime/1, parse_header/1]). --export([shell_quote/1, cmd/1, cmd_string/1, cmd_port/2]). +-export([shell_quote/1, cmd/1, cmd_string/1, cmd_port/2, cmd_status/1]). -export([record_to_proplist/2, record_to_proplist/3]). -export([safe_relative_path/1, partition/2]). --export([test/0]). +-export([parse_qvalues/1, pick_accepted_encodings/3]). +-export([make_io/1]). -define(PERCENT, 37). % $\% -define(FULLSTOP, 46). % $\. @@ -114,11 +115,32 @@ cmd(Argv) -> %% @spec cmd_string([string()]) -> string() %% @doc Create a shell quoted command string from a list of arguments. cmd_string(Argv) -> - join([shell_quote(X) || X <- Argv], " "). + string:join([shell_quote(X) || X <- Argv], " "). + +%% @spec cmd_status([string()]) -> {ExitStatus::integer(), Stdout::binary()} +%% @doc Accumulate the output and exit status from the given application, will be +%% spawned with cmd_port/2. +cmd_status(Argv) -> + Port = cmd_port(Argv, [exit_status, stderr_to_stdout, + use_stdio, binary]), + try cmd_loop(Port, []) + after catch port_close(Port) + end. + +%% @spec cmd_loop(port(), list()) -> {ExitStatus::integer(), Stdout::binary()} +%% @doc Accumulate the output and exit status from a port. +cmd_loop(Port, Acc) -> + receive + {Port, {exit_status, Status}} -> + {Status, iolist_to_binary(lists:reverse(Acc))}; + {Port, {data, Data}} -> + cmd_loop(Port, [Data | Acc]) + end. -%% @spec join([string()], Separator) -> string() -%% @doc Join a list of strings together with the given separator -%% string or char. +%% @spec join([iolist()], iolist()) -> iolist() +%% @doc Join a list of strings or binaries together with the given separator +%% string or char or binary. The output is flattened, but may be an +%% iolist() instead of a string() if any of the inputs are binary(). join([], _Separator) -> []; join([S], _Separator) -> @@ -159,10 +181,11 @@ quote_plus([C | Rest], Acc) -> %% @spec urlencode([{Key, Value}]) -> string() %% @doc URL encode the property list. urlencode(Props) -> - RevPairs = lists:foldl(fun ({K, V}, Acc) -> - [[quote_plus(K), $=, quote_plus(V)] | Acc] - end, [], Props), - lists:flatten(revjoin(RevPairs, $&, [])). + Pairs = lists:foldr( + fun ({K, V}, Acc) -> + [quote_plus(K) ++ "=" ++ quote_plus(V) | Acc] + end, [], Props), + string:join(Pairs, "&"). %% @spec parse_qs(string() | binary()) -> [{Key, Value}] %% @doc Parse a query string or application/x-www-form-urlencoded. @@ -233,20 +256,31 @@ urlsplit(Url) -> {Scheme, Netloc, Path, Query, Fragment}. urlsplit_scheme(Url) -> - urlsplit_scheme(Url, []). + case urlsplit_scheme(Url, []) of + no_scheme -> + {"", Url}; + Res -> + Res + end. -urlsplit_scheme([], Acc) -> - {"", lists:reverse(Acc)}; -urlsplit_scheme(":" ++ Rest, Acc) -> +urlsplit_scheme([C | Rest], Acc) when ((C >= $a andalso C =< $z) orelse + (C >= $A andalso C =< $Z) orelse + (C >= $0 andalso C =< $9) orelse + C =:= $+ orelse C =:= $- orelse + C =:= $.) -> + urlsplit_scheme(Rest, [C | Acc]); +urlsplit_scheme([$: | Rest], Acc=[_ | _]) -> {string:to_lower(lists:reverse(Acc)), Rest}; -urlsplit_scheme([C | Rest], Acc) -> - urlsplit_scheme(Rest, [C | Acc]). +urlsplit_scheme(_Rest, _Acc) -> + no_scheme. urlsplit_netloc("//" ++ Rest) -> urlsplit_netloc(Rest, []); urlsplit_netloc(Path) -> {"", Path}. +urlsplit_netloc("", Acc) -> + {lists:reverse(Acc), ""}; urlsplit_netloc(Rest=[C | _], Acc) when C =:= $/; C =:= $?; C =:= $# -> {lists:reverse(Acc), Rest}; urlsplit_netloc([C | Rest], Acc) -> @@ -311,67 +345,11 @@ urlsplit_query([C | Rest], Acc) -> %% @spec guess_mime(string()) -> string() %% @doc Guess the mime type of a file by the extension of its filename. guess_mime(File) -> - case filename:extension(File) of - ".html" -> - "text/html"; - ".xhtml" -> - "application/xhtml+xml"; - ".xml" -> - "application/xml"; - ".css" -> - "text/css"; - ".js" -> - "application/x-javascript"; - ".jpg" -> - "image/jpeg"; - ".gif" -> - "image/gif"; - ".png" -> - "image/png"; - ".swf" -> - "application/x-shockwave-flash"; - ".zip" -> - "application/zip"; - ".bz2" -> - "application/x-bzip2"; - ".gz" -> - "application/x-gzip"; - ".tar" -> - "application/x-tar"; - ".tgz" -> - "application/x-gzip"; - ".txt" -> + case mochiweb_mime:from_extension(filename:extension(File)) of + undefined -> "text/plain"; - ".doc" -> - "application/msword"; - ".pdf" -> - "application/pdf"; - ".xls" -> - "application/vnd.ms-excel"; - ".rtf" -> - "application/rtf"; - ".mov" -> - "video/quicktime"; - ".mp3" -> - "audio/mpeg"; - ".z" -> - "application/x-compress"; - ".wav" -> - "audio/x-wav"; - ".ico" -> - "image/x-icon"; - ".bmp" -> - "image/bmp"; - ".m4a" -> - "audio/mpeg"; - ".m3u" -> - "audio/x-mpegurl"; - ".exe" -> - "application/octet-stream"; - ".csv" -> - "text/csv"; - _ -> - "text/plain" + Mime -> + Mime end. %% @spec parse_header(string()) -> {Type, [{K, V}]} @@ -435,44 +413,267 @@ shell_quote([C | Rest], Acc) when C =:= $\" orelse C =:= $\` orelse shell_quote([C | Rest], Acc) -> shell_quote(Rest, [C | Acc]). -test() -> - test_join(), - test_quote_plus(), - test_unquote(), - test_urlencode(), - test_parse_qs(), - test_urlsplit_path(), - test_urlunsplit_path(), - test_urlsplit(), - test_urlunsplit(), - test_path_split(), - test_guess_mime(), - test_parse_header(), - test_shell_quote(), - test_cmd(), - test_cmd_string(), - test_partition(), - test_safe_relative_path(), +%% @spec parse_qvalues(string()) -> [qvalue()] | invalid_qvalue_string +%% @type qvalue() = {media_type() | encoding() , float()}. +%% @type media_type() = string(). +%% @type encoding() = string(). +%% +%% @doc Parses a list (given as a string) of elements with Q values associated +%% to them. Elements are separated by commas and each element is separated +%% from its Q value by a semicolon. Q values are optional but when missing +%% the value of an element is considered as 1.0. A Q value is always in the +%% range [0.0, 1.0]. A Q value list is used for example as the value of the +%% HTTP "Accept" and "Accept-Encoding" headers. +%% +%% Q values are described in section 2.9 of the RFC 2616 (HTTP 1.1). +%% +%% Example: +%% +%% parse_qvalues("gzip; q=0.5, deflate, identity;q=0.0") -> +%% [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 0.0}] +%% +parse_qvalues(QValuesStr) -> + try + lists:map( + fun(Pair) -> + [Type | Params] = string:tokens(Pair, ";"), + NormParams = normalize_media_params(Params), + {Q, NonQParams} = extract_q(NormParams), + {string:join([string:strip(Type) | NonQParams], ";"), Q} + end, + string:tokens(string:to_lower(QValuesStr), ",") + ) + catch + _Type:_Error -> + invalid_qvalue_string + end. + +normalize_media_params(Params) -> + {ok, Re} = re:compile("\\s"), + normalize_media_params(Re, Params, []). + +normalize_media_params(_Re, [], Acc) -> + lists:reverse(Acc); +normalize_media_params(Re, [Param | Rest], Acc) -> + NormParam = re:replace(Param, Re, "", [global, {return, list}]), + normalize_media_params(Re, Rest, [NormParam | Acc]). + +extract_q(NormParams) -> + {ok, KVRe} = re:compile("^([^=]+)=([^=]+)$"), + {ok, QRe} = re:compile("^((?:0|1)(?:\\.\\d{1,3})?)$"), + extract_q(KVRe, QRe, NormParams, []). + +extract_q(_KVRe, _QRe, [], Acc) -> + {1.0, lists:reverse(Acc)}; +extract_q(KVRe, QRe, [Param | Rest], Acc) -> + case re:run(Param, KVRe, [{capture, [1, 2], list}]) of + {match, [Name, Value]} -> + case Name of + "q" -> + {match, [Q]} = re:run(Value, QRe, [{capture, [1], list}]), + QVal = case Q of + "0" -> + 0.0; + "1" -> + 1.0; + Else -> + list_to_float(Else) + end, + case QVal < 0.0 orelse QVal > 1.0 of + false -> + {QVal, lists:reverse(Acc) ++ Rest} + end; + _ -> + extract_q(KVRe, QRe, Rest, [Param | Acc]) + end + end. + +%% @spec pick_accepted_encodings([qvalue()], [encoding()], encoding()) -> +%% [encoding()] +%% +%% @doc Determines which encodings specified in the given Q values list are +%% valid according to a list of supported encodings and a default encoding. +%% +%% The returned list of encodings is sorted, descendingly, according to the +%% Q values of the given list. The last element of this list is the given +%% default encoding unless this encoding is explicitily or implicitily +%% marked with a Q value of 0.0 in the given Q values list. +%% Note: encodings with the same Q value are kept in the same order as +%% found in the input Q values list. +%% +%% This encoding picking process is described in section 14.3 of the +%% RFC 2616 (HTTP 1.1). +%% +%% Example: +%% +%% pick_accepted_encodings( +%% [{"gzip", 0.5}, {"deflate", 1.0}], +%% ["gzip", "identity"], +%% "identity" +%% ) -> +%% ["gzip", "identity"] +%% +pick_accepted_encodings(AcceptedEncs, SupportedEncs, DefaultEnc) -> + SortedQList = lists:reverse( + lists:sort(fun({_, Q1}, {_, Q2}) -> Q1 < Q2 end, AcceptedEncs) + ), + {Accepted, Refused} = lists:foldr( + fun({E, Q}, {A, R}) -> + case Q > 0.0 of + true -> + {[E | A], R}; + false -> + {A, [E | R]} + end + end, + {[], []}, + SortedQList + ), + Refused1 = lists:foldr( + fun(Enc, Acc) -> + case Enc of + "*" -> + lists:subtract(SupportedEncs, Accepted) ++ Acc; + _ -> + [Enc | Acc] + end + end, + [], + Refused + ), + Accepted1 = lists:foldr( + fun(Enc, Acc) -> + case Enc of + "*" -> + lists:subtract(SupportedEncs, Accepted ++ Refused1) ++ Acc; + _ -> + [Enc | Acc] + end + end, + [], + Accepted + ), + Accepted2 = case lists:member(DefaultEnc, Accepted1) of + true -> + Accepted1; + false -> + Accepted1 ++ [DefaultEnc] + end, + [E || E <- Accepted2, lists:member(E, SupportedEncs), + not lists:member(E, Refused1)]. + +make_io(Atom) when is_atom(Atom) -> + atom_to_list(Atom); +make_io(Integer) when is_integer(Integer) -> + integer_to_list(Integer); +make_io(Io) when is_list(Io); is_binary(Io) -> + Io. + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). + +make_io_test() -> + ?assertEqual( + <<"atom">>, + iolist_to_binary(make_io(atom))), + ?assertEqual( + <<"20">>, + iolist_to_binary(make_io(20))), + ?assertEqual( + <<"list">>, + iolist_to_binary(make_io("list"))), + ?assertEqual( + <<"binary">>, + iolist_to_binary(make_io(<<"binary">>))), + ok. + +-record(test_record, {field1=f1, field2=f2}). +record_to_proplist_test() -> + ?assertEqual( + [{'__record', test_record}, + {field1, f1}, + {field2, f2}], + record_to_proplist(#test_record{}, record_info(fields, test_record))), + ?assertEqual( + [{'typekey', test_record}, + {field1, f1}, + {field2, f2}], + record_to_proplist(#test_record{}, + record_info(fields, test_record), + typekey)), ok. -test_shell_quote() -> - "\"foo \\$bar\\\"\\`' baz\"" = shell_quote("foo $bar\"`' baz"), +shell_quote_test() -> + ?assertEqual( + "\"foo \\$bar\\\"\\`' baz\"", + shell_quote("foo $bar\"`' baz")), + ok. + +cmd_port_test_spool(Port, Acc) -> + receive + {Port, eof} -> + Acc; + {Port, {data, {eol, Data}}} -> + cmd_port_test_spool(Port, ["\n", Data | Acc]); + {Port, Unknown} -> + throw({unknown, Unknown}) + after 100 -> + throw(timeout) + end. + +cmd_port_test() -> + Port = cmd_port(["echo", "$bling$ `word`!"], + [eof, stream, {line, 4096}]), + Res = try lists:append(lists:reverse(cmd_port_test_spool(Port, []))) + after catch port_close(Port) + end, + self() ! {Port, wtf}, + try cmd_port_test_spool(Port, []) + catch throw:{unknown, wtf} -> ok + end, + try cmd_port_test_spool(Port, []) + catch throw:timeout -> ok + end, + ?assertEqual( + "$bling$ `word`!\n", + Res). + +cmd_test() -> + ?assertEqual( + "$bling$ `word`!\n", + cmd(["echo", "$bling$ `word`!"])), ok. -test_cmd() -> - "$bling$ `word`!\n" = cmd(["echo", "$bling$ `word`!"]), +cmd_string_test() -> + ?assertEqual( + "\"echo\" \"\\$bling\\$ \\`word\\`!\"", + cmd_string(["echo", "$bling$ `word`!"])), ok. -test_cmd_string() -> - "\"echo\" \"\\$bling\\$ \\`word\\`!\"" = cmd_string(["echo", "$bling$ `word`!"]), +cmd_status_test() -> + ?assertEqual( + {0, <<"$bling$ `word`!\n">>}, + cmd_status(["echo", "$bling$ `word`!"])), ok. -test_parse_header() -> - {"multipart/form-data", [{"boundary", "AaB03x"}]} = - parse_header("multipart/form-data; boundary=AaB03x"), + +parse_header_test() -> + ?assertEqual( + {"multipart/form-data", [{"boundary", "AaB03x"}]}, + parse_header("multipart/form-data; boundary=AaB03x")), + %% This tests (currently) intentionally broken behavior + ?assertEqual( + {"multipart/form-data", + [{"b", ""}, + {"cgi", "is"}, + {"broken", "true\"e"}]}, + parse_header("multipart/form-data;b=;cgi=\"i\\s;broken=true\"e;=z;z")), ok. -test_guess_mime() -> +guess_mime_test() -> "text/plain" = guess_mime(""), "text/plain" = guess_mime(".text"), "application/zip" = guess_mime(".zip"), @@ -481,19 +682,22 @@ test_guess_mime() -> "application/xhtml+xml" = guess_mime("x.xhtml"), ok. -test_path_split() -> +path_split_test() -> {"", "foo/bar"} = path_split("/foo/bar"), {"foo", "bar"} = path_split("foo/bar"), {"bar", ""} = path_split("bar"), ok. -test_urlsplit() -> +urlsplit_test() -> {"", "", "/foo", "", "bar?baz"} = urlsplit("/foo#bar?baz"), {"http", "host:port", "/foo", "", "bar?baz"} = urlsplit("http://host:port/foo#bar?baz"), + {"http", "host", "", "", ""} = urlsplit("http://host"), + {"", "", "/wiki/Category:Fruit", "", ""} = + urlsplit("/wiki/Category:Fruit"), ok. -test_urlsplit_path() -> +urlsplit_path_test() -> {"/foo/bar", "", ""} = urlsplit_path("/foo/bar"), {"/foo", "baz", ""} = urlsplit_path("/foo?baz"), {"/foo", "", "bar?baz"} = urlsplit_path("/foo#bar?baz"), @@ -502,13 +706,13 @@ test_urlsplit_path() -> {"/foo", "bar?baz", "baz"} = urlsplit_path("/foo?bar?baz#baz"), ok. -test_urlunsplit() -> +urlunsplit_test() -> "/foo#bar?baz" = urlunsplit({"", "", "/foo", "", "bar?baz"}), "http://host:port/foo#bar?baz" = urlunsplit({"http", "host:port", "/foo", "", "bar?baz"}), ok. -test_urlunsplit_path() -> +urlunsplit_path_test() -> "/foo/bar" = urlunsplit_path({"/foo/bar", "", ""}), "/foo?baz" = urlunsplit_path({"/foo", "baz", ""}), "/foo#bar?baz" = urlunsplit_path({"/foo", "", "bar?baz"}), @@ -517,16 +721,28 @@ test_urlunsplit_path() -> "/foo?bar?baz#baz" = urlunsplit_path({"/foo", "bar?baz", "baz"}), ok. -test_join() -> - "foo,bar,baz" = join(["foo", "bar", "baz"], $,), - "foo,bar,baz" = join(["foo", "bar", "baz"], ","), - "foo bar" = join([["foo", " bar"]], ","), - "foo bar,baz" = join([["foo", " bar"], "baz"], ","), - "foo" = join(["foo"], ","), - "foobarbaz" = join(["foo", "bar", "baz"], ""), +join_test() -> + ?assertEqual("foo,bar,baz", + join(["foo", "bar", "baz"], $,)), + ?assertEqual("foo,bar,baz", + join(["foo", "bar", "baz"], ",")), + ?assertEqual("foo bar", + join([["foo", " bar"]], ",")), + ?assertEqual("foo bar,baz", + join([["foo", " bar"], "baz"], ",")), + ?assertEqual("foo", + join(["foo"], ",")), + ?assertEqual("foobarbaz", + join(["foo", "bar", "baz"], "")), + ?assertEqual("foo" ++ [<<>>] ++ "bar" ++ [<<>>] ++ "baz", + join(["foo", "bar", "baz"], <<>>)), + ?assertEqual("foobar" ++ [<<"baz">>], + join(["foo", "bar", <<"baz">>], "")), + ?assertEqual("", + join([], "any")), ok. -test_quote_plus() -> +quote_plus_test() -> "foo" = quote_plus(foo), "1" = quote_plus(1), "1.1" = quote_plus(1.1), @@ -535,26 +751,45 @@ test_quote_plus() -> "foo%0A" = quote_plus("foo\n"), "foo%0A" = quote_plus("foo\n"), "foo%3B%26%3D" = quote_plus("foo;&="), + "foo%3B%26%3D" = quote_plus(<<"foo;&=">>), ok. -test_unquote() -> - "foo bar" = unquote("foo+bar"), - "foo bar" = unquote("foo%20bar"), - "foo\r\n" = unquote("foo%0D%0A"), +unquote_test() -> + ?assertEqual("foo bar", + unquote("foo+bar")), + ?assertEqual("foo bar", + unquote("foo%20bar")), + ?assertEqual("foo\r\n", + unquote("foo%0D%0A")), + ?assertEqual("foo\r\n", + unquote(<<"foo%0D%0A">>)), ok. -test_urlencode() -> +urlencode_test() -> "foo=bar&baz=wibble+%0D%0A&z=1" = urlencode([{foo, "bar"}, {"baz", "wibble \r\n"}, {z, 1}]), ok. -test_parse_qs() -> - [{"foo", "bar"}, {"baz", "wibble \r\n"}, {"z", "1"}] = - parse_qs("foo=bar&baz=wibble+%0D%0A&z=1"), +parse_qs_test() -> + ?assertEqual( + [{"foo", "bar"}, {"baz", "wibble \r\n"}, {"z", "1"}], + parse_qs("foo=bar&baz=wibble+%0D%0a&z=1")), + ?assertEqual( + [{"", "bar"}, {"baz", "wibble \r\n"}, {"z", ""}], + parse_qs("=bar&baz=wibble+%0D%0a&z=")), + ?assertEqual( + [{"foo", "bar"}, {"baz", "wibble \r\n"}, {"z", "1"}], + parse_qs(<<"foo=bar&baz=wibble+%0D%0a&z=1">>)), + ?assertEqual( + [], + parse_qs("")), + ?assertEqual( + [{"foo", ""}, {"bar", ""}, {"baz", ""}], + parse_qs("foo;bar&baz")), ok. -test_partition() -> +partition_test() -> {"foo", "", ""} = partition("foo", "/"), {"foo", "/", "bar"} = partition("foo/bar", "/"), {"foo", "/", ""} = partition("foo/", "/"), @@ -562,7 +797,7 @@ test_partition() -> {"f", "oo/ba", "r"} = partition("foo/bar", "oo/ba"), ok. -test_safe_relative_path() -> +safe_relative_path_test() -> "foo" = safe_relative_path("foo"), "foo/" = safe_relative_path("foo/"), "foo" = safe_relative_path("foo/bar/.."), @@ -575,3 +810,164 @@ test_safe_relative_path() -> undefined = safe_relative_path("foo/../.."), undefined = safe_relative_path("foo//"), ok. + +parse_qvalues_test() -> + [] = parse_qvalues(""), + [{"identity", 0.0}] = parse_qvalues("identity;q=0"), + [{"identity", 0.0}] = parse_qvalues("identity ;q=0"), + [{"identity", 0.0}] = parse_qvalues(" identity; q =0 "), + [{"identity", 0.0}] = parse_qvalues("identity ; q = 0"), + [{"identity", 0.0}] = parse_qvalues("identity ; q= 0.0"), + [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues( + "gzip,deflate,identity;q=0.0" + ), + [{"deflate", 1.0}, {"gzip", 1.0}, {"identity", 0.0}] = parse_qvalues( + "deflate,gzip,identity;q=0.0" + ), + [{"gzip", 1.0}, {"deflate", 1.0}, {"gzip", 1.0}, {"identity", 0.0}] = + parse_qvalues("gzip,deflate,gzip,identity;q=0"), + [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues( + "gzip, deflate , identity; q=0.0" + ), + [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues( + "gzip; q=1, deflate;q=1.0, identity;q=0.0" + ), + [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues( + "gzip; q=0.5, deflate;q=1.0, identity;q=0" + ), + [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues( + "gzip; q=0.5, deflate , identity;q=0.0" + ), + [{"gzip", 0.5}, {"deflate", 0.8}, {"identity", 0.0}] = parse_qvalues( + "gzip; q=0.5, deflate;q=0.8, identity;q=0.0" + ), + [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 1.0}] = parse_qvalues( + "gzip; q=0.5,deflate,identity" + ), + [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 1.0}, {"identity", 1.0}] = + parse_qvalues("gzip; q=0.5,deflate,identity, identity "), + [{"text/html;level=1", 1.0}, {"text/plain", 0.5}] = + parse_qvalues("text/html;level=1, text/plain;q=0.5"), + [{"text/html;level=1", 0.3}, {"text/plain", 1.0}] = + parse_qvalues("text/html;level=1;q=0.3, text/plain"), + [{"text/html;level=1", 0.3}, {"text/plain", 1.0}] = + parse_qvalues("text/html; level = 1; q = 0.3, text/plain"), + [{"text/html;level=1", 0.3}, {"text/plain", 1.0}] = + parse_qvalues("text/html;q=0.3;level=1, text/plain"), + invalid_qvalue_string = parse_qvalues("gzip; q=1.1, deflate"), + invalid_qvalue_string = parse_qvalues("gzip; q=0.5, deflate;q=2"), + invalid_qvalue_string = parse_qvalues("gzip, deflate;q=AB"), + invalid_qvalue_string = parse_qvalues("gzip; q=2.1, deflate"), + invalid_qvalue_string = parse_qvalues("gzip; q=0.1234, deflate"), + invalid_qvalue_string = parse_qvalues("text/html;level=1;q=0.3, text/html;level"), + ok. + +pick_accepted_encodings_test() -> + ["identity"] = pick_accepted_encodings( + [], + ["gzip", "identity"], + "identity" + ), + ["gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 1.0}], + ["gzip", "identity"], + "identity" + ), + ["identity"] = pick_accepted_encodings( + [{"gzip", 0.0}], + ["gzip", "identity"], + "identity" + ), + ["gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 1.0}, {"deflate", 1.0}], + ["gzip", "identity"], + "identity" + ), + ["gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 0.5}, {"deflate", 1.0}], + ["gzip", "identity"], + "identity" + ), + ["identity"] = pick_accepted_encodings( + [{"gzip", 0.0}, {"deflate", 0.0}], + ["gzip", "identity"], + "identity" + ), + ["gzip"] = pick_accepted_encodings( + [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}], + ["gzip", "identity"], + "identity" + ), + ["gzip", "deflate", "identity"] = pick_accepted_encodings( + [{"gzip", 1.0}, {"deflate", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "deflate"] = pick_accepted_encodings( + [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["deflate", "gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 0.2}, {"deflate", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["deflate", "deflate", "gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 0.2}, {"deflate", 1.0}, {"deflate", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["deflate", "gzip", "gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 0.2}, {"deflate", 1.0}, {"gzip", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "deflate", "gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 0.2}, {"deflate", 0.9}, {"gzip", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + [] = pick_accepted_encodings( + [{"*", 0.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "deflate", "identity"] = pick_accepted_encodings( + [{"*", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "deflate", "identity"] = pick_accepted_encodings( + [{"*", 0.6}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip"] = pick_accepted_encodings( + [{"gzip", 1.0}, {"*", 0.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "deflate"] = pick_accepted_encodings( + [{"gzip", 1.0}, {"deflate", 0.6}, {"*", 0.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["deflate", "gzip"] = pick_accepted_encodings( + [{"gzip", 0.5}, {"deflate", 1.0}, {"*", 0.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "identity"] = pick_accepted_encodings( + [{"deflate", 0.0}, {"*", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "identity"] = pick_accepted_encodings( + [{"*", 1.0}, {"deflate", 0.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ok. + +-endif. diff --git a/src/mochiweb/reloader.erl b/src/mochiweb/reloader.erl index 6835f8f9..c0f5de88 100644 --- a/src/mochiweb/reloader.erl +++ b/src/mochiweb/reloader.erl @@ -13,7 +13,9 @@ -export([start/0, start_link/0]). -export([stop/0]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). - +-export([all_changed/0]). +-export([is_changed/1]). +-export([reload_modules/1]). -record(state, {last, tref}). %% External API @@ -74,8 +76,37 @@ terminate(_Reason, State) -> code_change(_Vsn, State, _Extra) -> {ok, State}. +%% @spec reload_modules([atom()]) -> [{module, atom()} | {error, term()}] +%% @doc code:purge/1 and code:load_file/1 the given list of modules in order, +%% return the results of code:load_file/1. +reload_modules(Modules) -> + [begin code:purge(M), code:load_file(M) end || M <- Modules]. + +%% @spec all_changed() -> [atom()] +%% @doc Return a list of beam modules that have changed. +all_changed() -> + [M || {M, Fn} <- code:all_loaded(), is_list(Fn), is_changed(M)]. + +%% @spec is_changed(atom()) -> boolean() +%% @doc true if the loaded module is a beam with a vsn attribute +%% and does not match the on-disk beam file, returns false otherwise. +is_changed(M) -> + try + module_vsn(M:module_info()) =/= module_vsn(code:get_object_code(M)) + catch _:_ -> + false + end. + %% Internal API +module_vsn({M, Beam, _Fn}) -> + {ok, {M, Vsn}} = beam_lib:version(Beam), + Vsn; +module_vsn(L) when is_list(L) -> + {_, Attrs} = lists:keyfind(attributes, 1, L), + {_, Vsn} = lists:keyfind(vsn, 1, Attrs), + Vsn. + doit(From, To) -> [case file:read_file_info(Filename) of {ok, #file_info{mtime = Mtime}} when Mtime >= From, Mtime < To -> @@ -121,3 +152,10 @@ reload(Module) -> stamp() -> erlang:localtime(). + +%% +%% Tests +%% +-include_lib("eunit/include/eunit.hrl"). +-ifdef(TEST). +-endif. |