diff options
Diffstat (limited to 'apps')
155 files changed, 33444 insertions, 0 deletions
diff --git a/apps/couch/c_src/couch_icu_driver.c b/apps/couch/c_src/couch_icu_driver.c new file mode 100644 index 00000000..1afe8eac --- /dev/null +++ b/apps/couch/c_src/couch_icu_driver.c @@ -0,0 +1,177 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. You may obtain a copy of the +License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +*/ + +// This file is the C port driver for Erlang. It provides a low overhead +// means of calling into C code, however coding errors in this module can +// crash the entire Erlang server. + +#ifdef DARWIN +#define U_HIDE_DRAFT_API 1 +#define U_DISABLE_RENAMING 1 +#endif + +#include "erl_driver.h" +#include "unicode/ucol.h" +#include "unicode/ucasemap.h" +#ifndef WIN32 +#include <string.h> // for memcpy +#endif + +typedef struct { + ErlDrvPort port; + UCollator* collNoCase; + UCollator* coll; +} couch_drv_data; + +static void couch_drv_stop(ErlDrvData data) +{ + couch_drv_data* pData = (couch_drv_data*)data; + if (pData->coll) { + ucol_close(pData->coll); + } + if (pData->collNoCase) { + ucol_close(pData->collNoCase); + } + driver_free((char*)pData); +} + +static ErlDrvData couch_drv_start(ErlDrvPort port, char *buff) +{ + UErrorCode status = U_ZERO_ERROR; + couch_drv_data* pData = (couch_drv_data*)driver_alloc(sizeof(couch_drv_data)); + + if (pData == NULL) + return ERL_DRV_ERROR_GENERAL; + + pData->port = port; + + pData->coll = ucol_open("", &status); + if (U_FAILURE(status)) { + couch_drv_stop((ErlDrvData)pData); + return ERL_DRV_ERROR_GENERAL; + } + + pData->collNoCase = ucol_open("", &status); + if (U_FAILURE(status)) { + couch_drv_stop((ErlDrvData)pData); + return ERL_DRV_ERROR_GENERAL; + } + + ucol_setAttribute(pData->collNoCase, UCOL_STRENGTH, UCOL_PRIMARY, &status); + if (U_FAILURE(status)) { + couch_drv_stop((ErlDrvData)pData); + return ERL_DRV_ERROR_GENERAL; + } + + return (ErlDrvData)pData; +} + +static int return_control_result(void* pLocalResult, int localLen, char **ppRetBuf, int returnLen) +{ + if (*ppRetBuf == NULL || localLen > returnLen) { + *ppRetBuf = (char*)driver_alloc_binary(localLen); + if(*ppRetBuf == NULL) { + return -1; + } + } + memcpy(*ppRetBuf, pLocalResult, localLen); + return localLen; +} + +static int couch_drv_control(ErlDrvData drv_data, unsigned int command, char *pBuf, + int bufLen, char **rbuf, int rlen) +{ + + couch_drv_data* pData = (couch_drv_data*)drv_data; + switch(command) { + case 0: // COLLATE + case 1: // COLLATE_NO_CASE: + { + UErrorCode status = U_ZERO_ERROR; + int collResult; + char response; + UCharIterator iterA; + UCharIterator iterB; + int32_t length; + + // 2 strings are in the buffer, consecutively + // The strings begin first with a 32 bit integer byte length, then the actual + // string bytes follow. + + // first 32bits are the length + memcpy(&length, pBuf, sizeof(length)); + pBuf += sizeof(length); + + // point the iterator at it. + uiter_setUTF8(&iterA, pBuf, length); + + pBuf += length; // now on to string b + + // first 32bits are the length + memcpy(&length, pBuf, sizeof(length)); + pBuf += sizeof(length); + + // point the iterator at it. + uiter_setUTF8(&iterB, pBuf, length); + + if (command == 0) // COLLATE + collResult = ucol_strcollIter(pData->coll, &iterA, &iterB, &status); + else // COLLATE_NO_CASE + collResult = ucol_strcollIter(pData->collNoCase, &iterA, &iterB, &status); + + if (collResult < 0) + response = 0; //lt + else if (collResult > 0) + response = 2; //gt + else + response = 1; //eq + + return return_control_result(&response, sizeof(response), rbuf, rlen); + } + + default: + return -1; + } +} + +ErlDrvEntry couch_driver_entry = { + NULL, /* F_PTR init, N/A */ + couch_drv_start, /* L_PTR start, called when port is opened */ + couch_drv_stop, /* F_PTR stop, called when port is closed */ + NULL, /* F_PTR output, called when erlang has sent */ + NULL, /* F_PTR ready_input, called when input descriptor ready */ + NULL, /* F_PTR ready_output, called when output descriptor ready */ + "couch_icu_driver", /* char *driver_name, the argument to open_port */ + NULL, /* F_PTR finish, called when unloaded */ + NULL, /* Not used */ + couch_drv_control, /* F_PTR control, port_command callback */ + NULL, /* F_PTR timeout, reserved */ + NULL, /* F_PTR outputv, reserved */ + NULL, /* F_PTR ready_async */ + NULL, /* F_PTR flush */ + NULL, /* F_PTR call */ + NULL, /* F_PTR event */ + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + ERL_DRV_FLAG_USE_PORT_LOCKING, + NULL, /* Reserved -- Used by emulator internally */ + NULL, /* F_PTR process_exit */ +}; + +DRIVER_INIT(couch_icu_driver) /* must match name in driver_entry */ +{ + return &couch_driver_entry; +} diff --git a/apps/couch/include/couch_db.hrl b/apps/couch/include/couch_db.hrl new file mode 100644 index 00000000..a35745ef --- /dev/null +++ b/apps/couch/include/couch_db.hrl @@ -0,0 +1,294 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(LOCAL_DOC_PREFIX, "_local/"). +-define(DESIGN_DOC_PREFIX0, "_design"). +-define(DESIGN_DOC_PREFIX, "_design/"). + +-define(MIN_STR, <<"">>). +-define(MAX_STR, <<255>>). % illegal utf string + +-define(JSON_ENCODE(V), couch_util:json_encode(V)). +-define(JSON_DECODE(V), couch_util:json_decode(V)). + +-define(b2l(V), binary_to_list(V)). +-define(l2b(V), list_to_binary(V)). + +-define(DEFAULT_ATTACHMENT_CONTENT_TYPE, <<"application/octet-stream">>). + +-define(LOG_DEBUG(Format, Args), + case couch_log:debug_on() of + true -> + gen_event:sync_notify(error_logger, + {self(), couch_debug, {Format, Args}}); + false -> ok + end). + +-define(LOG_INFO(Format, Args), + case couch_log:info_on() of + true -> + gen_event:sync_notify(error_logger, + {self(), couch_info, {Format, Args}}); + false -> ok + end). + +-define(LOG_ERROR(Format, Args), + gen_event:sync_notify(error_logger, + {self(), couch_error, {Format, Args}})). + + +-record(rev_info, + { + rev, + seq = 0, + deleted = false, + body_sp = nil % stream pointer + }). + +-record(doc_info, + { + id = <<"">>, + high_seq = 0, + revs = [] % rev_info + }). + +-record(full_doc_info, + {id = <<"">>, + update_seq = 0, + deleted = false, + rev_tree = [] + }). + +-record(httpd, + {mochi_req, + peer, + method, + path_parts, + db_url_handlers, + user_ctx, + req_body = undefined, + design_url_handlers, + auth, + default_fun, + url_handlers + }). + + +-record(doc, + { + id = <<"">>, + revs = {0, []}, + + % the json body object. + body = {[]}, + + atts = [], % attachments + + deleted = false, + + % key/value tuple of meta information, provided when using special options: + % couch_db:open_doc(Db, Id, Options). + meta = [] + }). + + +-record(att, + { + name, + type, + att_len, + disk_len, % length of the attachment in its identity form + % (that is, without a content encoding applied to it) + % differs from att_len when encoding /= identity + md5= <<>>, + revpos=0, + data, + encoding=identity % currently supported values are: + % identity, gzip + % additional values to support in the future: + % deflate, compress + }). + + +-record(user_ctx, + { + name=null, + roles=[], + handler + }). + +% This should be updated anytime a header change happens that requires more +% than filling in new defaults. +% +% As long the changes are limited to new header fields (with inline +% defaults) added to the end of the record, then there is no need to increment +% the disk revision number. +% +% if the disk revision is incremented, then new upgrade logic will need to be +% added to couch_db_updater:init_db. + +-define(LATEST_DISK_VERSION, 5). + +-record(db_header, + {disk_version = ?LATEST_DISK_VERSION, + update_seq = 0, + unused = 0, + fulldocinfo_by_id_btree_state = nil, + docinfo_by_seq_btree_state = nil, + local_docs_btree_state = nil, + purge_seq = 0, + purged_docs = nil, + security_ptr = nil, + revs_limit = 1000 + }). + +-record(db, + {main_pid = nil, + update_pid = nil, + compactor_pid = nil, + instance_start_time, % number of microsecs since jan 1 1970 as a binary string + fd, + fd_ref_counter, + header = #db_header{}, + committed_update_seq, + fulldocinfo_by_id_btree, + docinfo_by_seq_btree, + local_docs_btree, + update_seq, + name, + filepath, + validate_doc_funs = [], + security = [], + security_ptr = nil, + user_ctx = #user_ctx{}, + waiting_delayed_commit = nil, + revs_limit = 1000, + fsync_options = [], + is_sys_db = false + }). + + +-record(view_query_args, { + start_key, + end_key, + start_docid = ?MIN_STR, + end_docid = ?MAX_STR, + + direction = fwd, + inclusive_end=true, % aka a closed-interval + + limit = 10000000000, % Huge number to simplify logic + skip = 0, + + group_level = 0, + + view_type = nil, + include_docs = false, + stale = false, + multi_get = false, + callback = nil, + list = nil +}). + +-record(view_fold_helper_funs, { + reduce_count, + passed_end, + start_response, + send_row +}). + +-record(reduce_fold_helper_funs, { + start_response, + send_row +}). + +-record(extern_resp_args, { + code = 200, + stop = false, + data = <<>>, + ctype = "application/json", + headers = [] +}). + +-record(group, { + sig=nil, + db=nil, + fd=nil, + name, + def_lang, + design_options=[], + views, + id_btree=nil, + current_seq=0, + purge_seq=0, + query_server=nil, + waiting_delayed_commit=nil + }). + +-record(view, + {id_num, + map_names=[], + def, + btree=nil, + reduce_funs=[], + options=[] + }). + +-record(index_header, + {seq=0, + purge_seq=0, + id_btree_state=nil, + view_states=nil + }). + +-record(http_db, { + url, + auth = [], + resource = "", + headers = [ + {"User-Agent", "CouchDB/"++couch_server:get_version()}, + {"Accept", "application/json"}, + {"Accept-Encoding", "gzip"} + ], + qs = [], + method = get, + body = nil, + options = [ + {response_format,binary}, + {inactivity_timeout, 30000}, + {max_sessions, list_to_integer( + couch_config:get("replicator", "max_http_sessions", "10") + )}, + {max_pipeline_size, list_to_integer( + couch_config:get("replicator", "max_http_pipeline_size", "10") + )} + ], + retries = 10, + pause = 500, + conn = nil +}). + +% small value used in revision trees to indicate the revision isn't stored +-define(REV_MISSING, []). + +-record(changes_args, { + feed = "normal", + dir = fwd, + since = 0, + limit = 1000000000000000, + style = main_only, + heartbeat, + timeout, + filter = "", + include_docs = false +}). + diff --git a/apps/couch/priv/couchspawnkillable.sh b/apps/couch/priv/couchspawnkillable.sh new file mode 100644 index 00000000..f8d042e3 --- /dev/null +++ b/apps/couch/priv/couchspawnkillable.sh @@ -0,0 +1,20 @@ +#! /bin/sh -e + +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +# The purpose of this script is to echo an OS specific command before launching +# the actual process. This provides a way for Erlang to hard-kill its external +# processes. + +echo "kill -9 $$" +exec $* diff --git a/apps/couch/priv/stat_descriptions.cfg b/apps/couch/priv/stat_descriptions.cfg new file mode 100644 index 00000000..5c972ddf --- /dev/null +++ b/apps/couch/priv/stat_descriptions.cfg @@ -0,0 +1,51 @@ +%% Licensed under the Apache License, Version 2.0 (the "License"); you may not +%% use this file except in compliance with the License. You may obtain a copy of +%% the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +%% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +%% License for the specific language governing permissions and limitations under +%% the License. + +% Style guide for descriptions: Start with a lowercase letter & do not add +% a trailing full-stop / period +% Please keep this in alphabetical order + +{couchdb, database_writes, "number of times a database was changed"}. +{couchdb, database_reads, "number of times a document was read from a database"}. +{couchdb, open_databases, "number of open databases"}. +{couchdb, open_os_files, "number of file descriptors CouchDB has open"}. +{couchdb, request_time, "length of a request inside CouchDB without MochiWeb"}. +{couchdb, auth_cache_hits, "number of authentication cache hits"}. +{couchdb, auth_cache_misses, "number of authentication cache misses"}. + +{httpd, bulk_requests, "number of bulk requests"}. +{httpd, requests, "number of HTTP requests"}. +{httpd, temporary_view_reads, "number of temporary view reads"}. +{httpd, view_reads, "number of view reads"}. +{httpd, clients_requesting_changes, "number of clients for continuous _changes"}. + +{httpd_request_methods, 'COPY', "number of HTTP COPY requests"}. +{httpd_request_methods, 'DELETE', "number of HTTP DELETE requests"}. +{httpd_request_methods, 'GET', "number of HTTP GET requests"}. +{httpd_request_methods, 'HEAD', "number of HTTP HEAD requests"}. +{httpd_request_methods, 'MOVE', "number of HTTP MOVE requests"}. +{httpd_request_methods, 'POST', "number of HTTP POST requests"}. +{httpd_request_methods, 'PUT', "number of HTTP PUT requests"}. + +{httpd_status_codes, '200', "number of HTTP 200 OK responses"}. +{httpd_status_codes, '201', "number of HTTP 201 Created responses"}. +{httpd_status_codes, '202', "number of HTTP 202 Accepted responses"}. +{httpd_status_codes, '301', "number of HTTP 301 Moved Permanently responses"}. +{httpd_status_codes, '304', "number of HTTP 304 Not Modified responses"}. +{httpd_status_codes, '400', "number of HTTP 400 Bad Request responses"}. +{httpd_status_codes, '401', "number of HTTP 401 Unauthorized responses"}. +{httpd_status_codes, '403', "number of HTTP 403 Forbidden responses"}. +{httpd_status_codes, '404', "number of HTTP 404 Not Found responses"}. +{httpd_status_codes, '405', "number of HTTP 405 Method Not Allowed responses"}. +{httpd_status_codes, '409', "number of HTTP 409 Conflict responses"}. +{httpd_status_codes, '412', "number of HTTP 412 Precondition Failed responses"}. +{httpd_status_codes, '500', "number of HTTP 500 Internal Server Error responses"}. diff --git a/apps/couch/rebar.config b/apps/couch/rebar.config new file mode 100644 index 00000000..787e0337 --- /dev/null +++ b/apps/couch/rebar.config @@ -0,0 +1,7 @@ +{so_name, "couch_icu_driver.so"}. +{port_envs, [ + {"DRV_CFLAGS", "$DRV_CFLAGS -DPIC -O2 -fno-common"}, + {"DRV_LDFLAGS", "$DRV_LDFLAGS -lm -licuuc -licudata -licui18n -lpthread"}, + {"linux", "DRV_LDFLAGS", "$DRV_LDFLAGS -lcrypt"} +]}. +{lib_dirs, ["../"]}. diff --git a/apps/couch/src/couch.app.src b/apps/couch/src/couch.app.src new file mode 100644 index 00000000..2e25d55a --- /dev/null +++ b/apps/couch/src/couch.app.src @@ -0,0 +1,25 @@ +{application, couch, [ + {description, "Apache CouchDB"}, + {vsn, "1.0.1"}, + {registered, [ + couch_config, + couch_db_update, + couch_db_update_notifier_sup, + couch_external_manager, + couch_httpd, + couch_log, + couch_primary_services, + couch_query_servers, + couch_rep_sup, + couch_secondary_services, + couch_server, + couch_server_sup, + couch_stats_aggregator, + couch_stats_collector, + couch_task_status, + couch_view + ]}, + {mod, {couch_app, []}}, + {applications, [kernel, stdlib, crypto, sasl, inets, oauth, ibrowse, + mochiweb, ssl]} +]}. diff --git a/apps/couch/src/couch.erl b/apps/couch/src/couch.erl new file mode 100644 index 00000000..956e9489 --- /dev/null +++ b/apps/couch/src/couch.erl @@ -0,0 +1,39 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch). + +-compile(export_all). + +start() -> + application:start(couch). + +stop() -> + application:stop(couch). + +restart() -> + case stop() of + ok -> + start(); + {error, {not_started,couch}} -> + start(); + {error, Reason} -> + {error, Reason} + end. + +reload() -> + case supervisor:terminate_child(couch_server_sup, couch_config) of + ok -> + supervisor:restart_child(couch_server_sup, couch_config); + {error, Reason} -> + {error, Reason} + end. diff --git a/apps/couch/src/couch_app.erl b/apps/couch/src/couch_app.erl new file mode 100644 index 00000000..232953d9 --- /dev/null +++ b/apps/couch/src/couch_app.erl @@ -0,0 +1,56 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_app). + +-behaviour(application). + +-include("couch_db.hrl"). + +-export([start/2, stop/1]). + +start(_Type, DefaultIniFiles) -> + IniFiles = get_ini_files(DefaultIniFiles), + case start_apps([crypto, public_key, sasl, inets, oauth, ssl, ibrowse, mochiweb]) of + ok -> + couch_server_sup:start_link(IniFiles); + {error, Reason} -> + {error, Reason} + end. + +stop(_) -> + ok. + +get_ini_files(Default) -> + case init:get_argument(couch_ini) of + error -> + Default; + {ok, [[]]} -> + Default; + {ok, [Values]} -> + Values + end. + +start_apps([]) -> + ok; +start_apps([App|Rest]) -> + case application:start(App) of + ok -> + start_apps(Rest); + {error, {already_started, App}} -> + start_apps(Rest); + {error, _Reason} when App =:= public_key -> + % ignore on R12B5 + start_apps(Rest); + {error, _Reason} -> + {error, {app_would_not_start, App}} + end. diff --git a/apps/couch/src/couch_auth_cache.erl b/apps/couch/src/couch_auth_cache.erl new file mode 100644 index 00000000..078bfcc1 --- /dev/null +++ b/apps/couch/src/couch_auth_cache.erl @@ -0,0 +1,410 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_auth_cache). +-behaviour(gen_server). + +% public API +-export([get_user_creds/1]). + +% gen_server API +-export([start_link/0, init/1, handle_call/3, handle_info/2, handle_cast/2]). +-export([code_change/3, terminate/2]). + +-include("couch_db.hrl"). +-include("couch_js_functions.hrl"). + +-define(STATE, auth_state_ets). +-define(BY_USER, auth_by_user_ets). +-define(BY_ATIME, auth_by_atime_ets). + +-record(state, { + max_cache_size = 0, + cache_size = 0, + db_notifier = nil +}). + + +-spec get_user_creds(UserName::string() | binary()) -> + Credentials::list() | nil. + +get_user_creds(UserName) when is_list(UserName) -> + get_user_creds(?l2b(UserName)); + +get_user_creds(UserName) -> + UserCreds = case couch_config:get("admins", ?b2l(UserName)) of + "-hashed-" ++ HashedPwdAndSalt -> + % the name is an admin, now check to see if there is a user doc + % which has a matching name, salt, and password_sha + [HashedPwd, Salt] = string:tokens(HashedPwdAndSalt, ","), + case get_from_cache(UserName) of + nil -> + [{<<"roles">>, [<<"_admin">>]}, + {<<"salt">>, ?l2b(Salt)}, + {<<"password_sha">>, ?l2b(HashedPwd)}]; + UserProps when is_list(UserProps) -> + DocRoles = couch_util:get_value(<<"roles">>, UserProps), + [{<<"roles">>, [<<"_admin">> | DocRoles]}, + {<<"salt">>, ?l2b(Salt)}, + {<<"password_sha">>, ?l2b(HashedPwd)}] + end; + _Else -> + get_from_cache(UserName) + end, + validate_user_creds(UserCreds). + + +get_from_cache(UserName) -> + exec_if_auth_db( + fun(_AuthDb) -> + maybe_refresh_cache(), + case ets:lookup(?BY_USER, UserName) of + [] -> + gen_server:call(?MODULE, {fetch, UserName}, infinity); + [{UserName, {Credentials, _ATime}}] -> + couch_stats_collector:increment({couchdb, auth_cache_hits}), + gen_server:cast(?MODULE, {cache_hit, UserName}), + Credentials + end + end, + nil + ). + + +validate_user_creds(nil) -> + nil; +validate_user_creds(UserCreds) -> + case couch_util:get_value(<<"_conflicts">>, UserCreds) of + undefined -> + ok; + _ConflictList -> + throw({unauthorized, + <<"User document conflicts must be resolved before the document", + " is used for authentication purposes.">> + }) + end, + UserCreds. + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +init(_) -> + ?STATE = ets:new(?STATE, [set, protected, named_table]), + ?BY_USER = ets:new(?BY_USER, [set, protected, named_table]), + ?BY_ATIME = ets:new(?BY_ATIME, [ordered_set, private, named_table]), + AuthDbName = couch_config:get("couch_httpd_auth", "authentication_db"), + true = ets:insert(?STATE, {auth_db_name, ?l2b(AuthDbName)}), + true = ets:insert(?STATE, {auth_db, open_auth_db()}), + process_flag(trap_exit, true), + ok = couch_config:register( + fun("couch_httpd_auth", "auth_cache_size", SizeList) -> + Size = list_to_integer(SizeList), + ok = gen_server:call(?MODULE, {new_max_cache_size, Size}, infinity) + end + ), + ok = couch_config:register( + fun("couch_httpd_auth", "authentication_db", DbName) -> + ok = gen_server:call(?MODULE, {new_auth_db, ?l2b(DbName)}, infinity) + end + ), + {ok, Notifier} = couch_db_update_notifier:start_link(fun handle_db_event/1), + State = #state{ + db_notifier = Notifier, + max_cache_size = list_to_integer( + couch_config:get("couch_httpd_auth", "auth_cache_size", "50") + ) + }, + {ok, State}. + + +handle_db_event({Event, DbName}) -> + [{auth_db_name, AuthDbName}] = ets:lookup(?STATE, auth_db_name), + case DbName =:= AuthDbName of + true -> + case Event of + deleted -> gen_server:call(?MODULE, auth_db_deleted, infinity); + created -> gen_server:call(?MODULE, auth_db_created, infinity); + _Else -> ok + end; + false -> + ok + end. + + +handle_call({new_auth_db, AuthDbName}, _From, State) -> + NewState = clear_cache(State), + true = ets:insert(?STATE, {auth_db_name, AuthDbName}), + true = ets:insert(?STATE, {auth_db, open_auth_db()}), + {reply, ok, NewState}; + +handle_call(auth_db_deleted, _From, State) -> + NewState = clear_cache(State), + true = ets:insert(?STATE, {auth_db, nil}), + {reply, ok, NewState}; + +handle_call(auth_db_created, _From, State) -> + NewState = clear_cache(State), + true = ets:insert(?STATE, {auth_db, open_auth_db()}), + {reply, ok, NewState}; + +handle_call({new_max_cache_size, NewSize}, _From, State) -> + case NewSize >= State#state.cache_size of + true -> + ok; + false -> + lists:foreach( + fun(_) -> + LruTime = ets:last(?BY_ATIME), + [{LruTime, UserName}] = ets:lookup(?BY_ATIME, LruTime), + true = ets:delete(?BY_ATIME, LruTime), + true = ets:delete(?BY_USER, UserName) + end, + lists:seq(1, State#state.cache_size - NewSize) + ) + end, + NewState = State#state{ + max_cache_size = NewSize, + cache_size = erlang:min(NewSize, State#state.cache_size) + }, + {reply, ok, NewState}; + +handle_call({fetch, UserName}, _From, State) -> + {Credentials, NewState} = case ets:lookup(?BY_USER, UserName) of + [{UserName, {Creds, ATime}}] -> + couch_stats_collector:increment({couchdb, auth_cache_hits}), + cache_hit(UserName, Creds, ATime), + {Creds, State}; + [] -> + couch_stats_collector:increment({couchdb, auth_cache_misses}), + Creds = get_user_props_from_db(UserName), + State1 = add_cache_entry(UserName, Creds, erlang:now(), State), + {Creds, State1} + end, + {reply, Credentials, NewState}; + +handle_call(refresh, _From, State) -> + exec_if_auth_db(fun refresh_entries/1), + {reply, ok, State}. + + +handle_cast({cache_hit, UserName}, State) -> + case ets:lookup(?BY_USER, UserName) of + [{UserName, {Credentials, ATime}}] -> + cache_hit(UserName, Credentials, ATime); + _ -> + ok + end, + {noreply, State}. + + +handle_info(_Msg, State) -> + {noreply, State}. + + +terminate(_Reason, #state{db_notifier = Notifier}) -> + couch_db_update_notifier:stop(Notifier), + exec_if_auth_db(fun(AuthDb) -> catch couch_db:close(AuthDb) end), + true = ets:delete(?BY_USER), + true = ets:delete(?BY_ATIME), + true = ets:delete(?STATE). + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +clear_cache(State) -> + exec_if_auth_db(fun(AuthDb) -> catch couch_db:close(AuthDb) end), + true = ets:delete_all_objects(?BY_USER), + true = ets:delete_all_objects(?BY_ATIME), + State#state{cache_size = 0}. + + +add_cache_entry(UserName, Credentials, ATime, State) -> + case State#state.cache_size >= State#state.max_cache_size of + true -> + free_mru_cache_entry(); + false -> + ok + end, + true = ets:insert(?BY_ATIME, {ATime, UserName}), + true = ets:insert(?BY_USER, {UserName, {Credentials, ATime}}), + State#state{cache_size = couch_util:get_value(size, ets:info(?BY_USER))}. + + +free_mru_cache_entry() -> + case ets:last(?BY_ATIME) of + '$end_of_table' -> + ok; % empty cache + LruTime -> + [{LruTime, UserName}] = ets:lookup(?BY_ATIME, LruTime), + true = ets:delete(?BY_ATIME, LruTime), + true = ets:delete(?BY_USER, UserName) + end. + + +cache_hit(UserName, Credentials, ATime) -> + NewATime = erlang:now(), + true = ets:delete(?BY_ATIME, ATime), + true = ets:insert(?BY_ATIME, {NewATime, UserName}), + true = ets:insert(?BY_USER, {UserName, {Credentials, NewATime}}). + + +refresh_entries(AuthDb) -> + case reopen_auth_db(AuthDb) of + nil -> + ok; + AuthDb2 -> + case AuthDb2#db.update_seq > AuthDb#db.update_seq of + true -> + {ok, _, _} = couch_db:enum_docs_since( + AuthDb2, + AuthDb#db.update_seq, + fun(DocInfo, _, _) -> refresh_entry(AuthDb2, DocInfo) end, + AuthDb#db.update_seq, + [] + ), + true = ets:insert(?STATE, {auth_db, AuthDb2}); + false -> + ok + end + end. + + +refresh_entry(Db, #doc_info{high_seq = DocSeq} = DocInfo) -> + case is_user_doc(DocInfo) of + {true, UserName} -> + case ets:lookup(?BY_USER, UserName) of + [] -> + ok; + [{UserName, {_OldCreds, ATime}}] -> + {ok, Doc} = couch_db:open_doc(Db, DocInfo, [conflicts, deleted]), + NewCreds = user_creds(Doc), + true = ets:insert(?BY_USER, {UserName, {NewCreds, ATime}}) + end; + false -> + ok + end, + {ok, DocSeq}. + + +user_creds(#doc{deleted = true}) -> + nil; +user_creds(#doc{} = Doc) -> + {Creds} = couch_query_servers:json_doc(Doc), + Creds. + + +is_user_doc(#doc_info{id = <<"org.couchdb.user:", UserName/binary>>}) -> + {true, UserName}; +is_user_doc(_) -> + false. + + +maybe_refresh_cache() -> + case cache_needs_refresh() of + true -> + ok = gen_server:call(?MODULE, refresh, infinity); + false -> + ok + end. + + +cache_needs_refresh() -> + exec_if_auth_db( + fun(AuthDb) -> + case reopen_auth_db(AuthDb) of + nil -> + false; + AuthDb2 -> + AuthDb2#db.update_seq > AuthDb#db.update_seq + end + end, + false + ). + + +reopen_auth_db(AuthDb) -> + case (catch gen_server:call(AuthDb#db.main_pid, get_db, infinity)) of + {ok, AuthDb2} -> + AuthDb2; + _ -> + nil + end. + + +exec_if_auth_db(Fun) -> + exec_if_auth_db(Fun, ok). + +exec_if_auth_db(Fun, DefRes) -> + case ets:lookup(?STATE, auth_db) of + [{auth_db, #db{} = AuthDb}] -> + Fun(AuthDb); + _ -> + DefRes + end. + + +open_auth_db() -> + [{auth_db_name, DbName}] = ets:lookup(?STATE, auth_db_name), + {ok, AuthDb} = ensure_users_db_exists(DbName, [sys_db]), + AuthDb. + + +get_user_props_from_db(UserName) -> + exec_if_auth_db( + fun(AuthDb) -> + Db = reopen_auth_db(AuthDb), + DocId = <<"org.couchdb.user:", UserName/binary>>, + try + {ok, Doc} = couch_db:open_doc(Db, DocId, [conflicts]), + {DocProps} = couch_query_servers:json_doc(Doc), + DocProps + catch + _:_Error -> + nil + end + end, + nil + ). + +ensure_users_db_exists(DbName, Options) -> + Options1 = [{user_ctx, #user_ctx{roles=[<<"_admin">>]}} | Options], + case couch_db:open(DbName, Options1) of + {ok, Db} -> + ensure_auth_ddoc_exists(Db, <<"_design/_auth">>), + {ok, Db}; + _Error -> + {ok, Db} = couch_db:create(DbName, Options1), + ok = ensure_auth_ddoc_exists(Db, <<"_design/_auth">>), + {ok, Db} + end. + +ensure_auth_ddoc_exists(Db, DDocId) -> + case couch_db:open_doc(Db, DDocId) of + {not_found, _Reason} -> + {ok, AuthDesign} = auth_design_doc(DDocId), + {ok, _Rev} = couch_db:update_doc(Db, AuthDesign, []); + _ -> + ok + end, + ok. + +auth_design_doc(DocId) -> + DocProps = [ + {<<"_id">>, DocId}, + {<<"language">>,<<"javascript">>}, + {<<"validate_doc_update">>, ?AUTH_DB_DOC_VALIDATE_FUNCTION} + ], + {ok, couch_doc:from_json_obj({DocProps})}. diff --git a/apps/couch/src/couch_btree.erl b/apps/couch/src/couch_btree.erl new file mode 100644 index 00000000..0e47bac7 --- /dev/null +++ b/apps/couch/src/couch_btree.erl @@ -0,0 +1,676 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_btree). + +-export([open/2, open/3, query_modify/4, add/2, add_remove/3]). +-export([fold/4, full_reduce/1, final_reduce/2, foldl/3, foldl/4]). +-export([fold_reduce/4, lookup/2, get_state/1, set_options/2]). + +-define(CHUNK_THRESHOLD, 16#4ff). + +-record(btree, + {fd, + root, + extract_kv = fun({Key, Value}) -> {Key, Value} end, + assemble_kv = fun(Key, Value) -> {Key, Value} end, + less = fun(A, B) -> A < B end, + reduce = nil + }). + +extract(#btree{extract_kv=Extract}, Value) -> + Extract(Value). + +assemble(#btree{assemble_kv=Assemble}, Key, Value) -> + Assemble(Key, Value). + +less(#btree{less=Less}, A, B) -> + Less(A, B). + +% pass in 'nil' for State if a new Btree. +open(State, Fd) -> + {ok, #btree{root=State, fd=Fd}}. + +set_options(Bt, []) -> + Bt; +set_options(Bt, [{split, Extract}|Rest]) -> + set_options(Bt#btree{extract_kv=Extract}, Rest); +set_options(Bt, [{join, Assemble}|Rest]) -> + set_options(Bt#btree{assemble_kv=Assemble}, Rest); +set_options(Bt, [{less, Less}|Rest]) -> + set_options(Bt#btree{less=Less}, Rest); +set_options(Bt, [{reduce, Reduce}|Rest]) -> + set_options(Bt#btree{reduce=Reduce}, Rest). + +open(State, Fd, Options) -> + {ok, set_options(#btree{root=State, fd=Fd}, Options)}. + +get_state(#btree{root=Root}) -> + Root. + +final_reduce(#btree{reduce=Reduce}, Val) -> + final_reduce(Reduce, Val); +final_reduce(Reduce, {[], []}) -> + Reduce(reduce, []); +final_reduce(_Bt, {[], [Red]}) -> + Red; +final_reduce(Reduce, {[], Reductions}) -> + Reduce(rereduce, Reductions); +final_reduce(Reduce, {KVs, Reductions}) -> + Red = Reduce(reduce, KVs), + final_reduce(Reduce, {[], [Red | Reductions]}). + +fold_reduce(#btree{root=Root}=Bt, Fun, Acc, Options) -> + Dir = couch_util:get_value(dir, Options, fwd), + StartKey = couch_util:get_value(start_key, Options), + EndKey = couch_util:get_value(end_key, Options), + KeyGroupFun = couch_util:get_value(key_group_fun, Options, fun(_,_) -> true end), + {StartKey2, EndKey2} = + case Dir of + rev -> {EndKey, StartKey}; + fwd -> {StartKey, EndKey} + end, + try + {ok, Acc2, GroupedRedsAcc2, GroupedKVsAcc2, GroupedKey2} = + reduce_stream_node(Bt, Dir, Root, StartKey2, EndKey2, undefined, [], [], + KeyGroupFun, Fun, Acc), + if GroupedKey2 == undefined -> + {ok, Acc2}; + true -> + case Fun(GroupedKey2, {GroupedKVsAcc2, GroupedRedsAcc2}, Acc2) of + {ok, Acc3} -> {ok, Acc3}; + {stop, Acc3} -> {ok, Acc3} + end + end + catch + throw:{stop, AccDone} -> {ok, AccDone} + end. + +full_reduce(#btree{root=nil,reduce=Reduce}) -> + {ok, Reduce(reduce, [])}; +full_reduce(#btree{root={_P, Red}}) -> + {ok, Red}. + +% wraps a 2 arity function with the proper 3 arity function +convert_fun_arity(Fun) when is_function(Fun, 2) -> + fun(KV, _Reds, AccIn) -> Fun(KV, AccIn) end; +convert_fun_arity(Fun) when is_function(Fun, 3) -> + Fun. % Already arity 3 + +make_key_in_end_range_function(#btree{less=Less}, fwd, Options) -> + case couch_util:get_value(end_key_gt, Options) of + undefined -> + case couch_util:get_value(end_key, Options) of + undefined -> + fun(_Key) -> true end; + LastKey -> + fun(Key) -> not Less(LastKey, Key) end + end; + EndKey -> + fun(Key) -> Less(Key, EndKey) end + end; +make_key_in_end_range_function(#btree{less=Less}, rev, Options) -> + case couch_util:get_value(end_key_gt, Options) of + undefined -> + case couch_util:get_value(end_key, Options) of + undefined -> + fun(_Key) -> true end; + LastKey -> + fun(Key) -> not Less(Key, LastKey) end + end; + EndKey -> + fun(Key) -> Less(EndKey, Key) end + end. + + +foldl(Bt, Fun, Acc) -> + fold(Bt, Fun, Acc, []). + +foldl(Bt, Fun, Acc, Options) -> + fold(Bt, Fun, Acc, Options). + + +fold(#btree{root=nil}, _Fun, Acc, _Options) -> + {ok, {[], []}, Acc}; +fold(#btree{root=Root}=Bt, Fun, Acc, Options) -> + Dir = couch_util:get_value(dir, Options, fwd), + InRange = make_key_in_end_range_function(Bt, Dir, Options), + Result = + case couch_util:get_value(start_key, Options) of + undefined -> + stream_node(Bt, [], Bt#btree.root, InRange, Dir, + convert_fun_arity(Fun), Acc); + StartKey -> + stream_node(Bt, [], Bt#btree.root, StartKey, InRange, Dir, + convert_fun_arity(Fun), Acc) + end, + case Result of + {ok, Acc2}-> + {_P, FullReduction} = Root, + {ok, {[], [FullReduction]}, Acc2}; + {stop, LastReduction, Acc2} -> + {ok, LastReduction, Acc2} + end. + +add(Bt, InsertKeyValues) -> + add_remove(Bt, InsertKeyValues, []). + +add_remove(Bt, InsertKeyValues, RemoveKeys) -> + {ok, [], Bt2} = query_modify(Bt, [], InsertKeyValues, RemoveKeys), + {ok, Bt2}. + +query_modify(Bt, LookupKeys, InsertValues, RemoveKeys) -> + #btree{root=Root} = Bt, + InsertActions = lists:map( + fun(KeyValue) -> + {Key, Value} = extract(Bt, KeyValue), + {insert, Key, Value} + end, InsertValues), + RemoveActions = [{remove, Key, nil} || Key <- RemoveKeys], + FetchActions = [{fetch, Key, nil} || Key <- LookupKeys], + SortFun = + fun({OpA, A, _}, {OpB, B, _}) -> + case A == B of + % A and B are equal, sort by op. + true -> op_order(OpA) < op_order(OpB); + false -> + less(Bt, A, B) + end + end, + Actions = lists:sort(SortFun, lists:append([InsertActions, RemoveActions, FetchActions])), + {ok, KeyPointers, QueryResults, Bt2} = modify_node(Bt, Root, Actions, []), + {ok, NewRoot, Bt3} = complete_root(Bt2, KeyPointers), + {ok, QueryResults, Bt3#btree{root=NewRoot}}. + +% for ordering different operatations with the same key. +% fetch < remove < insert +op_order(fetch) -> 1; +op_order(remove) -> 2; +op_order(insert) -> 3. + +lookup(#btree{root=Root, less=Less}=Bt, Keys) -> + SortedKeys = lists:sort(Less, Keys), + {ok, SortedResults} = lookup(Bt, Root, SortedKeys), + % We want to return the results in the same order as the keys were input + % but we may have changed the order when we sorted. So we need to put the + % order back into the results. + couch_util:reorder_results(Keys, SortedResults). + +lookup(_Bt, nil, Keys) -> + {ok, [{Key, not_found} || Key <- Keys]}; +lookup(Bt, {Pointer, _Reds}, Keys) -> + {NodeType, NodeList} = get_node(Bt, Pointer), + case NodeType of + kp_node -> + lookup_kpnode(Bt, list_to_tuple(NodeList), 1, Keys, []); + kv_node -> + lookup_kvnode(Bt, list_to_tuple(NodeList), 1, Keys, []) + end. + +lookup_kpnode(_Bt, _NodeTuple, _LowerBound, [], Output) -> + {ok, lists:reverse(Output)}; +lookup_kpnode(_Bt, NodeTuple, LowerBound, Keys, Output) when tuple_size(NodeTuple) < LowerBound -> + {ok, lists:reverse(Output, [{Key, not_found} || Key <- Keys])}; +lookup_kpnode(Bt, NodeTuple, LowerBound, [FirstLookupKey | _] = LookupKeys, Output) -> + N = find_first_gteq(Bt, NodeTuple, LowerBound, tuple_size(NodeTuple), FirstLookupKey), + {Key, PointerInfo} = element(N, NodeTuple), + SplitFun = fun(LookupKey) -> not less(Bt, Key, LookupKey) end, + case lists:splitwith(SplitFun, LookupKeys) of + {[], GreaterQueries} -> + lookup_kpnode(Bt, NodeTuple, N + 1, GreaterQueries, Output); + {LessEqQueries, GreaterQueries} -> + {ok, Results} = lookup(Bt, PointerInfo, LessEqQueries), + lookup_kpnode(Bt, NodeTuple, N + 1, GreaterQueries, lists:reverse(Results, Output)) + end. + + +lookup_kvnode(_Bt, _NodeTuple, _LowerBound, [], Output) -> + {ok, lists:reverse(Output)}; +lookup_kvnode(_Bt, NodeTuple, LowerBound, Keys, Output) when tuple_size(NodeTuple) < LowerBound -> + % keys not found + {ok, lists:reverse(Output, [{Key, not_found} || Key <- Keys])}; +lookup_kvnode(Bt, NodeTuple, LowerBound, [LookupKey | RestLookupKeys], Output) -> + N = find_first_gteq(Bt, NodeTuple, LowerBound, tuple_size(NodeTuple), LookupKey), + {Key, Value} = element(N, NodeTuple), + case less(Bt, LookupKey, Key) of + true -> + % LookupKey is less than Key + lookup_kvnode(Bt, NodeTuple, N, RestLookupKeys, [{LookupKey, not_found} | Output]); + false -> + case less(Bt, Key, LookupKey) of + true -> + % LookupKey is greater than Key + lookup_kvnode(Bt, NodeTuple, N+1, RestLookupKeys, [{LookupKey, not_found} | Output]); + false -> + % LookupKey is equal to Key + lookup_kvnode(Bt, NodeTuple, N, RestLookupKeys, [{LookupKey, {ok, assemble(Bt, LookupKey, Value)}} | Output]) + end + end. + + +complete_root(Bt, []) -> + {ok, nil, Bt}; +complete_root(Bt, [{_Key, PointerInfo}])-> + {ok, PointerInfo, Bt}; +complete_root(Bt, KPs) -> + {ok, ResultKeyPointers, Bt2} = write_node(Bt, kp_node, KPs), + complete_root(Bt2, ResultKeyPointers). + +%%%%%%%%%%%%% The chunkify function sucks! %%%%%%%%%%%%% +% It is inaccurate as it does not account for compression when blocks are +% written. Plus with the "case byte_size(term_to_binary(InList)) of" code +% it's probably really inefficient. + +chunkify(InList) -> + case byte_size(term_to_binary(InList)) of + Size when Size > ?CHUNK_THRESHOLD -> + NumberOfChunksLikely = ((Size div ?CHUNK_THRESHOLD) + 1), + ChunkThreshold = Size div NumberOfChunksLikely, + chunkify(InList, ChunkThreshold, [], 0, []); + _Else -> + [InList] + end. + +chunkify([], _ChunkThreshold, [], 0, OutputChunks) -> + lists:reverse(OutputChunks); +chunkify([], _ChunkThreshold, OutList, _OutListSize, OutputChunks) -> + lists:reverse([lists:reverse(OutList) | OutputChunks]); +chunkify([InElement | RestInList], ChunkThreshold, OutList, OutListSize, OutputChunks) -> + case byte_size(term_to_binary(InElement)) of + Size when (Size + OutListSize) > ChunkThreshold andalso OutList /= [] -> + chunkify(RestInList, ChunkThreshold, [], 0, [lists:reverse([InElement | OutList]) | OutputChunks]); + Size -> + chunkify(RestInList, ChunkThreshold, [InElement | OutList], OutListSize + Size, OutputChunks) + end. + +modify_node(Bt, RootPointerInfo, Actions, QueryOutput) -> + case RootPointerInfo of + nil -> + NodeType = kv_node, + NodeList = []; + {Pointer, _Reds} -> + {NodeType, NodeList} = get_node(Bt, Pointer) + end, + NodeTuple = list_to_tuple(NodeList), + + {ok, NewNodeList, QueryOutput2, Bt2} = + case NodeType of + kp_node -> modify_kpnode(Bt, NodeTuple, 1, Actions, [], QueryOutput); + kv_node -> modify_kvnode(Bt, NodeTuple, 1, Actions, [], QueryOutput) + end, + case NewNodeList of + [] -> % no nodes remain + {ok, [], QueryOutput2, Bt2}; + NodeList -> % nothing changed + {LastKey, _LastValue} = element(tuple_size(NodeTuple), NodeTuple), + {ok, [{LastKey, RootPointerInfo}], QueryOutput2, Bt2}; + _Else2 -> + {ok, ResultList, Bt3} = write_node(Bt2, NodeType, NewNodeList), + {ok, ResultList, QueryOutput2, Bt3} + end. + +reduce_node(#btree{reduce=nil}, _NodeType, _NodeList) -> + []; +reduce_node(#btree{reduce=R}, kp_node, NodeList) -> + R(rereduce, [Red || {_K, {_P, Red}} <- NodeList]); +reduce_node(#btree{reduce=R}=Bt, kv_node, NodeList) -> + R(reduce, [assemble(Bt, K, V) || {K, V} <- NodeList]). + + +get_node(#btree{fd = Fd}, NodePos) -> + {ok, {NodeType, NodeList}} = couch_file:pread_term(Fd, NodePos), + {NodeType, NodeList}. + +write_node(Bt, NodeType, NodeList) -> + % split up nodes into smaller sizes + NodeListList = chunkify(NodeList), + % now write out each chunk and return the KeyPointer pairs for those nodes + ResultList = [ + begin + {ok, Pointer} = couch_file:append_term(Bt#btree.fd, {NodeType, ANodeList}), + {LastKey, _} = lists:last(ANodeList), + {LastKey, {Pointer, reduce_node(Bt, NodeType, ANodeList)}} + end + || + ANodeList <- NodeListList + ], + {ok, ResultList, Bt}. + +modify_kpnode(Bt, {}, _LowerBound, Actions, [], QueryOutput) -> + modify_node(Bt, nil, Actions, QueryOutput); +modify_kpnode(Bt, NodeTuple, LowerBound, [], ResultNode, QueryOutput) -> + {ok, lists:reverse(ResultNode, bounded_tuple_to_list(NodeTuple, LowerBound, + tuple_size(NodeTuple), [])), QueryOutput, Bt}; +modify_kpnode(Bt, NodeTuple, LowerBound, + [{_, FirstActionKey, _}|_]=Actions, ResultNode, QueryOutput) -> + Sz = tuple_size(NodeTuple), + N = find_first_gteq(Bt, NodeTuple, LowerBound, Sz, FirstActionKey), + case N =:= Sz of + true -> + % perform remaining actions on last node + {_, PointerInfo} = element(Sz, NodeTuple), + {ok, ChildKPs, QueryOutput2, Bt2} = + modify_node(Bt, PointerInfo, Actions, QueryOutput), + NodeList = lists:reverse(ResultNode, bounded_tuple_to_list(NodeTuple, LowerBound, + Sz - 1, ChildKPs)), + {ok, NodeList, QueryOutput2, Bt2}; + false -> + {NodeKey, PointerInfo} = element(N, NodeTuple), + SplitFun = fun({_ActionType, ActionKey, _ActionValue}) -> + not less(Bt, NodeKey, ActionKey) + end, + {LessEqQueries, GreaterQueries} = lists:splitwith(SplitFun, Actions), + {ok, ChildKPs, QueryOutput2, Bt2} = + modify_node(Bt, PointerInfo, LessEqQueries, QueryOutput), + ResultNode2 = lists:reverse(ChildKPs, bounded_tuple_to_revlist(NodeTuple, + LowerBound, N - 1, ResultNode)), + modify_kpnode(Bt2, NodeTuple, N+1, GreaterQueries, ResultNode2, QueryOutput2) + end. + +bounded_tuple_to_revlist(_Tuple, Start, End, Tail) when Start > End -> + Tail; +bounded_tuple_to_revlist(Tuple, Start, End, Tail) -> + bounded_tuple_to_revlist(Tuple, Start+1, End, [element(Start, Tuple)|Tail]). + +bounded_tuple_to_list(Tuple, Start, End, Tail) -> + bounded_tuple_to_list2(Tuple, Start, End, [], Tail). + +bounded_tuple_to_list2(_Tuple, Start, End, Acc, Tail) when Start > End -> + lists:reverse(Acc, Tail); +bounded_tuple_to_list2(Tuple, Start, End, Acc, Tail) -> + bounded_tuple_to_list2(Tuple, Start + 1, End, [element(Start, Tuple) | Acc], Tail). + +find_first_gteq(_Bt, _Tuple, Start, End, _Key) when Start == End -> + End; +find_first_gteq(Bt, Tuple, Start, End, Key) -> + Mid = Start + ((End - Start) div 2), + {TupleKey, _} = element(Mid, Tuple), + case less(Bt, TupleKey, Key) of + true -> + find_first_gteq(Bt, Tuple, Mid+1, End, Key); + false -> + find_first_gteq(Bt, Tuple, Start, Mid, Key) + end. + +modify_kvnode(Bt, NodeTuple, LowerBound, [], ResultNode, QueryOutput) -> + {ok, lists:reverse(ResultNode, bounded_tuple_to_list(NodeTuple, LowerBound, tuple_size(NodeTuple), [])), QueryOutput, Bt}; +modify_kvnode(Bt, NodeTuple, LowerBound, [{ActionType, ActionKey, ActionValue} | RestActions], ResultNode, QueryOutput) when LowerBound > tuple_size(NodeTuple) -> + case ActionType of + insert -> + modify_kvnode(Bt, NodeTuple, LowerBound, RestActions, [{ActionKey, ActionValue} | ResultNode], QueryOutput); + remove -> + % just drop the action + modify_kvnode(Bt, NodeTuple, LowerBound, RestActions, ResultNode, QueryOutput); + fetch -> + % the key/value must not exist in the tree + modify_kvnode(Bt, NodeTuple, LowerBound, RestActions, ResultNode, [{not_found, {ActionKey, nil}} | QueryOutput]) + end; +modify_kvnode(Bt, NodeTuple, LowerBound, [{ActionType, ActionKey, ActionValue} | RestActions], AccNode, QueryOutput) -> + N = find_first_gteq(Bt, NodeTuple, LowerBound, tuple_size(NodeTuple), ActionKey), + {Key, Value} = element(N, NodeTuple), + ResultNode = bounded_tuple_to_revlist(NodeTuple, LowerBound, N - 1, AccNode), + case less(Bt, ActionKey, Key) of + true -> + case ActionType of + insert -> + % ActionKey is less than the Key, so insert + modify_kvnode(Bt, NodeTuple, N, RestActions, [{ActionKey, ActionValue} | ResultNode], QueryOutput); + remove -> + % ActionKey is less than the Key, just drop the action + modify_kvnode(Bt, NodeTuple, N, RestActions, ResultNode, QueryOutput); + fetch -> + % ActionKey is less than the Key, the key/value must not exist in the tree + modify_kvnode(Bt, NodeTuple, N, RestActions, ResultNode, [{not_found, {ActionKey, nil}} | QueryOutput]) + end; + false -> + % ActionKey and Key are maybe equal. + case less(Bt, Key, ActionKey) of + false -> + case ActionType of + insert -> + modify_kvnode(Bt, NodeTuple, N+1, RestActions, [{ActionKey, ActionValue} | ResultNode], QueryOutput); + remove -> + modify_kvnode(Bt, NodeTuple, N+1, RestActions, ResultNode, QueryOutput); + fetch -> + % ActionKey is equal to the Key, insert into the QueryOuput, but re-process the node + % since an identical action key can follow it. + modify_kvnode(Bt, NodeTuple, N, RestActions, ResultNode, [{ok, assemble(Bt, Key, Value)} | QueryOutput]) + end; + true -> + modify_kvnode(Bt, NodeTuple, N + 1, [{ActionType, ActionKey, ActionValue} | RestActions], [{Key, Value} | ResultNode], QueryOutput) + end + end. + + +reduce_stream_node(_Bt, _Dir, nil, _KeyStart, _KeyEnd, GroupedKey, GroupedKVsAcc, + GroupedRedsAcc, _KeyGroupFun, _Fun, Acc) -> + {ok, Acc, GroupedRedsAcc, GroupedKVsAcc, GroupedKey}; +reduce_stream_node(Bt, Dir, {P, _R}, KeyStart, KeyEnd, GroupedKey, GroupedKVsAcc, + GroupedRedsAcc, KeyGroupFun, Fun, Acc) -> + case get_node(Bt, P) of + {kp_node, NodeList} -> + reduce_stream_kp_node(Bt, Dir, NodeList, KeyStart, KeyEnd, GroupedKey, + GroupedKVsAcc, GroupedRedsAcc, KeyGroupFun, Fun, Acc); + {kv_node, KVs} -> + reduce_stream_kv_node(Bt, Dir, KVs, KeyStart, KeyEnd, GroupedKey, + GroupedKVsAcc, GroupedRedsAcc, KeyGroupFun, Fun, Acc) + end. + +reduce_stream_kv_node(Bt, Dir, KVs, KeyStart, KeyEnd, + GroupedKey, GroupedKVsAcc, GroupedRedsAcc, + KeyGroupFun, Fun, Acc) -> + + GTEKeyStartKVs = + case KeyStart of + undefined -> + KVs; + _ -> + lists:dropwhile(fun({Key,_}) -> less(Bt, Key, KeyStart) end, KVs) + end, + KVs2 = + case KeyEnd of + undefined -> + GTEKeyStartKVs; + _ -> + lists:takewhile( + fun({Key,_}) -> + not less(Bt, KeyEnd, Key) + end, GTEKeyStartKVs) + end, + reduce_stream_kv_node2(Bt, adjust_dir(Dir, KVs2), GroupedKey, GroupedKVsAcc, GroupedRedsAcc, + KeyGroupFun, Fun, Acc). + + +reduce_stream_kv_node2(_Bt, [], GroupedKey, GroupedKVsAcc, GroupedRedsAcc, + _KeyGroupFun, _Fun, Acc) -> + {ok, Acc, GroupedRedsAcc, GroupedKVsAcc, GroupedKey}; +reduce_stream_kv_node2(Bt, [{Key, Value}| RestKVs], GroupedKey, GroupedKVsAcc, + GroupedRedsAcc, KeyGroupFun, Fun, Acc) -> + case GroupedKey of + undefined -> + reduce_stream_kv_node2(Bt, RestKVs, Key, + [assemble(Bt,Key,Value)], [], KeyGroupFun, Fun, Acc); + _ -> + + case KeyGroupFun(GroupedKey, Key) of + true -> + reduce_stream_kv_node2(Bt, RestKVs, GroupedKey, + [assemble(Bt,Key,Value)|GroupedKVsAcc], GroupedRedsAcc, KeyGroupFun, + Fun, Acc); + false -> + case Fun(GroupedKey, {GroupedKVsAcc, GroupedRedsAcc}, Acc) of + {ok, Acc2} -> + reduce_stream_kv_node2(Bt, RestKVs, Key, [assemble(Bt,Key,Value)], + [], KeyGroupFun, Fun, Acc2); + {stop, Acc2} -> + throw({stop, Acc2}) + end + end + end. + +reduce_stream_kp_node(Bt, Dir, NodeList, KeyStart, KeyEnd, + GroupedKey, GroupedKVsAcc, GroupedRedsAcc, + KeyGroupFun, Fun, Acc) -> + Nodes = + case KeyStart of + undefined -> + NodeList; + _ -> + lists:dropwhile( + fun({Key,_}) -> + less(Bt, Key, KeyStart) + end, NodeList) + end, + NodesInRange = + case KeyEnd of + undefined -> + Nodes; + _ -> + {InRange, MaybeInRange} = lists:splitwith( + fun({Key,_}) -> + less(Bt, Key, KeyEnd) + end, Nodes), + InRange ++ case MaybeInRange of [] -> []; [FirstMaybe|_] -> [FirstMaybe] end + end, + reduce_stream_kp_node2(Bt, Dir, adjust_dir(Dir, NodesInRange), KeyStart, KeyEnd, + GroupedKey, GroupedKVsAcc, GroupedRedsAcc, KeyGroupFun, Fun, Acc). + + +reduce_stream_kp_node2(Bt, Dir, [{_Key, NodeInfo} | RestNodeList], KeyStart, KeyEnd, + undefined, [], [], KeyGroupFun, Fun, Acc) -> + {ok, Acc2, GroupedRedsAcc2, GroupedKVsAcc2, GroupedKey2} = + reduce_stream_node(Bt, Dir, NodeInfo, KeyStart, KeyEnd, undefined, + [], [], KeyGroupFun, Fun, Acc), + reduce_stream_kp_node2(Bt, Dir, RestNodeList, KeyStart, KeyEnd, GroupedKey2, + GroupedKVsAcc2, GroupedRedsAcc2, KeyGroupFun, Fun, Acc2); +reduce_stream_kp_node2(Bt, Dir, NodeList, KeyStart, KeyEnd, + GroupedKey, GroupedKVsAcc, GroupedRedsAcc, KeyGroupFun, Fun, Acc) -> + {Grouped0, Ungrouped0} = lists:splitwith(fun({Key,_}) -> + KeyGroupFun(GroupedKey, Key) end, NodeList), + {GroupedNodes, UngroupedNodes} = + case Grouped0 of + [] -> + {Grouped0, Ungrouped0}; + _ -> + [FirstGrouped | RestGrouped] = lists:reverse(Grouped0), + {RestGrouped, [FirstGrouped | Ungrouped0]} + end, + GroupedReds = [R || {_, {_,R}} <- GroupedNodes], + case UngroupedNodes of + [{_Key, NodeInfo}|RestNodes] -> + {ok, Acc2, GroupedRedsAcc2, GroupedKVsAcc2, GroupedKey2} = + reduce_stream_node(Bt, Dir, NodeInfo, KeyStart, KeyEnd, GroupedKey, + GroupedKVsAcc, GroupedReds ++ GroupedRedsAcc, KeyGroupFun, Fun, Acc), + reduce_stream_kp_node2(Bt, Dir, RestNodes, KeyStart, KeyEnd, GroupedKey2, + GroupedKVsAcc2, GroupedRedsAcc2, KeyGroupFun, Fun, Acc2); + [] -> + {ok, Acc, GroupedReds ++ GroupedRedsAcc, GroupedKVsAcc, GroupedKey} + end. + +adjust_dir(fwd, List) -> + List; +adjust_dir(rev, List) -> + lists:reverse(List). + +stream_node(Bt, Reds, {Pointer, _Reds}, StartKey, InRange, Dir, Fun, Acc) -> + {NodeType, NodeList} = get_node(Bt, Pointer), + case NodeType of + kp_node -> + stream_kp_node(Bt, Reds, adjust_dir(Dir, NodeList), StartKey, InRange, Dir, Fun, Acc); + kv_node -> + stream_kv_node(Bt, Reds, adjust_dir(Dir, NodeList), StartKey, InRange, Dir, Fun, Acc) + end. + +stream_node(Bt, Reds, {Pointer, _Reds}, InRange, Dir, Fun, Acc) -> + {NodeType, NodeList} = get_node(Bt, Pointer), + case NodeType of + kp_node -> + stream_kp_node(Bt, Reds, adjust_dir(Dir, NodeList), InRange, Dir, Fun, Acc); + kv_node -> + stream_kv_node2(Bt, Reds, [], adjust_dir(Dir, NodeList), InRange, Dir, Fun, Acc) + end. + +stream_kp_node(_Bt, _Reds, [], _InRange, _Dir, _Fun, Acc) -> + {ok, Acc}; +stream_kp_node(Bt, Reds, [{_Key, {Pointer, Red}} | Rest], InRange, Dir, Fun, Acc) -> + case stream_node(Bt, Reds, {Pointer, Red}, InRange, Dir, Fun, Acc) of + {ok, Acc2} -> + stream_kp_node(Bt, [Red | Reds], Rest, InRange, Dir, Fun, Acc2); + {stop, LastReds, Acc2} -> + {stop, LastReds, Acc2} + end. + +drop_nodes(_Bt, Reds, _StartKey, []) -> + {Reds, []}; +drop_nodes(Bt, Reds, StartKey, [{NodeKey, {Pointer, Red}} | RestKPs]) -> + case less(Bt, NodeKey, StartKey) of + true -> drop_nodes(Bt, [Red | Reds], StartKey, RestKPs); + false -> {Reds, [{NodeKey, {Pointer, Red}} | RestKPs]} + end. + +stream_kp_node(Bt, Reds, KPs, StartKey, InRange, Dir, Fun, Acc) -> + {NewReds, NodesToStream} = + case Dir of + fwd -> + % drop all nodes sorting before the key + drop_nodes(Bt, Reds, StartKey, KPs); + rev -> + % keep all nodes sorting before the key, AND the first node to sort after + RevKPs = lists:reverse(KPs), + case lists:splitwith(fun({Key, _Pointer}) -> less(Bt, Key, StartKey) end, RevKPs) of + {_RevsBefore, []} -> + % everything sorts before it + {Reds, KPs}; + {RevBefore, [FirstAfter | Drop]} -> + {[Red || {_K,{_P,Red}} <- Drop] ++ Reds, + [FirstAfter | lists:reverse(RevBefore)]} + end + end, + case NodesToStream of + [] -> + {ok, Acc}; + [{_Key, {Pointer, Red}} | Rest] -> + case stream_node(Bt, NewReds, {Pointer, Red}, StartKey, InRange, Dir, Fun, Acc) of + {ok, Acc2} -> + stream_kp_node(Bt, [Red | NewReds], Rest, InRange, Dir, Fun, Acc2); + {stop, LastReds, Acc2} -> + {stop, LastReds, Acc2} + end + end. + +stream_kv_node(Bt, Reds, KVs, StartKey, InRange, Dir, Fun, Acc) -> + DropFun = + case Dir of + fwd -> + fun({Key, _}) -> less(Bt, Key, StartKey) end; + rev -> + fun({Key, _}) -> less(Bt, StartKey, Key) end + end, + {LTKVs, GTEKVs} = lists:splitwith(DropFun, KVs), + AssembleLTKVs = [assemble(Bt,K,V) || {K,V} <- LTKVs], + stream_kv_node2(Bt, Reds, AssembleLTKVs, GTEKVs, InRange, Dir, Fun, Acc). + +stream_kv_node2(_Bt, _Reds, _PrevKVs, [], _InRange, _Dir, _Fun, Acc) -> + {ok, Acc}; +stream_kv_node2(Bt, Reds, PrevKVs, [{K,V} | RestKVs], InRange, Dir, Fun, Acc) -> + case InRange(K) of + false -> + {stop, {PrevKVs, Reds}, Acc}; + true -> + AssembledKV = assemble(Bt, K, V), + case Fun(AssembledKV, {PrevKVs, Reds}, Acc) of + {ok, Acc2} -> + stream_kv_node2(Bt, Reds, [AssembledKV | PrevKVs], RestKVs, InRange, Dir, Fun, Acc2); + {stop, Acc2} -> + {stop, {PrevKVs, Reds}, Acc2} + end + end. diff --git a/apps/couch/src/couch_changes.erl b/apps/couch/src/couch_changes.erl new file mode 100644 index 00000000..3a5bc4f8 --- /dev/null +++ b/apps/couch/src/couch_changes.erl @@ -0,0 +1,270 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_changes). +-include("couch_db.hrl"). + +-export([handle_changes/3]). + +%% @type Req -> #httpd{} | {json_req, JsonObj()} +handle_changes(#changes_args{style=Style}=Args1, Req, Db) -> + Args = Args1#changes_args{filter= + make_filter_fun(Args1#changes_args.filter, Style, Req, Db)}, + StartSeq = case Args#changes_args.dir of + rev -> + couch_db:get_update_seq(Db); + fwd -> + Args#changes_args.since + end, + if Args#changes_args.feed == "continuous" orelse + Args#changes_args.feed == "longpoll" -> + fun(Callback) -> + Self = self(), + {ok, Notify} = couch_db_update_notifier:start_link( + fun({_, DbName}) when DbName == Db#db.name -> + Self ! db_updated; + (_) -> + ok + end + ), + start_sending_changes(Callback, Args#changes_args.feed), + {Timeout, TimeoutFun} = get_changes_timeout(Args, Callback), + try + keep_sending_changes( + Args, + Callback, + Db, + StartSeq, + <<"">>, + Timeout, + TimeoutFun + ) + after + couch_db_update_notifier:stop(Notify), + get_rest_db_updated() % clean out any remaining update messages + end + end; + true -> + fun(Callback) -> + start_sending_changes(Callback, Args#changes_args.feed), + {ok, {_, LastSeq, _Prepend, _, _, _, _, _}} = + send_changes( + Args#changes_args{feed="normal"}, + Callback, + Db, + StartSeq, + <<"">> + ), + end_sending_changes(Callback, LastSeq, Args#changes_args.feed) + end + end. + +%% @type Req -> #httpd{} | {json_req, JsonObj()} +make_filter_fun(FilterName, Style, Req, Db) -> + case [list_to_binary(couch_httpd:unquote(Part)) + || Part <- string:tokens(FilterName, "/")] of + [] -> + fun(#doc_info{revs=[#rev_info{rev=Rev}|_]=Revs}) -> + case Style of + main_only -> + [{[{<<"rev">>, couch_doc:rev_to_str(Rev)}]}]; + all_docs -> + [{[{<<"rev">>, couch_doc:rev_to_str(R)}]} + || #rev_info{rev=R} <- Revs] + end + end; + [DName, FName] -> + DesignId = <<"_design/", DName/binary>>, + DDoc = couch_httpd_db:couch_doc_open(Db, DesignId, nil, []), + % validate that the ddoc has the filter fun + #doc{body={Props}} = DDoc, + couch_util:get_nested_json_value({Props}, [<<"filters">>, FName]), + fun(DocInfo) -> + DocInfos = + case Style of + main_only -> + [DocInfo]; + all_docs -> + [DocInfo#doc_info{revs=[Rev]}|| Rev <- DocInfo#doc_info.revs] + end, + Docs = [Doc || {ok, Doc} <- [ + couch_db:open_doc(Db, DocInfo2, [deleted, conflicts]) + || DocInfo2 <- DocInfos]], + {ok, Passes} = couch_query_servers:filter_docs( + Req, Db, DDoc, FName, Docs + ), + [{[{<<"rev">>, couch_doc:rev_to_str({RevPos,RevId})}]} + || {Pass, #doc{revs={RevPos,[RevId|_]}}} + <- lists:zip(Passes, Docs), Pass == true] + end; + _Else -> + throw({bad_request, + "filter parameter must be of the form `designname/filtername`"}) + end. + +get_changes_timeout(Args, Callback) -> + #changes_args{ + heartbeat = Heartbeat, + timeout = Timeout, + feed = ResponseType + } = Args, + DefaultTimeout = list_to_integer( + couch_config:get("httpd", "changes_timeout", "60000") + ), + case Heartbeat of + undefined -> + case Timeout of + undefined -> + {DefaultTimeout, fun() -> stop end}; + infinity -> + {infinity, fun() -> stop end}; + _ -> + {lists:min([DefaultTimeout, Timeout]), fun() -> stop end} + end; + true -> + {DefaultTimeout, fun() -> Callback(timeout, ResponseType), ok end}; + _ -> + {lists:min([DefaultTimeout, Heartbeat]), + fun() -> Callback(timeout, ResponseType), ok end} + end. + +start_sending_changes(_Callback, "continuous") -> + ok; +start_sending_changes(Callback, ResponseType) -> + Callback(start, ResponseType). + +send_changes(Args, Callback, Db, StartSeq, Prepend) -> + #changes_args{ + style = Style, + include_docs = IncludeDocs, + limit = Limit, + feed = ResponseType, + dir = Dir, + filter = FilterFun + } = Args, + couch_db:changes_since( + Db, + Style, + StartSeq, + fun changes_enumerator/2, + [{dir, Dir}], + {Db, StartSeq, Prepend, FilterFun, Callback, ResponseType, Limit, + IncludeDocs} + ). + +keep_sending_changes(Args, Callback, Db, StartSeq, Prepend, Timeout, + TimeoutFun) -> + #changes_args{ + feed = ResponseType, + limit = Limit + } = Args, + % ?LOG_INFO("send_changes start ~p",[StartSeq]), + {ok, {_, EndSeq, Prepend2, _, _, _, NewLimit, _}} = send_changes( + Args#changes_args{dir=fwd}, Callback, Db, StartSeq, Prepend + ), + % ?LOG_INFO("send_changes last ~p",[EndSeq]), + couch_db:close(Db), + if Limit > NewLimit, ResponseType == "longpoll" -> + end_sending_changes(Callback, EndSeq, ResponseType); + true -> + case wait_db_updated(Timeout, TimeoutFun) of + updated -> + % ?LOG_INFO("wait_db_updated updated ~p",[{Db#db.name, EndSeq}]), + case couch_db:open(Db#db.name, [{user_ctx, Db#db.user_ctx}]) of + {ok, Db2} -> + keep_sending_changes( + Args#changes_args{limit=NewLimit}, + Callback, + Db2, + EndSeq, + Prepend2, + Timeout, + TimeoutFun + ); + _Else -> + end_sending_changes(Callback, EndSeq, ResponseType) + end; + stop -> + % ?LOG_INFO("wait_db_updated stop ~p",[{Db#db.name, EndSeq}]), + end_sending_changes(Callback, EndSeq, ResponseType) + end + end. + +end_sending_changes(Callback, EndSeq, ResponseType) -> + Callback({stop, EndSeq}, ResponseType). + +changes_enumerator(DocInfo, {Db, _, _, FilterFun, Callback, "continuous", + Limit, IncludeDocs}) -> + + #doc_info{id=Id, high_seq=Seq, + revs=[#rev_info{deleted=Del,rev=Rev}|_]} = DocInfo, + Results0 = FilterFun(DocInfo), + Results = [Result || Result <- Results0, Result /= null], + Go = if Limit =< 1 -> stop; true -> ok end, + case Results of + [] -> + {Go, {Db, Seq, nil, FilterFun, Callback, "continuous", Limit, + IncludeDocs} + }; + _ -> + ChangesRow = changes_row(Db, Seq, Id, Del, Results, Rev, IncludeDocs), + Callback({change, ChangesRow, <<"">>}, "continuous"), + {Go, {Db, Seq, nil, FilterFun, Callback, "continuous", Limit - 1, + IncludeDocs} + } + end; +changes_enumerator(DocInfo, {Db, _, Prepend, FilterFun, Callback, ResponseType, + Limit, IncludeDocs}) -> + + #doc_info{id=Id, high_seq=Seq, revs=[#rev_info{deleted=Del,rev=Rev}|_]} + = DocInfo, + Results0 = FilterFun(DocInfo), + Results = [Result || Result <- Results0, Result /= null], + Go = if Limit =< 1 -> stop; true -> ok end, + case Results of + [] -> + {Go, {Db, Seq, Prepend, FilterFun, Callback, ResponseType, Limit, + IncludeDocs} + }; + _ -> + ChangesRow = changes_row(Db, Seq, Id, Del, Results, Rev, IncludeDocs), + Callback({change, ChangesRow, Prepend}, ResponseType), + {Go, {Db, Seq, <<",\n">>, FilterFun, Callback, ResponseType, Limit - 1, + IncludeDocs} + } + end. + + +changes_row(Db, Seq, Id, Del, Results, Rev, true) -> + {[{<<"seq">>, Seq}, {<<"id">>, Id}, {<<"changes">>, Results}] ++ + deleted_item(Del) ++ couch_httpd_view:doc_member(Db, {Id, Rev})}; +changes_row(_, Seq, Id, Del, Results, _, false) -> + {[{<<"seq">>, Seq}, {<<"id">>, Id}, {<<"changes">>, Results}] ++ + deleted_item(Del)}. + +deleted_item(true) -> [{deleted, true}]; +deleted_item(_) -> []. + +% waits for a db_updated msg, if there are multiple msgs, collects them. +wait_db_updated(Timeout, TimeoutFun) -> + receive db_updated -> get_rest_db_updated() + after Timeout -> + case TimeoutFun() of + ok -> wait_db_updated(Timeout, TimeoutFun); + stop -> stop + end + end. + +get_rest_db_updated() -> + receive db_updated -> get_rest_db_updated() + after 0 -> updated + end. diff --git a/apps/couch/src/couch_config.erl b/apps/couch/src/couch_config.erl new file mode 100644 index 00000000..be53e3a3 --- /dev/null +++ b/apps/couch/src/couch_config.erl @@ -0,0 +1,243 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +% Reads CouchDB's ini file and gets queried for configuration parameters. +% This module is initialized with a list of ini files that it consecutively +% reads Key/Value pairs from and saves them in an ets table. If more an one +% ini file is specified, the last one is used to write changes that are made +% with store/2 back to that ini file. + +-module(couch_config). +-behaviour(gen_server). + +-include("couch_db.hrl"). + + +-export([start_link/1, stop/0]). +-export([all/0, get/1, get/2, get/3, set/3, set/4, delete/2, delete/3]). +-export([register/1, register/2]). +-export([parse_ini_file/1]). + +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +-record(config, { + notify_funs=[], + write_filename=undefined +}). + + +start_link(IniFiles) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, IniFiles, []). + +stop() -> + gen_server:cast(?MODULE, stop). + + +all() -> + lists:sort(gen_server:call(?MODULE, all, infinity)). + + +get(Section) when is_binary(Section) -> + ?MODULE:get(?b2l(Section)); +get(Section) -> + Matches = ets:match(?MODULE, {{Section, '$1'}, '$2'}), + [{Key, Value} || [Key, Value] <- Matches]. + +get(Section, Key) -> + ?MODULE:get(Section, Key, undefined). + +get(Section, Key, Default) when is_binary(Section) and is_binary(Key) -> + ?MODULE:get(?b2l(Section), ?b2l(Key), Default); +get(Section, Key, Default) -> + case ets:lookup(?MODULE, {Section, Key}) of + [] -> Default; + [{_, Match}] -> Match + end. + +set(Section, Key, Value) -> + ?MODULE:set(Section, Key, Value, true). + +set(Section, Key, Value, Persist) when is_binary(Section) and is_binary(Key) -> + ?MODULE:set(?b2l(Section), ?b2l(Key), Value, Persist); +set(Section, Key, Value, Persist) -> + gen_server:call(?MODULE, {set, Section, Key, Value, Persist}). + + +delete(Section, Key) when is_binary(Section) and is_binary(Key) -> + delete(?b2l(Section), ?b2l(Key)); +delete(Section, Key) -> + delete(Section, Key, true). + +delete(Section, Key, Persist) when is_binary(Section) and is_binary(Key) -> + delete(?b2l(Section), ?b2l(Key), Persist); +delete(Section, Key, Persist) -> + gen_server:call(?MODULE, {delete, Section, Key, Persist}). + + +register(Fun) -> + ?MODULE:register(Fun, self()). + +register(Fun, Pid) -> + gen_server:call(?MODULE, {register, Fun, Pid}). + + +init(IniFiles) -> + ets:new(?MODULE, [named_table, set, protected]), + lists:map(fun(IniFile) -> + {ok, ParsedIniValues} = parse_ini_file(IniFile), + ets:insert(?MODULE, ParsedIniValues) + end, IniFiles), + WriteFile = case IniFiles of + [_|_] -> lists:last(IniFiles); + _ -> undefined + end, + {ok, #config{write_filename=WriteFile}}. + + +terminate(_Reason, _State) -> + ok. + + +handle_call(all, _From, Config) -> + Resp = lists:sort((ets:tab2list(?MODULE))), + {reply, Resp, Config}; +handle_call({set, Sec, Key, Val, Persist}, From, Config) -> + true = ets:insert(?MODULE, {{Sec, Key}, Val}), + case {Persist, Config#config.write_filename} of + {true, undefined} -> + ok; + {true, FileName} -> + couch_config_writer:save_to_file({{Sec, Key}, Val}, FileName); + _ -> + ok + end, + spawn_link(fun() -> + [catch F(Sec, Key, Val, Persist) || {_Pid, F} <- Config#config.notify_funs], + gen_server:reply(From, ok) + end), + {noreply, Config}; +handle_call({delete, Sec, Key, Persist}, From, Config) -> + true = ets:delete(?MODULE, {Sec,Key}), + case {Persist, Config#config.write_filename} of + {true, undefined} -> + ok; + {true, FileName} -> + couch_config_writer:save_to_file({{Sec, Key}, ""}, FileName); + _ -> + ok + end, + spawn_link(fun() -> + [catch F(Sec, Key, deleted, Persist) || {_Pid, F} <- Config#config.notify_funs], + gen_server:reply(From, ok) + end), + {noreply, Config}; +handle_call({register, Fun, Pid}, _From, #config{notify_funs=PidFuns}=Config) -> + erlang:monitor(process, Pid), + % convert 1 and 2 arity to 3 arity + Fun2 = + case Fun of + _ when is_function(Fun, 1) -> + fun(Section, _Key, _Value, _Persist) -> Fun(Section) end; + _ when is_function(Fun, 2) -> + fun(Section, Key, _Value, _Persist) -> Fun(Section, Key) end; + _ when is_function(Fun, 3) -> + fun(Section, Key, Value, _Persist) -> Fun(Section, Key, Value) end; + _ when is_function(Fun, 4) -> + Fun + end, + {reply, ok, Config#config{notify_funs=[{Pid, Fun2} | PidFuns]}}. + + +handle_cast(stop, State) -> + {stop, normal, State}; +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({'DOWN', _, _, DownPid, _}, #config{notify_funs=PidFuns}=Config) -> + % remove any funs registered by the downed process + FilteredPidFuns = [{Pid,Fun} || {Pid,Fun} <- PidFuns, Pid /= DownPid], + {noreply, Config#config{notify_funs=FilteredPidFuns}}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +parse_ini_file(IniFile) -> + IniFilename = couch_util:abs_pathname(IniFile), + IniBin = + case file:read_file(IniFilename) of + {ok, IniBin0} -> + IniBin0; + {error, enoent} -> + Fmt = "Couldn't find server configuration file ~s.", + Msg = ?l2b(io_lib:format(Fmt, [IniFilename])), + ?LOG_ERROR("~s~n", [Msg]), + throw({startup_error, Msg}) + end, + + Lines = re:split(IniBin, "\r\n|\n|\r|\032", [{return, list}]), + {_, ParsedIniValues} = + lists:foldl(fun(Line, {AccSectionName, AccValues}) -> + case string:strip(Line) of + "[" ++ Rest -> + case re:split(Rest, "\\]", [{return, list}]) of + [NewSectionName, ""] -> + {NewSectionName, AccValues}; + _Else -> % end bracket not at end, ignore this line + {AccSectionName, AccValues} + end; + ";" ++ _Comment -> + {AccSectionName, AccValues}; + Line2 -> + case re:split(Line2, "\s?=\s?", [{return, list}]) of + [Value] -> + MultiLineValuePart = case re:run(Line, "^ \\S", []) of + {match, _} -> + true; + _ -> + false + end, + case {MultiLineValuePart, AccValues} of + {true, [{{_, ValueName}, PrevValue} | AccValuesRest]} -> + % remove comment + case re:split(Value, " ;|\t;", [{return, list}]) of + [[]] -> + % empty line + {AccSectionName, AccValues}; + [LineValue | _Rest] -> + E = {{AccSectionName, ValueName}, + PrevValue ++ " " ++ LineValue}, + {AccSectionName, [E | AccValuesRest]} + end; + _ -> + {AccSectionName, AccValues} + end; + [""|_LineValues] -> % line begins with "=", ignore + {AccSectionName, AccValues}; + [ValueName|LineValues] -> % yeehaw, got a line! + RemainingLine = couch_util:implode(LineValues, "="), + % removes comments + case re:split(RemainingLine, " ;|\t;", [{return, list}]) of + [[]] -> + % empty line means delete this key + ets:delete(?MODULE, {AccSectionName, ValueName}), + {AccSectionName, AccValues}; + [LineValue | _Rest] -> + {AccSectionName, + [{{AccSectionName, ValueName}, LineValue} | AccValues]} + end + end + end + end, {"", []}, Lines), + {ok, ParsedIniValues}. + diff --git a/apps/couch/src/couch_config_writer.erl b/apps/couch/src/couch_config_writer.erl new file mode 100644 index 00000000..c8691d79 --- /dev/null +++ b/apps/couch/src/couch_config_writer.erl @@ -0,0 +1,79 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% @doc Saves a Key/Value pair to a ini file. The Key consists of a Section +%% and Option combination. If that combination is found in the ini file +%% the new value replaces the old value. If only the Section is found the +%% Option and value combination is appended to the Section. If the Section +%% does not yet exist in the ini file, it is added and the Option/Value +%% pair is appended. +%% @see couch_config + +-module(couch_config_writer). + +-export([save_to_file/2]). + +%% @spec save_to_file( +%% Config::{{Section::string(), Option::string()}, Value::string()}, +%% File::filename()) -> ok +%% @doc Saves a Section/Key/Value triple to the ini file File::filename() +save_to_file({{Section, Key}, Value}, File) -> + {ok, OldFileContents} = file:read_file(File), + Lines = re:split(OldFileContents, "\r\n|\n|\r|\032", [{return, list}]), + + SectionLine = "[" ++ Section ++ "]", + {ok, Pattern} = re:compile(["^(", Key, "\\s*=)|\\[[a-zA-Z0-9\_-]*\\]"]), + + NewLines = process_file_lines(Lines, [], SectionLine, Pattern, Key, Value), + NewFileContents = reverse_and_add_newline(strip_empty_lines(NewLines), []), + ok = file:write_file(File, NewFileContents). + + +process_file_lines([Section|Rest], SeenLines, Section, Pattern, Key, Value) -> + process_section_lines(Rest, [Section|SeenLines], Pattern, Key, Value); + +process_file_lines([Line|Rest], SeenLines, Section, Pattern, Key, Value) -> + process_file_lines(Rest, [Line|SeenLines], Section, Pattern, Key, Value); + +process_file_lines([], SeenLines, Section, _Pattern, Key, Value) -> + % Section wasn't found. Append it with the option here. + [Key ++ " = " ++ Value, Section, "" | strip_empty_lines(SeenLines)]. + + +process_section_lines([Line|Rest], SeenLines, Pattern, Key, Value) -> + case re:run(Line, Pattern, [{capture, all_but_first}]) of + nomatch -> % Found nothing interesting. Move on. + process_section_lines(Rest, [Line|SeenLines], Pattern, Key, Value); + {match, []} -> % Found another section. Append the option here. + lists:reverse(Rest) ++ + [Line, "", Key ++ " = " ++ Value | strip_empty_lines(SeenLines)]; + {match, _} -> % Found the option itself. Replace it. + lists:reverse(Rest) ++ [Key ++ " = " ++ Value | SeenLines] + end; + +process_section_lines([], SeenLines, _Pattern, Key, Value) -> + % Found end of file within the section. Append the option here. + [Key ++ " = " ++ Value | strip_empty_lines(SeenLines)]. + + +reverse_and_add_newline([Line|Rest], Content) -> + reverse_and_add_newline(Rest, [Line, "\n", Content]); + +reverse_and_add_newline([], Content) -> + Content. + + +strip_empty_lines(["" | Rest]) -> + strip_empty_lines(Rest); + +strip_empty_lines(All) -> + All. diff --git a/apps/couch/src/couch_db.erl b/apps/couch/src/couch_db.erl new file mode 100644 index 00000000..7678f6ca --- /dev/null +++ b/apps/couch/src/couch_db.erl @@ -0,0 +1,1195 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_db). +-behaviour(gen_server). + +-export([open/2,open_int/2,close/1,create/2,start_compact/1,get_db_info/1,get_design_docs/1]). +-export([open_ref_counted/2,is_idle/1,monitor/1,count_changes_since/2]). +-export([update_doc/3,update_doc/4,update_docs/4,update_docs/2,update_docs/3,delete_doc/3]). +-export([get_doc_info/2,open_doc/2,open_doc/3,open_doc_revs/4]). +-export([set_revs_limit/2,get_revs_limit/1]). +-export([get_missing_revs/2,name/1,doc_to_tree/1,get_update_seq/1,get_committed_update_seq/1]). +-export([enum_docs/4,enum_docs_since/5]). +-export([enum_docs_since_reduce_to_count/1,enum_docs_reduce_to_count/1]). +-export([increment_update_seq/1,get_purge_seq/1,purge_docs/2,get_last_purged/1]). +-export([start_link/3,open_doc_int/3,ensure_full_commit/1]). +-export([set_security/2,get_security/1]). +-export([init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3,handle_info/2]). +-export([changes_since/5,changes_since/6,read_doc/2,new_revid/1]). +-export([check_is_admin/1, check_is_reader/1]). + +-include("couch_db.hrl"). + + +start_link(DbName, Filepath, Options) -> + case open_db_file(Filepath, Options) of + {ok, Fd} -> + StartResult = gen_server:start_link(couch_db, {DbName, Filepath, Fd, Options}, []), + unlink(Fd), + StartResult; + Else -> + Else + end. + +open_db_file(Filepath, Options) -> + case couch_file:open(Filepath, Options) of + {ok, Fd} -> + {ok, Fd}; + {error, enoent} -> + % couldn't find file. is there a compact version? This can happen if + % crashed during the file switch. + case couch_file:open(Filepath ++ ".compact") of + {ok, Fd} -> + ?LOG_INFO("Found ~s~s compaction file, using as primary storage.", [Filepath, ".compact"]), + ok = file:rename(Filepath ++ ".compact", Filepath), + ok = couch_file:sync(Fd), + {ok, Fd}; + {error, enoent} -> + {not_found, no_db_file} + end; + Error -> + Error + end. + + +create(DbName, Options) -> + couch_server:create(DbName, Options). + +% this is for opening a database for internal purposes like the replicator +% or the view indexer. it never throws a reader error. +open_int(DbName, Options) -> + couch_server:open(DbName, Options). + +% this should be called anytime an http request opens the database. +% it ensures that the http userCtx is a valid reader +open(DbName, Options) -> + case couch_server:open(DbName, Options) of + {ok, Db} -> + try + check_is_reader(Db), + {ok, Db} + catch + throw:Error -> + close(Db), + throw(Error) + end; + Else -> Else + end. + +ensure_full_commit(#db{update_pid=UpdatePid,instance_start_time=StartTime}) -> + ok = gen_server:call(UpdatePid, full_commit, infinity), + {ok, StartTime}. + +close(#db{fd_ref_counter=RefCntr}) -> + couch_ref_counter:drop(RefCntr). + +open_ref_counted(MainPid, OpenedPid) -> + gen_server:call(MainPid, {open_ref_count, OpenedPid}). + +is_idle(MainPid) -> + gen_server:call(MainPid, is_idle). + +monitor(#db{main_pid=MainPid}) -> + erlang:monitor(process, MainPid). + +start_compact(#db{update_pid=Pid}) -> + gen_server:call(Pid, start_compact). + +delete_doc(Db, Id, Revisions) -> + DeletedDocs = [#doc{id=Id, revs=[Rev], deleted=true} || Rev <- Revisions], + {ok, [Result]} = update_docs(Db, DeletedDocs, []), + {ok, Result}. + +open_doc(Db, IdOrDocInfo) -> + open_doc(Db, IdOrDocInfo, []). + +open_doc(Db, Id, Options) -> + increment_stat(Db, {couchdb, database_reads}), + case open_doc_int(Db, Id, Options) of + {ok, #doc{deleted=true}=Doc} -> + case lists:member(deleted, Options) of + true -> + apply_open_options({ok, Doc},Options); + false -> + {not_found, deleted} + end; + Else -> + apply_open_options(Else,Options) + end. + +apply_open_options({ok, Doc},Options) -> + apply_open_options2(Doc,Options); +apply_open_options(Else,_Options) -> + Else. + +apply_open_options2(Doc,[]) -> + {ok, Doc}; +apply_open_options2(#doc{atts=Atts,revs=Revs}=Doc, + [{atts_since, PossibleAncestors}|Rest]) -> + RevPos = find_ancestor_rev_pos(Revs, PossibleAncestors), + apply_open_options2(Doc#doc{atts=[A#att{data= + if AttPos>RevPos -> Data; true -> stub end} + || #att{revpos=AttPos,data=Data}=A <- Atts]}, Rest); +apply_open_options2(Doc,[_|Rest]) -> + apply_open_options2(Doc,Rest). + + +find_ancestor_rev_pos({_, []}, _AttsSinceRevs) -> + 0; +find_ancestor_rev_pos(_DocRevs, []) -> + 0; +find_ancestor_rev_pos({RevPos, [RevId|Rest]}, AttsSinceRevs) -> + case lists:member({RevPos, RevId}, AttsSinceRevs) of + true -> + RevPos; + false -> + find_ancestor_rev_pos({RevPos - 1, Rest}, AttsSinceRevs) + end. + +open_doc_revs(Db, Id, Revs, Options) -> + increment_stat(Db, {couchdb, database_reads}), + [{ok, Results}] = open_doc_revs_int(Db, [{Id, Revs}], Options), + {ok, [apply_open_options(Result, Options) || Result <- Results]}. + +% Each returned result is a list of tuples: +% {Id, MissingRevs, PossibleAncestors} +% if no revs are missing, it's omitted from the results. +get_missing_revs(Db, IdRevsList) -> + Results = get_full_doc_infos(Db, [Id1 || {Id1, _Revs} <- IdRevsList]), + {ok, find_missing(IdRevsList, Results)}. + +find_missing([], []) -> + []; +find_missing([{Id, Revs}|RestIdRevs], [{ok, FullInfo} | RestLookupInfo]) -> + case couch_key_tree:find_missing(FullInfo#full_doc_info.rev_tree, Revs) of + [] -> + find_missing(RestIdRevs, RestLookupInfo); + MissingRevs -> + #doc_info{revs=RevsInfo} = couch_doc:to_doc_info(FullInfo), + LeafRevs = [Rev || #rev_info{rev=Rev} <- RevsInfo], + % Find the revs that are possible parents of this rev + PossibleAncestors = + lists:foldl(fun({LeafPos, LeafRevId}, Acc) -> + % this leaf is a "possible ancenstor" of the missing + % revs if this LeafPos lessthan any of the missing revs + case lists:any(fun({MissingPos, _}) -> + LeafPos < MissingPos end, MissingRevs) of + true -> + [{LeafPos, LeafRevId} | Acc]; + false -> + Acc + end + end, [], LeafRevs), + [{Id, MissingRevs, PossibleAncestors} | + find_missing(RestIdRevs, RestLookupInfo)] + end; +find_missing([{Id, Revs}|RestIdRevs], [not_found | RestLookupInfo]) -> + [{Id, Revs, []} | find_missing(RestIdRevs, RestLookupInfo)]. + +get_doc_info(Db, Id) -> + case get_full_doc_info(Db, Id) of + {ok, DocInfo} -> + {ok, couch_doc:to_doc_info(DocInfo)}; + Else -> + Else + end. + +% returns {ok, DocInfo} or not_found +get_full_doc_info(Db, Id) -> + [Result] = get_full_doc_infos(Db, [Id]), + Result. + +get_full_doc_infos(Db, Ids) -> + couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids). + +increment_update_seq(#db{update_pid=UpdatePid}) -> + gen_server:call(UpdatePid, increment_update_seq). + +purge_docs(#db{update_pid=UpdatePid}, IdsRevs) -> + gen_server:call(UpdatePid, {purge_docs, IdsRevs}). + +get_committed_update_seq(#db{committed_update_seq=Seq}) -> + Seq. + +get_update_seq(#db{update_seq=Seq})-> + Seq. + +get_purge_seq(#db{header=#db_header{purge_seq=PurgeSeq}})-> + PurgeSeq. + +get_last_purged(#db{header=#db_header{purged_docs=nil}}) -> + {ok, []}; +get_last_purged(#db{fd=Fd, header=#db_header{purged_docs=PurgedPointer}}) -> + couch_file:pread_term(Fd, PurgedPointer). + +get_db_info(Db) -> + #db{fd=Fd, + header=#db_header{disk_version=DiskVersion}, + compactor_pid=Compactor, + update_seq=SeqNum, + name=Name, + fulldocinfo_by_id_btree=FullDocBtree, + instance_start_time=StartTime, + committed_update_seq=CommittedUpdateSeq} = Db, + {ok, Size} = couch_file:bytes(Fd), + {ok, {Count, DelCount}} = couch_btree:full_reduce(FullDocBtree), + InfoList = [ + {db_name, Name}, + {doc_count, Count}, + {doc_del_count, DelCount}, + {update_seq, SeqNum}, + {purge_seq, couch_db:get_purge_seq(Db)}, + {compact_running, Compactor/=nil}, + {disk_size, Size}, + {instance_start_time, StartTime}, + {disk_format_version, DiskVersion}, + {committed_update_seq, CommittedUpdateSeq} + ], + {ok, InfoList}. + +get_design_docs(#db{fulldocinfo_by_id_btree=Btree}=Db) -> + {ok,_, Docs} = couch_btree:fold(Btree, + fun(#full_doc_info{id= <<"_design/",_/binary>>}=FullDocInfo, _Reds, AccDocs) -> + {ok, Doc} = couch_db:open_doc_int(Db, FullDocInfo, []), + {ok, [Doc | AccDocs]}; + (_, _Reds, AccDocs) -> + {stop, AccDocs} + end, + [], [{start_key, <<"_design/">>}, {end_key_gt, <<"_design0">>}]), + {ok, Docs}. + +check_is_admin(#db{user_ctx=#user_ctx{name=Name,roles=Roles}}=Db) -> + {Admins} = get_admins(Db), + AdminRoles = [<<"_admin">> | couch_util:get_value(<<"roles">>, Admins, [])], + AdminNames = couch_util:get_value(<<"names">>, Admins,[]), + case AdminRoles -- Roles of + AdminRoles -> % same list, not an admin role + case AdminNames -- [Name] of + AdminNames -> % same names, not an admin + throw({unauthorized, <<"You are not a db or server admin.">>}); + _ -> + ok + end; + _ -> + ok + end. + +check_is_reader(#db{user_ctx=#user_ctx{name=Name,roles=Roles}=UserCtx}=Db) -> + case (catch check_is_admin(Db)) of + ok -> ok; + _ -> + {Readers} = get_readers(Db), + ReaderRoles = couch_util:get_value(<<"roles">>, Readers,[]), + WithAdminRoles = [<<"_admin">> | ReaderRoles], + ReaderNames = couch_util:get_value(<<"names">>, Readers,[]), + case ReaderRoles ++ ReaderNames of + [] -> ok; % no readers == public access + _Else -> + case WithAdminRoles -- Roles of + WithAdminRoles -> % same list, not an reader role + case ReaderNames -- [Name] of + ReaderNames -> % same names, not a reader + ?LOG_DEBUG("Not a reader: UserCtx ~p vs Names ~p Roles ~p",[UserCtx, ReaderNames, WithAdminRoles]), + throw({unauthorized, <<"You are not authorized to access this db.">>}); + _ -> + ok + end; + _ -> + ok + end + end + end. + +get_admins(#db{security=SecProps}) -> + couch_util:get_value(<<"admins">>, SecProps, {[]}). + +get_readers(#db{security=SecProps}) -> + couch_util:get_value(<<"readers">>, SecProps, {[]}). + +get_security(#db{security=SecProps}) -> + {SecProps}. + +set_security(#db{update_pid=Pid}=Db, {NewSecProps}) when is_list(NewSecProps) -> + check_is_admin(Db), + ok = validate_security_object(NewSecProps), + ok = gen_server:call(Pid, {set_security, NewSecProps}, infinity), + {ok, _} = ensure_full_commit(Db), + ok; +set_security(_, _) -> + throw(bad_request). + +validate_security_object(SecProps) -> + Admins = couch_util:get_value(<<"admins">>, SecProps, {[]}), + Readers = couch_util:get_value(<<"readers">>, SecProps, {[]}), + ok = validate_names_and_roles(Admins), + ok = validate_names_and_roles(Readers), + ok. + +% validate user input +validate_names_and_roles({Props}) when is_list(Props) -> + case couch_util:get_value(<<"names">>,Props,[]) of + Ns when is_list(Ns) -> + [throw("names must be a JSON list of strings") ||N <- Ns, not is_binary(N)], + Ns; + _ -> throw("names must be a JSON list of strings") + end, + case couch_util:get_value(<<"roles">>,Props,[]) of + Rs when is_list(Rs) -> + [throw("roles must be a JSON list of strings") ||R <- Rs, not is_binary(R)], + Rs; + _ -> throw("roles must be a JSON list of strings") + end, + ok. + +get_revs_limit(#db{revs_limit=Limit}) -> + Limit. + +set_revs_limit(#db{update_pid=Pid}=Db, Limit) when Limit > 0 -> + check_is_admin(Db), + gen_server:call(Pid, {set_revs_limit, Limit}, infinity); +set_revs_limit(_Db, _Limit) -> + throw(invalid_revs_limit). + +name(#db{name=Name}) -> + Name. + +update_doc(Db, Doc, Options) -> + update_doc(Db, Doc, Options, interactive_edit). + +update_doc(Db, Doc, Options, UpdateType) -> + case update_docs(Db, [Doc], Options, UpdateType) of + {ok, [{ok, NewRev}]} -> + {ok, NewRev}; + {ok, [{{_Id, _Rev}, Error}]} -> + throw(Error); + {ok, [Error]} -> + throw(Error); + {ok, []} -> + % replication success + {Pos, [RevId | _]} = Doc#doc.revs, + {ok, {Pos, RevId}} + end. + +update_docs(Db, Docs) -> + update_docs(Db, Docs, []). + +% group_alike_docs groups the sorted documents into sublist buckets, by id. +% ([DocA, DocA, DocB, DocC], []) -> [[DocA, DocA], [DocB], [DocC]] +group_alike_docs(Docs) -> + Sorted = lists:sort(fun(#doc{id=A},#doc{id=B})-> A < B end, Docs), + group_alike_docs(Sorted, []). + +group_alike_docs([], Buckets) -> + lists:reverse(Buckets); +group_alike_docs([Doc|Rest], []) -> + group_alike_docs(Rest, [[Doc]]); +group_alike_docs([Doc|Rest], [Bucket|RestBuckets]) -> + [#doc{id=BucketId}|_] = Bucket, + case Doc#doc.id == BucketId of + true -> + % add to existing bucket + group_alike_docs(Rest, [[Doc|Bucket]|RestBuckets]); + false -> + % add to new bucket + group_alike_docs(Rest, [[Doc]|[Bucket|RestBuckets]]) + end. + +validate_doc_update(#db{}=Db, #doc{id= <<"_design/",_/binary>>}, _GetDiskDocFun) -> + catch check_is_admin(Db); +validate_doc_update(#db{validate_doc_funs=[]}, _Doc, _GetDiskDocFun) -> + ok; +validate_doc_update(_Db, #doc{id= <<"_local/",_/binary>>}, _GetDiskDocFun) -> + ok; +validate_doc_update(Db, Doc, GetDiskDocFun) -> + DiskDoc = GetDiskDocFun(), + JsonCtx = couch_util:json_user_ctx(Db), + SecObj = get_security(Db), + try [case Fun(Doc, DiskDoc, JsonCtx, SecObj) of + ok -> ok; + Error -> throw(Error) + end || Fun <- Db#db.validate_doc_funs], + ok + catch + throw:Error -> + Error + end. + + +prep_and_validate_update(Db, #doc{id=Id,revs={RevStart, Revs}}=Doc, + OldFullDocInfo, LeafRevsDict, AllowConflict) -> + case Revs of + [PrevRev|_] -> + case dict:find({RevStart, PrevRev}, LeafRevsDict) of + {ok, {Deleted, DiskSp, DiskRevs}} -> + case couch_doc:has_stubs(Doc) of + true -> + DiskDoc = make_doc(Db, Id, Deleted, DiskSp, DiskRevs), + Doc2 = couch_doc:merge_stubs(Doc, DiskDoc), + {validate_doc_update(Db, Doc2, fun() -> DiskDoc end), Doc2}; + false -> + LoadDiskDoc = fun() -> make_doc(Db,Id,Deleted,DiskSp,DiskRevs) end, + {validate_doc_update(Db, Doc, LoadDiskDoc), Doc} + end; + error when AllowConflict -> + couch_doc:merge_stubs(Doc, #doc{}), % will generate error if + % there are stubs + {validate_doc_update(Db, Doc, fun() -> nil end), Doc}; + error -> + {conflict, Doc} + end; + [] -> + % new doc, and we have existing revs. + % reuse existing deleted doc + if OldFullDocInfo#full_doc_info.deleted orelse AllowConflict -> + {validate_doc_update(Db, Doc, fun() -> nil end), Doc}; + true -> + {conflict, Doc} + end + end. + + + +prep_and_validate_updates(_Db, [], [], _AllowConflict, AccPrepped, + AccFatalErrors) -> + {AccPrepped, AccFatalErrors}; +prep_and_validate_updates(Db, [DocBucket|RestBuckets], [not_found|RestLookups], + AllowConflict, AccPrepped, AccErrors) -> + [#doc{id=Id}|_]=DocBucket, + % no existing revs are known, + {PreppedBucket, AccErrors3} = lists:foldl( + fun(#doc{revs=Revs}=Doc, {AccBucket, AccErrors2}) -> + case couch_doc:has_stubs(Doc) of + true -> + couch_doc:merge_stubs(Doc, #doc{}); % will throw exception + false -> ok + end, + case Revs of + {0, []} -> + case validate_doc_update(Db, Doc, fun() -> nil end) of + ok -> + {[Doc | AccBucket], AccErrors2}; + Error -> + {AccBucket, [{{Id, {0, []}}, Error} | AccErrors2]} + end; + _ -> + % old revs specified but none exist, a conflict + {AccBucket, [{{Id, Revs}, conflict} | AccErrors2]} + end + end, + {[], AccErrors}, DocBucket), + + prep_and_validate_updates(Db, RestBuckets, RestLookups, AllowConflict, + [PreppedBucket | AccPrepped], AccErrors3); +prep_and_validate_updates(Db, [DocBucket|RestBuckets], + [{ok, #full_doc_info{rev_tree=OldRevTree}=OldFullDocInfo}|RestLookups], + AllowConflict, AccPrepped, AccErrors) -> + Leafs = couch_key_tree:get_all_leafs(OldRevTree), + LeafRevsDict = dict:from_list([{{Start, RevId}, {Deleted, Sp, Revs}} || + {{Deleted, Sp, _Seq}, {Start, [RevId|_]}=Revs} <- Leafs]), + {PreppedBucket, AccErrors3} = lists:foldl( + fun(Doc, {Docs2Acc, AccErrors2}) -> + case prep_and_validate_update(Db, Doc, OldFullDocInfo, + LeafRevsDict, AllowConflict) of + {ok, Doc2} -> + {[Doc2 | Docs2Acc], AccErrors2}; + {Error, #doc{id=Id,revs=Revs}} -> + % Record the error + {Docs2Acc, [{{Id, Revs}, Error} |AccErrors2]} + end + end, + {[], AccErrors}, DocBucket), + prep_and_validate_updates(Db, RestBuckets, RestLookups, AllowConflict, + [PreppedBucket | AccPrepped], AccErrors3). + + +update_docs(#db{update_pid=UpdatePid}=Db, Docs, Options) -> + update_docs(#db{update_pid=UpdatePid}=Db, Docs, Options, interactive_edit). + + +prep_and_validate_replicated_updates(_Db, [], [], AccPrepped, AccErrors) -> + Errors2 = [{{Id, {Pos, Rev}}, Error} || + {#doc{id=Id,revs={Pos,[Rev|_]}}, Error} <- AccErrors], + {lists:reverse(AccPrepped), lists:reverse(Errors2)}; +prep_and_validate_replicated_updates(Db, [Bucket|RestBuckets], [OldInfo|RestOldInfo], AccPrepped, AccErrors) -> + case OldInfo of + not_found -> + {ValidatedBucket, AccErrors3} = lists:foldl( + fun(Doc, {AccPrepped2, AccErrors2}) -> + case couch_doc:has_stubs(Doc) of + true -> + couch_doc:merge_stubs(Doc, #doc{}); % will throw exception + false -> ok + end, + case validate_doc_update(Db, Doc, fun() -> nil end) of + ok -> + {[Doc | AccPrepped2], AccErrors2}; + Error -> + {AccPrepped2, [{Doc, Error} | AccErrors2]} + end + end, + {[], AccErrors}, Bucket), + prep_and_validate_replicated_updates(Db, RestBuckets, RestOldInfo, [ValidatedBucket | AccPrepped], AccErrors3); + {ok, #full_doc_info{rev_tree=OldTree}} -> + NewRevTree = lists:foldl( + fun(NewDoc, AccTree) -> + {NewTree, _} = couch_key_tree:merge(AccTree, [couch_db:doc_to_tree(NewDoc)]), + NewTree + end, + OldTree, Bucket), + Leafs = couch_key_tree:get_all_leafs_full(NewRevTree), + LeafRevsFullDict = dict:from_list( [{{Start, RevId}, FullPath} || {Start, [{RevId, _}|_]}=FullPath <- Leafs]), + {ValidatedBucket, AccErrors3} = + lists:foldl( + fun(#doc{id=Id,revs={Pos, [RevId|_]}}=Doc, {AccValidated, AccErrors2}) -> + case dict:find({Pos, RevId}, LeafRevsFullDict) of + {ok, {Start, Path}} -> + % our unflushed doc is a leaf node. Go back on the path + % to find the previous rev that's on disk. + + LoadPrevRevFun = fun() -> + make_first_doc_on_disk(Db,Id,Start-1, tl(Path)) + end, + + case couch_doc:has_stubs(Doc) of + true -> + DiskDoc = LoadPrevRevFun(), + Doc2 = couch_doc:merge_stubs(Doc, DiskDoc), + GetDiskDocFun = fun() -> DiskDoc end; + false -> + Doc2 = Doc, + GetDiskDocFun = LoadPrevRevFun + end, + + case validate_doc_update(Db, Doc2, GetDiskDocFun) of + ok -> + {[Doc2 | AccValidated], AccErrors2}; + Error -> + {AccValidated, [{Doc, Error} | AccErrors2]} + end; + _ -> + % this doc isn't a leaf or already exists in the tree. + % ignore but consider it a success. + {AccValidated, AccErrors2} + end + end, + {[], AccErrors}, Bucket), + prep_and_validate_replicated_updates(Db, RestBuckets, RestOldInfo, + [ValidatedBucket | AccPrepped], AccErrors3) + end. + + + +new_revid(#doc{body=Body,revs={OldStart,OldRevs}, + atts=Atts,deleted=Deleted}) -> + case [{N, T, M} || #att{name=N,type=T,md5=M} <- Atts, M =/= <<>>] of + Atts2 when length(Atts) =/= length(Atts2) -> + % We must have old style non-md5 attachments + ?l2b(integer_to_list(couch_util:rand32())); + Atts2 -> + OldRev = case OldRevs of [] -> 0; [OldRev0|_] -> OldRev0 end, + couch_util:md5(term_to_binary([Deleted, OldStart, OldRev, Body, Atts2])) + end. + +new_revs([], OutBuckets, IdRevsAcc) -> + {lists:reverse(OutBuckets), IdRevsAcc}; +new_revs([Bucket|RestBuckets], OutBuckets, IdRevsAcc) -> + {NewBucket, IdRevsAcc3} = lists:mapfoldl( + fun(#doc{id=Id,revs={Start, RevIds}}=Doc, IdRevsAcc2)-> + NewRevId = new_revid(Doc), + {Doc#doc{revs={Start+1, [NewRevId | RevIds]}}, + [{{Id, {Start, RevIds}}, {ok, {Start+1, NewRevId}}} | IdRevsAcc2]} + end, IdRevsAcc, Bucket), + new_revs(RestBuckets, [NewBucket|OutBuckets], IdRevsAcc3). + +check_dup_atts(#doc{atts=Atts}=Doc) -> + Atts2 = lists:sort(fun(#att{name=N1}, #att{name=N2}) -> N1 < N2 end, Atts), + check_dup_atts2(Atts2), + Doc. + +check_dup_atts2([#att{name=N}, #att{name=N} | _]) -> + throw({bad_request, <<"Duplicate attachments">>}); +check_dup_atts2([_ | Rest]) -> + check_dup_atts2(Rest); +check_dup_atts2(_) -> + ok. + + +update_docs(Db, Docs, Options, replicated_changes) -> + increment_stat(Db, {couchdb, database_writes}), + DocBuckets = group_alike_docs(Docs), + + case (Db#db.validate_doc_funs /= []) orelse + lists:any( + fun(#doc{id= <<?DESIGN_DOC_PREFIX, _/binary>>}) -> true; + (#doc{atts=Atts}) -> + Atts /= [] + end, Docs) of + true -> + Ids = [Id || [#doc{id=Id}|_] <- DocBuckets], + ExistingDocs = get_full_doc_infos(Db, Ids), + + {DocBuckets2, DocErrors} = + prep_and_validate_replicated_updates(Db, DocBuckets, ExistingDocs, [], []), + DocBuckets3 = [Bucket || [_|_]=Bucket <- DocBuckets2]; % remove empty buckets + false -> + DocErrors = [], + DocBuckets3 = DocBuckets + end, + DocBuckets4 = [[doc_flush_atts(check_dup_atts(Doc), Db#db.fd) + || Doc <- Bucket] || Bucket <- DocBuckets3], + {ok, []} = write_and_commit(Db, DocBuckets4, [], [merge_conflicts | Options]), + {ok, DocErrors}; + +update_docs(Db, Docs, Options, interactive_edit) -> + increment_stat(Db, {couchdb, database_writes}), + AllOrNothing = lists:member(all_or_nothing, Options), + % go ahead and generate the new revision ids for the documents. + % separate out the NonRep documents from the rest of the documents + {Docs2, NonRepDocs} = lists:foldl( + fun(#doc{id=Id}=Doc, {DocsAcc, NonRepDocsAcc}) -> + case Id of + <<?LOCAL_DOC_PREFIX, _/binary>> -> + {DocsAcc, [Doc | NonRepDocsAcc]}; + Id-> + {[Doc | DocsAcc], NonRepDocsAcc} + end + end, {[], []}, Docs), + + DocBuckets = group_alike_docs(Docs2), + + case (Db#db.validate_doc_funs /= []) orelse + lists:any( + fun(#doc{id= <<?DESIGN_DOC_PREFIX, _/binary>>}) -> + true; + (#doc{atts=Atts}) -> + Atts /= [] + end, Docs2) of + true -> + % lookup the doc by id and get the most recent + Ids = [Id || [#doc{id=Id}|_] <- DocBuckets], + ExistingDocInfos = get_full_doc_infos(Db, Ids), + + {DocBucketsPrepped, PreCommitFailures} = prep_and_validate_updates(Db, + DocBuckets, ExistingDocInfos, AllOrNothing, [], []), + + % strip out any empty buckets + DocBuckets2 = [Bucket || [_|_] = Bucket <- DocBucketsPrepped]; + false -> + PreCommitFailures = [], + DocBuckets2 = DocBuckets + end, + + if (AllOrNothing) and (PreCommitFailures /= []) -> + {aborted, lists:map( + fun({{Id,{Pos, [RevId|_]}}, Error}) -> + {{Id, {Pos, RevId}}, Error}; + ({{Id,{0, []}}, Error}) -> + {{Id, {0, <<>>}}, Error} + end, PreCommitFailures)}; + true -> + Options2 = if AllOrNothing -> [merge_conflicts]; + true -> [] end ++ Options, + DocBuckets3 = [[ + doc_flush_atts(set_new_att_revpos( + check_dup_atts(Doc)), Db#db.fd) + || Doc <- B] || B <- DocBuckets2], + {DocBuckets4, IdRevs} = new_revs(DocBuckets3, [], []), + + {ok, CommitResults} = write_and_commit(Db, DocBuckets4, NonRepDocs, Options2), + + ResultsDict = dict:from_list(IdRevs ++ CommitResults ++ PreCommitFailures), + {ok, lists:map( + fun(#doc{id=Id,revs={Pos, RevIds}}) -> + {ok, Result} = dict:find({Id, {Pos, RevIds}}, ResultsDict), + Result + end, Docs)} + end. + +% Returns the first available document on disk. Input list is a full rev path +% for the doc. +make_first_doc_on_disk(_Db, _Id, _Pos, []) -> + nil; +make_first_doc_on_disk(Db, Id, Pos, [{_Rev, ?REV_MISSING}|RestPath]) -> + make_first_doc_on_disk(Db, Id, Pos - 1, RestPath); +make_first_doc_on_disk(Db, Id, Pos, [{_Rev, {IsDel, Sp, _Seq}} |_]=DocPath) -> + Revs = [Rev || {Rev, _} <- DocPath], + make_doc(Db, Id, IsDel, Sp, {Pos, Revs}). + +set_commit_option(Options) -> + CommitSettings = { + [true || O <- Options, O==full_commit orelse O==delay_commit], + couch_config:get("couchdb", "delayed_commits", "false") + }, + case CommitSettings of + {[true], _} -> + Options; % user requested explicit commit setting, do not change it + {_, "true"} -> + Options; % delayed commits are enabled, do nothing + {_, "false"} -> + [full_commit|Options]; + {_, Else} -> + ?LOG_ERROR("[couchdb] delayed_commits setting must be true/false, not ~p", + [Else]), + [full_commit|Options] + end. + +collect_results(UpdatePid, MRef, ResultsAcc) -> + receive + {result, UpdatePid, Result} -> + collect_results(UpdatePid, MRef, [Result | ResultsAcc]); + {done, UpdatePid} -> + {ok, ResultsAcc}; + {retry, UpdatePid} -> + retry; + {'DOWN', MRef, _, _, Reason} -> + exit(Reason) + end. + +write_and_commit(#db{update_pid=UpdatePid, user_ctx=Ctx}=Db, DocBuckets, + NonRepDocs, Options0) -> + Options = set_commit_option(Options0), + MergeConflicts = lists:member(merge_conflicts, Options), + FullCommit = lists:member(full_commit, Options), + MRef = erlang:monitor(process, UpdatePid), + try + UpdatePid ! {update_docs, self(), DocBuckets, NonRepDocs, MergeConflicts, FullCommit}, + case collect_results(UpdatePid, MRef, []) of + {ok, Results} -> {ok, Results}; + retry -> + % This can happen if the db file we wrote to was swapped out by + % compaction. Retry by reopening the db and writing to the current file + {ok, Db2} = open_ref_counted(Db#db.main_pid, Ctx), + DocBuckets2 = [[doc_flush_atts(Doc, Db2#db.fd) || Doc <- Bucket] || Bucket <- DocBuckets], + % We only retry once + close(Db2), + UpdatePid ! {update_docs, self(), DocBuckets2, NonRepDocs, MergeConflicts, FullCommit}, + case collect_results(UpdatePid, MRef, []) of + {ok, Results} -> {ok, Results}; + retry -> throw({update_error, compaction_retry}) + end + end + after + erlang:demonitor(MRef, [flush]) + end. + + +set_new_att_revpos(#doc{revs={RevPos,_Revs},atts=Atts}=Doc) -> + Doc#doc{atts= lists:map(fun(#att{data={_Fd,_Sp}}=Att) -> + % already commited to disk, do not set new rev + Att; + (Att) -> + Att#att{revpos=RevPos+1} + end, Atts)}. + + +doc_flush_atts(Doc, Fd) -> + Doc#doc{atts=[flush_att(Fd, Att) || Att <- Doc#doc.atts]}. + +check_md5(_NewSig, <<>>) -> ok; +check_md5(Sig1, Sig2) when Sig1 == Sig2 -> ok; +check_md5(_, _) -> throw(md5_mismatch). + +flush_att(Fd, #att{data={Fd0, _}}=Att) when Fd0 == Fd -> + % already written to our file, nothing to write + Att; + +flush_att(Fd, #att{data={OtherFd,StreamPointer}, md5=InMd5}=Att) -> + {NewStreamData, Len, _IdentityLen, Md5, IdentityMd5} = + couch_stream:copy_to_new_stream(OtherFd, StreamPointer, Fd), + check_md5(IdentityMd5, InMd5), + Att#att{data={Fd, NewStreamData}, md5=Md5, att_len=Len, disk_len=Len}; + +flush_att(Fd, #att{data=Data}=Att) when is_binary(Data) -> + with_stream(Fd, Att, fun(OutputStream) -> + couch_stream:write(OutputStream, Data) + end); + +flush_att(Fd, #att{data=Fun,att_len=undefined}=Att) when is_function(Fun) -> + with_stream(Fd, Att, fun(OutputStream) -> + % Fun(MaxChunkSize, WriterFun) must call WriterFun + % once for each chunk of the attachment, + Fun(4096, + % WriterFun({Length, Binary}, State) + % WriterFun({0, _Footers}, State) + % Called with Length == 0 on the last time. + % WriterFun returns NewState. + fun({0, Footers}, _) -> + F = mochiweb_headers:from_binary(Footers), + case mochiweb_headers:get_value("Content-MD5", F) of + undefined -> + ok; + Md5 -> + {md5, base64:decode(Md5)} + end; + ({_Length, Chunk}, _) -> + couch_stream:write(OutputStream, Chunk) + end, ok) + end); + +flush_att(Fd, #att{data=Fun,att_len=AttLen}=Att) when is_function(Fun) -> + with_stream(Fd, Att, fun(OutputStream) -> + write_streamed_attachment(OutputStream, Fun, AttLen) + end). + +% From RFC 2616 3.6.1 - Chunked Transfer Coding +% +% In other words, the origin server is willing to accept +% the possibility that the trailer fields might be silently +% discarded along the path to the client. +% +% I take this to mean that if "Trailers: Content-MD5\r\n" +% is present in the request, but there is no Content-MD5 +% trailer, we're free to ignore this inconsistency and +% pretend that no Content-MD5 exists. +with_stream(Fd, #att{md5=InMd5,type=Type,encoding=Enc}=Att, Fun) -> + {ok, OutputStream} = case (Enc =:= identity) andalso + couch_util:compressible_att_type(Type) of + true -> + CompLevel = list_to_integer( + couch_config:get("attachments", "compression_level", "0") + ), + couch_stream:open(Fd, gzip, [{compression_level, CompLevel}]); + _ -> + couch_stream:open(Fd) + end, + ReqMd5 = case Fun(OutputStream) of + {md5, FooterMd5} -> + case InMd5 of + md5_in_footer -> FooterMd5; + _ -> InMd5 + end; + _ -> + InMd5 + end, + {StreamInfo, Len, IdentityLen, Md5, IdentityMd5} = + couch_stream:close(OutputStream), + check_md5(IdentityMd5, ReqMd5), + {AttLen, DiskLen, NewEnc} = case Enc of + identity -> + case {Md5, IdentityMd5} of + {Same, Same} -> + {Len, IdentityLen, identity}; + _ -> + {Len, IdentityLen, gzip} + end; + gzip -> + case {Att#att.att_len, Att#att.disk_len} of + {AL, DL} when AL =:= undefined orelse DL =:= undefined -> + % Compressed attachment uploaded through the standalone API. + {Len, Len, gzip}; + {AL, DL} -> + % This case is used for efficient push-replication, where a + % compressed attachment is located in the body of multipart + % content-type request. + {AL, DL, gzip} + end + end, + Att#att{ + data={Fd,StreamInfo}, + att_len=AttLen, + disk_len=DiskLen, + md5=Md5, + encoding=NewEnc + }. + + +write_streamed_attachment(_Stream, _F, 0) -> + ok; +write_streamed_attachment(Stream, F, LenLeft) -> + Bin = F(), + ok = couch_stream:write(Stream, Bin), + write_streamed_attachment(Stream, F, LenLeft - size(Bin)). + +enum_docs_since_reduce_to_count(Reds) -> + couch_btree:final_reduce( + fun couch_db_updater:btree_by_seq_reduce/2, Reds). + +enum_docs_reduce_to_count(Reds) -> + {Count, _DelCount} = couch_btree:final_reduce( + fun couch_db_updater:btree_by_id_reduce/2, Reds), + Count. + +changes_since(Db, Style, StartSeq, Fun, Acc) -> + changes_since(Db, Style, StartSeq, Fun, [], Acc). + +changes_since(Db, Style, StartSeq, Fun, Options, Acc) -> + Wrapper = fun(DocInfo, _Offset, Acc2) -> + #doc_info{revs=Revs} = DocInfo, + DocInfo2 = + case Style of + main_only -> + DocInfo; + all_docs -> + % remove revs before the seq + DocInfo#doc_info{revs=[RevInfo || + #rev_info{seq=RevSeq}=RevInfo <- Revs, StartSeq < RevSeq]} + end, + Fun(DocInfo2, Acc2) + end, + {ok, _LastReduction, AccOut} = couch_btree:fold(Db#db.docinfo_by_seq_btree, + Wrapper, Acc, [{start_key, StartSeq + 1}] ++ Options), + {ok, AccOut}. + +count_changes_since(Db, SinceSeq) -> + {ok, Changes} = + couch_btree:fold_reduce(Db#db.docinfo_by_seq_btree, + fun(_SeqStart, PartialReds, 0) -> + {ok, couch_btree:final_reduce(Db#db.docinfo_by_seq_btree, PartialReds)} + end, + 0, [{start_key, SinceSeq + 1}]), + Changes. + +enum_docs_since(Db, SinceSeq, InFun, Acc, Options) -> + {ok, LastReduction, AccOut} = couch_btree:fold(Db#db.docinfo_by_seq_btree, InFun, Acc, [{start_key, SinceSeq + 1} | Options]), + {ok, enum_docs_since_reduce_to_count(LastReduction), AccOut}. + +enum_docs(Db, InFun, InAcc, Options) -> + {ok, LastReduce, OutAcc} = couch_btree:fold(Db#db.fulldocinfo_by_id_btree, InFun, InAcc, Options), + {ok, enum_docs_reduce_to_count(LastReduce), OutAcc}. + +% server functions + +init({DbName, Filepath, Fd, Options}) -> + {ok, UpdaterPid} = gen_server:start_link(couch_db_updater, {self(), DbName, Filepath, Fd, Options}, []), + {ok, #db{fd_ref_counter=RefCntr}=Db} = gen_server:call(UpdaterPid, get_db), + couch_ref_counter:add(RefCntr), + case lists:member(sys_db, Options) of + true -> + ok; + false -> + couch_stats_collector:track_process_count({couchdb, open_databases}) + end, + process_flag(trap_exit, true), + {ok, Db}. + +terminate(_Reason, Db) -> + couch_util:shutdown_sync(Db#db.update_pid), + ok. + +handle_call({open_ref_count, OpenerPid}, _, #db{fd_ref_counter=RefCntr}=Db) -> + ok = couch_ref_counter:add(RefCntr, OpenerPid), + {reply, {ok, Db}, Db}; +handle_call(is_idle, _From, #db{fd_ref_counter=RefCntr, compactor_pid=Compact, + waiting_delayed_commit=Delay}=Db) -> + % Idle means no referrers. Unless in the middle of a compaction file switch, + % there are always at least 2 referrers, couch_db_updater and us. + {reply, (Delay == nil) andalso (Compact == nil) andalso (couch_ref_counter:count(RefCntr) == 2), Db}; +handle_call({db_updated, NewDb}, _From, #db{fd_ref_counter=OldRefCntr}) -> + #db{fd_ref_counter=NewRefCntr}=NewDb, + case NewRefCntr =:= OldRefCntr of + true -> ok; + false -> + couch_ref_counter:add(NewRefCntr), + couch_ref_counter:drop(OldRefCntr) + end, + {reply, ok, NewDb}; +handle_call(get_db, _From, Db) -> + {reply, {ok, Db}, Db}. + + +handle_cast(Msg, Db) -> + ?LOG_ERROR("Bad cast message received for db ~s: ~p", [Db#db.name, Msg]), + exit({error, Msg}). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_info({'EXIT', _Pid, normal}, Db) -> + {noreply, Db}; +handle_info({'EXIT', _Pid, Reason}, Server) -> + {stop, Reason, Server}; +handle_info(Msg, Db) -> + ?LOG_ERROR("Bad message received for db ~s: ~p", [Db#db.name, Msg]), + exit({error, Msg}). + + +%%% Internal function %%% +open_doc_revs_int(Db, IdRevs, Options) -> + Ids = [Id || {Id, _Revs} <- IdRevs], + LookupResults = get_full_doc_infos(Db, Ids), + lists:zipwith( + fun({Id, Revs}, Lookup) -> + case Lookup of + {ok, #full_doc_info{rev_tree=RevTree}} -> + {FoundRevs, MissingRevs} = + case Revs of + all -> + {couch_key_tree:get_all_leafs(RevTree), []}; + _ -> + case lists:member(latest, Options) of + true -> + couch_key_tree:get_key_leafs(RevTree, Revs); + false -> + couch_key_tree:get(RevTree, Revs) + end + end, + FoundResults = + lists:map(fun({Value, {Pos, [Rev|_]}=FoundRevPath}) -> + case Value of + ?REV_MISSING -> + % we have the rev in our list but know nothing about it + {{not_found, missing}, {Pos, Rev}}; + {IsDeleted, SummaryPtr, _UpdateSeq} -> + {ok, make_doc(Db, Id, IsDeleted, SummaryPtr, FoundRevPath)} + end + end, FoundRevs), + Results = FoundResults ++ [{{not_found, missing}, MissingRev} || MissingRev <- MissingRevs], + {ok, Results}; + not_found when Revs == all -> + {ok, []}; + not_found -> + {ok, [{{not_found, missing}, Rev} || Rev <- Revs]} + end + end, + IdRevs, LookupResults). + +open_doc_int(Db, <<?LOCAL_DOC_PREFIX, _/binary>> = Id, _Options) -> + case couch_btree:lookup(Db#db.local_docs_btree, [Id]) of + [{ok, {_, {Rev, BodyData}}}] -> + {ok, #doc{id=Id, revs={0, [list_to_binary(integer_to_list(Rev))]}, body=BodyData}}; + [not_found] -> + {not_found, missing} + end; +open_doc_int(Db, #doc_info{id=Id,revs=[RevInfo|_]}=DocInfo, Options) -> + #rev_info{deleted=IsDeleted,rev={Pos,RevId},body_sp=Bp} = RevInfo, + Doc = make_doc(Db, Id, IsDeleted, Bp, {Pos,[RevId]}), + {ok, Doc#doc{meta=doc_meta_info(DocInfo, [], Options)}}; +open_doc_int(Db, #full_doc_info{id=Id,rev_tree=RevTree}=FullDocInfo, Options) -> + #doc_info{revs=[#rev_info{deleted=IsDeleted,rev=Rev,body_sp=Bp}|_]} = + DocInfo = couch_doc:to_doc_info(FullDocInfo), + {[{_, RevPath}], []} = couch_key_tree:get(RevTree, [Rev]), + Doc = make_doc(Db, Id, IsDeleted, Bp, RevPath), + {ok, Doc#doc{meta=doc_meta_info(DocInfo, RevTree, Options)}}; +open_doc_int(Db, Id, Options) -> + case get_full_doc_info(Db, Id) of + {ok, FullDocInfo} -> + open_doc_int(Db, FullDocInfo, Options); + not_found -> + {not_found, missing} + end. + +doc_meta_info(#doc_info{high_seq=Seq,revs=[#rev_info{rev=Rev}|RestInfo]}, RevTree, Options) -> + case lists:member(revs_info, Options) of + false -> []; + true -> + {[{Pos, RevPath}],[]} = + couch_key_tree:get_full_key_paths(RevTree, [Rev]), + + [{revs_info, Pos, lists:map( + fun({Rev1, {true, _Sp, _UpdateSeq}}) -> + {Rev1, deleted}; + ({Rev1, {false, _Sp, _UpdateSeq}}) -> + {Rev1, available}; + ({Rev1, ?REV_MISSING}) -> + {Rev1, missing} + end, RevPath)}] + end ++ + case lists:member(conflicts, Options) of + false -> []; + true -> + case [Rev1 || #rev_info{rev=Rev1,deleted=false} <- RestInfo] of + [] -> []; + ConflictRevs -> [{conflicts, ConflictRevs}] + end + end ++ + case lists:member(deleted_conflicts, Options) of + false -> []; + true -> + case [Rev1 || #rev_info{rev=Rev1,deleted=true} <- RestInfo] of + [] -> []; + DelConflictRevs -> [{deleted_conflicts, DelConflictRevs}] + end + end ++ + case lists:member(local_seq, Options) of + false -> []; + true -> [{local_seq, Seq}] + end. + +read_doc(#db{fd=Fd}, OldStreamPointer) when is_tuple(OldStreamPointer) -> + % 09 UPGRADE CODE + couch_stream:old_read_term(Fd, OldStreamPointer); +read_doc(#db{fd=Fd}, Pos) -> + couch_file:pread_term(Fd, Pos). + + +doc_to_tree(#doc{revs={Start, RevIds}}=Doc) -> + [Tree] = doc_to_tree_simple(Doc, lists:reverse(RevIds)), + {Start - length(RevIds) + 1, Tree}. + + +doc_to_tree_simple(Doc, [RevId]) -> + [{RevId, Doc, []}]; +doc_to_tree_simple(Doc, [RevId | Rest]) -> + [{RevId, ?REV_MISSING, doc_to_tree_simple(Doc, Rest)}]. + + +make_doc(#db{fd=Fd}=Db, Id, Deleted, Bp, RevisionPath) -> + {BodyData, Atts} = + case Bp of + nil -> + {[], []}; + _ -> + {ok, {BodyData0, Atts0}} = read_doc(Db, Bp), + {BodyData0, + lists:map( + fun({Name,Type,Sp,AttLen,DiskLen,RevPos,Md5,Enc}) -> + #att{name=Name, + type=Type, + att_len=AttLen, + disk_len=DiskLen, + md5=Md5, + revpos=RevPos, + data={Fd,Sp}, + encoding= + case Enc of + true -> + % 0110 UPGRADE CODE + gzip; + false -> + % 0110 UPGRADE CODE + identity; + _ -> + Enc + end + }; + ({Name,Type,Sp,AttLen,RevPos,Md5}) -> + #att{name=Name, + type=Type, + att_len=AttLen, + disk_len=AttLen, + md5=Md5, + revpos=RevPos, + data={Fd,Sp}}; + ({Name,{Type,Sp,AttLen}}) -> + #att{name=Name, + type=Type, + att_len=AttLen, + disk_len=AttLen, + md5= <<>>, + revpos=0, + data={Fd,Sp}} + end, Atts0)} + end, + #doc{ + id = Id, + revs = RevisionPath, + body = BodyData, + atts = Atts, + deleted = Deleted + }. + + +increment_stat(#db{is_sys_db = true}, _Stat) -> + ok; +increment_stat(#db{}, Stat) -> + couch_stats_collector:increment(Stat). diff --git a/apps/couch/src/couch_db_update_notifier.erl b/apps/couch/src/couch_db_update_notifier.erl new file mode 100644 index 00000000..150eb31b --- /dev/null +++ b/apps/couch/src/couch_db_update_notifier.erl @@ -0,0 +1,73 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +% +% This causes an OS process to spawned and it is notified every time a database +% is updated. +% +% The notifications are in the form of a the database name sent as a line of +% text to the OS processes stdout. +% + +-module(couch_db_update_notifier). + +-behaviour(gen_event). + +-export([start_link/1, notify/1]). +-export([init/1, terminate/2, handle_event/2, handle_call/2, handle_info/2, code_change/3,stop/1]). + +-include("couch_db.hrl"). + +start_link(Exec) -> + couch_event_sup:start_link(couch_db_update, {couch_db_update_notifier, make_ref()}, Exec). + +notify(Event) -> + gen_event:notify(couch_db_update, Event). + +stop(Pid) -> + couch_event_sup:stop(Pid). + +init(Exec) when is_list(Exec) -> % an exe + couch_os_process:start_link(Exec, []); +init(Else) -> + {ok, Else}. + +terminate(_Reason, Pid) when is_pid(Pid) -> + couch_os_process:stop(Pid), + ok; +terminate(_Reason, _State) -> + ok. + +handle_event(Event, Fun) when is_function(Fun, 1) -> + Fun(Event), + {ok, Fun}; +handle_event(Event, {Fun, FunAcc}) -> + FunAcc2 = Fun(Event, FunAcc), + {ok, {Fun, FunAcc2}}; +handle_event({EventAtom, DbName}, Pid) -> + Obj = {[{type, list_to_binary(atom_to_list(EventAtom))}, {db, DbName}]}, + ok = couch_os_process:send(Pid, Obj), + {ok, Pid}. + +handle_call(_Request, State) -> + {reply, ok, State}. + +handle_info({'EXIT', Pid, Reason}, Pid) -> + ?LOG_ERROR("Update notification process ~p died: ~p", [Pid, Reason]), + remove_handler; +handle_info({'EXIT', _, _}, Pid) -> + %% the db_update event manager traps exits and forwards this message to all + %% its handlers. Just ignore as it wasn't our os_process that exited. + {ok, Pid}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/apps/couch/src/couch_db_update_notifier_sup.erl b/apps/couch/src/couch_db_update_notifier_sup.erl new file mode 100644 index 00000000..4d730fc7 --- /dev/null +++ b/apps/couch/src/couch_db_update_notifier_sup.erl @@ -0,0 +1,63 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +% +% This causes an OS process to spawned and it is notified every time a database +% is updated. +% +% The notifications are in the form of a the database name sent as a line of +% text to the OS processes stdout. +% + +-module(couch_db_update_notifier_sup). + +-behaviour(supervisor). + +-export([start_link/0,init/1]). + +start_link() -> + supervisor:start_link({local, couch_db_update_notifier_sup}, + couch_db_update_notifier_sup, []). + +init([]) -> + ok = couch_config:register( + fun("update_notification", Key, Value) -> reload_config(Key, Value) end + ), + + UpdateNotifierExes = couch_config:get("update_notification"), + + {ok, + {{one_for_one, 10, 3600}, + lists:map(fun({Name, UpdateNotifierExe}) -> + {Name, + {couch_db_update_notifier, start_link, [UpdateNotifierExe]}, + permanent, + 1000, + supervisor, + [couch_db_update_notifier]} + end, UpdateNotifierExes)}}. + +%% @doc when update_notification configuration changes, terminate the process +%% for that notifier and start a new one with the updated config +reload_config(Id, Exe) -> + ChildSpec = { + Id, + {couch_db_update_notifier, start_link, [Exe]}, + permanent, + 1000, + supervisor, + [couch_db_update_notifier] + }, + supervisor:terminate_child(couch_db_update_notifier_sup, Id), + supervisor:delete_child(couch_db_update_notifier_sup, Id), + supervisor:start_child(couch_db_update_notifier_sup, ChildSpec). + diff --git a/apps/couch/src/couch_db_updater.erl b/apps/couch/src/couch_db_updater.erl new file mode 100644 index 00000000..19a4c165 --- /dev/null +++ b/apps/couch/src/couch_db_updater.erl @@ -0,0 +1,879 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_db_updater). +-behaviour(gen_server). + +-export([btree_by_id_reduce/2,btree_by_seq_reduce/2]). +-export([init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3,handle_info/2]). + +-include("couch_db.hrl"). + + +init({MainPid, DbName, Filepath, Fd, Options}) -> + process_flag(trap_exit, true), + case lists:member(create, Options) of + true -> + % create a new header and writes it to the file + Header = #db_header{}, + ok = couch_file:write_header(Fd, Header), + % delete any old compaction files that might be hanging around + RootDir = couch_config:get("couchdb", "database_dir", "."), + couch_file:delete(RootDir, Filepath ++ ".compact"); + false -> + ok = couch_file:upgrade_old_header(Fd, <<$g, $m, $k, 0>>), % 09 UPGRADE CODE + case couch_file:read_header(Fd) of + {ok, Header} -> + ok; + no_valid_header -> + % create a new header and writes it to the file + Header = #db_header{}, + ok = couch_file:write_header(Fd, Header), + % delete any old compaction files that might be hanging around + file:delete(Filepath ++ ".compact") + end + end, + + Db = init_db(DbName, Filepath, Fd, Header), + Db2 = refresh_validate_doc_funs(Db), + {ok, Db2#db{main_pid = MainPid, is_sys_db = lists:member(sys_db, Options)}}. + + +terminate(_Reason, Db) -> + couch_file:close(Db#db.fd), + couch_util:shutdown_sync(Db#db.compactor_pid), + couch_util:shutdown_sync(Db#db.fd_ref_counter), + ok. + +handle_call(get_db, _From, Db) -> + {reply, {ok, Db}, Db}; +handle_call(full_commit, _From, #db{waiting_delayed_commit=nil}=Db) -> + {reply, ok, Db}; % no data waiting, return ok immediately +handle_call(full_commit, _From, Db) -> + {reply, ok, commit_data(Db)}; % commit the data and return ok +handle_call(increment_update_seq, _From, Db) -> + Db2 = commit_data(Db#db{update_seq=Db#db.update_seq+1}), + ok = gen_server:call(Db2#db.main_pid, {db_updated, Db2}), + couch_db_update_notifier:notify({updated, Db#db.name}), + {reply, {ok, Db2#db.update_seq}, Db2}; + +handle_call({set_security, NewSec}, _From, Db) -> + {ok, Ptr} = couch_file:append_term(Db#db.fd, NewSec), + Db2 = commit_data(Db#db{security=NewSec, security_ptr=Ptr, + update_seq=Db#db.update_seq+1}), + ok = gen_server:call(Db2#db.main_pid, {db_updated, Db2}), + {reply, ok, Db2}; + +handle_call({set_revs_limit, Limit}, _From, Db) -> + Db2 = commit_data(Db#db{revs_limit=Limit, + update_seq=Db#db.update_seq+1}), + ok = gen_server:call(Db2#db.main_pid, {db_updated, Db2}), + {reply, ok, Db2}; + +handle_call({purge_docs, _IdRevs}, _From, + #db{compactor_pid=Pid}=Db) when Pid /= nil -> + {reply, {error, purge_during_compaction}, Db}; +handle_call({purge_docs, IdRevs}, _From, Db) -> + #db{ + fd=Fd, + fulldocinfo_by_id_btree = DocInfoByIdBTree, + docinfo_by_seq_btree = DocInfoBySeqBTree, + update_seq = LastSeq, + header = Header = #db_header{purge_seq=PurgeSeq} + } = Db, + DocLookups = couch_btree:lookup(DocInfoByIdBTree, + [Id || {Id, _Revs} <- IdRevs]), + + NewDocInfos = lists:zipwith( + fun({_Id, Revs}, {ok, #full_doc_info{rev_tree=Tree}=FullDocInfo}) -> + case couch_key_tree:remove_leafs(Tree, Revs) of + {_, []=_RemovedRevs} -> % no change + nil; + {NewTree, RemovedRevs} -> + {FullDocInfo#full_doc_info{rev_tree=NewTree},RemovedRevs} + end; + (_, not_found) -> + nil + end, + IdRevs, DocLookups), + + SeqsToRemove = [Seq + || {#full_doc_info{update_seq=Seq},_} <- NewDocInfos], + + FullDocInfoToUpdate = [FullInfo + || {#full_doc_info{rev_tree=Tree}=FullInfo,_} + <- NewDocInfos, Tree /= []], + + IdRevsPurged = [{Id, Revs} + || {#full_doc_info{id=Id}, Revs} <- NewDocInfos], + + {DocInfoToUpdate, NewSeq} = lists:mapfoldl( + fun(#full_doc_info{rev_tree=Tree}=FullInfo, SeqAcc) -> + Tree2 = couch_key_tree:map_leafs( fun(RevInfo) -> + RevInfo#rev_info{seq=SeqAcc + 1} + end, Tree), + {couch_doc:to_doc_info(FullInfo#full_doc_info{rev_tree=Tree2}), + SeqAcc + 1} + end, LastSeq, FullDocInfoToUpdate), + + IdsToRemove = [Id || {#full_doc_info{id=Id,rev_tree=[]},_} + <- NewDocInfos], + + {ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree, + DocInfoToUpdate, SeqsToRemove), + {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree, + FullDocInfoToUpdate, IdsToRemove), + {ok, Pointer} = couch_file:append_term(Fd, IdRevsPurged), + + Db2 = commit_data( + Db#db{ + fulldocinfo_by_id_btree = DocInfoByIdBTree2, + docinfo_by_seq_btree = DocInfoBySeqBTree2, + update_seq = NewSeq + 1, + header=Header#db_header{purge_seq=PurgeSeq+1, purged_docs=Pointer}}), + + ok = gen_server:call(Db2#db.main_pid, {db_updated, Db2}), + couch_db_update_notifier:notify({updated, Db#db.name}), + {reply, {ok, (Db2#db.header)#db_header.purge_seq, IdRevsPurged}, Db2}; +handle_call(start_compact, _From, Db) -> + case Db#db.compactor_pid of + nil -> + ?LOG_INFO("Starting compaction for db \"~s\"", [Db#db.name]), + Pid = spawn_link(fun() -> start_copy_compact(Db) end), + Db2 = Db#db{compactor_pid=Pid}, + ok = gen_server:call(Db#db.main_pid, {db_updated, Db2}), + {reply, ok, Db2}; + _ -> + % compact currently running, this is a no-op + {reply, ok, Db} + end. + + + +handle_cast({compact_done, CompactFilepath}, #db{filepath=Filepath}=Db) -> + {ok, NewFd} = couch_file:open(CompactFilepath), + {ok, NewHeader} = couch_file:read_header(NewFd), + #db{update_seq=NewSeq} = NewDb = + init_db(Db#db.name, Filepath, NewFd, NewHeader), + unlink(NewFd), + case Db#db.update_seq == NewSeq of + true -> + % suck up all the local docs into memory and write them to the new db + {ok, _, LocalDocs} = couch_btree:foldl(Db#db.local_docs_btree, + fun(Value, _Offset, Acc) -> {ok, [Value | Acc]} end, []), + {ok, NewLocalBtree} = couch_btree:add(NewDb#db.local_docs_btree, LocalDocs), + + NewDb2 = commit_data(NewDb#db{ + local_docs_btree = NewLocalBtree, + main_pid = Db#db.main_pid, + filepath = Filepath, + instance_start_time = Db#db.instance_start_time, + revs_limit = Db#db.revs_limit + }), + + ?LOG_DEBUG("CouchDB swapping files ~s and ~s.", + [Filepath, CompactFilepath]), + RootDir = couch_config:get("couchdb", "database_dir", "."), + couch_file:delete(RootDir, Filepath), + ok = file:rename(CompactFilepath, Filepath), + close_db(Db), + ok = gen_server:call(Db#db.main_pid, {db_updated, NewDb2}), + ?LOG_INFO("Compaction for db \"~s\" completed.", [Db#db.name]), + {noreply, NewDb2#db{compactor_pid=nil}}; + false -> + ?LOG_INFO("Compaction file still behind main file " + "(update seq=~p. compact update seq=~p). Retrying.", + [Db#db.update_seq, NewSeq]), + close_db(NewDb), + Pid = spawn_link(fun() -> start_copy_compact(Db) end), + Db2 = Db#db{compactor_pid=Pid}, + {noreply, Db2} + end. + + +handle_info({update_docs, Client, GroupedDocs, NonRepDocs, MergeConflicts, + FullCommit}, Db) -> + GroupedDocs2 = [[{Client, D} || D <- DocGroup] || DocGroup <- GroupedDocs], + if NonRepDocs == [] -> + {GroupedDocs3, Clients, FullCommit2} = collect_updates(GroupedDocs2, + [Client], MergeConflicts, FullCommit); + true -> + GroupedDocs3 = GroupedDocs2, + FullCommit2 = FullCommit, + Clients = [Client] + end, + NonRepDocs2 = [{Client, NRDoc} || NRDoc <- NonRepDocs], + try update_docs_int(Db, GroupedDocs3, NonRepDocs2, MergeConflicts, + FullCommit2) of + {ok, Db2} -> + ok = gen_server:call(Db#db.main_pid, {db_updated, Db2}), + if Db2#db.update_seq /= Db#db.update_seq -> + couch_db_update_notifier:notify({updated, Db2#db.name}); + true -> ok + end, + [catch(ClientPid ! {done, self()}) || ClientPid <- Clients], + {noreply, Db2} + catch + throw: retry -> + [catch(ClientPid ! {retry, self()}) || ClientPid <- Clients], + {noreply, Db} + end; +handle_info(delayed_commit, #db{waiting_delayed_commit=nil}=Db) -> + %no outstanding delayed commits, ignore + {noreply, Db}; +handle_info(delayed_commit, Db) -> + case commit_data(Db) of + Db -> + {noreply, Db}; + Db2 -> + ok = gen_server:call(Db2#db.main_pid, {db_updated, Db2}), + {noreply, Db2} + end; +handle_info({'EXIT', _Pid, normal}, Db) -> + {noreply, Db}; +handle_info({'EXIT', _Pid, Reason}, Db) -> + {stop, Reason, Db}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +merge_updates([], RestB, AccOutGroups) -> + lists:reverse(AccOutGroups, RestB); +merge_updates(RestA, [], AccOutGroups) -> + lists:reverse(AccOutGroups, RestA); +merge_updates([[{_, #doc{id=IdA}}|_]=GroupA | RestA], + [[{_, #doc{id=IdB}}|_]=GroupB | RestB], AccOutGroups) -> + if IdA == IdB -> + merge_updates(RestA, RestB, [GroupA ++ GroupB | AccOutGroups]); + IdA < IdB -> + merge_updates(RestA, [GroupB | RestB], [GroupA | AccOutGroups]); + true -> + merge_updates([GroupA | RestA], RestB, [GroupB | AccOutGroups]) + end. + +collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit) -> + receive + % Only collect updates with the same MergeConflicts flag and without + % local docs. It's easier to just avoid multiple _local doc + % updaters than deal with their possible conflicts, and local docs + % writes are relatively rare. Can be optmized later if really needed. + {update_docs, Client, GroupedDocs, [], MergeConflicts, FullCommit2} -> + GroupedDocs2 = [[{Client, Doc} || Doc <- DocGroup] + || DocGroup <- GroupedDocs], + GroupedDocsAcc2 = + merge_updates(GroupedDocsAcc, GroupedDocs2, []), + collect_updates(GroupedDocsAcc2, [Client | ClientsAcc], + MergeConflicts, (FullCommit or FullCommit2)) + after 0 -> + {GroupedDocsAcc, ClientsAcc, FullCommit} + end. + + +btree_by_seq_split(#doc_info{id=Id, high_seq=KeySeq, revs=Revs}) -> + RevInfos = [{Rev, Seq, Bp} || + #rev_info{rev=Rev,seq=Seq,deleted=false,body_sp=Bp} <- Revs], + DeletedRevInfos = [{Rev, Seq, Bp} || + #rev_info{rev=Rev,seq=Seq,deleted=true,body_sp=Bp} <- Revs], + {KeySeq,{Id, RevInfos, DeletedRevInfos}}. + +btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) -> + #doc_info{ + id = Id, + high_seq=KeySeq, + revs = + [#rev_info{rev=Rev,seq=Seq,deleted=false,body_sp = Bp} || + {Rev, Seq, Bp} <- RevInfos] ++ + [#rev_info{rev=Rev,seq=Seq,deleted=true,body_sp = Bp} || + {Rev, Seq, Bp} <- DeletedRevInfos]}; +btree_by_seq_join(KeySeq,{Id, Rev, Bp, Conflicts, DelConflicts, Deleted}) -> + % 09 UPGRADE CODE + % this is the 0.9.0 and earlier by_seq record. It's missing the body pointers + % and individual seq nums for conflicts that are currently in the index, + % meaning the filtered _changes api will not work except for on main docs. + % Simply compact a 0.9.0 database to upgrade the index. + #doc_info{ + id=Id, + high_seq=KeySeq, + revs = [#rev_info{rev=Rev,seq=KeySeq,deleted=Deleted,body_sp=Bp}] ++ + [#rev_info{rev=Rev1,seq=KeySeq,deleted=false} || Rev1 <- Conflicts] ++ + [#rev_info{rev=Rev2,seq=KeySeq,deleted=true} || Rev2 <- DelConflicts]}. + +btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq, + deleted=Deleted, rev_tree=Tree}) -> + DiskTree = + couch_key_tree:map( + fun(_RevId, {IsDeleted, BodyPointer, UpdateSeq}) -> + {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq}; + (_RevId, ?REV_MISSING) -> + ?REV_MISSING + end, Tree), + {Id, {Seq, if Deleted -> 1; true -> 0 end, DiskTree}}. + +btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) -> + Tree = + couch_key_tree:map( + fun(_RevId, {IsDeleted, BodyPointer, UpdateSeq}) -> + {IsDeleted == 1, BodyPointer, UpdateSeq}; + (_RevId, ?REV_MISSING) -> + ?REV_MISSING; + (_RevId, {IsDeleted, BodyPointer}) -> + % 09 UPGRADE CODE + % this is the 0.9.0 and earlier rev info record. It's missing the seq + % nums, which means couchdb will sometimes reexamine unchanged + % documents with the _changes API. + % This is fixed by compacting the database. + {IsDeleted == 1, BodyPointer, HighSeq} + end, DiskTree), + + #full_doc_info{id=Id, update_seq=HighSeq, deleted=Deleted==1, rev_tree=Tree}. + +btree_by_id_reduce(reduce, FullDocInfos) -> + % count the number of not deleted documents + {length([1 || #full_doc_info{deleted=false} <- FullDocInfos]), + length([1 || #full_doc_info{deleted=true} <- FullDocInfos])}; +btree_by_id_reduce(rereduce, Reds) -> + {lists:sum([Count || {Count,_} <- Reds]), + lists:sum([DelCount || {_, DelCount} <- Reds])}. + +btree_by_seq_reduce(reduce, DocInfos) -> + % count the number of documents + length(DocInfos); +btree_by_seq_reduce(rereduce, Reds) -> + lists:sum(Reds). + +simple_upgrade_record(Old, New) when tuple_size(Old) =:= tuple_size(New) -> + Old; +simple_upgrade_record(Old, New) when tuple_size(Old) < tuple_size(New) -> + OldSz = tuple_size(Old), + NewValuesTail = + lists:sublist(tuple_to_list(New), OldSz + 1, tuple_size(New) - OldSz), + list_to_tuple(tuple_to_list(Old) ++ NewValuesTail). + + +init_db(DbName, Filepath, Fd, Header0) -> + Header1 = simple_upgrade_record(Header0, #db_header{}), + Header = + case element(2, Header1) of + 1 -> Header1#db_header{unused = 0, security_ptr = nil}; % 0.9 + 2 -> Header1#db_header{unused = 0, security_ptr = nil}; % post 0.9 and pre 0.10 + 3 -> Header1#db_header{security_ptr = nil}; % post 0.9 and pre 0.10 + 4 -> Header1#db_header{security_ptr = nil}; % 0.10 and pre 0.11 + ?LATEST_DISK_VERSION -> Header1; + _ -> throw({database_disk_version_error, "Incorrect disk header version"}) + end, + + {ok, FsyncOptions} = couch_util:parse_term( + couch_config:get("couchdb", "fsync_options", + "[before_header, after_header, on_file_open]")), + + case lists:member(on_file_open, FsyncOptions) of + true -> ok = couch_file:sync(Fd); + _ -> ok + end, + + {ok, IdBtree} = couch_btree:open(Header#db_header.fulldocinfo_by_id_btree_state, Fd, + [{split, fun(X) -> btree_by_id_split(X) end}, + {join, fun(X,Y) -> btree_by_id_join(X,Y) end}, + {reduce, fun(X,Y) -> btree_by_id_reduce(X,Y) end}]), + {ok, SeqBtree} = couch_btree:open(Header#db_header.docinfo_by_seq_btree_state, Fd, + [{split, fun(X) -> btree_by_seq_split(X) end}, + {join, fun(X,Y) -> btree_by_seq_join(X,Y) end}, + {reduce, fun(X,Y) -> btree_by_seq_reduce(X,Y) end}]), + {ok, LocalDocsBtree} = couch_btree:open(Header#db_header.local_docs_btree_state, Fd), + case Header#db_header.security_ptr of + nil -> + Security = [], + SecurityPtr = nil; + SecurityPtr -> + {ok, Security} = couch_file:pread_term(Fd, SecurityPtr) + end, + % convert start time tuple to microsecs and store as a binary string + {MegaSecs, Secs, MicroSecs} = now(), + StartTime = ?l2b(io_lib:format("~p", + [(MegaSecs*1000000*1000000) + (Secs*1000000) + MicroSecs])), + {ok, RefCntr} = couch_ref_counter:start([Fd]), + #db{ + update_pid=self(), + fd=Fd, + fd_ref_counter = RefCntr, + header=Header, + fulldocinfo_by_id_btree = IdBtree, + docinfo_by_seq_btree = SeqBtree, + local_docs_btree = LocalDocsBtree, + committed_update_seq = Header#db_header.update_seq, + update_seq = Header#db_header.update_seq, + name = DbName, + filepath = Filepath, + security = Security, + security_ptr = SecurityPtr, + instance_start_time = StartTime, + revs_limit = Header#db_header.revs_limit, + fsync_options = FsyncOptions + }. + + +close_db(#db{fd_ref_counter = RefCntr}) -> + couch_ref_counter:drop(RefCntr). + + +refresh_validate_doc_funs(Db) -> + {ok, DesignDocs} = couch_db:get_design_docs(Db), + ProcessDocFuns = lists:flatmap( + fun(DesignDoc) -> + case couch_doc:get_validate_doc_fun(DesignDoc) of + nil -> []; + Fun -> [Fun] + end + end, DesignDocs), + Db#db{validate_doc_funs=ProcessDocFuns}. + +% rev tree functions + +flush_trees(_Db, [], AccFlushedTrees) -> + {ok, lists:reverse(AccFlushedTrees)}; +flush_trees(#db{fd=Fd,header=Header}=Db, + [InfoUnflushed | RestUnflushed], AccFlushed) -> + #full_doc_info{update_seq=UpdateSeq, rev_tree=Unflushed} = InfoUnflushed, + Flushed = couch_key_tree:map( + fun(_Rev, Value) -> + case Value of + #doc{atts=Atts,deleted=IsDeleted}=Doc -> + % this node value is actually an unwritten document summary, + % write to disk. + % make sure the Fd in the written bins is the same Fd we are + % and convert bins, removing the FD. + % All bins should have been written to disk already. + DiskAtts = + case Atts of + [] -> []; + [#att{data={BinFd, _Sp}} | _ ] when BinFd == Fd -> + [{N,T,P,AL,DL,R,M,E} + || #att{name=N,type=T,data={_,P},md5=M,revpos=R, + att_len=AL,disk_len=DL,encoding=E} + <- Atts]; + _ -> + % BinFd must not equal our Fd. This can happen when a database + % is being switched out during a compaction + ?LOG_DEBUG("File where the attachments are written has" + " changed. Possibly retrying.", []), + throw(retry) + end, + {ok, NewSummaryPointer} = + case Header#db_header.disk_version < 4 of + true -> + couch_file:append_term(Fd, {Doc#doc.body, DiskAtts}); + false -> + couch_file:append_term_md5(Fd, {Doc#doc.body, DiskAtts}) + end, + {IsDeleted, NewSummaryPointer, UpdateSeq}; + _ -> + Value + end + end, Unflushed), + flush_trees(Db, RestUnflushed, [InfoUnflushed#full_doc_info{rev_tree=Flushed} | AccFlushed]). + + +send_result(Client, Id, OriginalRevs, NewResult) -> + % used to send a result to the client + catch(Client ! {result, self(), {{Id, OriginalRevs}, NewResult}}). + +merge_rev_trees(_MergeConflicts, [], [], AccNewInfos, AccRemoveSeqs, AccSeq) -> + {ok, lists:reverse(AccNewInfos), AccRemoveSeqs, AccSeq}; +merge_rev_trees(MergeConflicts, [NewDocs|RestDocsList], + [OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) -> + #full_doc_info{id=Id,rev_tree=OldTree,deleted=OldDeleted,update_seq=OldSeq} + = OldDocInfo, + NewRevTree = lists:foldl( + fun({Client, #doc{revs={Pos,[_Rev|PrevRevs]}}=NewDoc}, AccTree) -> + if not MergeConflicts -> + case couch_key_tree:merge(AccTree, [couch_db:doc_to_tree(NewDoc)]) of + {_NewTree, conflicts} when (not OldDeleted) -> + send_result(Client, Id, {Pos-1,PrevRevs}, conflict), + AccTree; + {NewTree, conflicts} when PrevRevs /= [] -> + % Check to be sure if prev revision was specified, it's + % a leaf node in the tree + Leafs = couch_key_tree:get_all_leafs(AccTree), + IsPrevLeaf = lists:any(fun({_, {LeafPos, [LeafRevId|_]}}) -> + {LeafPos, LeafRevId} == {Pos-1, hd(PrevRevs)} + end, Leafs), + if IsPrevLeaf -> + NewTree; + true -> + send_result(Client, Id, {Pos-1,PrevRevs}, conflict), + AccTree + end; + {NewTree, no_conflicts} when AccTree == NewTree -> + % the tree didn't change at all + % meaning we are saving a rev that's already + % been editted again. + if (Pos == 1) and OldDeleted -> + % this means we are recreating a brand new document + % into a state that already existed before. + % put the rev into a subsequent edit of the deletion + #doc_info{revs=[#rev_info{rev={OldPos,OldRev}}|_]} = + couch_doc:to_doc_info(OldDocInfo), + NewRevId = couch_db:new_revid( + NewDoc#doc{revs={OldPos, [OldRev]}}), + NewDoc2 = NewDoc#doc{revs={OldPos + 1, [NewRevId, OldRev]}}, + {NewTree2, _} = couch_key_tree:merge(AccTree, + [couch_db:doc_to_tree(NewDoc2)]), + % we changed the rev id, this tells the caller we did + send_result(Client, Id, {Pos-1,PrevRevs}, + {ok, {OldPos + 1, NewRevId}}), + NewTree2; + true -> + send_result(Client, Id, {Pos-1,PrevRevs}, conflict), + AccTree + end; + {NewTree, _} -> + NewTree + end; + true -> + {NewTree, _} = couch_key_tree:merge(AccTree, + [couch_db:doc_to_tree(NewDoc)]), + NewTree + end + end, + OldTree, NewDocs), + if NewRevTree == OldTree -> + % nothing changed + merge_rev_trees(MergeConflicts, RestDocsList, RestOldInfo, AccNewInfos, + AccRemoveSeqs, AccSeq); + true -> + % we have updated the document, give it a new seq # + NewInfo = #full_doc_info{id=Id,update_seq=AccSeq+1,rev_tree=NewRevTree}, + RemoveSeqs = case OldSeq of + 0 -> AccRemoveSeqs; + _ -> [OldSeq | AccRemoveSeqs] + end, + merge_rev_trees(MergeConflicts, RestDocsList, RestOldInfo, + [NewInfo|AccNewInfos], RemoveSeqs, AccSeq+1) + end. + + + +new_index_entries([], AccById, AccBySeq) -> + {AccById, AccBySeq}; +new_index_entries([FullDocInfo|RestInfos], AccById, AccBySeq) -> + #doc_info{revs=[#rev_info{deleted=Deleted}|_]} = DocInfo = + couch_doc:to_doc_info(FullDocInfo), + new_index_entries(RestInfos, + [FullDocInfo#full_doc_info{deleted=Deleted}|AccById], + [DocInfo|AccBySeq]). + + +stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) -> + [Info#full_doc_info{rev_tree=couch_key_tree:stem(Tree, Limit)} || + #full_doc_info{rev_tree=Tree}=Info <- DocInfos]. + +update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) -> + #db{ + fulldocinfo_by_id_btree = DocInfoByIdBTree, + docinfo_by_seq_btree = DocInfoBySeqBTree, + update_seq = LastSeq + } = Db, + Ids = [Id || [{_Client, #doc{id=Id}}|_] <- DocsList], + % lookup up the old documents, if they exist. + OldDocLookups = couch_btree:lookup(DocInfoByIdBTree, Ids), + OldDocInfos = lists:zipwith( + fun(_Id, {ok, FullDocInfo}) -> + FullDocInfo; + (Id, not_found) -> + #full_doc_info{id=Id} + end, + Ids, OldDocLookups), + % Merge the new docs into the revision trees. + {ok, NewDocInfos0, RemoveSeqs, NewSeq} = merge_rev_trees( + MergeConflicts, DocsList, OldDocInfos, [], [], LastSeq), + + NewFullDocInfos = stem_full_doc_infos(Db, NewDocInfos0), + + % All documents are now ready to write. + + {ok, Db2} = update_local_docs(Db, NonRepDocs), + + % Write out the document summaries (the bodies are stored in the nodes of + % the trees, the attachments are already written to disk) + {ok, FlushedFullDocInfos} = flush_trees(Db2, NewFullDocInfos, []), + + {IndexFullDocInfos, IndexDocInfos} = + new_index_entries(FlushedFullDocInfos, [], []), + + % and the indexes + {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree, IndexFullDocInfos, []), + {ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree, IndexDocInfos, RemoveSeqs), + + Db3 = Db2#db{ + fulldocinfo_by_id_btree = DocInfoByIdBTree2, + docinfo_by_seq_btree = DocInfoBySeqBTree2, + update_seq = NewSeq}, + + % Check if we just updated any design documents, and update the validation + % funs if we did. + case [1 || <<"_design/",_/binary>> <- Ids] of + [] -> + Db4 = Db3; + _ -> + Db4 = refresh_validate_doc_funs(Db3) + end, + + {ok, commit_data(Db4, not FullCommit)}. + + +update_local_docs(#db{local_docs_btree=Btree}=Db, Docs) -> + Ids = [Id || {_Client, #doc{id=Id}} <- Docs], + OldDocLookups = couch_btree:lookup(Btree, Ids), + BtreeEntries = lists:zipwith( + fun({Client, #doc{id=Id,deleted=Delete,revs={0,PrevRevs},body=Body}}, OldDocLookup) -> + case PrevRevs of + [RevStr|_] -> + PrevRev = list_to_integer(?b2l(RevStr)); + [] -> + PrevRev = 0 + end, + OldRev = + case OldDocLookup of + {ok, {_, {OldRev0, _}}} -> OldRev0; + not_found -> 0 + end, + case OldRev == PrevRev of + true -> + case Delete of + false -> + send_result(Client, Id, {0, PrevRevs}, {ok, + {0, ?l2b(integer_to_list(PrevRev + 1))}}), + {update, {Id, {PrevRev + 1, Body}}}; + true -> + send_result(Client, Id, {0, PrevRevs}, + {ok, {0, <<"0">>}}), + {remove, Id} + end; + false -> + send_result(Client, Id, {0, PrevRevs}, conflict), + ignore + end + end, Docs, OldDocLookups), + + BtreeIdsRemove = [Id || {remove, Id} <- BtreeEntries], + BtreeIdsUpdate = [{Key, Val} || {update, {Key, Val}} <- BtreeEntries], + + {ok, Btree2} = + couch_btree:add_remove(Btree, BtreeIdsUpdate, BtreeIdsRemove), + + {ok, Db#db{local_docs_btree = Btree2}}. + + +commit_data(Db) -> + commit_data(Db, false). + +db_to_header(Db, Header) -> + Header#db_header{ + update_seq = Db#db.update_seq, + docinfo_by_seq_btree_state = couch_btree:get_state(Db#db.docinfo_by_seq_btree), + fulldocinfo_by_id_btree_state = couch_btree:get_state(Db#db.fulldocinfo_by_id_btree), + local_docs_btree_state = couch_btree:get_state(Db#db.local_docs_btree), + security_ptr = Db#db.security_ptr, + revs_limit = Db#db.revs_limit}. + +commit_data(#db{waiting_delayed_commit=nil} = Db, true) -> + Db#db{waiting_delayed_commit=erlang:send_after(1000,self(),delayed_commit)}; +commit_data(Db, true) -> + Db; +commit_data(Db, _) -> + #db{ + fd = Fd, + filepath = Filepath, + header = OldHeader, + fsync_options = FsyncOptions, + waiting_delayed_commit = Timer + } = Db, + if is_reference(Timer) -> erlang:cancel_timer(Timer); true -> ok end, + case db_to_header(Db, OldHeader) of + OldHeader -> + Db#db{waiting_delayed_commit=nil}; + Header -> + case lists:member(before_header, FsyncOptions) of + true -> ok = couch_file:sync(Filepath); + _ -> ok + end, + + ok = couch_file:write_header(Fd, Header), + + case lists:member(after_header, FsyncOptions) of + true -> ok = couch_file:sync(Filepath); + _ -> ok + end, + + Db#db{waiting_delayed_commit=nil, + header=Header, + committed_update_seq=Db#db.update_seq} + end. + + +copy_doc_attachments(#db{fd=SrcFd}=SrcDb, {Pos,_RevId}, SrcSp, DestFd) -> + {ok, {BodyData, BinInfos}} = couch_db:read_doc(SrcDb, SrcSp), + % copy the bin values + NewBinInfos = lists:map( + fun({Name, {Type, BinSp, AttLen}}) when is_tuple(BinSp) orelse BinSp == null -> + % 09 UPGRADE CODE + {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} = + couch_stream:old_copy_to_new_stream(SrcFd, BinSp, AttLen, DestFd), + {Name, Type, NewBinSp, AttLen, AttLen, Pos, Md5, identity}; + ({Name, {Type, BinSp, AttLen}}) -> + % 09 UPGRADE CODE + {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} = + couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd), + {Name, Type, NewBinSp, AttLen, AttLen, Pos, Md5, identity}; + ({Name, Type, BinSp, AttLen, _RevPos, <<>>}) when + is_tuple(BinSp) orelse BinSp == null -> + % 09 UPGRADE CODE + {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} = + couch_stream:old_copy_to_new_stream(SrcFd, BinSp, AttLen, DestFd), + {Name, Type, NewBinSp, AttLen, AttLen, AttLen, Md5, identity}; + ({Name, Type, BinSp, AttLen, RevPos, Md5}) -> + % 010 UPGRADE CODE + {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} = + couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd), + {Name, Type, NewBinSp, AttLen, AttLen, RevPos, Md5, identity}; + ({Name, Type, BinSp, AttLen, DiskLen, RevPos, Md5, Enc1}) -> + {NewBinSp, AttLen, _, Md5, _IdentityMd5} = + couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd), + Enc = case Enc1 of + true -> + % 0110 UPGRADE CODE + gzip; + false -> + % 0110 UPGRADE CODE + identity; + _ -> + Enc1 + end, + {Name, Type, NewBinSp, AttLen, DiskLen, RevPos, Md5, Enc} + end, BinInfos), + {BodyData, NewBinInfos}. + +copy_rev_tree_attachments(SrcDb, DestFd, Tree) -> + couch_key_tree:map( + fun(Rev, {IsDel, Sp, Seq}, leaf) -> + DocBody = copy_doc_attachments(SrcDb, Rev, Sp, DestFd), + {IsDel, DocBody, Seq}; + (_, _, branch) -> + ?REV_MISSING + end, Tree). + + +copy_docs(Db, #db{fd=DestFd}=NewDb, InfoBySeq, Retry) -> + Ids = [Id || #doc_info{id=Id} <- InfoBySeq], + LookupResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids), + + % write out the attachments + NewFullDocInfos0 = lists:map( + fun({ok, #full_doc_info{rev_tree=RevTree}=Info}) -> + Info#full_doc_info{rev_tree=copy_rev_tree_attachments(Db, DestFd, RevTree)} + end, LookupResults), + % write out the docs + % we do this in 2 stages so the docs are written out contiguously, making + % view indexing and replication faster. + NewFullDocInfos1 = lists:map( + fun(#full_doc_info{rev_tree=RevTree}=Info) -> + Info#full_doc_info{rev_tree=couch_key_tree:map_leafs( + fun(_Key, {IsDel, DocBody, Seq}) -> + {ok, Pos} = couch_file:append_term_md5(DestFd, DocBody), + {IsDel, Pos, Seq} + end, RevTree)} + end, NewFullDocInfos0), + + NewFullDocInfos = stem_full_doc_infos(Db, NewFullDocInfos1), + NewDocInfos = [couch_doc:to_doc_info(Info) || Info <- NewFullDocInfos], + RemoveSeqs = + case Retry of + false -> + []; + true -> + % We are retrying a compaction, meaning the documents we are copying may + % already exist in our file and must be removed from the by_seq index. + Existing = couch_btree:lookup(NewDb#db.fulldocinfo_by_id_btree, Ids), + [Seq || {ok, #full_doc_info{update_seq=Seq}} <- Existing] + end, + + {ok, DocInfoBTree} = couch_btree:add_remove( + NewDb#db.docinfo_by_seq_btree, NewDocInfos, RemoveSeqs), + {ok, FullDocInfoBTree} = couch_btree:add_remove( + NewDb#db.fulldocinfo_by_id_btree, NewFullDocInfos, []), + NewDb#db{ fulldocinfo_by_id_btree=FullDocInfoBTree, + docinfo_by_seq_btree=DocInfoBTree}. + + + +copy_compact(Db, NewDb0, Retry) -> + FsyncOptions = [Op || Op <- NewDb0#db.fsync_options, Op == before_header], + NewDb = NewDb0#db{fsync_options=FsyncOptions}, + TotalChanges = couch_db:count_changes_since(Db, NewDb#db.update_seq), + EnumBySeqFun = + fun(#doc_info{high_seq=Seq}=DocInfo, _Offset, {AccNewDb, AccUncopied, TotalCopied}) -> + couch_task_status:update("Copied ~p of ~p changes (~p%)", + [TotalCopied, TotalChanges, (TotalCopied*100) div TotalChanges]), + if TotalCopied rem 1000 =:= 0 -> + NewDb2 = copy_docs(Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry), + if TotalCopied rem 10000 =:= 0 -> + {ok, {commit_data(NewDb2#db{update_seq=Seq}), [], TotalCopied + 1}}; + true -> + {ok, {NewDb2#db{update_seq=Seq}, [], TotalCopied + 1}} + end; + true -> + {ok, {AccNewDb, [DocInfo | AccUncopied], TotalCopied + 1}} + end + end, + + couch_task_status:set_update_frequency(500), + + {ok, _, {NewDb2, Uncopied, TotalChanges}} = + couch_btree:foldl(Db#db.docinfo_by_seq_btree, EnumBySeqFun, + {NewDb, [], 0}, + [{start_key, NewDb#db.update_seq + 1}]), + + couch_task_status:update("Flushing"), + + NewDb3 = copy_docs(Db, NewDb2, lists:reverse(Uncopied), Retry), + + % copy misc header values + if NewDb3#db.security /= Db#db.security -> + {ok, Ptr} = couch_file:append_term(NewDb3#db.fd, Db#db.security), + NewDb4 = NewDb3#db{security=Db#db.security, security_ptr=Ptr}; + true -> + NewDb4 = NewDb3 + end, + + commit_data(NewDb4#db{update_seq=Db#db.update_seq}). + +start_copy_compact(#db{name=Name,filepath=Filepath}=Db) -> + CompactFile = Filepath ++ ".compact", + ?LOG_DEBUG("Compaction process spawned for db \"~s\"", [Name]), + case couch_file:open(CompactFile) of + {ok, Fd} -> + couch_task_status:add_task(<<"Database Compaction">>, <<Name/binary, " retry">>, <<"Starting">>), + Retry = true, + {ok, Header} = couch_file:read_header(Fd); + {error, enoent} -> + couch_task_status:add_task(<<"Database Compaction">>, Name, <<"Starting">>), + {ok, Fd} = couch_file:open(CompactFile, [create]), + Retry = false, + ok = couch_file:write_header(Fd, Header=#db_header{}) + end, + NewDb = init_db(Name, CompactFile, Fd, Header), + unlink(Fd), + NewDb2 = copy_compact(Db, NewDb, Retry), + close_db(NewDb2), + gen_server:cast(Db#db.update_pid, {compact_done, CompactFile}). + diff --git a/apps/couch/src/couch_doc.erl b/apps/couch/src/couch_doc.erl new file mode 100644 index 00000000..d15cd7de --- /dev/null +++ b/apps/couch/src/couch_doc.erl @@ -0,0 +1,508 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_doc). + +-export([to_doc_info/1,to_doc_info_path/1,parse_rev/1,parse_revs/1,rev_to_str/1,revs_to_strs/1]). +-export([att_foldl/3,att_foldl_decode/3,get_validate_doc_fun/1]). +-export([from_json_obj/1,to_json_obj/2,has_stubs/1, merge_stubs/2]). +-export([validate_docid/1]). +-export([doc_from_multi_part_stream/2]). +-export([doc_to_multi_part_stream/5, len_doc_to_multi_part_stream/4]). + +-include("couch_db.hrl"). + +% helpers used by to_json_obj +to_json_rev(0, []) -> + []; +to_json_rev(Start, [FirstRevId|_]) -> + [{<<"_rev">>, ?l2b([integer_to_list(Start),"-",revid_to_str(FirstRevId)])}]. + +to_json_body(true, {Body}) -> + Body ++ [{<<"_deleted">>, true}]; +to_json_body(false, {Body}) -> + Body. + +to_json_revisions(Options, Start, RevIds) -> + case lists:member(revs, Options) of + false -> []; + true -> + [{<<"_revisions">>, {[{<<"start">>, Start}, + {<<"ids">>, [revid_to_str(R) ||R <- RevIds]}]}}] + end. + +revid_to_str(RevId) when size(RevId) =:= 16 -> + ?l2b(couch_util:to_hex(RevId)); +revid_to_str(RevId) -> + RevId. + +rev_to_str({Pos, RevId}) -> + ?l2b([integer_to_list(Pos),"-",revid_to_str(RevId)]). + + +revs_to_strs([]) -> + []; +revs_to_strs([{Pos, RevId}| Rest]) -> + [rev_to_str({Pos, RevId}) | revs_to_strs(Rest)]. + +to_json_meta(Meta) -> + lists:map( + fun({revs_info, Start, RevsInfo}) -> + {JsonRevsInfo, _Pos} = lists:mapfoldl( + fun({RevId, Status}, PosAcc) -> + JsonObj = {[{<<"rev">>, rev_to_str({PosAcc, RevId})}, + {<<"status">>, ?l2b(atom_to_list(Status))}]}, + {JsonObj, PosAcc - 1} + end, Start, RevsInfo), + {<<"_revs_info">>, JsonRevsInfo}; + ({local_seq, Seq}) -> + {<<"_local_seq">>, Seq}; + ({conflicts, Conflicts}) -> + {<<"_conflicts">>, revs_to_strs(Conflicts)}; + ({deleted_conflicts, DConflicts}) -> + {<<"_deleted_conflicts">>, revs_to_strs(DConflicts)} + end, Meta). + +to_json_attachments(Attachments, Options) -> + to_json_attachments( + Attachments, + lists:member(attachments, Options), + lists:member(follows, Options), + lists:member(att_encoding_info, Options) + ). + +to_json_attachments([], _OutputData, _DataToFollow, _ShowEncInfo) -> + []; +to_json_attachments(Atts, OutputData, DataToFollow, ShowEncInfo) -> + AttProps = lists:map( + fun(#att{disk_len=DiskLen, att_len=AttLen, encoding=Enc}=Att) -> + {Att#att.name, {[ + {<<"content_type">>, Att#att.type}, + {<<"revpos">>, Att#att.revpos} + ] ++ + if not OutputData orelse Att#att.data == stub -> + [{<<"length">>, DiskLen}, {<<"stub">>, true}]; + true -> + if DataToFollow -> + [{<<"length">>, DiskLen}, {<<"follows">>, true}]; + true -> + AttData = case Enc of + gzip -> + zlib:gunzip(att_to_bin(Att)); + identity -> + att_to_bin(Att) + end, + [{<<"data">>, base64:encode(AttData)}] + end + end ++ + case {ShowEncInfo, Enc} of + {false, _} -> + []; + {true, identity} -> + []; + {true, _} -> + [ + {<<"encoding">>, couch_util:to_binary(Enc)}, + {<<"encoded_length">>, AttLen} + ] + end + }} + end, Atts), + [{<<"_attachments">>, {AttProps}}]. + +to_json_obj(#doc{id=Id,deleted=Del,body=Body,revs={Start, RevIds}, + meta=Meta}=Doc,Options)-> + {[{<<"_id">>, Id}] + ++ to_json_rev(Start, RevIds) + ++ to_json_body(Del, Body) + ++ to_json_revisions(Options, Start, RevIds) + ++ to_json_meta(Meta) + ++ to_json_attachments(Doc#doc.atts, Options) + }. + +from_json_obj({Props}) -> + transfer_fields(Props, #doc{body=[]}); + +from_json_obj(_Other) -> + throw({bad_request, "Document must be a JSON object"}). + +parse_revid(RevId) when size(RevId) =:= 32 -> + RevInt = erlang:list_to_integer(?b2l(RevId), 16), + <<RevInt:128>>; +parse_revid(RevId) when length(RevId) =:= 32 -> + RevInt = erlang:list_to_integer(RevId, 16), + <<RevInt:128>>; +parse_revid(RevId) when is_binary(RevId) -> + RevId; +parse_revid(RevId) when is_list(RevId) -> + ?l2b(RevId). + + +parse_rev(Rev) when is_binary(Rev) -> + parse_rev(?b2l(Rev)); +parse_rev(Rev) when is_list(Rev) -> + SplitRev = lists:splitwith(fun($-) -> false; (_) -> true end, Rev), + case SplitRev of + {Pos, [$- | RevId]} -> {list_to_integer(Pos), parse_revid(RevId)}; + _Else -> throw({bad_request, <<"Invalid rev format">>}) + end; +parse_rev(_BadRev) -> + throw({bad_request, <<"Invalid rev format">>}). + +parse_revs([]) -> + []; +parse_revs([Rev | Rest]) -> + [parse_rev(Rev) | parse_revs(Rest)]. + + +validate_docid(Id) when is_binary(Id) -> + case Id of + <<"_design/", _/binary>> -> ok; + <<"_local/", _/binary>> -> ok; + <<"_", _/binary>> -> + throw({bad_request, <<"Only reserved document ids may start with underscore.">>}); + _Else -> ok + end; +validate_docid(Id) -> + ?LOG_DEBUG("Document id is not a string: ~p", [Id]), + throw({bad_request, <<"Document id must be a string">>}). + +transfer_fields([], #doc{body=Fields}=Doc) -> + % convert fields back to json object + Doc#doc{body={lists:reverse(Fields)}}; + +transfer_fields([{<<"_id">>, Id} | Rest], Doc) -> + validate_docid(Id), + transfer_fields(Rest, Doc#doc{id=Id}); + +transfer_fields([{<<"_rev">>, Rev} | Rest], #doc{revs={0, []}}=Doc) -> + {Pos, RevId} = parse_rev(Rev), + transfer_fields(Rest, + Doc#doc{revs={Pos, [RevId]}}); + +transfer_fields([{<<"_rev">>, _Rev} | Rest], Doc) -> + % we already got the rev from the _revisions + transfer_fields(Rest,Doc); + +transfer_fields([{<<"_attachments">>, {JsonBins}} | Rest], Doc) -> + Atts = lists:map(fun({Name, {BinProps}}) -> + case couch_util:get_value(<<"stub">>, BinProps) of + true -> + Type = couch_util:get_value(<<"content_type">>, BinProps), + RevPos = couch_util:get_value(<<"revpos">>, BinProps, nil), + DiskLen = couch_util:get_value(<<"length">>, BinProps), + {Enc, EncLen} = att_encoding_info(BinProps), + #att{name=Name, data=stub, type=Type, att_len=EncLen, + disk_len=DiskLen, encoding=Enc, revpos=RevPos}; + _ -> + Type = couch_util:get_value(<<"content_type">>, BinProps, + ?DEFAULT_ATTACHMENT_CONTENT_TYPE), + RevPos = couch_util:get_value(<<"revpos">>, BinProps, 0), + case couch_util:get_value(<<"follows">>, BinProps) of + true -> + DiskLen = couch_util:get_value(<<"length">>, BinProps), + {Enc, EncLen} = att_encoding_info(BinProps), + #att{name=Name, data=follows, type=Type, encoding=Enc, + att_len=EncLen, disk_len=DiskLen, revpos=RevPos}; + _ -> + Value = couch_util:get_value(<<"data">>, BinProps), + Bin = base64:decode(Value), + LenBin = size(Bin), + #att{name=Name, data=Bin, type=Type, att_len=LenBin, + disk_len=LenBin, revpos=RevPos} + end + end + end, JsonBins), + transfer_fields(Rest, Doc#doc{atts=Atts}); + +transfer_fields([{<<"_revisions">>, {Props}} | Rest], Doc) -> + RevIds = couch_util:get_value(<<"ids">>, Props), + Start = couch_util:get_value(<<"start">>, Props), + if not is_integer(Start) -> + throw({doc_validation, "_revisions.start isn't an integer."}); + not is_list(RevIds) -> + throw({doc_validation, "_revisions.ids isn't a array."}); + true -> + ok + end, + [throw({doc_validation, "RevId isn't a string"}) || + RevId <- RevIds, not is_binary(RevId)], + RevIds2 = [parse_revid(RevId) || RevId <- RevIds], + transfer_fields(Rest, Doc#doc{revs={Start, RevIds2}}); + +transfer_fields([{<<"_deleted">>, B} | Rest], Doc) when is_boolean(B) -> + transfer_fields(Rest, Doc#doc{deleted=B}); + +% ignored fields +transfer_fields([{<<"_revs_info">>, _} | Rest], Doc) -> + transfer_fields(Rest, Doc); +transfer_fields([{<<"_local_seq">>, _} | Rest], Doc) -> + transfer_fields(Rest, Doc); +transfer_fields([{<<"_conflicts">>, _} | Rest], Doc) -> + transfer_fields(Rest, Doc); +transfer_fields([{<<"_deleted_conflicts">>, _} | Rest], Doc) -> + transfer_fields(Rest, Doc); + +% unknown special field +transfer_fields([{<<"_",Name/binary>>, _} | _], _) -> + throw({doc_validation, + ?l2b(io_lib:format("Bad special document member: _~s", [Name]))}); + +transfer_fields([Field | Rest], #doc{body=Fields}=Doc) -> + transfer_fields(Rest, Doc#doc{body=[Field|Fields]}). + +att_encoding_info(BinProps) -> + DiskLen = couch_util:get_value(<<"length">>, BinProps), + case couch_util:get_value(<<"encoding">>, BinProps) of + undefined -> + {identity, DiskLen}; + Enc -> + EncodedLen = couch_util:get_value(<<"encoded_length">>, BinProps, DiskLen), + {list_to_existing_atom(?b2l(Enc)), EncodedLen} + end. + +to_doc_info(FullDocInfo) -> + {DocInfo, _Path} = to_doc_info_path(FullDocInfo), + DocInfo. + +max_seq([], Max) -> + Max; +max_seq([#rev_info{seq=Seq}|Rest], Max) -> + max_seq(Rest, if Max > Seq -> Max; true -> Seq end). + +to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree}) -> + RevInfosAndPath = + [{#rev_info{deleted=Del,body_sp=Bp,seq=Seq,rev={Pos,RevId}}, Path} || + {{Del, Bp, Seq},{Pos, [RevId|_]}=Path} <- + couch_key_tree:get_all_leafs(Tree)], + SortedRevInfosAndPath = lists:sort( + fun({#rev_info{deleted=DeletedA,rev=RevA}, _PathA}, + {#rev_info{deleted=DeletedB,rev=RevB}, _PathB}) -> + % sort descending by {not deleted, rev} + {not DeletedA, RevA} > {not DeletedB, RevB} + end, RevInfosAndPath), + [{_RevInfo, WinPath}|_] = SortedRevInfosAndPath, + RevInfos = [RevInfo || {RevInfo, _Path} <- SortedRevInfosAndPath], + {#doc_info{id=Id, high_seq=max_seq(RevInfos, 0), revs=RevInfos}, WinPath}. + + + + +att_foldl(#att{data=Bin}, Fun, Acc) when is_binary(Bin) -> + Fun(Bin, Acc); +att_foldl(#att{data={Fd,Sp},att_len=Len}, Fun, Acc) when is_tuple(Sp) orelse Sp == null -> + % 09 UPGRADE CODE + couch_stream:old_foldl(Fd, Sp, Len, Fun, Acc); +att_foldl(#att{data={Fd,Sp},md5=Md5}, Fun, Acc) -> + couch_stream:foldl(Fd, Sp, Md5, Fun, Acc); +att_foldl(#att{data=DataFun,att_len=Len}, Fun, Acc) when is_function(DataFun) -> + fold_streamed_data(DataFun, Len, Fun, Acc). + +att_foldl_decode(#att{data={Fd,Sp},md5=Md5,encoding=Enc}, Fun, Acc) -> + couch_stream:foldl_decode(Fd, Sp, Md5, Enc, Fun, Acc); +att_foldl_decode(#att{data=Fun2,att_len=Len, encoding=identity}, Fun, Acc) -> + fold_streamed_data(Fun2, Len, Fun, Acc). + +att_to_bin(#att{data=Bin}) when is_binary(Bin) -> + Bin; +att_to_bin(#att{data=Iolist}) when is_list(Iolist) -> + iolist_to_binary(Iolist); +att_to_bin(#att{data={_Fd,_Sp}}=Att) -> + iolist_to_binary( + lists:reverse(att_foldl( + Att, + fun(Bin,Acc) -> [Bin|Acc] end, + [] + )) + ); +att_to_bin(#att{data=DataFun, att_len=Len}) when is_function(DataFun)-> + iolist_to_binary( + lists:reverse(fold_streamed_data( + DataFun, + Len, + fun(Data, Acc) -> [Data | Acc] end, + [] + )) + ). + +get_validate_doc_fun(#doc{body={Props}}=DDoc) -> + case couch_util:get_value(<<"validate_doc_update">>, Props) of + undefined -> + nil; + _Else -> + fun(EditDoc, DiskDoc, Ctx, SecObj) -> + couch_query_servers:validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx, SecObj) + end + end. + + +has_stubs(#doc{atts=Atts}) -> + has_stubs(Atts); +has_stubs([]) -> + false; +has_stubs([#att{data=stub}|_]) -> + true; +has_stubs([_Att|Rest]) -> + has_stubs(Rest). + +merge_stubs(#doc{id=Id,atts=MemBins}=StubsDoc, #doc{atts=DiskBins}) -> + BinDict = dict:from_list([{Name, Att} || #att{name=Name}=Att <- DiskBins]), + MergedBins = lists:map( + fun(#att{name=Name, data=stub, revpos=StubRevPos}) -> + case dict:find(Name, BinDict) of + {ok, #att{revpos=DiskRevPos}=DiskAtt} + when DiskRevPos == StubRevPos orelse StubRevPos == nil -> + DiskAtt; + _ -> + throw({missing_stub, + <<"id:", Id/binary, ", name:", Name/binary>>}) + end; + (Att) -> + Att + end, MemBins), + StubsDoc#doc{atts= MergedBins}. + +fold_streamed_data(_RcvFun, 0, _Fun, Acc) -> + Acc; +fold_streamed_data(RcvFun, LenLeft, Fun, Acc) when LenLeft > 0-> + Bin = RcvFun(), + ResultAcc = Fun(Bin, Acc), + fold_streamed_data(RcvFun, LenLeft - size(Bin), Fun, ResultAcc). + +len_doc_to_multi_part_stream(Boundary, JsonBytes, Atts, SendEncodedAtts) -> + AttsSize = lists:foldl(fun(#att{data=Data} = Att, AccAttsSize) -> + case Data of + stub -> + AccAttsSize; + _ -> + AccAttsSize + + 4 + % "\r\n\r\n" + case SendEncodedAtts of + true -> + Att#att.att_len; + _ -> + Att#att.disk_len + end + + 4 + % "\r\n--" + size(Boundary) + end + end, 0, Atts), + if AttsSize == 0 -> + {<<"application/json">>, iolist_size(JsonBytes)}; + true -> + {<<"multipart/related; boundary=\"", Boundary/binary, "\"">>, + 2 + % "--" + size(Boundary) + + 36 + % "\r\ncontent-type: application/json\r\n\r\n" + iolist_size(JsonBytes) + + 4 + % "\r\n--" + size(Boundary) + + + AttsSize + + 2 % "--" + } + end. + +doc_to_multi_part_stream(Boundary, JsonBytes, Atts, WriteFun, + SendEncodedAtts) -> + case lists:any(fun(#att{data=Data})-> Data /= stub end, Atts) of + true -> + WriteFun([<<"--", Boundary/binary, + "\r\ncontent-type: application/json\r\n\r\n">>, + JsonBytes, <<"\r\n--", Boundary/binary>>]), + atts_to_mp(Atts, Boundary, WriteFun, SendEncodedAtts); + false -> + WriteFun(JsonBytes) + end. + +atts_to_mp([], _Boundary, WriteFun, _SendEncAtts) -> + WriteFun(<<"--">>); +atts_to_mp([#att{data=stub} | RestAtts], Boundary, WriteFun, + SendEncodedAtts) -> + atts_to_mp(RestAtts, Boundary, WriteFun, SendEncodedAtts); +atts_to_mp([Att | RestAtts], Boundary, WriteFun, + SendEncodedAtts) -> + WriteFun(<<"\r\n\r\n">>), + AttFun = case SendEncodedAtts of + false -> + fun att_foldl_decode/3; + true -> + fun att_foldl/3 + end, + AttFun(Att, fun(Data, _) -> WriteFun(Data) end, ok), + WriteFun(<<"\r\n--", Boundary/binary>>), + atts_to_mp(RestAtts, Boundary, WriteFun, SendEncodedAtts). + + +doc_from_multi_part_stream(ContentType, DataFun) -> + Self = self(), + Parser = spawn_link(fun() -> + couch_httpd:parse_multipart_request(ContentType, DataFun, + fun(Next)-> mp_parse_doc(Next, []) end), + unlink(Self) + end), + Parser ! {get_doc_bytes, self()}, + receive + {doc_bytes, DocBytes} -> + Doc = from_json_obj(?JSON_DECODE(DocBytes)), + % go through the attachments looking for 'follows' in the data, + % replace with function that reads the data from MIME stream. + ReadAttachmentDataFun = fun() -> + Parser ! {get_bytes, self()}, + receive {bytes, Bytes} -> Bytes end + end, + Atts2 = lists:map( + fun(#att{data=follows}=A) -> + A#att{data=ReadAttachmentDataFun}; + (A) -> + A + end, Doc#doc.atts), + {ok, Doc#doc{atts=Atts2}} + end. + +mp_parse_doc({headers, H}, []) -> + case couch_util:get_value("content-type", H) of + {"application/json", _} -> + fun (Next) -> + mp_parse_doc(Next, []) + end + end; +mp_parse_doc({body, Bytes}, AccBytes) -> + fun (Next) -> + mp_parse_doc(Next, [Bytes | AccBytes]) + end; +mp_parse_doc(body_end, AccBytes) -> + receive {get_doc_bytes, From} -> + From ! {doc_bytes, lists:reverse(AccBytes)} + end, + fun (Next) -> + mp_parse_atts(Next) + end. + +mp_parse_atts(eof) -> + ok; +mp_parse_atts({headers, _H}) -> + fun (Next) -> + mp_parse_atts(Next) + end; +mp_parse_atts({body, Bytes}) -> + receive {get_bytes, From} -> + From ! {bytes, Bytes} + end, + fun (Next) -> + mp_parse_atts(Next) + end; +mp_parse_atts(body_end) -> + fun (Next) -> + mp_parse_atts(Next) + end. + + diff --git a/apps/couch/src/couch_event_sup.erl b/apps/couch/src/couch_event_sup.erl new file mode 100644 index 00000000..6fd6963a --- /dev/null +++ b/apps/couch/src/couch_event_sup.erl @@ -0,0 +1,69 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% The purpose of this module is to allow event handlers to particpate in Erlang +%% supervisor trees. It provide a monitorable process that crashes if the event +%% handler fails. The process, when shutdown, deregisters the event handler. + +-module(couch_event_sup). +-behaviour(gen_server). + +-include("couch_db.hrl"). + +-export([start_link/3,start_link/4, stop/1]). +-export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2,code_change/3]). + +% +% Instead calling the +% ok = gen_event:add_sup_handler(error_logger, my_log, Args) +% +% do this: +% {ok, LinkedPid} = couch_event_sup:start_link(error_logger, my_log, Args) +% +% The benefit is the event is now part of the process tree, and can be +% started, restarted and shutdown consistently like the rest of the server +% components. +% +% And now if the "event" crashes, the supervisor is notified and can restart +% the event handler. +% +% Use this form to named process: +% {ok, LinkedPid} = couch_event_sup:start_link({local, my_log}, error_logger, my_log, Args) +% + +start_link(EventMgr, EventHandler, Args) -> + gen_server:start_link(couch_event_sup, {EventMgr, EventHandler, Args}, []). + +start_link(ServerName, EventMgr, EventHandler, Args) -> + gen_server:start_link(ServerName, couch_event_sup, {EventMgr, EventHandler, Args}, []). + +stop(Pid) -> + gen_server:cast(Pid, stop). + +init({EventMgr, EventHandler, Args}) -> + ok = gen_event:add_sup_handler(EventMgr, EventHandler, Args), + {ok, {EventMgr, EventHandler}}. + +terminate(_Reason, _State) -> + ok. + +handle_call(_Whatever, _From, State) -> + {ok, State}. + +handle_cast(stop, State) -> + {stop, normal, State}. + +handle_info({gen_event_EXIT, _Handler, Reason}, State) -> + {stop, Reason, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/apps/couch/src/couch_external_manager.erl b/apps/couch/src/couch_external_manager.erl new file mode 100644 index 00000000..7e401389 --- /dev/null +++ b/apps/couch/src/couch_external_manager.erl @@ -0,0 +1,101 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_external_manager). +-behaviour(gen_server). + +-export([start_link/0, execute/2, config_change/2]). +-export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2]). + +-include("couch_db.hrl"). + +start_link() -> + gen_server:start_link({local, couch_external_manager}, + couch_external_manager, [], []). + +execute(UrlName, JsonReq) -> + Pid = gen_server:call(couch_external_manager, {get, UrlName}), + case Pid of + {error, Reason} -> + Reason; + _ -> + couch_external_server:execute(Pid, JsonReq) + end. + +config_change("external", UrlName) -> + gen_server:call(couch_external_manager, {config, UrlName}). + +% gen_server API + +init([]) -> + process_flag(trap_exit, true), + Handlers = ets:new(couch_external_manager_handlers, [set, private]), + couch_config:register(fun config_change/2), + {ok, Handlers}. + +terminate(_Reason, Handlers) -> + ets:foldl(fun({_UrlName, Pid}, nil) -> + couch_external_server:stop(Pid), + nil + end, nil, Handlers), + ok. + +handle_call({get, UrlName}, _From, Handlers) -> + case ets:lookup(Handlers, UrlName) of + [] -> + case couch_config:get("external", UrlName, nil) of + nil -> + Msg = lists:flatten( + io_lib:format("No server configured for ~p.", [UrlName])), + {reply, {error, {unknown_external_server, ?l2b(Msg)}}, Handlers}; + Command -> + {ok, NewPid} = couch_external_server:start_link(UrlName, Command), + true = ets:insert(Handlers, {UrlName, NewPid}), + {reply, NewPid, Handlers} + end; + [{UrlName, Pid}] -> + {reply, Pid, Handlers} + end; +handle_call({config, UrlName}, _From, Handlers) -> + % A newly added handler and a handler that had it's command + % changed are treated exactly the same. + + % Shutdown the old handler. + case ets:lookup(Handlers, UrlName) of + [{UrlName, Pid}] -> + couch_external_server:stop(Pid); + [] -> + ok + end, + % Wait for next request to boot the handler. + {reply, ok, Handlers}. + +handle_cast(_Whatever, State) -> + {noreply, State}. + +handle_info({'EXIT', Pid, normal}, Handlers) -> + ?LOG_INFO("EXTERNAL: Server ~p terminated normally", [Pid]), + % The process terminated normally without us asking - Remove Pid from the + % handlers table so we don't attempt to reuse it + ets:match_delete(Handlers, {'_', Pid}), + {noreply, Handlers}; + +handle_info({'EXIT', Pid, Reason}, Handlers) -> + ?LOG_INFO("EXTERNAL: Server ~p died. (reason: ~p)", [Pid, Reason]), + % Remove Pid from the handlers table so we don't try closing + % it a second time in terminate/2. + ets:match_delete(Handlers, {'_', Pid}), + {stop, normal, Handlers}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + diff --git a/apps/couch/src/couch_external_server.erl b/apps/couch/src/couch_external_server.erl new file mode 100644 index 00000000..045fcee9 --- /dev/null +++ b/apps/couch/src/couch_external_server.erl @@ -0,0 +1,69 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_external_server). +-behaviour(gen_server). + +-export([start_link/2, stop/1, execute/2]). +-export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2, code_change/3]). + +-include("couch_db.hrl"). + +% External API + +start_link(Name, Command) -> + gen_server:start_link(couch_external_server, [Name, Command], []). + +stop(Pid) -> + gen_server:cast(Pid, stop). + +execute(Pid, JsonReq) -> + gen_server:call(Pid, {execute, JsonReq}, infinity). + +% Gen Server Handlers + +init([Name, Command]) -> + ?LOG_INFO("EXTERNAL: Starting process for: ~s", [Name]), + ?LOG_INFO("COMMAND: ~s", [Command]), + process_flag(trap_exit, true), + Timeout = list_to_integer(couch_config:get("couchdb", "os_process_timeout", + "5000")), + {ok, Pid} = couch_os_process:start_link(Command, [{timeout, Timeout}]), + couch_config:register(fun("couchdb", "os_process_timeout", NewTimeout) -> + couch_os_process:set_timeout(Pid, list_to_integer(NewTimeout)) + end), + {ok, {Name, Command, Pid}}. + +terminate(_Reason, {_Name, _Command, Pid}) -> + couch_os_process:stop(Pid), + ok. + +handle_call({execute, JsonReq}, _From, {Name, Command, Pid}) -> + {reply, couch_os_process:prompt(Pid, JsonReq), {Name, Command, Pid}}. + +handle_info({'EXIT', _Pid, normal}, State) -> + {noreply, State}; +handle_info({'EXIT', Pid, Reason}, {Name, Command, Pid}) -> + ?LOG_INFO("EXTERNAL: Process for ~s exiting. (reason: ~w)", [Name, Reason]), + {stop, Reason, {Name, Command, Pid}}. + +handle_cast(stop, {Name, Command, Pid}) -> + ?LOG_INFO("EXTERNAL: Shutting down ~s", [Name]), + exit(Pid, normal), + {stop, normal, {Name, Command, Pid}}; +handle_cast(_Whatever, State) -> + {noreply, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + diff --git a/apps/couch/src/couch_file.erl b/apps/couch/src/couch_file.erl new file mode 100644 index 00000000..0a891712 --- /dev/null +++ b/apps/couch/src/couch_file.erl @@ -0,0 +1,588 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_file). +-behaviour(gen_server). + +-include("couch_db.hrl"). + +-define(SIZE_BLOCK, 4096). + +-record(file, { + fd, + tail_append_begin = 0, % 09 UPGRADE CODE + eof = 0 + }). + +-export([open/1, open/2, close/1, bytes/1, sync/1, append_binary/2,old_pread/3]). +-export([append_term/2, pread_term/2, pread_iolist/2, write_header/2]). +-export([pread_binary/2, read_header/1, truncate/2, upgrade_old_header/2]). +-export([append_term_md5/2,append_binary_md5/2]). +-export([init/1, terminate/2, handle_call/3, handle_cast/2, code_change/3, handle_info/2]). +-export([delete/2,delete/3,init_delete_dir/1]). + +%%---------------------------------------------------------------------- +%% Args: Valid Options are [create] and [create,overwrite]. +%% Files are opened in read/write mode. +%% Returns: On success, {ok, Fd} +%% or {error, Reason} if the file could not be opened. +%%---------------------------------------------------------------------- + +open(Filepath) -> + open(Filepath, []). + +open(Filepath, Options) -> + case gen_server:start_link(couch_file, + {Filepath, Options, self(), Ref = make_ref()}, []) of + {ok, Fd} -> + {ok, Fd}; + ignore -> + % get the error + receive + {Ref, Pid, Error} -> + case process_info(self(), trap_exit) of + {trap_exit, true} -> receive {'EXIT', Pid, _} -> ok end; + {trap_exit, false} -> ok + end, + Error + end; + Error -> + Error + end. + + +%%---------------------------------------------------------------------- +%% Purpose: To append an Erlang term to the end of the file. +%% Args: Erlang term to serialize and append to the file. +%% Returns: {ok, Pos} where Pos is the file offset to the beginning the +%% serialized term. Use pread_term to read the term back. +%% or {error, Reason}. +%%---------------------------------------------------------------------- + +append_term(Fd, Term) -> + append_binary(Fd, term_to_binary(Term)). + +append_term_md5(Fd, Term) -> + append_binary_md5(Fd, term_to_binary(Term)). + + +%%---------------------------------------------------------------------- +%% Purpose: To append an Erlang binary to the end of the file. +%% Args: Erlang term to serialize and append to the file. +%% Returns: {ok, Pos} where Pos is the file offset to the beginning the +%% serialized term. Use pread_term to read the term back. +%% or {error, Reason}. +%%---------------------------------------------------------------------- + +append_binary(Fd, Bin) -> + Size = iolist_size(Bin), + gen_server:call(Fd, {append_bin, + [<<0:1/integer,Size:31/integer>>, Bin]}, infinity). + +append_binary_md5(Fd, Bin) -> + Size = iolist_size(Bin), + gen_server:call(Fd, {append_bin, + [<<1:1/integer,Size:31/integer>>, couch_util:md5(Bin), Bin]}, infinity). + + +%%---------------------------------------------------------------------- +%% Purpose: Reads a term from a file that was written with append_term +%% Args: Pos, the offset into the file where the term is serialized. +%% Returns: {ok, Term} +%% or {error, Reason}. +%%---------------------------------------------------------------------- + + +pread_term(Fd, Pos) -> + {ok, Bin} = pread_binary(Fd, Pos), + {ok, binary_to_term(Bin)}. + + +%%---------------------------------------------------------------------- +%% Purpose: Reads a binrary from a file that was written with append_binary +%% Args: Pos, the offset into the file where the term is serialized. +%% Returns: {ok, Term} +%% or {error, Reason}. +%%---------------------------------------------------------------------- + +pread_binary(Fd, Pos) -> + {ok, L} = pread_iolist(Fd, Pos), + {ok, iolist_to_binary(L)}. + + +pread_iolist(Fd, Pos) -> + gen_server:call(Fd, {pread_iolist, Pos}, infinity). + +%%---------------------------------------------------------------------- +%% Purpose: The length of a file, in bytes. +%% Returns: {ok, Bytes} +%% or {error, Reason}. +%%---------------------------------------------------------------------- + +% length in bytes +bytes(Fd) -> + gen_server:call(Fd, bytes, infinity). + +%%---------------------------------------------------------------------- +%% Purpose: Truncate a file to the number of bytes. +%% Returns: ok +%% or {error, Reason}. +%%---------------------------------------------------------------------- + +truncate(Fd, Pos) -> + gen_server:call(Fd, {truncate, Pos}, infinity). + +%%---------------------------------------------------------------------- +%% Purpose: Ensure all bytes written to the file are flushed to disk. +%% Returns: ok +%% or {error, Reason}. +%%---------------------------------------------------------------------- + +sync(Filepath) when is_list(Filepath) -> + {ok, Fd} = file:open(Filepath, [append, raw]), + try file:sync(Fd) after file:close(Fd) end; +sync(Fd) -> + gen_server:call(Fd, sync, infinity). + +%%---------------------------------------------------------------------- +%% Purpose: Close the file. +%% Returns: ok +%%---------------------------------------------------------------------- +close(Fd) -> + MRef = erlang:monitor(process, Fd), + try + catch unlink(Fd), + catch exit(Fd, shutdown), + receive + {'DOWN', MRef, _, _, _} -> + ok + end + after + erlang:demonitor(MRef, [flush]) + end. + + +delete(RootDir, Filepath) -> + delete(RootDir, Filepath, true). + + +delete(RootDir, Filepath, Async) -> + DelFile = filename:join([RootDir,".delete", ?b2l(couch_uuids:random())]), + case file:rename(Filepath, DelFile) of + ok -> + if (Async) -> + spawn(file, delete, [DelFile]), + ok; + true -> + file:delete(DelFile) + end; + Error -> + Error + end. + + +init_delete_dir(RootDir) -> + Dir = filename:join(RootDir,".delete"), + % note: ensure_dir requires an actual filename companent, which is the + % reason for "foo". + filelib:ensure_dir(filename:join(Dir,"foo")), + filelib:fold_files(Dir, ".*", true, + fun(Filename, _) -> + ok = file:delete(Filename) + end, ok). + + +% 09 UPGRADE CODE +old_pread(Fd, Pos, Len) -> + {ok, <<RawBin:Len/binary>>, false} = gen_server:call(Fd, {pread, Pos, Len}, infinity), + {ok, RawBin}. + +% 09 UPGRADE CODE +upgrade_old_header(Fd, Sig) -> + gen_server:call(Fd, {upgrade_old_header, Sig}, infinity). + + +read_header(Fd) -> + case gen_server:call(Fd, find_header, infinity) of + {ok, Bin} -> + {ok, binary_to_term(Bin)}; + Else -> + Else + end. + +write_header(Fd, Data) -> + Bin = term_to_binary(Data), + Md5 = couch_util:md5(Bin), + % now we assemble the final header binary and write to disk + FinalBin = <<Md5/binary, Bin/binary>>, + gen_server:call(Fd, {write_header, FinalBin}, infinity). + + + + +init_status_error(ReturnPid, Ref, Error) -> + ReturnPid ! {Ref, self(), Error}, + ignore. + +% server functions + +init({Filepath, Options, ReturnPid, Ref}) -> + process_flag(trap_exit, true), + case lists:member(create, Options) of + true -> + filelib:ensure_dir(Filepath), + case file:open(Filepath, [read, append, raw, binary]) of + {ok, Fd} -> + {ok, Length} = file:position(Fd, eof), + case Length > 0 of + true -> + % this means the file already exists and has data. + % FYI: We don't differentiate between empty files and non-existant + % files here. + case lists:member(overwrite, Options) of + true -> + {ok, 0} = file:position(Fd, 0), + ok = file:truncate(Fd), + ok = file:sync(Fd), + maybe_track_open_os_files(Options), + {ok, #file{fd=Fd}}; + false -> + ok = file:close(Fd), + init_status_error(ReturnPid, Ref, file_exists) + end; + false -> + maybe_track_open_os_files(Options), + {ok, #file{fd=Fd}} + end; + Error -> + init_status_error(ReturnPid, Ref, Error) + end; + false -> + % open in read mode first, so we don't create the file if it doesn't exist. + case file:open(Filepath, [read, raw]) of + {ok, Fd_Read} -> + {ok, Fd} = file:open(Filepath, [read, append, raw, binary]), + ok = file:close(Fd_Read), + maybe_track_open_os_files(Options), + {ok, Length} = file:position(Fd, eof), + {ok, #file{fd=Fd, eof=Length}}; + Error -> + init_status_error(ReturnPid, Ref, Error) + end + end. + +maybe_track_open_os_files(FileOptions) -> + case lists:member(sys_db, FileOptions) of + true -> + ok; + false -> + couch_stats_collector:track_process_count({couchdb, open_os_files}) + end. + +terminate(_Reason, _Fd) -> + ok. + + +handle_call({pread_iolist, Pos}, _From, File) -> + {LenIolist, NextPos} = read_raw_iolist_int(File, Pos, 4), + case iolist_to_binary(LenIolist) of + <<1:1/integer,Len:31/integer>> -> % an MD5-prefixed term + {Md5AndIoList, _} = read_raw_iolist_int(File, NextPos, Len+16), + {Md5, IoList} = extract_md5(Md5AndIoList), + case couch_util:md5(IoList) of + Md5 -> + {reply, {ok, IoList}, File}; + _ -> + {stop, file_corruption, {error,file_corruption}, File} + end; + <<0:1/integer,Len:31/integer>> -> + {Iolist, _} = read_raw_iolist_int(File, NextPos, Len), + {reply, {ok, Iolist}, File} + end; +handle_call({pread, Pos, Bytes}, _From, #file{fd=Fd,tail_append_begin=TailAppendBegin}=File) -> + {ok, Bin} = file:pread(Fd, Pos, Bytes), + {reply, {ok, Bin, Pos >= TailAppendBegin}, File}; +handle_call(bytes, _From, #file{eof=Length}=File) -> + {reply, {ok, Length}, File}; +handle_call(sync, _From, #file{fd=Fd}=File) -> + {reply, file:sync(Fd), File}; +handle_call({truncate, Pos}, _From, #file{fd=Fd}=File) -> + {ok, Pos} = file:position(Fd, Pos), + case file:truncate(Fd) of + ok -> + {reply, ok, File#file{eof=Pos}}; + Error -> + {reply, Error, File} + end; +handle_call({append_bin, Bin}, _From, #file{fd=Fd, eof=Pos}=File) -> + Blocks = make_blocks(Pos rem ?SIZE_BLOCK, Bin), + case file:write(Fd, Blocks) of + ok -> + {reply, {ok, Pos}, File#file{eof=Pos+iolist_size(Blocks)}}; + Error -> + {reply, Error, File} + end; +handle_call({write_header, Bin}, _From, #file{fd=Fd, eof=Pos}=File) -> + BinSize = size(Bin), + case Pos rem ?SIZE_BLOCK of + 0 -> + Padding = <<>>; + BlockOffset -> + Padding = <<0:(8*(?SIZE_BLOCK-BlockOffset))>> + end, + FinalBin = [Padding, <<1, BinSize:32/integer>> | make_blocks(5, [Bin])], + case file:write(Fd, FinalBin) of + ok -> + {reply, ok, File#file{eof=Pos+iolist_size(FinalBin)}}; + Error -> + {reply, Error, File} + end; + + +handle_call({upgrade_old_header, Prefix}, _From, #file{fd=Fd}=File) -> + case (catch read_old_header(Fd, Prefix)) of + {ok, Header} -> + TailAppendBegin = File#file.eof, + Bin = term_to_binary(Header), + Md5 = couch_util:md5(Bin), + % now we assemble the final header binary and write to disk + FinalBin = <<Md5/binary, Bin/binary>>, + {reply, ok, _} = handle_call({write_header, FinalBin}, ok, File), + ok = write_old_header(Fd, <<"upgraded">>, TailAppendBegin), + {reply, ok, File#file{tail_append_begin=TailAppendBegin}}; + _Error -> + case (catch read_old_header(Fd, <<"upgraded">>)) of + {ok, TailAppendBegin} -> + {reply, ok, File#file{tail_append_begin = TailAppendBegin}}; + _Error2 -> + {reply, ok, File} + end + end; + + +handle_call(find_header, _From, #file{fd=Fd, eof=Pos}=File) -> + {reply, find_header(Fd, Pos div ?SIZE_BLOCK), File}. + +% 09 UPGRADE CODE +-define(HEADER_SIZE, 2048). % size of each segment of the doubly written header + +% 09 UPGRADE CODE +read_old_header(Fd, Prefix) -> + {ok, Bin} = file:pread(Fd, 0, 2*(?HEADER_SIZE)), + <<Bin1:(?HEADER_SIZE)/binary, Bin2:(?HEADER_SIZE)/binary>> = Bin, + Result = + % read the first header + case extract_header(Prefix, Bin1) of + {ok, Header1} -> + case extract_header(Prefix, Bin2) of + {ok, Header2} -> + case Header1 == Header2 of + true -> + % Everything is completely normal! + {ok, Header1}; + false -> + % To get here we must have two different header versions with signatures intact. + % It's weird but possible (a commit failure right at the 2k boundary). Log it and take the first. + ?LOG_INFO("Header version differences.~nPrimary Header: ~p~nSecondary Header: ~p", [Header1, Header2]), + {ok, Header1} + end; + Error -> + % error reading second header. It's ok, but log it. + ?LOG_INFO("Secondary header corruption (error: ~p). Using primary header.", [Error]), + {ok, Header1} + end; + Error -> + % error reading primary header + case extract_header(Prefix, Bin2) of + {ok, Header2} -> + % log corrupt primary header. It's ok since the secondary is still good. + ?LOG_INFO("Primary header corruption (error: ~p). Using secondary header.", [Error]), + {ok, Header2}; + _ -> + % error reading secondary header too + % return the error, no need to log anything as the caller will be responsible for dealing with the error. + Error + end + end, + case Result of + {ok, {pointer_to_header_data, Ptr}} -> + pread_term(Fd, Ptr); + _ -> + Result + end. + +% 09 UPGRADE CODE +extract_header(Prefix, Bin) -> + SizeOfPrefix = size(Prefix), + SizeOfTermBin = ?HEADER_SIZE - + SizeOfPrefix - + 16, % md5 sig + + <<HeaderPrefix:SizeOfPrefix/binary, TermBin:SizeOfTermBin/binary, Sig:16/binary>> = Bin, + + % check the header prefix + case HeaderPrefix of + Prefix -> + % check the integrity signature + case couch_util:md5(TermBin) == Sig of + true -> + Header = binary_to_term(TermBin), + {ok, Header}; + false -> + header_corrupt + end; + _ -> + unknown_header_type + end. + + +% 09 UPGRADE CODE +write_old_header(Fd, Prefix, Data) -> + TermBin = term_to_binary(Data), + % the size of all the bytes written to the header, including the md5 signature (16 bytes) + FilledSize = byte_size(Prefix) + byte_size(TermBin) + 16, + {TermBin2, FilledSize2} = + case FilledSize > ?HEADER_SIZE of + true -> + % too big! + {ok, Pos} = append_binary(Fd, TermBin), + PtrBin = term_to_binary({pointer_to_header_data, Pos}), + {PtrBin, byte_size(Prefix) + byte_size(PtrBin) + 16}; + false -> + {TermBin, FilledSize} + end, + ok = file:sync(Fd), + % pad out the header with zeros, then take the md5 hash + PadZeros = <<0:(8*(?HEADER_SIZE - FilledSize2))>>, + Sig = couch_util:md5([TermBin2, PadZeros]), + % now we assemble the final header binary and write to disk + WriteBin = <<Prefix/binary, TermBin2/binary, PadZeros/binary, Sig/binary>>, + ?HEADER_SIZE = size(WriteBin), % sanity check + DblWriteBin = [WriteBin, WriteBin], + ok = file:pwrite(Fd, 0, DblWriteBin), + ok = file:sync(Fd). + + +handle_cast(close, Fd) -> + {stop,normal,Fd}. + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_info({'EXIT', _, normal}, Fd) -> + {noreply, Fd}; +handle_info({'EXIT', _, Reason}, Fd) -> + {stop, Reason, Fd}. + + +find_header(_Fd, -1) -> + no_valid_header; +find_header(Fd, Block) -> + case (catch load_header(Fd, Block)) of + {ok, Bin} -> + {ok, Bin}; + _Error -> + find_header(Fd, Block -1) + end. + +load_header(Fd, Block) -> + {ok, <<1>>} = file:pread(Fd, Block*?SIZE_BLOCK, 1), + {ok, <<HeaderLen:32/integer>>} = file:pread(Fd, (Block*?SIZE_BLOCK) + 1, 4), + TotalBytes = calculate_total_read_len(1, HeaderLen), + {ok, <<RawBin:TotalBytes/binary>>} = + file:pread(Fd, (Block*?SIZE_BLOCK) + 5, TotalBytes), + <<Md5Sig:16/binary, HeaderBin/binary>> = + iolist_to_binary(remove_block_prefixes(1, RawBin)), + Md5Sig = couch_util:md5(HeaderBin), + {ok, HeaderBin}. + +-spec read_raw_iolist_int(#file{}, Pos::non_neg_integer(), Len::non_neg_integer()) -> + {Data::iolist(), CurPos::non_neg_integer()}. +read_raw_iolist_int(#file{fd=Fd, tail_append_begin=TAB}, Pos, Len) -> + BlockOffset = Pos rem ?SIZE_BLOCK, + TotalBytes = calculate_total_read_len(BlockOffset, Len), + {ok, <<RawBin:TotalBytes/binary>>} = file:pread(Fd, Pos, TotalBytes), + if Pos >= TAB -> + {remove_block_prefixes(BlockOffset, RawBin), Pos + TotalBytes}; + true -> + % 09 UPGRADE CODE + <<ReturnBin:Len/binary, _/binary>> = RawBin, + {[ReturnBin], Pos + Len} + end. + +-spec extract_md5(iolist()) -> {binary(), iolist()}. +extract_md5(FullIoList) -> + {Md5List, IoList} = split_iolist(FullIoList, 16, []), + {iolist_to_binary(Md5List), IoList}. + +calculate_total_read_len(0, FinalLen) -> + calculate_total_read_len(1, FinalLen) + 1; +calculate_total_read_len(BlockOffset, FinalLen) -> + case ?SIZE_BLOCK - BlockOffset of + BlockLeft when BlockLeft >= FinalLen -> + FinalLen; + BlockLeft -> + FinalLen + ((FinalLen - BlockLeft) div (?SIZE_BLOCK -1)) + + if ((FinalLen - BlockLeft) rem (?SIZE_BLOCK -1)) =:= 0 -> 0; + true -> 1 end + end. + +remove_block_prefixes(_BlockOffset, <<>>) -> + []; +remove_block_prefixes(0, <<_BlockPrefix,Rest/binary>>) -> + remove_block_prefixes(1, Rest); +remove_block_prefixes(BlockOffset, Bin) -> + BlockBytesAvailable = ?SIZE_BLOCK - BlockOffset, + case size(Bin) of + Size when Size > BlockBytesAvailable -> + <<DataBlock:BlockBytesAvailable/binary,Rest/binary>> = Bin, + [DataBlock | remove_block_prefixes(0, Rest)]; + _Size -> + [Bin] + end. + +make_blocks(_BlockOffset, []) -> + []; +make_blocks(0, IoList) -> + [<<0>> | make_blocks(1, IoList)]; +make_blocks(BlockOffset, IoList) -> + case split_iolist(IoList, (?SIZE_BLOCK - BlockOffset), []) of + {Begin, End} -> + [Begin | make_blocks(0, End)]; + _SplitRemaining -> + IoList + end. + +%% @doc Returns a tuple where the first element contains the leading SplitAt +%% bytes of the original iolist, and the 2nd element is the tail. If SplitAt +%% is larger than byte_size(IoList), return the difference. +-spec split_iolist(IoList::iolist(), SplitAt::non_neg_integer(), Acc::list()) -> + {iolist(), iolist()} | non_neg_integer(). +split_iolist(List, 0, BeginAcc) -> + {lists:reverse(BeginAcc), List}; +split_iolist([], SplitAt, _BeginAcc) -> + SplitAt; +split_iolist([<<Bin/binary>> | Rest], SplitAt, BeginAcc) when SplitAt > byte_size(Bin) -> + split_iolist(Rest, SplitAt - byte_size(Bin), [Bin | BeginAcc]); +split_iolist([<<Bin/binary>> | Rest], SplitAt, BeginAcc) -> + <<Begin:SplitAt/binary,End/binary>> = Bin, + split_iolist([End | Rest], 0, [Begin | BeginAcc]); +split_iolist([Sublist| Rest], SplitAt, BeginAcc) when is_list(Sublist) -> + case split_iolist(Sublist, SplitAt, BeginAcc) of + {Begin, End} -> + {Begin, [End | Rest]}; + SplitRemaining -> + split_iolist(Rest, SplitAt - (SplitAt - SplitRemaining), [Sublist | BeginAcc]) + end; +split_iolist([Byte | Rest], SplitAt, BeginAcc) when is_integer(Byte) -> + split_iolist(Rest, SplitAt - 1, [Byte | BeginAcc]). diff --git a/apps/couch/src/couch_httpd.erl b/apps/couch/src/couch_httpd.erl new file mode 100644 index 00000000..8a5c699a --- /dev/null +++ b/apps/couch/src/couch_httpd.erl @@ -0,0 +1,988 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd). +-include("couch_db.hrl"). + +-export([start_link/0, stop/0, handle_request/7]). + +-export([header_value/2,header_value/3,qs_value/2,qs_value/3,qs/1,path/1,absolute_uri/2,body_length/1]). +-export([verify_is_server_admin/1,unquote/1,quote/1,recv/2,recv_chunked/4,error_info/1]). +-export([make_fun_spec_strs/1, make_arity_1_fun/1]). +-export([parse_form/1,json_body/1,json_body_obj/1,body/1,doc_etag/1, make_etag/1, etag_respond/3]). +-export([primary_header_value/2,partition/1,serve_file/3,serve_file/4, server_header/0]). +-export([start_chunked_response/3,send_chunk/2,log_request/2]). +-export([start_response_length/4, send/2]). +-export([start_json_response/2, start_json_response/3, end_json_response/1]). +-export([send_response/4,send_method_not_allowed/2,send_error/4, send_redirect/2,send_chunked_error/2]). +-export([send_json/2,send_json/3,send_json/4,last_chunk/1,parse_multipart_request/3]). +-export([accepted_encodings/1,handle_request_int/5,validate_referer/1,validate_ctype/2]). + +start_link() -> + % read config and register for configuration changes + + % just stop if one of the config settings change. couch_server_sup + % will restart us and then we will pick up the new settings. + + BindAddress = couch_config:get("httpd", "bind_address", any), + Port = couch_config:get("httpd", "port", "5984"), + MaxConnections = couch_config:get("httpd", "max_connections", "2048"), + VirtualHosts = couch_config:get("vhosts"), + VhostGlobals = re:split( + couch_config:get("httpd", "vhost_global_handlers", ""), + ", ?", + [{return, list}] + ), + DefaultSpec = "{couch_httpd_db, handle_request}", + DefaultFun = make_arity_1_fun( + couch_config:get("httpd", "default_handler", DefaultSpec) + ), + + UrlHandlersList = lists:map( + fun({UrlKey, SpecStr}) -> + {?l2b(UrlKey), make_arity_1_fun(SpecStr)} + end, couch_config:get("httpd_global_handlers")), + + DbUrlHandlersList = lists:map( + fun({UrlKey, SpecStr}) -> + {?l2b(UrlKey), make_arity_2_fun(SpecStr)} + end, couch_config:get("httpd_db_handlers")), + + DesignUrlHandlersList = lists:map( + fun({UrlKey, SpecStr}) -> + {?l2b(UrlKey), make_arity_3_fun(SpecStr)} + end, couch_config:get("httpd_design_handlers")), + + UrlHandlers = dict:from_list(UrlHandlersList), + DbUrlHandlers = dict:from_list(DbUrlHandlersList), + DesignUrlHandlers = dict:from_list(DesignUrlHandlersList), + Loop = fun(Req)-> + apply(?MODULE, handle_request, [ + Req, DefaultFun, UrlHandlers, DbUrlHandlers, DesignUrlHandlers, + VirtualHosts, VhostGlobals + ]) + end, + + % and off we go + + {ok, Pid} = case mochiweb_http:start([ + {loop, Loop}, + {name, ?MODULE}, + {ip, BindAddress}, + {port, Port}, + {max, MaxConnections} + ]) of + {ok, MochiPid} -> {ok, MochiPid}; + {error, Reason} -> + io:format("Failure to start Mochiweb: ~s~n",[Reason]), + throw({error, Reason}) + end, + + ok = couch_config:register( + fun("httpd", "bind_address") -> + ?MODULE:stop(); + ("httpd", "port") -> + ?MODULE:stop(); + ("httpd", "max_connections") -> + ?MODULE:stop(); + ("httpd", "default_handler") -> + ?MODULE:stop(); + ("httpd_global_handlers", _) -> + ?MODULE:stop(); + ("httpd_db_handlers", _) -> + ?MODULE:stop(); + ("vhosts", _) -> + ?MODULE:stop() + end, Pid), + + {ok, Pid}. + +% SpecStr is a string like "{my_module, my_fun}" +% or "{my_module, my_fun, <<"my_arg">>}" +make_arity_1_fun(SpecStr) -> + case couch_util:parse_term(SpecStr) of + {ok, {Mod, Fun, SpecArg}} -> + fun(Arg) -> Mod:Fun(Arg, SpecArg) end; + {ok, {Mod, Fun}} -> + fun(Arg) -> Mod:Fun(Arg) end + end. + +make_arity_2_fun(SpecStr) -> + case couch_util:parse_term(SpecStr) of + {ok, {Mod, Fun, SpecArg}} -> + fun(Arg1, Arg2) -> Mod:Fun(Arg1, Arg2, SpecArg) end; + {ok, {Mod, Fun}} -> + fun(Arg1, Arg2) -> Mod:Fun(Arg1, Arg2) end + end. + +make_arity_3_fun(SpecStr) -> + case couch_util:parse_term(SpecStr) of + {ok, {Mod, Fun, SpecArg}} -> + fun(Arg1, Arg2, Arg3) -> Mod:Fun(Arg1, Arg2, Arg3, SpecArg) end; + {ok, {Mod, Fun}} -> + fun(Arg1, Arg2, Arg3) -> Mod:Fun(Arg1, Arg2, Arg3) end + end. + +% SpecStr is "{my_module, my_fun}, {my_module2, my_fun2}" +make_fun_spec_strs(SpecStr) -> + re:split(SpecStr, "(?<=})\\s*,\\s*(?={)", [{return, list}]). + +stop() -> + mochiweb_http:stop(?MODULE). + +%% + +% if there's a vhost definition that matches the request, redirect internally +redirect_to_vhost(MochiReq, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers, VhostTarget) -> + + Path = MochiReq:get(raw_path), + Target = VhostTarget ++ Path, + ?LOG_DEBUG("Vhost Target: '~p'~n", [Target]), + % build a new mochiweb request + MochiReq1 = mochiweb_request:new(MochiReq:get(socket), + MochiReq:get(method), + Target, + MochiReq:get(version), + MochiReq:get(headers)), + % cleanup, It force mochiweb to reparse raw uri. + MochiReq1:cleanup(), + + handle_request_int(MochiReq1, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers). + +handle_request(MochiReq, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers, VirtualHosts, VhostGlobals) -> + + % grab Host from Req + Vhost = MochiReq:get_header_value("Host"), + + % find Vhost in config + case couch_util:get_value(Vhost, VirtualHosts) of + undefined -> % business as usual + handle_request_int(MochiReq, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers); + VhostTarget -> + case vhost_global(VhostGlobals, MochiReq) of + true ->% global handler for vhosts + handle_request_int(MochiReq, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers); + _Else -> + % do rewrite + redirect_to_vhost(MochiReq, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers, VhostTarget) + end + end. + + +handle_request_int(MochiReq, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers) -> + Begin = now(), + AuthenticationSrcs = make_fun_spec_strs( + couch_config:get("httpd", "authentication_handlers")), + % for the path, use the raw path with the query string and fragment + % removed, but URL quoting left intact + RawUri = MochiReq:get(raw_path), + {"/" ++ Path, _, _} = mochiweb_util:urlsplit_path(RawUri), + + HandlerKey = + case mochiweb_util:partition(Path, "/") of + {"", "", ""} -> + <<"/">>; % Special case the root url handler + {FirstPart, _, _} -> + list_to_binary(FirstPart) + end, + ?LOG_DEBUG("~p ~s ~p~nHeaders: ~p", [ + MochiReq:get(method), + RawUri, + MochiReq:get(version), + mochiweb_headers:to_list(MochiReq:get(headers)) + ]), + + Method1 = + case MochiReq:get(method) of + % already an atom + Meth when is_atom(Meth) -> Meth; + + % Non standard HTTP verbs aren't atoms (COPY, MOVE etc) so convert when + % possible (if any module references the atom, then it's existing). + Meth -> couch_util:to_existing_atom(Meth) + end, + increment_method_stats(Method1), + + % allow broken HTTP clients to fake a full method vocabulary with an X-HTTP-METHOD-OVERRIDE header + MethodOverride = MochiReq:get_primary_header_value("X-HTTP-Method-Override"), + Method2 = case lists:member(MethodOverride, ["GET", "HEAD", "POST", "PUT", "DELETE", "TRACE", "CONNECT", "COPY"]) of + true -> + ?LOG_INFO("MethodOverride: ~s (real method was ~s)", [MethodOverride, Method1]), + case Method1 of + 'POST' -> couch_util:to_existing_atom(MethodOverride); + _ -> + % Ignore X-HTTP-Method-Override when the original verb isn't POST. + % I'd like to send a 406 error to the client, but that'd require a nasty refactor. + % throw({not_acceptable, <<"X-HTTP-Method-Override may only be used with POST requests.">>}) + Method1 + end; + _ -> Method1 + end, + + % alias HEAD to GET as mochiweb takes care of stripping the body + Method = case Method2 of + 'HEAD' -> 'GET'; + Other -> Other + end, + + HttpReq = #httpd{ + mochi_req = MochiReq, + peer = MochiReq:get(peer), + method = Method, + path_parts = [list_to_binary(couch_httpd:unquote(Part)) + || Part <- string:tokens(Path, "/")], + db_url_handlers = DbUrlHandlers, + design_url_handlers = DesignUrlHandlers, + default_fun = DefaultFun, + url_handlers = UrlHandlers + }, + + HandlerFun = couch_util:dict_find(HandlerKey, UrlHandlers, DefaultFun), + + {ok, Resp} = + try + case authenticate_request(HttpReq, AuthenticationSrcs) of + #httpd{} = Req -> + HandlerFun(Req); + Response -> + Response + end + catch + throw:{http_head_abort, Resp0} -> + {ok, Resp0}; + throw:{invalid_json, S} -> + ?LOG_ERROR("attempted upload of invalid JSON (set log_level to debug to log it)", []), + ?LOG_DEBUG("Invalid JSON: ~p",[S]), + send_error(HttpReq, {bad_request, "invalid UTF-8 JSON"}); + throw:unacceptable_encoding -> + ?LOG_ERROR("unsupported encoding method for the response", []), + send_error(HttpReq, {not_acceptable, "unsupported encoding"}); + throw:bad_accept_encoding_value -> + ?LOG_ERROR("received invalid Accept-Encoding header", []), + send_error(HttpReq, bad_request); + exit:normal -> + exit(normal); + throw:Error -> + ?LOG_DEBUG("Minor error in HTTP request: ~p",[Error]), + ?LOG_DEBUG("Stacktrace: ~p",[erlang:get_stacktrace()]), + send_error(HttpReq, Error); + error:badarg -> + ?LOG_ERROR("Badarg error in HTTP request",[]), + ?LOG_INFO("Stacktrace: ~p",[erlang:get_stacktrace()]), + send_error(HttpReq, badarg); + error:function_clause -> + ?LOG_ERROR("function_clause error in HTTP request",[]), + ?LOG_INFO("Stacktrace: ~p",[erlang:get_stacktrace()]), + send_error(HttpReq, function_clause); + Tag:Error -> + ?LOG_ERROR("Uncaught error in HTTP request: ~p",[{Tag, Error}]), + ?LOG_INFO("Stacktrace: ~p",[erlang:get_stacktrace()]), + send_error(HttpReq, Error) + end, + RequestTime = round(timer:now_diff(now(), Begin)/1000), + couch_stats_collector:record({couchdb, request_time}, RequestTime), + couch_stats_collector:increment({httpd, requests}), + {ok, Resp}. + +% Try authentication handlers in order until one sets a user_ctx +% the auth funs also have the option of returning a response +% move this to couch_httpd_auth? +authenticate_request(#httpd{user_ctx=#user_ctx{}} = Req, _AuthSrcs) -> + Req; +authenticate_request(#httpd{} = Req, []) -> + case couch_config:get("couch_httpd_auth", "require_valid_user", "false") of + "true" -> + throw({unauthorized, <<"Authentication required.">>}); + "false" -> + Req#httpd{user_ctx=#user_ctx{}} + end; +authenticate_request(#httpd{} = Req, [AuthSrc|Rest]) -> + AuthFun = make_arity_1_fun(AuthSrc), + R = case AuthFun(Req) of + #httpd{user_ctx=#user_ctx{}=UserCtx}=Req2 -> + Req2#httpd{user_ctx=UserCtx#user_ctx{handler=?l2b(AuthSrc)}}; + Else -> Else + end, + authenticate_request(R, Rest); +authenticate_request(Response, _AuthSrcs) -> + Response. + +increment_method_stats(Method) -> + couch_stats_collector:increment({httpd_request_methods, Method}). + +% if so, then it will not be rewritten, but will run as a normal couchdb request. +% normally you'd use this for _uuids _utils and a few of the others you want to keep available on vhosts. You can also use it to make databases 'global'. +vhost_global(VhostGlobals, MochiReq) -> + "/" ++ Path = MochiReq:get(path), + Front = case partition(Path) of + {"", "", ""} -> + "/"; % Special case the root url handler + {FirstPart, _, _} -> + FirstPart + end, + [true] == [true||V <- VhostGlobals, V == Front]. + +validate_referer(Req) -> + Host = host_for_request(Req), + Referer = header_value(Req, "Referer", fail), + case Referer of + fail -> + throw({bad_request, <<"Referer header required.">>}); + Referer -> + {_,RefererHost,_,_,_} = mochiweb_util:urlsplit(Referer), + if + RefererHost =:= Host -> ok; + true -> throw({bad_request, <<"Referer header must match host.">>}) + end + end. + +validate_ctype(Req, Ctype) -> + case couch_httpd:header_value(Req, "Content-Type") of + undefined -> + throw({bad_ctype, "Content-Type must be "++Ctype}); + ReqCtype -> + % ?LOG_ERROR("Ctype ~p ReqCtype ~p",[Ctype,ReqCtype]), + case re:split(ReqCtype, ";", [{return, list}]) of + [Ctype] -> ok; + [Ctype, _Rest] -> ok; + _Else -> + throw({bad_ctype, "Content-Type must be "++Ctype}) + end + end. + +% Utilities + +partition(Path) -> + mochiweb_util:partition(Path, "/"). + +header_value(#httpd{mochi_req=MochiReq}, Key) -> + MochiReq:get_header_value(Key). + +header_value(#httpd{mochi_req=MochiReq}, Key, Default) -> + case MochiReq:get_header_value(Key) of + undefined -> Default; + Value -> Value + end. + +primary_header_value(#httpd{mochi_req=MochiReq}, Key) -> + MochiReq:get_primary_header_value(Key). + +accepted_encodings(#httpd{mochi_req=MochiReq}) -> + case MochiReq:accepted_encodings(["gzip", "identity"]) of + bad_accept_encoding_value -> + throw(bad_accept_encoding_value); + [] -> + throw(unacceptable_encoding); + EncList -> + EncList + end. + +serve_file(Req, RelativePath, DocumentRoot) -> + serve_file(Req, RelativePath, DocumentRoot, []). + +serve_file(#httpd{mochi_req=MochiReq}=Req, RelativePath, DocumentRoot, ExtraHeaders) -> + {ok, MochiReq:serve_file(RelativePath, DocumentRoot, + server_header() ++ couch_httpd_auth:cookie_auth_header(Req, []) ++ ExtraHeaders)}. + +qs_value(Req, Key) -> + qs_value(Req, Key, undefined). + +qs_value(Req, Key, Default) -> + couch_util:get_value(Key, qs(Req), Default). + +qs(#httpd{mochi_req=MochiReq}) -> + MochiReq:parse_qs(). + +path(#httpd{mochi_req=MochiReq}) -> + MochiReq:get(path). + +host_for_request(#httpd{mochi_req=MochiReq}) -> + XHost = couch_config:get("httpd", "x_forwarded_host", "X-Forwarded-Host"), + case MochiReq:get_header_value(XHost) of + undefined -> + case MochiReq:get_header_value("Host") of + undefined -> + {ok, {Address, Port}} = inet:sockname(MochiReq:get(socket)), + inet_parse:ntoa(Address) ++ ":" ++ integer_to_list(Port); + Value1 -> + Value1 + end; + Value -> Value + end. + +absolute_uri(#httpd{mochi_req=MochiReq}=Req, Path) -> + Host = host_for_request(Req), + XSsl = couch_config:get("httpd", "x_forwarded_ssl", "X-Forwarded-Ssl"), + Scheme = case MochiReq:get_header_value(XSsl) of + "on" -> "https"; + _ -> + XProto = couch_config:get("httpd", "x_forwarded_proto", "X-Forwarded-Proto"), + case MochiReq:get_header_value(XProto) of + % Restrict to "https" and "http" schemes only + "https" -> "https"; + _ -> "http" + end + end, + Scheme ++ "://" ++ Host ++ Path. + +unquote(UrlEncodedString) -> + mochiweb_util:unquote(UrlEncodedString). + +quote(UrlDecodedString) -> + mochiweb_util:quote_plus(UrlDecodedString). + +parse_form(#httpd{mochi_req=MochiReq}) -> + mochiweb_multipart:parse_form(MochiReq). + +recv(#httpd{mochi_req=MochiReq}, Len) -> + MochiReq:recv(Len). + +recv_chunked(#httpd{mochi_req=MochiReq}, MaxChunkSize, ChunkFun, InitState) -> + % Fun is called once with each chunk + % Fun({Length, Binary}, State) + % called with Length == 0 on the last time. + MochiReq:stream_body(MaxChunkSize, ChunkFun, InitState). + +body_length(Req) -> + case header_value(Req, "Transfer-Encoding") of + undefined -> + case header_value(Req, "Content-Length") of + undefined -> undefined; + Length -> list_to_integer(Length) + end; + "chunked" -> chunked; + Unknown -> {unknown_transfer_encoding, Unknown} + end. + +body(#httpd{mochi_req=MochiReq, req_body=ReqBody}) -> + case ReqBody of + undefined -> + % Maximum size of document PUT request body (4GB) + MaxSize = list_to_integer( + couch_config:get("couchdb", "max_document_size", "4294967296")), + MochiReq:recv_body(MaxSize); + _Else -> + ReqBody + end. + +json_body(Httpd) -> + ?JSON_DECODE(body(Httpd)). + +json_body_obj(Httpd) -> + case json_body(Httpd) of + {Props} -> {Props}; + _Else -> + throw({bad_request, "Request body must be a JSON object"}) + end. + + + +doc_etag(#doc{revs={Start, [DiskRev|_]}}) -> + "\"" ++ ?b2l(couch_doc:rev_to_str({Start, DiskRev})) ++ "\"". + +make_etag(Term) -> + <<SigInt:128/integer>> = couch_util:md5(term_to_binary(Term)), + list_to_binary("\"" ++ lists:flatten(io_lib:format("~.36B",[SigInt])) ++ "\""). + +etag_match(Req, CurrentEtag) when is_binary(CurrentEtag) -> + etag_match(Req, binary_to_list(CurrentEtag)); + +etag_match(Req, CurrentEtag) -> + EtagsToMatch = string:tokens( + couch_httpd:header_value(Req, "If-None-Match", ""), ", "), + lists:member(CurrentEtag, EtagsToMatch). + +etag_respond(Req, CurrentEtag, RespFun) -> + case etag_match(Req, CurrentEtag) of + true -> + % the client has this in their cache. + couch_httpd:send_response(Req, 304, [{"Etag", CurrentEtag}], <<>>); + false -> + % Run the function. + RespFun() + end. + +verify_is_server_admin(#httpd{user_ctx=UserCtx}) -> + verify_is_server_admin(UserCtx); +verify_is_server_admin(#user_ctx{roles=Roles}) -> + case lists:member(<<"_admin">>, Roles) of + true -> ok; + false -> throw({unauthorized, <<"You are not a server admin.">>}) + end. + +log_request(#httpd{mochi_req=MochiReq,peer=Peer}, Code) -> + ?LOG_INFO("~s - - ~p ~s ~B", [ + Peer, + couch_util:to_existing_atom(MochiReq:get(method)), + MochiReq:get(raw_path), + couch_util:to_integer(Code) + ]). + + +start_response_length(#httpd{mochi_req=MochiReq}=Req, Code, Headers, Length) -> + log_request(Req, Code), + couch_stats_collector:increment({httpd_status_codes, Code}), + Resp = MochiReq:start_response_length({Code, Headers ++ server_header() ++ couch_httpd_auth:cookie_auth_header(Req, Headers), Length}), + case MochiReq:get(method) of + 'HEAD' -> throw({http_head_abort, Resp}); + _ -> ok + end, + {ok, Resp}. + +send(Resp, Data) -> + Resp:send(Data), + {ok, Resp}. + +no_resp_conn_header([]) -> + true; +no_resp_conn_header([{Hdr, _}|Rest]) -> + case string:to_lower(Hdr) of + "connection" -> false; + _ -> no_resp_conn_header(Rest) + end. + +http_1_0_keep_alive(Req, Headers) -> + KeepOpen = Req:should_close() == false, + IsHttp10 = Req:get(version) == {1, 0}, + NoRespHeader = no_resp_conn_header(Headers), + case KeepOpen andalso IsHttp10 andalso NoRespHeader of + true -> [{"Connection", "Keep-Alive"} | Headers]; + false -> Headers + end. + +start_chunked_response(#httpd{mochi_req=MochiReq}=Req, Code, Headers) -> + log_request(Req, Code), + couch_stats_collector:increment({httpd_status_codes, Code}), + Headers2 = http_1_0_keep_alive(MochiReq, Headers), + Resp = MochiReq:respond({Code, Headers2 ++ server_header() ++ couch_httpd_auth:cookie_auth_header(Req, Headers2), chunked}), + case MochiReq:get(method) of + 'HEAD' -> throw({http_head_abort, Resp}); + _ -> ok + end, + {ok, Resp}. + +send_chunk(Resp, Data) -> + case iolist_size(Data) of + 0 -> ok; % do nothing + _ -> Resp:write_chunk(Data) + end, + {ok, Resp}. + +last_chunk(Resp) -> + Resp:write_chunk([]), + {ok, Resp}. + +send_response(#httpd{mochi_req=MochiReq}=Req, Code, Headers, Body) -> + log_request(Req, Code), + couch_stats_collector:increment({httpd_status_codes, Code}), + Headers2 = http_1_0_keep_alive(MochiReq, Headers), + if Code >= 400 -> + ?LOG_DEBUG("httpd ~p error response:~n ~s", [Code, Body]); + true -> ok + end, + {ok, MochiReq:respond({Code, Headers2 ++ server_header() ++ couch_httpd_auth:cookie_auth_header(Req, Headers2), Body})}. + +send_method_not_allowed(Req, Methods) -> + send_error(Req, 405, [{"Allow", Methods}], <<"method_not_allowed">>, ?l2b("Only " ++ Methods ++ " allowed")). + +send_json(Req, Value) -> + send_json(Req, 200, Value). + +send_json(Req, Code, Value) -> + send_json(Req, Code, [], Value). + +send_json(Req, Code, Headers, Value) -> + DefaultHeaders = [ + {"Content-Type", negotiate_content_type(Req)}, + {"Cache-Control", "must-revalidate"} + ], + Body = list_to_binary( + [start_jsonp(Req), ?JSON_ENCODE(Value), end_jsonp(), $\n] + ), + send_response(Req, Code, DefaultHeaders ++ Headers, Body). + +start_json_response(Req, Code) -> + start_json_response(Req, Code, []). + +start_json_response(Req, Code, Headers) -> + DefaultHeaders = [ + {"Content-Type", negotiate_content_type(Req)}, + {"Cache-Control", "must-revalidate"} + ], + start_jsonp(Req), % Validate before starting chunked. + %start_chunked_response(Req, Code, DefaultHeaders ++ Headers). + {ok, Resp} = start_chunked_response(Req, Code, DefaultHeaders ++ Headers), + case start_jsonp(Req) of + [] -> ok; + Start -> send_chunk(Resp, Start) + end, + {ok, Resp}. + +end_json_response(Resp) -> + send_chunk(Resp, end_jsonp() ++ [$\n]), + last_chunk(Resp). + +start_jsonp(Req) -> + case get(jsonp) of + undefined -> put(jsonp, qs_value(Req, "callback", no_jsonp)); + _ -> ok + end, + case get(jsonp) of + no_jsonp -> []; + [] -> []; + CallBack -> + try + % make sure jsonp is configured on (default off) + case couch_config:get("httpd", "allow_jsonp", "false") of + "true" -> + validate_callback(CallBack), + CallBack ++ "("; + _Else -> + % this could throw an error message, but instead we just ignore the + % jsonp parameter + % throw({bad_request, <<"JSONP must be configured before using.">>}) + put(jsonp, no_jsonp), + [] + end + catch + Error -> + put(jsonp, no_jsonp), + throw(Error) + end + end. + +end_jsonp() -> + Resp = case get(jsonp) of + no_jsonp -> []; + [] -> []; + _ -> ");" + end, + put(jsonp, undefined), + Resp. + +validate_callback(CallBack) when is_binary(CallBack) -> + validate_callback(binary_to_list(CallBack)); +validate_callback([]) -> + ok; +validate_callback([Char | Rest]) -> + case Char of + _ when Char >= $a andalso Char =< $z -> ok; + _ when Char >= $A andalso Char =< $Z -> ok; + _ when Char >= $0 andalso Char =< $9 -> ok; + _ when Char == $. -> ok; + _ when Char == $_ -> ok; + _ when Char == $[ -> ok; + _ when Char == $] -> ok; + _ -> + throw({bad_request, invalid_callback}) + end, + validate_callback(Rest). + + +error_info({Error, Reason}) when is_list(Reason) -> + error_info({Error, ?l2b(Reason)}); +error_info(bad_request) -> + {400, <<"bad_request">>, <<>>}; +error_info({bad_request, Reason}) -> + {400, <<"bad_request">>, Reason}; +error_info({query_parse_error, Reason}) -> + {400, <<"query_parse_error">>, Reason}; +% Prior art for md5 mismatch resulting in a 400 is from AWS S3 +error_info(md5_mismatch) -> + {400, <<"content_md5_mismatch">>, <<"Possible message corruption.">>}; +error_info(not_found) -> + {404, <<"not_found">>, <<"missing">>}; +error_info({not_found, Reason}) -> + {404, <<"not_found">>, Reason}; +error_info({not_acceptable, Reason}) -> + {406, <<"not_acceptable">>, Reason}; +error_info(conflict) -> + {409, <<"conflict">>, <<"Document update conflict.">>}; +error_info({forbidden, Msg}) -> + {403, <<"forbidden">>, Msg}; +error_info({unauthorized, Msg}) -> + {401, <<"unauthorized">>, Msg}; +error_info(file_exists) -> + {412, <<"file_exists">>, <<"The database could not be " + "created, the file already exists.">>}; +error_info({bad_ctype, Reason}) -> + {415, <<"bad_content_type">>, Reason}; +error_info({error, illegal_database_name}) -> + {400, <<"illegal_database_name">>, <<"Only lowercase characters (a-z), " + "digits (0-9), and any of the characters _, $, (, ), +, -, and / " + "are allowed. Must begin with a letter.">>}; +error_info({missing_stub, Reason}) -> + {412, <<"missing_stub">>, Reason}; +error_info({Error, Reason}) -> + {500, couch_util:to_binary(Error), couch_util:to_binary(Reason)}; +error_info(Error) -> + {500, <<"unknown_error">>, couch_util:to_binary(Error)}. + +error_headers(#httpd{mochi_req=MochiReq}=Req, Code, ErrorStr, ReasonStr) -> + if Code == 401 -> + % this is where the basic auth popup is triggered + case MochiReq:get_header_value("X-CouchDB-WWW-Authenticate") of + undefined -> + case couch_config:get("httpd", "WWW-Authenticate", nil) of + nil -> + % If the client is a browser and the basic auth popup isn't turned on + % redirect to the session page. + case ErrorStr of + <<"unauthorized">> -> + case couch_config:get("couch_httpd_auth", "authentication_redirect", nil) of + nil -> {Code, []}; + AuthRedirect -> + case couch_config:get("couch_httpd_auth", "require_valid_user", "false") of + "true" -> + % send the browser popup header no matter what if we are require_valid_user + {Code, [{"WWW-Authenticate", "Basic realm=\"server\""}]}; + _False -> + % if the accept header matches html, then do the redirect. else proceed as usual. + Accepts = case MochiReq:get_header_value("Accept") of + undefined -> + % According to the HTTP 1.1 spec, if the Accept + % header is missing, it means the client accepts + % all media types. + "html"; + Else -> + Else + end, + case re:run(Accepts, "\\bhtml\\b", + [{capture, none}, caseless]) of + nomatch -> + {Code, []}; + match -> + AuthRedirectBin = ?l2b(AuthRedirect), + UrlReturn = ?l2b(couch_util:url_encode(MochiReq:get(path))), + UrlReason = ?l2b(couch_util:url_encode(ReasonStr)), + {302, [{"Location", couch_httpd:absolute_uri(Req, <<AuthRedirectBin/binary,"?return=",UrlReturn/binary,"&reason=",UrlReason/binary>>)}]} + end + end + end; + _Else -> + {Code, []} + end; + Type -> + {Code, [{"WWW-Authenticate", Type}]} + end; + Type -> + {Code, [{"WWW-Authenticate", Type}]} + end; + true -> + {Code, []} + end. + +send_error(_Req, {already_sent, Resp, _Error}) -> + {ok, Resp}; + +send_error(Req, Error) -> + {Code, ErrorStr, ReasonStr} = error_info(Error), + {Code1, Headers} = error_headers(Req, Code, ErrorStr, ReasonStr), + send_error(Req, Code1, Headers, ErrorStr, ReasonStr). + +send_error(Req, Code, ErrorStr, ReasonStr) -> + send_error(Req, Code, [], ErrorStr, ReasonStr). + +send_error(Req, Code, Headers, ErrorStr, ReasonStr) -> + send_json(Req, Code, Headers, + {[{<<"error">>, ErrorStr}, + {<<"reason">>, ReasonStr}]}). + +% give the option for list functions to output html or other raw errors +send_chunked_error(Resp, {_Error, {[{<<"body">>, Reason}]}}) -> + send_chunk(Resp, Reason), + last_chunk(Resp); + +send_chunked_error(Resp, Error) -> + {Code, ErrorStr, ReasonStr} = error_info(Error), + JsonError = {[{<<"code">>, Code}, + {<<"error">>, ErrorStr}, + {<<"reason">>, ReasonStr}]}, + send_chunk(Resp, ?l2b([$\n,?JSON_ENCODE(JsonError),$\n])), + last_chunk(Resp). + +send_redirect(Req, Path) -> + Headers = [{"Location", couch_httpd:absolute_uri(Req, Path)}], + send_response(Req, 301, Headers, <<>>). + +negotiate_content_type(#httpd{mochi_req=MochiReq}) -> + %% Determine the appropriate Content-Type header for a JSON response + %% depending on the Accept header in the request. A request that explicitly + %% lists the correct JSON MIME type will get that type, otherwise the + %% response will have the generic MIME type "text/plain" + AcceptedTypes = case MochiReq:get_header_value("Accept") of + undefined -> []; + AcceptHeader -> string:tokens(AcceptHeader, ", ") + end, + case lists:member("application/json", AcceptedTypes) of + true -> "application/json"; + false -> "text/plain;charset=utf-8" + end. + +server_header() -> + OTPVersion = "R" ++ integer_to_list(erlang:system_info(compat_rel)) ++ "B", + [{"Server", "CouchDB/" ++ couch_server:get_version() ++ + " (Erlang OTP/" ++ OTPVersion ++ ")"}]. + + +-record(mp, {boundary, buffer, data_fun, callback}). + + +parse_multipart_request(ContentType, DataFun, Callback) -> + Boundary0 = iolist_to_binary(get_boundary(ContentType)), + Boundary = <<"\r\n--", Boundary0/binary>>, + Mp = #mp{boundary= Boundary, + buffer= <<>>, + data_fun=DataFun, + callback=Callback}, + {Mp2, _NilCallback} = read_until(Mp, <<"--", Boundary0/binary>>, + fun(Next)-> nil_callback(Next) end), + #mp{buffer=Buffer, data_fun=DataFun2, callback=Callback2} = + parse_part_header(Mp2), + {Buffer, DataFun2, Callback2}. + +nil_callback(_Data)-> + fun(Next) -> nil_callback(Next) end. + +get_boundary({"multipart/" ++ _, Opts}) -> + case couch_util:get_value("boundary", Opts) of + S when is_list(S) -> + S + end; +get_boundary(ContentType) -> + {"multipart/" ++ _ , Opts} = mochiweb_util:parse_header(ContentType), + get_boundary({"multipart/", Opts}). + + + +split_header(<<>>) -> + []; +split_header(Line) -> + {Name, [$: | Value]} = lists:splitwith(fun (C) -> C =/= $: end, + binary_to_list(Line)), + [{string:to_lower(string:strip(Name)), + mochiweb_util:parse_header(Value)}]. + +read_until(#mp{data_fun=DataFun, buffer=Buffer}=Mp, Pattern, Callback) -> + case find_in_binary(Pattern, Buffer) of + not_found -> + Callback2 = Callback(Buffer), + {Buffer2, DataFun2} = DataFun(), + Buffer3 = iolist_to_binary(Buffer2), + read_until(Mp#mp{data_fun=DataFun2,buffer=Buffer3}, Pattern, Callback2); + {partial, 0} -> + {NewData, DataFun2} = DataFun(), + read_until(Mp#mp{data_fun=DataFun2, + buffer= iolist_to_binary([Buffer,NewData])}, + Pattern, Callback); + {partial, Skip} -> + <<DataChunk:Skip/binary, Rest/binary>> = Buffer, + Callback2 = Callback(DataChunk), + {NewData, DataFun2} = DataFun(), + read_until(Mp#mp{data_fun=DataFun2, + buffer= iolist_to_binary([Rest | NewData])}, + Pattern, Callback2); + {exact, 0} -> + PatternLen = size(Pattern), + <<_:PatternLen/binary, Rest/binary>> = Buffer, + {Mp#mp{buffer= Rest}, Callback}; + {exact, Skip} -> + PatternLen = size(Pattern), + <<DataChunk:Skip/binary, _:PatternLen/binary, Rest/binary>> = Buffer, + Callback2 = Callback(DataChunk), + {Mp#mp{buffer= Rest}, Callback2} + end. + + +parse_part_header(#mp{callback=UserCallBack}=Mp) -> + {Mp2, AccCallback} = read_until(Mp, <<"\r\n\r\n">>, + fun(Next) -> acc_callback(Next, []) end), + HeaderData = AccCallback(get_data), + + Headers = + lists:foldl(fun(Line, Acc) -> + split_header(Line) ++ Acc + end, [], re:split(HeaderData,<<"\r\n">>, [])), + NextCallback = UserCallBack({headers, Headers}), + parse_part_body(Mp2#mp{callback=NextCallback}). + +parse_part_body(#mp{boundary=Prefix, callback=Callback}=Mp) -> + {Mp2, WrappedCallback} = read_until(Mp, Prefix, + fun(Data) -> body_callback_wrapper(Data, Callback) end), + Callback2 = WrappedCallback(get_callback), + Callback3 = Callback2(body_end), + case check_for_last(Mp2#mp{callback=Callback3}) of + {last, #mp{callback=Callback3}=Mp3} -> + Mp3#mp{callback=Callback3(eof)}; + {more, Mp3} -> + parse_part_header(Mp3) + end. + +acc_callback(get_data, Acc)-> + iolist_to_binary(lists:reverse(Acc)); +acc_callback(Data, Acc)-> + fun(Next) -> acc_callback(Next, [Data | Acc]) end. + +body_callback_wrapper(get_callback, Callback) -> + Callback; +body_callback_wrapper(Data, Callback) -> + Callback2 = Callback({body, Data}), + fun(Next) -> body_callback_wrapper(Next, Callback2) end. + + +check_for_last(#mp{buffer=Buffer, data_fun=DataFun}=Mp) -> + case Buffer of + <<"--",_/binary>> -> {last, Mp}; + <<_, _, _/binary>> -> {more, Mp}; + _ -> % not long enough + {Data, DataFun2} = DataFun(), + check_for_last(Mp#mp{buffer= <<Buffer/binary, Data/binary>>, + data_fun = DataFun2}) + end. + +find_in_binary(B, Data) when size(B) > 0 -> + case size(Data) - size(B) of + Last when Last < 0 -> + partial_find(B, Data, 0, size(Data)); + Last -> + find_in_binary(B, size(B), Data, 0, Last) + end. + +find_in_binary(B, BS, D, N, Last) when N =< Last-> + case D of + <<_:N/binary, B:BS/binary, _/binary>> -> + {exact, N}; + _ -> + find_in_binary(B, BS, D, 1 + N, Last) + end; +find_in_binary(B, BS, D, N, Last) when N =:= 1 + Last -> + partial_find(B, D, N, BS - 1). + +partial_find(_B, _D, _N, 0) -> + not_found; +partial_find(B, D, N, K) -> + <<B1:K/binary, _/binary>> = B, + case D of + <<_Skip:N/binary, B1/binary>> -> + {partial, N}; + _ -> + partial_find(B, D, 1 + N, K - 1) + end. + + diff --git a/apps/couch/src/couch_httpd_auth.erl b/apps/couch/src/couch_httpd_auth.erl new file mode 100644 index 00000000..7023e7f3 --- /dev/null +++ b/apps/couch/src/couch_httpd_auth.erl @@ -0,0 +1,349 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_auth). +-include("couch_db.hrl"). + +-export([default_authentication_handler/1,special_test_authentication_handler/1]). +-export([cookie_authentication_handler/1]). +-export([null_authentication_handler/1]). +-export([proxy_authentification_handler/1]). +-export([cookie_auth_header/2]). +-export([handle_session_req/1]). + +-import(couch_httpd, [header_value/2, send_json/2,send_json/4, send_method_not_allowed/2]). + +special_test_authentication_handler(Req) -> + case header_value(Req, "WWW-Authenticate") of + "X-Couch-Test-Auth " ++ NamePass -> + % NamePass is a colon separated string: "joe schmoe:a password". + [Name, Pass] = re:split(NamePass, ":", [{return, list}]), + case {Name, Pass} of + {"Jan Lehnardt", "apple"} -> ok; + {"Christopher Lenz", "dog food"} -> ok; + {"Noah Slater", "biggiesmalls endian"} -> ok; + {"Chris Anderson", "mp3"} -> ok; + {"Damien Katz", "pecan pie"} -> ok; + {_, _} -> + throw({unauthorized, <<"Name or password is incorrect.">>}) + end, + Req#httpd{user_ctx=#user_ctx{name=?l2b(Name)}}; + _ -> + % No X-Couch-Test-Auth credentials sent, give admin access so the + % previous authentication can be restored after the test + Req#httpd{user_ctx=#user_ctx{roles=[<<"_admin">>]}} + end. + +basic_name_pw(Req) -> + AuthorizationHeader = header_value(Req, "Authorization"), + case AuthorizationHeader of + "Basic " ++ Base64Value -> + case string:tokens(?b2l(base64:decode(Base64Value)),":") of + ["_", "_"] -> + % special name and pass to be logged out + nil; + [User, Pass] -> + {User, Pass}; + [User] -> + {User, ""}; + _ -> + nil + end; + _ -> + nil + end. + +default_authentication_handler(Req) -> + case basic_name_pw(Req) of + {User, Pass} -> + case couch_auth_cache:get_user_creds(User) of + nil -> + throw({unauthorized, <<"Name or password is incorrect.">>}); + UserProps -> + UserSalt = couch_util:get_value(<<"salt">>, UserProps, <<>>), + PasswordHash = hash_password(?l2b(Pass), UserSalt), + ExpectedHash = couch_util:get_value(<<"password_sha">>, UserProps, nil), + case couch_util:verify(ExpectedHash, PasswordHash) of + true -> + Req#httpd{user_ctx=#user_ctx{ + name=?l2b(User), + roles=couch_util:get_value(<<"roles">>, UserProps, []) + }}; + _Else -> + throw({unauthorized, <<"Name or password is incorrect.">>}) + end + end; + nil -> + case couch_server:has_admins() of + true -> + Req; + false -> + case couch_config:get("couch_httpd_auth", "require_valid_user", "false") of + "true" -> Req; + % If no admins, and no user required, then everyone is admin! + % Yay, admin party! + _ -> Req#httpd{user_ctx=#user_ctx{roles=[<<"_admin">>]}} + end + end + end. + +null_authentication_handler(Req) -> + Req#httpd{user_ctx=#user_ctx{roles=[<<"_admin">>]}}. + +%% @doc proxy auth handler. +% +% This handler allows creation of a userCtx object from a user authenticated remotly. +% The client just pass specific headers to CouchDB and the handler create the userCtx. +% Headers name can be defined in local.ini. By thefault they are : +% +% * X-Auth-CouchDB-UserName : contain the username, (x_auth_username in +% couch_httpd_auth section) +% * X-Auth-CouchDB-Roles : contain the user roles, list of roles separated by a +% comma (x_auth_roles in couch_httpd_auth section) +% * X-Auth-CouchDB-Token : token to authenticate the authorization (x_auth_token +% in couch_httpd_auth section). This token is an hmac-sha1 created from secret key +% and username. The secret key should be the same in the client and couchdb node. s +% ecret key is the secret key in couch_httpd_auth section of ini. This token is optional +% if value of proxy_use_secret key in couch_httpd_auth section of ini isn't true. +% +proxy_authentification_handler(Req) -> + case proxy_auth_user(Req) of + nil -> Req; + Req2 -> Req2 + end. + +proxy_auth_user(Req) -> + XHeaderUserName = couch_config:get("couch_httpd_auth", "x_auth_username", + "X-Auth-CouchDB-UserName"), + XHeaderRoles = couch_config:get("couch_httpd_auth", "x_auth_roles", + "X-Auth-CouchDB-Roles"), + XHeaderToken = couch_config:get("couch_httpd_auth", "x_auth_token", + "X-Auth-CouchDB-Token"), + case header_value(Req, XHeaderUserName) of + undefined -> nil; + UserName -> + Roles = case header_value(Req, XHeaderRoles) of + undefined -> []; + Else -> + [?l2b(R) || R <- string:tokens(Else, ",")] + end, + case couch_config:get("couch_httpd_auth", "proxy_use_secret", "false") of + "true" -> + case couch_config:get("couch_httpd_auth", "secret", nil) of + nil -> + Req#httpd{user_ctx=#user_ctx{name=?l2b(UserName), roles=Roles}}; + Secret -> + ExpectedToken = couch_util:to_hex(crypto:sha_mac(Secret, UserName)), + case header_value(Req, XHeaderToken) of + Token when Token == ExpectedToken -> + Req#httpd{user_ctx=#user_ctx{name=?l2b(UserName), + roles=Roles}}; + _ -> nil + end + end; + _ -> + Req#httpd{user_ctx=#user_ctx{name=?l2b(UserName), roles=Roles}} + end + end. + + +cookie_authentication_handler(#httpd{mochi_req=MochiReq}=Req) -> + case MochiReq:get_cookie_value("AuthSession") of + undefined -> Req; + [] -> Req; + Cookie -> + [User, TimeStr | HashParts] = try + AuthSession = couch_util:decodeBase64Url(Cookie), + [_A, _B | _Cs] = string:tokens(?b2l(AuthSession), ":") + catch + _:_Error -> + Reason = <<"Malformed AuthSession cookie. Please clear your cookies.">>, + throw({bad_request, Reason}) + end, + % Verify expiry and hash + CurrentTime = make_cookie_time(), + case couch_config:get("couch_httpd_auth", "secret", nil) of + nil -> + ?LOG_ERROR("cookie auth secret is not set",[]), + Req; + SecretStr -> + Secret = ?l2b(SecretStr), + case couch_auth_cache:get_user_creds(User) of + nil -> Req; + UserProps -> + UserSalt = couch_util:get_value(<<"salt">>, UserProps, <<"">>), + FullSecret = <<Secret/binary, UserSalt/binary>>, + ExpectedHash = crypto:sha_mac(FullSecret, User ++ ":" ++ TimeStr), + Hash = ?l2b(string:join(HashParts, ":")), + Timeout = to_int(couch_config:get("couch_httpd_auth", "timeout", 600)), + ?LOG_DEBUG("timeout ~p", [Timeout]), + case (catch erlang:list_to_integer(TimeStr, 16)) of + TimeStamp when CurrentTime < TimeStamp + Timeout -> + case couch_util:verify(ExpectedHash, Hash) of + true -> + TimeLeft = TimeStamp + Timeout - CurrentTime, + ?LOG_DEBUG("Successful cookie auth as: ~p", [User]), + Req#httpd{user_ctx=#user_ctx{ + name=?l2b(User), + roles=couch_util:get_value(<<"roles">>, UserProps, []) + }, auth={FullSecret, TimeLeft < Timeout*0.9}}; + _Else -> + Req + end; + _Else -> + Req + end + end + end + end. + +cookie_auth_header(#httpd{user_ctx=#user_ctx{name=null}}, _Headers) -> []; +cookie_auth_header(#httpd{user_ctx=#user_ctx{name=User}, auth={Secret, true}}, Headers) -> + % Note: we only set the AuthSession cookie if: + % * a valid AuthSession cookie has been received + % * we are outside a 10% timeout window + % * and if an AuthSession cookie hasn't already been set e.g. by a login + % or logout handler. + % The login and logout handlers need to set the AuthSession cookie + % themselves. + CookieHeader = couch_util:get_value("Set-Cookie", Headers, ""), + Cookies = mochiweb_cookies:parse_cookie(CookieHeader), + AuthSession = couch_util:get_value("AuthSession", Cookies), + if AuthSession == undefined -> + TimeStamp = make_cookie_time(), + [cookie_auth_cookie(?b2l(User), Secret, TimeStamp)]; + true -> + [] + end; +cookie_auth_header(_Req, _Headers) -> []. + +cookie_auth_cookie(User, Secret, TimeStamp) -> + SessionData = User ++ ":" ++ erlang:integer_to_list(TimeStamp, 16), + Hash = crypto:sha_mac(Secret, SessionData), + mochiweb_cookies:cookie("AuthSession", + couch_util:encodeBase64Url(SessionData ++ ":" ++ ?b2l(Hash)), + [{path, "/"}, {http_only, true}]). % TODO add {secure, true} when SSL is detected + +hash_password(Password, Salt) -> + ?l2b(couch_util:to_hex(crypto:sha(<<Password/binary, Salt/binary>>))). + +ensure_cookie_auth_secret() -> + case couch_config:get("couch_httpd_auth", "secret", nil) of + nil -> + NewSecret = ?b2l(couch_uuids:random()), + couch_config:set("couch_httpd_auth", "secret", NewSecret), + NewSecret; + Secret -> Secret + end. + +% session handlers +% Login handler with user db +% TODO this should also allow a JSON POST +handle_session_req(#httpd{method='POST', mochi_req=MochiReq}=Req) -> + ReqBody = MochiReq:recv_body(), + Form = case MochiReq:get_primary_header_value("content-type") of + % content type should be json + "application/x-www-form-urlencoded" ++ _ -> + mochiweb_util:parse_qs(ReqBody); + _ -> + [] + end, + UserName = ?l2b(couch_util:get_value("name", Form, "")), + Password = ?l2b(couch_util:get_value("password", Form, "")), + ?LOG_DEBUG("Attempt Login: ~s",[UserName]), + User = case couch_auth_cache:get_user_creds(UserName) of + nil -> []; + Result -> Result + end, + UserSalt = couch_util:get_value(<<"salt">>, User, <<>>), + PasswordHash = hash_password(Password, UserSalt), + ExpectedHash = couch_util:get_value(<<"password_sha">>, User, nil), + case couch_util:verify(ExpectedHash, PasswordHash) of + true -> + % setup the session cookie + Secret = ?l2b(ensure_cookie_auth_secret()), + CurrentTime = make_cookie_time(), + Cookie = cookie_auth_cookie(?b2l(UserName), <<Secret/binary, UserSalt/binary>>, CurrentTime), + % TODO document the "next" feature in Futon + {Code, Headers} = case couch_httpd:qs_value(Req, "next", nil) of + nil -> + {200, [Cookie]}; + Redirect -> + {302, [Cookie, {"Location", couch_httpd:absolute_uri(Req, Redirect)}]} + end, + send_json(Req#httpd{req_body=ReqBody}, Code, Headers, + {[ + {ok, true}, + {name, couch_util:get_value(<<"name">>, User, null)}, + {roles, couch_util:get_value(<<"roles">>, User, [])} + ]}); + _Else -> + % clear the session + Cookie = mochiweb_cookies:cookie("AuthSession", "", [{path, "/"}, {http_only, true}]), + send_json(Req, 401, [Cookie], {[{error, <<"unauthorized">>},{reason, <<"Name or password is incorrect.">>}]}) + end; +% get user info +% GET /_session +handle_session_req(#httpd{method='GET', user_ctx=UserCtx}=Req) -> + Name = UserCtx#user_ctx.name, + ForceLogin = couch_httpd:qs_value(Req, "basic", "false"), + case {Name, ForceLogin} of + {null, "true"} -> + throw({unauthorized, <<"Please login.">>}); + {Name, _} -> + send_json(Req, {[ + % remove this ok + {ok, true}, + {<<"userCtx">>, {[ + {name, Name}, + {roles, UserCtx#user_ctx.roles} + ]}}, + {info, {[ + {authentication_db, ?l2b(couch_config:get("couch_httpd_auth", "authentication_db"))}, + {authentication_handlers, [auth_name(H) || H <- couch_httpd:make_fun_spec_strs( + couch_config:get("httpd", "authentication_handlers"))]} + ] ++ maybe_value(authenticated, UserCtx#user_ctx.handler, fun(Handler) -> + auth_name(?b2l(Handler)) + end)}} + ]}) + end; +% logout by deleting the session +handle_session_req(#httpd{method='DELETE'}=Req) -> + Cookie = mochiweb_cookies:cookie("AuthSession", "", [{path, "/"}, {http_only, true}]), + {Code, Headers} = case couch_httpd:qs_value(Req, "next", nil) of + nil -> + {200, [Cookie]}; + Redirect -> + {302, [Cookie, {"Location", couch_httpd:absolute_uri(Req, Redirect)}]} + end, + send_json(Req, Code, Headers, {[{ok, true}]}); +handle_session_req(Req) -> + send_method_not_allowed(Req, "GET,HEAD,POST,DELETE"). + +maybe_value(_Key, undefined, _Fun) -> []; +maybe_value(Key, Else, Fun) -> + [{Key, Fun(Else)}]. + +auth_name(String) when is_list(String) -> + [_,_,_,_,_,Name|_] = re:split(String, "[\\W_]", [{return, list}]), + ?l2b(Name). + +to_int(Value) when is_binary(Value) -> + to_int(?b2l(Value)); +to_int(Value) when is_list(Value) -> + list_to_integer(Value); +to_int(Value) when is_integer(Value) -> + Value. + +make_cookie_time() -> + {NowMS, NowS, _} = erlang:now(), + NowMS * 1000000 + NowS. diff --git a/apps/couch/src/couch_httpd_db.erl b/apps/couch/src/couch_httpd_db.erl new file mode 100644 index 00000000..cf4e2120 --- /dev/null +++ b/apps/couch/src/couch_httpd_db.erl @@ -0,0 +1,1214 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_db). +-include("couch_db.hrl"). + +-export([handle_request/1, handle_compact_req/2, handle_design_req/2, + db_req/2, couch_doc_open/4,handle_changes_req/2, + update_doc_result_to_json/1, update_doc_result_to_json/2, + handle_design_info_req/3, handle_view_cleanup_req/2]). + +-import(couch_httpd, + [send_json/2,send_json/3,send_json/4,send_method_not_allowed/2, + start_json_response/2,start_json_response/3, + send_chunk/2,last_chunk/1,end_json_response/1, + start_chunked_response/3, absolute_uri/2, send/2, + start_response_length/4]). + +-record(doc_query_args, { + options = [], + rev = nil, + open_revs = [], + update_type = interactive_edit, + atts_since = nil +}). + +% Database request handlers +handle_request(#httpd{path_parts=[DbName|RestParts],method=Method, + db_url_handlers=DbUrlHandlers}=Req)-> + case {Method, RestParts} of + {'PUT', []} -> + create_db_req(Req, DbName); + {'DELETE', []} -> + % if we get ?rev=... the user is using a faulty script where the + % document id is empty by accident. Let them recover safely. + case couch_httpd:qs_value(Req, "rev", false) of + false -> delete_db_req(Req, DbName); + _Rev -> throw({bad_request, + "You tried to DELETE a database with a ?=rev parameter. " + ++ "Did you mean to DELETE a document instead?"}) + end; + {_, []} -> + do_db_req(Req, fun db_req/2); + {_, [SecondPart|_]} -> + Handler = couch_util:dict_find(SecondPart, DbUrlHandlers, fun db_req/2), + do_db_req(Req, Handler) + end. + +handle_changes_req(#httpd{method='GET'}=Req, Db) -> + MakeCallback = fun(Resp) -> + fun({change, Change, _}, "continuous") -> + send_chunk(Resp, [?JSON_ENCODE(Change) | "\n"]); + ({change, Change, Prepend}, _) -> + send_chunk(Resp, [Prepend, ?JSON_ENCODE(Change)]); + (start, "continuous") -> + ok; + (start, _) -> + send_chunk(Resp, "{\"results\":[\n"); + ({stop, EndSeq}, "continuous") -> + send_chunk( + Resp, + [?JSON_ENCODE({[{<<"last_seq">>, EndSeq}]}) | "\n"] + ), + end_json_response(Resp); + ({stop, EndSeq}, _) -> + send_chunk( + Resp, + io_lib:format("\n],\n\"last_seq\":~w}\n", [EndSeq]) + ), + end_json_response(Resp); + (timeout, _) -> + send_chunk(Resp, "\n") + end + end, + ChangesArgs = parse_changes_query(Req), + ChangesFun = couch_changes:handle_changes(ChangesArgs, Req, Db), + WrapperFun = case ChangesArgs#changes_args.feed of + "normal" -> + {ok, Info} = couch_db:get_db_info(Db), + CurrentEtag = couch_httpd:make_etag(Info), + fun(FeedChangesFun) -> + couch_httpd:etag_respond( + Req, + CurrentEtag, + fun() -> + {ok, Resp} = couch_httpd:start_json_response( + Req, 200, [{"Etag", CurrentEtag}] + ), + FeedChangesFun(MakeCallback(Resp)) + end + ) + end; + _ -> + % "longpoll" or "continuous" + {ok, Resp} = couch_httpd:start_json_response(Req, 200), + fun(FeedChangesFun) -> + FeedChangesFun(MakeCallback(Resp)) + end + end, + couch_stats_collector:track_process_count( + {httpd, clients_requesting_changes} + ), + WrapperFun(ChangesFun); + +handle_changes_req(#httpd{path_parts=[_,<<"_changes">>]}=Req, _Db) -> + send_method_not_allowed(Req, "GET,HEAD"). + +handle_compact_req(#httpd{method='POST',path_parts=[DbName,_,Id|_]}=Req, Db) -> + ok = couch_db:check_is_admin(Db), + couch_httpd:validate_ctype(Req, "application/json"), + ok = couch_view_compactor:start_compact(DbName, Id), + send_json(Req, 202, {[{ok, true}]}); + +handle_compact_req(#httpd{method='POST'}=Req, Db) -> + ok = couch_db:check_is_admin(Db), + couch_httpd:validate_ctype(Req, "application/json"), + ok = couch_db:start_compact(Db), + send_json(Req, 202, {[{ok, true}]}); + +handle_compact_req(Req, _Db) -> + send_method_not_allowed(Req, "POST"). + +handle_view_cleanup_req(#httpd{method='POST'}=Req, Db) -> + % delete unreferenced index files + ok = couch_db:check_is_admin(Db), + couch_httpd:validate_ctype(Req, "application/json"), + ok = couch_view:cleanup_index_files(Db), + send_json(Req, 202, {[{ok, true}]}); + +handle_view_cleanup_req(Req, _Db) -> + send_method_not_allowed(Req, "POST"). + + +handle_design_req(#httpd{ + path_parts=[_DbName, _Design, DesignName, <<"_",_/binary>> = Action | _Rest], + design_url_handlers = DesignUrlHandlers + }=Req, Db) -> + % load ddoc + DesignId = <<"_design/", DesignName/binary>>, + DDoc = couch_httpd_db:couch_doc_open(Db, DesignId, nil, []), + Handler = couch_util:dict_find(Action, DesignUrlHandlers, fun(_, _, _) -> + throw({not_found, <<"missing handler: ", Action/binary>>}) + end), + Handler(Req, Db, DDoc); + +handle_design_req(Req, Db) -> + db_req(Req, Db). + +handle_design_info_req(#httpd{ + method='GET', + path_parts=[_DbName, _Design, DesignName, _] + }=Req, Db, _DDoc) -> + DesignId = <<"_design/", DesignName/binary>>, + {ok, GroupInfoList} = couch_view:get_group_info(Db, DesignId), + send_json(Req, 200, {[ + {name, DesignName}, + {view_index, {GroupInfoList}} + ]}); + +handle_design_info_req(Req, _Db, _DDoc) -> + send_method_not_allowed(Req, "GET"). + +create_db_req(#httpd{user_ctx=UserCtx}=Req, DbName) -> + ok = couch_httpd:verify_is_server_admin(Req), + case couch_server:create(DbName, [{user_ctx, UserCtx}]) of + {ok, Db} -> + couch_db:close(Db), + DbUrl = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName)), + send_json(Req, 201, [{"Location", DbUrl}], {[{ok, true}]}); + Error -> + throw(Error) + end. + +delete_db_req(#httpd{user_ctx=UserCtx}=Req, DbName) -> + ok = couch_httpd:verify_is_server_admin(Req), + case couch_server:delete(DbName, [{user_ctx, UserCtx}]) of + ok -> + send_json(Req, 200, {[{ok, true}]}); + Error -> + throw(Error) + end. + +do_db_req(#httpd{user_ctx=UserCtx,path_parts=[DbName|_]}=Req, Fun) -> + case couch_db:open(DbName, [{user_ctx, UserCtx}]) of + {ok, Db} -> + try + Fun(Req, Db) + after + catch couch_db:close(Db) + end; + Error -> + throw(Error) + end. + +db_req(#httpd{method='GET',path_parts=[_DbName]}=Req, Db) -> + {ok, DbInfo} = couch_db:get_db_info(Db), + send_json(Req, {DbInfo}); + +db_req(#httpd{method='POST',path_parts=[DbName]}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + Doc = couch_doc:from_json_obj(couch_httpd:json_body(Req)), + Doc2 = case Doc#doc.id of + <<"">> -> + Doc#doc{id=couch_uuids:new(), revs={0, []}}; + _ -> + Doc + end, + DocId = Doc2#doc.id, + case couch_httpd:qs_value(Req, "batch") of + "ok" -> + % async_batching + spawn(fun() -> + case catch(couch_db:update_doc(Db, Doc2, [])) of + {ok, _} -> ok; + Error -> + ?LOG_INFO("Batch doc error (~s): ~p",[DocId, Error]) + end + end), + + send_json(Req, 202, [], {[ + {ok, true}, + {id, DocId} + ]}); + _Normal -> + % normal + {ok, NewRev} = couch_db:update_doc(Db, Doc2, []), + DocUrl = absolute_uri( + Req, binary_to_list(<<"/",DbName/binary,"/", DocId/binary>>)), + send_json(Req, 201, [{"Location", DocUrl}], {[ + {ok, true}, + {id, DocId}, + {rev, couch_doc:rev_to_str(NewRev)} + ]}) + end; + + +db_req(#httpd{path_parts=[_DbName]}=Req, _Db) -> + send_method_not_allowed(Req, "DELETE,GET,HEAD,POST"); + +db_req(#httpd{method='POST',path_parts=[_,<<"_ensure_full_commit">>]}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + UpdateSeq = couch_db:get_update_seq(Db), + CommittedSeq = couch_db:get_committed_update_seq(Db), + {ok, StartTime} = + case couch_httpd:qs_value(Req, "seq") of + undefined -> + couch_db:ensure_full_commit(Db); + RequiredStr -> + RequiredSeq = list_to_integer(RequiredStr), + if RequiredSeq > UpdateSeq -> + throw({bad_request, + "can't do a full commit ahead of current update_seq"}); + RequiredSeq > CommittedSeq -> + couch_db:ensure_full_commit(Db); + true -> + {ok, Db#db.instance_start_time} + end + end, + send_json(Req, 201, {[ + {ok, true}, + {instance_start_time, StartTime} + ]}); + +db_req(#httpd{path_parts=[_,<<"_ensure_full_commit">>]}=Req, _Db) -> + send_method_not_allowed(Req, "POST"); + +db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> + couch_stats_collector:increment({httpd, bulk_requests}), + couch_httpd:validate_ctype(Req, "application/json"), + {JsonProps} = couch_httpd:json_body_obj(Req), + DocsArray = couch_util:get_value(<<"docs">>, JsonProps), + case couch_httpd:header_value(Req, "X-Couch-Full-Commit") of + "true" -> + Options = [full_commit]; + "false" -> + Options = [delay_commit]; + _ -> + Options = [] + end, + case couch_util:get_value(<<"new_edits">>, JsonProps, true) of + true -> + Docs = lists:map( + fun({ObjProps} = JsonObj) -> + Doc = couch_doc:from_json_obj(JsonObj), + validate_attachment_names(Doc), + Id = case Doc#doc.id of + <<>> -> couch_uuids:new(); + Id0 -> Id0 + end, + case couch_util:get_value(<<"_rev">>, ObjProps) of + undefined -> + Revs = {0, []}; + Rev -> + {Pos, RevId} = couch_doc:parse_rev(Rev), + Revs = {Pos, [RevId]} + end, + Doc#doc{id=Id,revs=Revs} + end, + DocsArray), + Options2 = + case couch_util:get_value(<<"all_or_nothing">>, JsonProps) of + true -> [all_or_nothing|Options]; + _ -> Options + end, + case couch_db:update_docs(Db, Docs, Options2) of + {ok, Results} -> + % output the results + DocResults = lists:zipwith(fun update_doc_result_to_json/2, + Docs, Results), + send_json(Req, 201, DocResults); + {aborted, Errors} -> + ErrorsJson = + lists:map(fun update_doc_result_to_json/1, Errors), + send_json(Req, 417, ErrorsJson) + end; + false -> + Docs = lists:map(fun(JsonObj) -> + Doc = couch_doc:from_json_obj(JsonObj), + validate_attachment_names(Doc), + Doc + end, DocsArray), + {ok, Errors} = couch_db:update_docs(Db, Docs, Options, replicated_changes), + ErrorsJson = + lists:map(fun update_doc_result_to_json/1, Errors), + send_json(Req, 201, ErrorsJson) + end; +db_req(#httpd{path_parts=[_,<<"_bulk_docs">>]}=Req, _Db) -> + send_method_not_allowed(Req, "POST"); + +db_req(#httpd{method='POST',path_parts=[_,<<"_purge">>]}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + {IdsRevs} = couch_httpd:json_body_obj(Req), + IdsRevs2 = [{Id, couch_doc:parse_revs(Revs)} || {Id, Revs} <- IdsRevs], + + case couch_db:purge_docs(Db, IdsRevs2) of + {ok, PurgeSeq, PurgedIdsRevs} -> + PurgedIdsRevs2 = [{Id, couch_doc:revs_to_strs(Revs)} || {Id, Revs} <- PurgedIdsRevs], + send_json(Req, 200, {[{<<"purge_seq">>, PurgeSeq}, {<<"purged">>, {PurgedIdsRevs2}}]}); + Error -> + throw(Error) + end; + +db_req(#httpd{path_parts=[_,<<"_purge">>]}=Req, _Db) -> + send_method_not_allowed(Req, "POST"); + +db_req(#httpd{method='GET',path_parts=[_,<<"_all_docs">>]}=Req, Db) -> + all_docs_view(Req, Db, nil); + +db_req(#httpd{method='POST',path_parts=[_,<<"_all_docs">>]}=Req, Db) -> + {Fields} = couch_httpd:json_body_obj(Req), + case couch_util:get_value(<<"keys">>, Fields, nil) of + nil -> + ?LOG_DEBUG("POST to _all_docs with no keys member.", []), + all_docs_view(Req, Db, nil); + Keys when is_list(Keys) -> + all_docs_view(Req, Db, Keys); + _ -> + throw({bad_request, "`keys` member must be a array."}) + end; + +db_req(#httpd{path_parts=[_,<<"_all_docs">>]}=Req, _Db) -> + send_method_not_allowed(Req, "GET,HEAD,POST"); + +db_req(#httpd{method='POST',path_parts=[_,<<"_missing_revs">>]}=Req, Db) -> + {JsonDocIdRevs} = couch_httpd:json_body_obj(Req), + JsonDocIdRevs2 = [{Id, [couch_doc:parse_rev(RevStr) || RevStr <- RevStrs]} || {Id, RevStrs} <- JsonDocIdRevs], + {ok, Results} = couch_db:get_missing_revs(Db, JsonDocIdRevs2), + Results2 = [{Id, couch_doc:revs_to_strs(Revs)} || {Id, Revs, _} <- Results], + send_json(Req, {[ + {missing_revs, {Results2}} + ]}); + +db_req(#httpd{path_parts=[_,<<"_missing_revs">>]}=Req, _Db) -> + send_method_not_allowed(Req, "POST"); + +db_req(#httpd{method='POST',path_parts=[_,<<"_revs_diff">>]}=Req, Db) -> + {JsonDocIdRevs} = couch_httpd:json_body_obj(Req), + JsonDocIdRevs2 = + [{Id, couch_doc:parse_revs(RevStrs)} || {Id, RevStrs} <- JsonDocIdRevs], + {ok, Results} = couch_db:get_missing_revs(Db, JsonDocIdRevs2), + Results2 = + lists:map(fun({Id, MissingRevs, PossibleAncestors}) -> + {Id, + {[{missing, couch_doc:revs_to_strs(MissingRevs)}] ++ + if PossibleAncestors == [] -> + []; + true -> + [{possible_ancestors, + couch_doc:revs_to_strs(PossibleAncestors)}] + end}} + end, Results), + send_json(Req, {Results2}); + +db_req(#httpd{path_parts=[_,<<"_revs_diff">>]}=Req, _Db) -> + send_method_not_allowed(Req, "POST"); + +db_req(#httpd{method='PUT',path_parts=[_,<<"_security">>]}=Req, Db) -> + SecObj = couch_httpd:json_body(Req), + ok = couch_db:set_security(Db, SecObj), + send_json(Req, {[{<<"ok">>, true}]}); + +db_req(#httpd{method='GET',path_parts=[_,<<"_security">>]}=Req, Db) -> + send_json(Req, couch_db:get_security(Db)); + +db_req(#httpd{path_parts=[_,<<"_security">>]}=Req, _Db) -> + send_method_not_allowed(Req, "PUT,GET"); + +db_req(#httpd{method='PUT',path_parts=[_,<<"_revs_limit">>]}=Req, + Db) -> + Limit = couch_httpd:json_body(Req), + ok = couch_db:set_revs_limit(Db, Limit), + send_json(Req, {[{<<"ok">>, true}]}); + +db_req(#httpd{method='GET',path_parts=[_,<<"_revs_limit">>]}=Req, Db) -> + send_json(Req, couch_db:get_revs_limit(Db)); + +db_req(#httpd{path_parts=[_,<<"_revs_limit">>]}=Req, _Db) -> + send_method_not_allowed(Req, "PUT,GET"); + +% Special case to enable using an unencoded slash in the URL of design docs, +% as slashes in document IDs must otherwise be URL encoded. +db_req(#httpd{method='GET',mochi_req=MochiReq, path_parts=[DbName,<<"_design/",_/binary>>|_]}=Req, _Db) -> + PathFront = "/" ++ couch_httpd:quote(binary_to_list(DbName)) ++ "/", + [_|PathTail] = re:split(MochiReq:get(raw_path), "_design%2F", + [{return, list}]), + couch_httpd:send_redirect(Req, PathFront ++ "_design/" ++ + mochiweb_util:join(PathTail, "_design%2F")); + +db_req(#httpd{path_parts=[_DbName,<<"_design">>,Name]}=Req, Db) -> + db_doc_req(Req, Db, <<"_design/",Name/binary>>); + +db_req(#httpd{path_parts=[_DbName,<<"_design">>,Name|FileNameParts]}=Req, Db) -> + db_attachment_req(Req, Db, <<"_design/",Name/binary>>, FileNameParts); + + +% Special case to allow for accessing local documents without %2F +% encoding the docid. Throws out requests that don't have the second +% path part or that specify an attachment name. +db_req(#httpd{path_parts=[_DbName, <<"_local">>]}, _Db) -> + throw({bad_request, <<"Invalid _local document id.">>}); + +db_req(#httpd{path_parts=[_DbName, <<"_local/">>]}, _Db) -> + throw({bad_request, <<"Invalid _local document id.">>}); + +db_req(#httpd{path_parts=[_DbName, <<"_local">>, Name]}=Req, Db) -> + db_doc_req(Req, Db, <<"_local/", Name/binary>>); + +db_req(#httpd{path_parts=[_DbName, <<"_local">> | _Rest]}, _Db) -> + throw({bad_request, <<"_local documents do not accept attachments.">>}); + +db_req(#httpd{path_parts=[_, DocId]}=Req, Db) -> + db_doc_req(Req, Db, DocId); + +db_req(#httpd{path_parts=[_, DocId | FileNameParts]}=Req, Db) -> + db_attachment_req(Req, Db, DocId, FileNameParts). + +all_docs_view(Req, Db, Keys) -> + #view_query_args{ + start_key = StartKey, + start_docid = StartDocId, + end_key = EndKey, + end_docid = EndDocId, + limit = Limit, + skip = SkipCount, + direction = Dir, + inclusive_end = Inclusive + } = QueryArgs = couch_httpd_view:parse_view_params(Req, Keys, map), + {ok, Info} = couch_db:get_db_info(Db), + CurrentEtag = couch_httpd:make_etag(Info), + couch_httpd:etag_respond(Req, CurrentEtag, fun() -> + + TotalRowCount = couch_util:get_value(doc_count, Info), + StartId = if is_binary(StartKey) -> StartKey; + true -> StartDocId + end, + EndId = if is_binary(EndKey) -> EndKey; + true -> EndDocId + end, + FoldAccInit = {Limit, SkipCount, undefined, []}, + UpdateSeq = couch_db:get_update_seq(Db), + JsonParams = case couch_httpd:qs_value(Req, "update_seq") of + "true" -> + [{update_seq, UpdateSeq}]; + _Else -> + [] + end, + case Keys of + nil -> + FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs, CurrentEtag, Db, UpdateSeq, + TotalRowCount, #view_fold_helper_funs{ + reduce_count = fun couch_db:enum_docs_reduce_to_count/1 + }), + AdapterFun = fun(#full_doc_info{id=Id}=FullDocInfo, Offset, Acc) -> + case couch_doc:to_doc_info(FullDocInfo) of + #doc_info{revs=[#rev_info{deleted=false, rev=Rev}|_]} -> + FoldlFun({{Id, Id}, {[{rev, couch_doc:rev_to_str(Rev)}]}}, Offset, Acc); + #doc_info{revs=[#rev_info{deleted=true}|_]} -> + {ok, Acc} + end + end, + {ok, LastOffset, FoldResult} = couch_db:enum_docs(Db, + AdapterFun, FoldAccInit, [{start_key, StartId}, {dir, Dir}, + {if Inclusive -> end_key; true -> end_key_gt end, EndId}]), + couch_httpd_view:finish_view_fold(Req, TotalRowCount, LastOffset, FoldResult, JsonParams); + _ -> + FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs, CurrentEtag, Db, UpdateSeq, + TotalRowCount, #view_fold_helper_funs{ + reduce_count = fun(Offset) -> Offset end + }), + KeyFoldFun = case Dir of + fwd -> + fun lists:foldl/3; + rev -> + fun lists:foldr/3 + end, + FoldResult = KeyFoldFun( + fun(Key, FoldAcc) -> + DocInfo = (catch couch_db:get_doc_info(Db, Key)), + Doc = case DocInfo of + {ok, #doc_info{id=Id, revs=[#rev_info{deleted=false, rev=Rev}|_]}} -> + {{Id, Id}, {[{rev, couch_doc:rev_to_str(Rev)}]}}; + {ok, #doc_info{id=Id, revs=[#rev_info{deleted=true, rev=Rev}|_]}} -> + {{Id, Id}, {[{rev, couch_doc:rev_to_str(Rev)}, {deleted, true}]}}; + not_found -> + {{Key, error}, not_found}; + _ -> + ?LOG_ERROR("Invalid DocInfo: ~p", [DocInfo]), + throw({error, invalid_doc_info}) + end, + {_, FoldAcc2} = FoldlFun(Doc, 0, FoldAcc), + FoldAcc2 + end, FoldAccInit, Keys), + couch_httpd_view:finish_view_fold(Req, TotalRowCount, 0, FoldResult, JsonParams) + end + end). + +db_doc_req(#httpd{method='DELETE'}=Req, Db, DocId) -> + % check for the existence of the doc to handle the 404 case. + couch_doc_open(Db, DocId, nil, []), + case couch_httpd:qs_value(Req, "rev") of + undefined -> + update_doc(Req, Db, DocId, + couch_doc_from_req(Req, DocId, {[{<<"_deleted">>,true}]})); + Rev -> + update_doc(Req, Db, DocId, + couch_doc_from_req(Req, DocId, + {[{<<"_rev">>, ?l2b(Rev)},{<<"_deleted">>,true}]})) + end; + +db_doc_req(#httpd{method='GET'}=Req, Db, DocId) -> + #doc_query_args{ + rev = Rev, + open_revs = Revs, + options = Options, + atts_since = AttsSince + } = parse_doc_query(Req), + case Revs of + [] -> + Options2 = + if AttsSince /= nil -> + [{atts_since, AttsSince}, attachments | Options]; + true -> Options + end, + Doc = couch_doc_open(Db, DocId, Rev, Options2), + send_doc(Req, Doc, Options2); + _ -> + {ok, Results} = couch_db:open_doc_revs(Db, DocId, Revs, Options), + AcceptedTypes = case couch_httpd:header_value(Req, "Accept") of + undefined -> []; + AcceptHeader -> string:tokens(AcceptHeader, "; ") + end, + case lists:member("multipart/mixed", AcceptedTypes) of + false -> + {ok, Resp} = start_json_response(Req, 200), + send_chunk(Resp, "["), + % We loop through the docs. The first time through the separator + % is whitespace, then a comma on subsequent iterations. + lists:foldl( + fun(Result, AccSeparator) -> + case Result of + {ok, Doc} -> + JsonDoc = couch_doc:to_json_obj(Doc, Options), + Json = ?JSON_ENCODE({[{ok, JsonDoc}]}), + send_chunk(Resp, AccSeparator ++ Json); + {{not_found, missing}, RevId} -> + RevStr = couch_doc:rev_to_str(RevId), + Json = ?JSON_ENCODE({[{"missing", RevStr}]}), + send_chunk(Resp, AccSeparator ++ Json) + end, + "," % AccSeparator now has a comma + end, + "", Results), + send_chunk(Resp, "]"), + end_json_response(Resp); + true -> + send_docs_multipart(Req, Results, Options) + end + end; + + +db_doc_req(#httpd{method='POST'}=Req, Db, DocId) -> + couch_httpd:validate_referer(Req), + couch_doc:validate_docid(DocId), + couch_httpd:validate_ctype(Req, "multipart/form-data"), + Form = couch_httpd:parse_form(Req), + case proplists:is_defined("_doc", Form) of + true -> + Json = ?JSON_DECODE(couch_util:get_value("_doc", Form)), + Doc = couch_doc_from_req(Req, DocId, Json); + false -> + Rev = couch_doc:parse_rev(list_to_binary(couch_util:get_value("_rev", Form))), + {ok, [{ok, Doc}]} = couch_db:open_doc_revs(Db, DocId, [Rev], []) + end, + UpdatedAtts = [ + #att{name=validate_attachment_name(Name), + type=list_to_binary(ContentType), + data=Content} || + {Name, {ContentType, _}, Content} <- + proplists:get_all_values("_attachments", Form) + ], + #doc{atts=OldAtts} = Doc, + OldAtts2 = lists:flatmap( + fun(#att{name=OldName}=Att) -> + case [1 || A <- UpdatedAtts, A#att.name == OldName] of + [] -> [Att]; % the attachment wasn't in the UpdatedAtts, return it + _ -> [] % the attachment was in the UpdatedAtts, drop it + end + end, OldAtts), + NewDoc = Doc#doc{ + atts = UpdatedAtts ++ OldAtts2 + }, + {ok, NewRev} = couch_db:update_doc(Db, NewDoc, []), + + send_json(Req, 201, [{"Etag", "\"" ++ ?b2l(couch_doc:rev_to_str(NewRev)) ++ "\""}], {[ + {ok, true}, + {id, DocId}, + {rev, couch_doc:rev_to_str(NewRev)} + ]}); + +db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) -> + #doc_query_args{ + update_type = UpdateType + } = parse_doc_query(Req), + couch_doc:validate_docid(DocId), + + Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)), + RespHeaders = [{"Location", Loc}], + case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of + ("multipart/related;" ++ _) = ContentType -> + {ok, Doc0} = couch_doc:doc_from_multi_part_stream(ContentType, + fun() -> receive_request_data(Req) end), + Doc = couch_doc_from_req(Req, DocId, Doc0), + update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType); + _Else -> + case couch_httpd:qs_value(Req, "batch") of + "ok" -> + % batch + Doc = couch_doc_from_req(Req, DocId, couch_httpd:json_body(Req)), + + spawn(fun() -> + case catch(couch_db:update_doc(Db, Doc, [])) of + {ok, _} -> ok; + Error -> + ?LOG_INFO("Batch doc error (~s): ~p",[DocId, Error]) + end + end), + send_json(Req, 202, [], {[ + {ok, true}, + {id, DocId} + ]}); + _Normal -> + % normal + Body = couch_httpd:json_body(Req), + Doc = couch_doc_from_req(Req, DocId, Body), + update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType) + end + end; + +db_doc_req(#httpd{method='COPY'}=Req, Db, SourceDocId) -> + SourceRev = + case extract_header_rev(Req, couch_httpd:qs_value(Req, "rev")) of + missing_rev -> nil; + Rev -> Rev + end, + {TargetDocId, TargetRevs} = parse_copy_destination_header(Req), + % open old doc + Doc = couch_doc_open(Db, SourceDocId, SourceRev, []), + % save new doc + {ok, NewTargetRev} = couch_db:update_doc(Db, + Doc#doc{id=TargetDocId, revs=TargetRevs}, []), + % respond + send_json(Req, 201, + [{"Etag", "\"" ++ ?b2l(couch_doc:rev_to_str(NewTargetRev)) ++ "\""}], + update_doc_result_to_json(TargetDocId, {ok, NewTargetRev})); + +db_doc_req(Req, _Db, _DocId) -> + send_method_not_allowed(Req, "DELETE,GET,HEAD,POST,PUT,COPY"). + + +send_doc(Req, Doc, Options) -> + case Doc#doc.meta of + [] -> + DiskEtag = couch_httpd:doc_etag(Doc), + % output etag only when we have no meta + couch_httpd:etag_respond(Req, DiskEtag, fun() -> + send_doc_efficiently(Req, Doc, [{"Etag", DiskEtag}], Options) + end); + _ -> + send_doc_efficiently(Req, Doc, [], Options) + end. + + +send_doc_efficiently(Req, #doc{atts=[]}=Doc, Headers, Options) -> + send_json(Req, 200, Headers, couch_doc:to_json_obj(Doc, Options)); +send_doc_efficiently(Req, #doc{atts=Atts}=Doc, Headers, Options) -> + case lists:member(attachments, Options) of + true -> + AcceptedTypes = case couch_httpd:header_value(Req, "Accept") of + undefined -> []; + AcceptHeader -> string:tokens(AcceptHeader, ", ") + end, + case lists:member("multipart/related", AcceptedTypes) of + false -> + send_json(Req, 200, Headers, couch_doc:to_json_obj(Doc, Options)); + true -> + Boundary = couch_uuids:random(), + JsonBytes = ?JSON_ENCODE(couch_doc:to_json_obj(Doc, + [attachments, follows|Options])), + {ContentType, Len} = couch_doc:len_doc_to_multi_part_stream( + Boundary,JsonBytes, Atts,false), + CType = {<<"Content-Type">>, ContentType}, + {ok, Resp} = start_response_length(Req, 200, [CType|Headers], Len), + couch_doc:doc_to_multi_part_stream(Boundary,JsonBytes,Atts, + fun(Data) -> couch_httpd:send(Resp, Data) end, false) + end; + false -> + send_json(Req, 200, Headers, couch_doc:to_json_obj(Doc, Options)) + end. + +send_docs_multipart(Req, Results, Options) -> + OuterBoundary = couch_uuids:random(), + InnerBoundary = couch_uuids:random(), + CType = {"Content-Type", + "multipart/mixed; boundary=\"" ++ ?b2l(OuterBoundary) ++ "\""}, + {ok, Resp} = start_chunked_response(Req, 200, [CType]), + couch_httpd:send_chunk(Resp, <<"--", OuterBoundary/binary>>), + lists:foreach( + fun({ok, #doc{atts=Atts}=Doc}) -> + JsonBytes = ?JSON_ENCODE(couch_doc:to_json_obj(Doc, + [attachments,follows|Options])), + {ContentType, _Len} = couch_doc:len_doc_to_multi_part_stream( + InnerBoundary, JsonBytes, Atts, false), + couch_httpd:send_chunk(Resp, <<"\r\nContent-Type: ", + ContentType/binary, "\r\n\r\n">>), + couch_doc:doc_to_multi_part_stream(InnerBoundary, JsonBytes, Atts, + fun(Data) -> couch_httpd:send_chunk(Resp, Data) + end, false), + couch_httpd:send_chunk(Resp, <<"\r\n--", OuterBoundary/binary>>); + ({{not_found, missing}, RevId}) -> + RevStr = couch_doc:rev_to_str(RevId), + Json = ?JSON_ENCODE({[{"missing", RevStr}]}), + couch_httpd:send_chunk(Resp, + [<<"\r\nContent-Type: application/json; error=\"true\"\r\n\r\n">>, + Json, + <<"\r\n--", OuterBoundary/binary>>]) + end, Results), + couch_httpd:send_chunk(Resp, <<"--">>), + couch_httpd:last_chunk(Resp). + +receive_request_data(Req) -> + {couch_httpd:recv(Req, 0), fun() -> receive_request_data(Req) end}. + +update_doc_result_to_json({{Id, Rev}, Error}) -> + {_Code, Err, Msg} = couch_httpd:error_info(Error), + {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)}, + {error, Err}, {reason, Msg}]}. + +update_doc_result_to_json(#doc{id=DocId}, Result) -> + update_doc_result_to_json(DocId, Result); +update_doc_result_to_json(DocId, {ok, NewRev}) -> + {[{id, DocId}, {rev, couch_doc:rev_to_str(NewRev)}]}; +update_doc_result_to_json(DocId, Error) -> + {_Code, ErrorStr, Reason} = couch_httpd:error_info(Error), + {[{id, DocId}, {error, ErrorStr}, {reason, Reason}]}. + + +update_doc(Req, Db, DocId, Doc) -> + update_doc(Req, Db, DocId, Doc, []). + +update_doc(Req, Db, DocId, Doc, Headers) -> + update_doc(Req, Db, DocId, Doc, Headers, interactive_edit). + +update_doc(Req, Db, DocId, #doc{deleted=Deleted}=Doc, Headers, UpdateType) -> + case couch_httpd:header_value(Req, "X-Couch-Full-Commit") of + "true" -> + Options = [full_commit]; + "false" -> + Options = [delay_commit]; + _ -> + Options = [] + end, + {ok, NewRev} = couch_db:update_doc(Db, Doc, Options, UpdateType), + NewRevStr = couch_doc:rev_to_str(NewRev), + ResponseHeaders = [{"Etag", <<"\"", NewRevStr/binary, "\"">>}] ++ Headers, + send_json(Req, if Deleted -> 200; true -> 201 end, + ResponseHeaders, {[ + {ok, true}, + {id, DocId}, + {rev, NewRevStr}]}). + +couch_doc_from_req(Req, DocId, #doc{revs=Revs}=Doc) -> + validate_attachment_names(Doc), + ExplicitDocRev = + case Revs of + {Start,[RevId|_]} -> {Start, RevId}; + _ -> undefined + end, + case extract_header_rev(Req, ExplicitDocRev) of + missing_rev -> + Revs2 = {0, []}; + ExplicitDocRev -> + Revs2 = Revs; + {Pos, Rev} -> + Revs2 = {Pos, [Rev]} + end, + Doc#doc{id=DocId, revs=Revs2}; +couch_doc_from_req(Req, DocId, Json) -> + couch_doc_from_req(Req, DocId, couch_doc:from_json_obj(Json)). + + +% Useful for debugging +% couch_doc_open(Db, DocId) -> +% couch_doc_open(Db, DocId, nil, []). + +couch_doc_open(Db, DocId, Rev, Options) -> + case Rev of + nil -> % open most recent rev + case couch_db:open_doc(Db, DocId, Options) of + {ok, Doc} -> + Doc; + Error -> + throw(Error) + end; + _ -> % open a specific rev (deletions come back as stubs) + case couch_db:open_doc_revs(Db, DocId, [Rev], Options) of + {ok, [{ok, Doc}]} -> + Doc; + {ok, [{{not_found, missing}, Rev}]} -> + throw(not_found); + {ok, [Else]} -> + throw(Else) + end + end. + +% Attachment request handlers + +db_attachment_req(#httpd{method='GET'}=Req, Db, DocId, FileNameParts) -> + FileName = list_to_binary(mochiweb_util:join(lists:map(fun binary_to_list/1, FileNameParts),"/")), + #doc_query_args{ + rev=Rev, + options=Options + } = parse_doc_query(Req), + #doc{ + atts=Atts + } = Doc = couch_doc_open(Db, DocId, Rev, Options), + case [A || A <- Atts, A#att.name == FileName] of + [] -> + throw({not_found, "Document is missing attachment"}); + [#att{type=Type, encoding=Enc, disk_len=DiskLen, att_len=AttLen}=Att] -> + Etag = couch_httpd:doc_etag(Doc), + ReqAcceptsAttEnc = lists:member( + atom_to_list(Enc), + couch_httpd:accepted_encodings(Req) + ), + Headers = [ + {"ETag", Etag}, + {"Cache-Control", "must-revalidate"}, + {"Content-Type", binary_to_list(Type)} + ] ++ case ReqAcceptsAttEnc of + true -> + [{"Content-Encoding", atom_to_list(Enc)}]; + _ -> + [] + end, + Len = case {Enc, ReqAcceptsAttEnc} of + {identity, _} -> + % stored and served in identity form + DiskLen; + {_, false} when DiskLen =/= AttLen -> + % Stored encoded, but client doesn't accept the encoding we used, + % so we need to decode on the fly. DiskLen is the identity length + % of the attachment. + DiskLen; + {_, true} -> + % Stored and served encoded. AttLen is the encoded length. + AttLen; + _ -> + % We received an encoded attachment and stored it as such, so we + % don't know the identity length. The client doesn't accept the + % encoding, and since we cannot serve a correct Content-Length + % header we'll fall back to a chunked response. + undefined + end, + AttFun = case ReqAcceptsAttEnc of + false -> + fun couch_doc:att_foldl_decode/3; + true -> + fun couch_doc:att_foldl/3 + end, + couch_httpd:etag_respond( + Req, + Etag, + fun() -> + case Len of + undefined -> + {ok, Resp} = start_chunked_response(Req, 200, Headers), + AttFun(Att, fun(Seg, _) -> send_chunk(Resp, Seg) end, ok), + last_chunk(Resp); + _ -> + {ok, Resp} = start_response_length(Req, 200, Headers, Len), + AttFun(Att, fun(Seg, _) -> send(Resp, Seg) end, ok) + end + end + ) + end; + + +db_attachment_req(#httpd{method=Method,mochi_req=MochiReq}=Req, Db, DocId, FileNameParts) + when (Method == 'PUT') or (Method == 'DELETE') -> + FileName = validate_attachment_name( + mochiweb_util:join( + lists:map(fun binary_to_list/1, + FileNameParts),"/")), + + NewAtt = case Method of + 'DELETE' -> + []; + _ -> + [#att{ + name = FileName, + type = case couch_httpd:header_value(Req,"Content-Type") of + undefined -> + % We could throw an error here or guess by the FileName. + % Currently, just giving it a default. + <<"application/octet-stream">>; + CType -> + list_to_binary(CType) + end, + data = case couch_httpd:body_length(Req) of + undefined -> + <<"">>; + {unknown_transfer_encoding, Unknown} -> + exit({unknown_transfer_encoding, Unknown}); + chunked -> + fun(MaxChunkSize, ChunkFun, InitState) -> + couch_httpd:recv_chunked(Req, MaxChunkSize, + ChunkFun, InitState) + end; + 0 -> + <<"">>; + Length when is_integer(Length) -> + Expect = case couch_httpd:header_value(Req, "expect") of + undefined -> + undefined; + Value when is_list(Value) -> + string:to_lower(Value) + end, + case Expect of + "100-continue" -> + MochiReq:start_raw_response({100, gb_trees:empty()}); + _Else -> + ok + end, + + + fun() -> couch_httpd:recv(Req, 0) end; + Length -> + exit({length_not_integer, Length}) + end, + att_len = case couch_httpd:header_value(Req,"Content-Length") of + undefined -> + undefined; + Length -> + list_to_integer(Length) + end, + md5 = get_md5_header(Req), + encoding = case string:to_lower(string:strip( + couch_httpd:header_value(Req,"Content-Encoding","identity") + )) of + "identity" -> + identity; + "gzip" -> + gzip; + _ -> + throw({ + bad_ctype, + "Only gzip and identity content-encodings are supported" + }) + end + }] + end, + + Doc = case extract_header_rev(Req, couch_httpd:qs_value(Req, "rev")) of + missing_rev -> % make the new doc + couch_doc:validate_docid(DocId), + #doc{id=DocId}; + Rev -> + case couch_db:open_doc_revs(Db, DocId, [Rev], []) of + {ok, [{ok, Doc0}]} -> Doc0; + {ok, [Error]} -> throw(Error) + end + end, + + #doc{atts=Atts} = Doc, + DocEdited = Doc#doc{ + atts = NewAtt ++ [A || A <- Atts, A#att.name /= FileName] + }, + {ok, UpdatedRev} = couch_db:update_doc(Db, DocEdited, []), + #db{name=DbName} = Db, + + {Status, Headers} = case Method of + 'DELETE' -> + {200, []}; + _ -> + {201, [{"Etag", "\"" ++ ?b2l(couch_doc:rev_to_str(UpdatedRev)) ++ "\""}, + {"Location", absolute_uri(Req, "/" ++ + binary_to_list(DbName) ++ "/" ++ + binary_to_list(DocId) ++ "/" ++ + binary_to_list(FileName) + )}]} + end, + send_json(Req,Status, Headers, {[ + {ok, true}, + {id, DocId}, + {rev, couch_doc:rev_to_str(UpdatedRev)} + ]}); + +db_attachment_req(Req, _Db, _DocId, _FileNameParts) -> + send_method_not_allowed(Req, "DELETE,GET,HEAD,PUT"). + + +get_md5_header(Req) -> + ContentMD5 = couch_httpd:header_value(Req, "Content-MD5"), + Length = couch_httpd:body_length(Req), + Trailer = couch_httpd:header_value(Req, "Trailer"), + case {ContentMD5, Length, Trailer} of + _ when is_list(ContentMD5) orelse is_binary(ContentMD5) -> + base64:decode(ContentMD5); + {_, chunked, undefined} -> + <<>>; + {_, chunked, _} -> + case re:run(Trailer, "\\bContent-MD5\\b", [caseless]) of + {match, _} -> + md5_in_footer; + _ -> + <<>> + end; + _ -> + <<>> + end. + +parse_doc_query(Req) -> + lists:foldl(fun({Key,Value}, Args) -> + case {Key, Value} of + {"attachments", "true"} -> + Options = [attachments | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"meta", "true"} -> + Options = [revs_info, conflicts, deleted_conflicts | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"revs", "true"} -> + Options = [revs | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"local_seq", "true"} -> + Options = [local_seq | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"revs_info", "true"} -> + Options = [revs_info | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"conflicts", "true"} -> + Options = [conflicts | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"deleted_conflicts", "true"} -> + Options = [deleted_conflicts | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"rev", Rev} -> + Args#doc_query_args{rev=couch_doc:parse_rev(Rev)}; + {"open_revs", "all"} -> + Args#doc_query_args{open_revs=all}; + {"open_revs", RevsJsonStr} -> + JsonArray = ?JSON_DECODE(RevsJsonStr), + Args#doc_query_args{open_revs=couch_doc:parse_revs(JsonArray)}; + {"atts_since", RevsJsonStr} -> + JsonArray = ?JSON_DECODE(RevsJsonStr), + Args#doc_query_args{atts_since = couch_doc:parse_revs(JsonArray)}; + {"new_edits", "false"} -> + Args#doc_query_args{update_type=replicated_changes}; + {"new_edits", "true"} -> + Args#doc_query_args{update_type=interactive_edit}; + {"att_encoding_info", "true"} -> + Options = [att_encoding_info | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + _Else -> % unknown key value pair, ignore. + Args + end + end, #doc_query_args{}, couch_httpd:qs(Req)). + +parse_changes_query(Req) -> + lists:foldl(fun({Key, Value}, Args) -> + case {Key, Value} of + {"feed", _} -> + Args#changes_args{feed=Value}; + {"descending", "true"} -> + Args#changes_args{dir=rev}; + {"since", _} -> + Args#changes_args{since=list_to_integer(Value)}; + {"limit", _} -> + Args#changes_args{limit=list_to_integer(Value)}; + {"style", _} -> + Args#changes_args{style=list_to_existing_atom(Value)}; + {"heartbeat", "true"} -> + Args#changes_args{heartbeat=true}; + {"heartbeat", _} -> + Args#changes_args{heartbeat=list_to_integer(Value)}; + {"timeout", _} -> + Args#changes_args{timeout=list_to_integer(Value)}; + {"include_docs", "true"} -> + Args#changes_args{include_docs=true}; + {"filter", _} -> + Args#changes_args{filter=Value}; + _Else -> % unknown key value pair, ignore. + Args + end + end, #changes_args{}, couch_httpd:qs(Req)). + +extract_header_rev(Req, ExplicitRev) when is_binary(ExplicitRev) or is_list(ExplicitRev)-> + extract_header_rev(Req, couch_doc:parse_rev(ExplicitRev)); +extract_header_rev(Req, ExplicitRev) -> + Etag = case couch_httpd:header_value(Req, "If-Match") of + undefined -> undefined; + Value -> couch_doc:parse_rev(string:strip(Value, both, $")) + end, + case {ExplicitRev, Etag} of + {undefined, undefined} -> missing_rev; + {_, undefined} -> ExplicitRev; + {undefined, _} -> Etag; + _ when ExplicitRev == Etag -> Etag; + _ -> + throw({bad_request, "Document rev and etag have different values"}) + end. + + +parse_copy_destination_header(Req) -> + Destination = couch_httpd:header_value(Req, "Destination"), + case re:run(Destination, "\\?", [{capture, none}]) of + nomatch -> + {list_to_binary(Destination), {0, []}}; + match -> + [DocId, RevQs] = re:split(Destination, "\\?", [{return, list}]), + [_RevQueryKey, Rev] = re:split(RevQs, "=", [{return, list}]), + {Pos, RevId} = couch_doc:parse_rev(Rev), + {list_to_binary(DocId), {Pos, [RevId]}} + end. + +validate_attachment_names(Doc) -> + lists:foreach(fun(#att{name=Name}) -> + validate_attachment_name(Name) + end, Doc#doc.atts). + +validate_attachment_name(Name) when is_list(Name) -> + validate_attachment_name(list_to_binary(Name)); +validate_attachment_name(<<"_",_/binary>>) -> + throw({bad_request, <<"Attachment name can't start with '_'">>}); +validate_attachment_name(Name) -> + case is_valid_utf8(Name) of + true -> Name; + false -> throw({bad_request, <<"Attachment name is not UTF-8 encoded">>}) + end. + +%% borrowed from mochijson2:json_bin_is_safe() +is_valid_utf8(<<>>) -> + true; +is_valid_utf8(<<C, Rest/binary>>) -> + case C of + $\" -> + false; + $\\ -> + false; + $\b -> + false; + $\f -> + false; + $\n -> + false; + $\r -> + false; + $\t -> + false; + C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> + false; + C when C < 16#7f -> + is_valid_utf8(Rest); + _ -> + false + end. diff --git a/apps/couch/src/couch_httpd_external.erl b/apps/couch/src/couch_httpd_external.erl new file mode 100644 index 00000000..07202934 --- /dev/null +++ b/apps/couch/src/couch_httpd_external.erl @@ -0,0 +1,162 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_external). + +-export([handle_external_req/2, handle_external_req/3]). +-export([send_external_response/2, json_req_obj/2, json_req_obj/3]). +-export([default_or_content_type/2, parse_external_response/1]). + +-import(couch_httpd,[send_error/4]). + +-include("couch_db.hrl"). + +% handle_external_req/2 +% for the old type of config usage: +% _external = {couch_httpd_external, handle_external_req} +% with urls like +% /db/_external/action/design/name +handle_external_req(#httpd{ + path_parts=[_DbName, _External, UrlName | _Path] + }=HttpReq, Db) -> + process_external_req(HttpReq, Db, UrlName); +handle_external_req(#httpd{path_parts=[_, _]}=Req, _Db) -> + send_error(Req, 404, <<"external_server_error">>, <<"No server name specified.">>); +handle_external_req(Req, _) -> + send_error(Req, 404, <<"external_server_error">>, <<"Broken assumption">>). + +% handle_external_req/3 +% for this type of config usage: +% _action = {couch_httpd_external, handle_external_req, <<"action">>} +% with urls like +% /db/_action/design/name +handle_external_req(HttpReq, Db, Name) -> + process_external_req(HttpReq, Db, Name). + +process_external_req(HttpReq, Db, Name) -> + + Response = couch_external_manager:execute(binary_to_list(Name), + json_req_obj(HttpReq, Db)), + + case Response of + {unknown_external_server, Msg} -> + send_error(HttpReq, 404, <<"external_server_error">>, Msg); + _ -> + send_external_response(HttpReq, Response) + end. +json_req_obj(Req, Db) -> json_req_obj(Req, Db, null). +json_req_obj(#httpd{mochi_req=Req, + method=Method, + path_parts=Path, + req_body=ReqBody + }, Db, DocId) -> + Body = case ReqBody of + undefined -> Req:recv_body(); + Else -> Else + end, + ParsedForm = case Req:get_primary_header_value("content-type") of + "application/x-www-form-urlencoded" ++ _ -> + mochiweb_util:parse_qs(Body); + _ -> + [] + end, + Headers = Req:get(headers), + Hlist = mochiweb_headers:to_list(Headers), + {ok, Info} = couch_db:get_db_info(Db), + % add headers... + {[{<<"info">>, {Info}}, + {<<"id">>, DocId}, + {<<"uuid">>, couch_uuids:new()}, + {<<"method">>, Method}, + {<<"path">>, Path}, + {<<"query">>, json_query_keys(to_json_terms(Req:parse_qs()))}, + {<<"headers">>, to_json_terms(Hlist)}, + {<<"body">>, Body}, + {<<"peer">>, ?l2b(Req:get(peer))}, + {<<"form">>, to_json_terms(ParsedForm)}, + {<<"cookie">>, to_json_terms(Req:parse_cookie())}, + {<<"userCtx">>, couch_util:json_user_ctx(Db)}]}. + +to_json_terms(Data) -> + to_json_terms(Data, []). + +to_json_terms([], Acc) -> + {lists:reverse(Acc)}; +to_json_terms([{Key, Value} | Rest], Acc) when is_atom(Key) -> + to_json_terms(Rest, [{list_to_binary(atom_to_list(Key)), list_to_binary(Value)} | Acc]); +to_json_terms([{Key, Value} | Rest], Acc) -> + to_json_terms(Rest, [{list_to_binary(Key), list_to_binary(Value)} | Acc]). + +json_query_keys({Json}) -> + json_query_keys(Json, []). +json_query_keys([], Acc) -> + {lists:reverse(Acc)}; +json_query_keys([{<<"startkey">>, Value} | Rest], Acc) -> + json_query_keys(Rest, [{<<"startkey">>, couch_util:json_decode(Value)}|Acc]); +json_query_keys([{<<"endkey">>, Value} | Rest], Acc) -> + json_query_keys(Rest, [{<<"endkey">>, couch_util:json_decode(Value)}|Acc]); +json_query_keys([{<<"key">>, Value} | Rest], Acc) -> + json_query_keys(Rest, [{<<"key">>, couch_util:json_decode(Value)}|Acc]); +json_query_keys([Term | Rest], Acc) -> + json_query_keys(Rest, [Term|Acc]). + +send_external_response(#httpd{mochi_req=MochiReq}=Req, Response) -> + #extern_resp_args{ + code = Code, + data = Data, + ctype = CType, + headers = Headers + } = parse_external_response(Response), + couch_httpd:log_request(Req, Code), + Resp = MochiReq:respond({Code, + default_or_content_type(CType, Headers ++ couch_httpd:server_header()), Data}), + {ok, Resp}. + +parse_external_response({Response}) -> + lists:foldl(fun({Key,Value}, Args) -> + case {Key, Value} of + {"", _} -> + Args; + {<<"code">>, Value} -> + Args#extern_resp_args{code=Value}; + {<<"stop">>, true} -> + Args#extern_resp_args{stop=true}; + {<<"json">>, Value} -> + Args#extern_resp_args{ + data=?JSON_ENCODE(Value), + ctype="application/json"}; + {<<"body">>, Value} -> + Args#extern_resp_args{data=Value, ctype="text/html; charset=utf-8"}; + {<<"base64">>, Value} -> + Args#extern_resp_args{ + data=base64:decode(Value), + ctype="application/binary" + }; + {<<"headers">>, {Headers}} -> + NewHeaders = lists:map(fun({Header, HVal}) -> + {binary_to_list(Header), binary_to_list(HVal)} + end, Headers), + Args#extern_resp_args{headers=NewHeaders}; + _ -> % unknown key + Msg = lists:flatten(io_lib:format("Invalid data from external server: ~p", [{Key, Value}])), + throw({external_response_error, Msg}) + end + end, #extern_resp_args{}, Response). + +default_or_content_type(DefaultContentType, Headers) -> + IsContentType = fun({X, _}) -> string:to_lower(X) == "content-type" end, + case lists:any(IsContentType, Headers) of + false -> + [{"Content-Type", DefaultContentType} | Headers]; + true -> + Headers + end. diff --git a/apps/couch/src/couch_httpd_misc_handlers.erl b/apps/couch/src/couch_httpd_misc_handlers.erl new file mode 100644 index 00000000..0a6f4a42 --- /dev/null +++ b/apps/couch/src/couch_httpd_misc_handlers.erl @@ -0,0 +1,219 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_misc_handlers). + +-export([handle_welcome_req/2,handle_favicon_req/2,handle_utils_dir_req/2, + handle_all_dbs_req/1,handle_replicate_req/1,handle_restart_req/1, + handle_uuids_req/1,handle_config_req/1,handle_log_req/1, + handle_task_status_req/1]). + +-export([increment_update_seq_req/2]). + + +-include("couch_db.hrl"). + +-import(couch_httpd, + [send_json/2,send_json/3,send_json/4,send_method_not_allowed/2, + start_json_response/2,send_chunk/2,last_chunk/1,end_json_response/1, + start_chunked_response/3, send_error/4]). + +% httpd global handlers + +handle_welcome_req(#httpd{method='GET'}=Req, WelcomeMessage) -> + send_json(Req, {[ + {couchdb, WelcomeMessage}, + {version, list_to_binary(couch_server:get_version())} + ]}); +handle_welcome_req(Req, _) -> + send_method_not_allowed(Req, "GET,HEAD"). + +handle_favicon_req(#httpd{method='GET'}=Req, DocumentRoot) -> + {{Year,Month,Day},Time} = erlang:localtime(), + OneYearFromNow = {{Year+1,Month,Day},Time}, + CachingHeaders = [ + %favicon should expire a year from now + {"Cache-Control", "public, max-age=31536000"}, + {"Expires", httpd_util:rfc1123_date(OneYearFromNow)} + ], + couch_httpd:serve_file(Req, "favicon.ico", DocumentRoot, CachingHeaders); + +handle_favicon_req(Req, _) -> + send_method_not_allowed(Req, "GET,HEAD"). + +handle_utils_dir_req(#httpd{method='GET'}=Req, DocumentRoot) -> + "/" ++ UrlPath = couch_httpd:path(Req), + case couch_httpd:partition(UrlPath) of + {_ActionKey, "/", RelativePath} -> + % GET /_utils/path or GET /_utils/ + couch_httpd:serve_file(Req, RelativePath, DocumentRoot); + {_ActionKey, "", _RelativePath} -> + % GET /_utils + RedirectPath = couch_httpd:path(Req) ++ "/", + couch_httpd:send_redirect(Req, RedirectPath) + end; +handle_utils_dir_req(Req, _) -> + send_method_not_allowed(Req, "GET,HEAD"). + +handle_all_dbs_req(#httpd{method='GET'}=Req) -> + {ok, DbNames} = couch_server:all_databases(), + send_json(Req, DbNames); +handle_all_dbs_req(Req) -> + send_method_not_allowed(Req, "GET,HEAD"). + + +handle_task_status_req(#httpd{method='GET'}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), + % convert the list of prop lists to a list of json objects + send_json(Req, [{Props} || Props <- couch_task_status:all()]); +handle_task_status_req(Req) -> + send_method_not_allowed(Req, "GET,HEAD"). + +handle_replicate_req(#httpd{method='POST'}=Req) -> + couch_httpd:validate_ctype(Req, "application/json"), + PostBody = couch_httpd:json_body_obj(Req), + try couch_rep:replicate(PostBody, Req#httpd.user_ctx) of + {ok, {continuous, RepId}} -> + send_json(Req, 202, {[{ok, true}, {<<"_local_id">>, RepId}]}); + {ok, {cancelled, RepId}} -> + send_json(Req, 200, {[{ok, true}, {<<"_local_id">>, RepId}]}); + {ok, {JsonResults}} -> + send_json(Req, {[{ok, true} | JsonResults]}); + {error, {Type, Details}} -> + send_json(Req, 500, {[{error, Type}, {reason, Details}]}); + {error, not_found} -> + send_json(Req, 404, {[{error, not_found}]}); + {error, Reason} -> + send_json(Req, 500, {[{error, Reason}]}) + catch + throw:{db_not_found, Msg} -> + send_json(Req, 404, {[{error, db_not_found}, {reason, Msg}]}) + end; +handle_replicate_req(Req) -> + send_method_not_allowed(Req, "POST"). + + +handle_restart_req(#httpd{method='POST'}=Req) -> + couch_httpd:validate_ctype(Req, "application/json"), + ok = couch_httpd:verify_is_server_admin(Req), + couch_server_sup:restart_core_server(), + send_json(Req, 200, {[{ok, true}]}); +handle_restart_req(Req) -> + send_method_not_allowed(Req, "POST"). + + +handle_uuids_req(#httpd{method='GET'}=Req) -> + Count = list_to_integer(couch_httpd:qs_value(Req, "count", "1")), + UUIDs = [couch_uuids:new() || _ <- lists:seq(1, Count)], + Etag = couch_httpd:make_etag(UUIDs), + couch_httpd:etag_respond(Req, Etag, fun() -> + CacheBustingHeaders = [ + {"Date", httpd_util:rfc1123_date()}, + {"Cache-Control", "no-cache"}, + % Past date, ON PURPOSE! + {"Expires", "Fri, 01 Jan 1990 00:00:00 GMT"}, + {"Pragma", "no-cache"}, + {"ETag", Etag} + ], + send_json(Req, 200, CacheBustingHeaders, {[{<<"uuids">>, UUIDs}]}) + end); +handle_uuids_req(Req) -> + send_method_not_allowed(Req, "GET"). + + +% Config request handler + + +% GET /_config/ +% GET /_config +handle_config_req(#httpd{method='GET', path_parts=[_]}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), + Grouped = lists:foldl(fun({{Section, Key}, Value}, Acc) -> + case dict:is_key(Section, Acc) of + true -> + dict:append(Section, {list_to_binary(Key), list_to_binary(Value)}, Acc); + false -> + dict:store(Section, [{list_to_binary(Key), list_to_binary(Value)}], Acc) + end + end, dict:new(), couch_config:all()), + KVs = dict:fold(fun(Section, Values, Acc) -> + [{list_to_binary(Section), {Values}} | Acc] + end, [], Grouped), + send_json(Req, 200, {KVs}); +% GET /_config/Section +handle_config_req(#httpd{method='GET', path_parts=[_,Section]}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), + KVs = [{list_to_binary(Key), list_to_binary(Value)} + || {Key, Value} <- couch_config:get(Section)], + send_json(Req, 200, {KVs}); +% PUT /_config/Section/Key +% "value" +handle_config_req(#httpd{method='PUT', path_parts=[_, Section, Key]}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), + Value = couch_httpd:json_body(Req), + Persist = couch_httpd:header_value(Req, "X-Couch-Persist") /= "false", + OldValue = couch_config:get(Section, Key, ""), + ok = couch_config:set(Section, Key, ?b2l(Value), Persist), + send_json(Req, 200, list_to_binary(OldValue)); +% GET /_config/Section/Key +handle_config_req(#httpd{method='GET', path_parts=[_, Section, Key]}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), + case couch_config:get(Section, Key, null) of + null -> + throw({not_found, unknown_config_value}); + Value -> + send_json(Req, 200, list_to_binary(Value)) + end; +% DELETE /_config/Section/Key +handle_config_req(#httpd{method='DELETE',path_parts=[_,Section,Key]}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), + Persist = couch_httpd:header_value(Req, "X-Couch-Persist") /= "false", + case couch_config:get(Section, Key, null) of + null -> + throw({not_found, unknown_config_value}); + OldValue -> + couch_config:delete(Section, Key, Persist), + send_json(Req, 200, list_to_binary(OldValue)) + end; +handle_config_req(Req) -> + send_method_not_allowed(Req, "GET,PUT,DELETE"). + + +% httpd db handlers + +increment_update_seq_req(#httpd{method='POST'}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + {ok, NewSeq} = couch_db:increment_update_seq(Db), + send_json(Req, {[{ok, true}, + {update_seq, NewSeq} + ]}); +increment_update_seq_req(Req, _Db) -> + send_method_not_allowed(Req, "POST"). + +% httpd log handlers + +handle_log_req(#httpd{method='GET'}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), + Bytes = list_to_integer(couch_httpd:qs_value(Req, "bytes", "1000")), + Offset = list_to_integer(couch_httpd:qs_value(Req, "offset", "0")), + Chunk = couch_log:read(Bytes, Offset), + {ok, Resp} = start_chunked_response(Req, 200, [ + % send a plaintext response + {"Content-Type", "text/plain; charset=utf-8"}, + {"Content-Length", integer_to_list(length(Chunk))} + ]), + send_chunk(Resp, Chunk), + last_chunk(Resp); +handle_log_req(Req) -> + send_method_not_allowed(Req, "GET"). + + diff --git a/apps/couch/src/couch_httpd_oauth.erl b/apps/couch/src/couch_httpd_oauth.erl new file mode 100644 index 00000000..05ee10e2 --- /dev/null +++ b/apps/couch/src/couch_httpd_oauth.erl @@ -0,0 +1,176 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_oauth). +-include("couch_db.hrl"). + +-export([oauth_authentication_handler/1, handle_oauth_req/1, consumer_lookup/2]). + +% OAuth auth handler using per-node user db +oauth_authentication_handler(#httpd{mochi_req=MochiReq}=Req) -> + serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> + AccessToken = couch_util:get_value("oauth_token", Params), + case couch_config:get("oauth_token_secrets", AccessToken) of + undefined -> + couch_httpd:send_error(Req, 400, <<"invalid_token">>, + <<"Invalid OAuth token.">>); + TokenSecret -> + ?LOG_DEBUG("OAuth URL is: ~p", [URL]), + case oauth:verify(Signature, atom_to_list(MochiReq:get(method)), URL, Params, Consumer, TokenSecret) of + true -> + set_user_ctx(Req, AccessToken); + false -> + Req + end + end + end, true). + +% Look up the consumer key and get the roles to give the consumer +set_user_ctx(Req, AccessToken) -> + % TODO move to db storage + Name = case couch_config:get("oauth_token_users", AccessToken) of + undefined -> throw({bad_request, unknown_oauth_token}); + Value -> ?l2b(Value) + end, + case couch_auth_cache:get_user_creds(Name) of + nil -> Req; + User -> + Roles = couch_util:get_value(<<"roles">>, User, []), + Req#httpd{user_ctx=#user_ctx{name=Name, roles=Roles}} + end. + +% OAuth request_token +handle_oauth_req(#httpd{path_parts=[_OAuth, <<"request_token">>], method=Method}=Req) -> + serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> + AccessToken = couch_util:get_value("oauth_token", Params), + TokenSecret = couch_config:get("oauth_token_secrets", AccessToken), + case oauth:verify(Signature, atom_to_list(Method), URL, Params, Consumer, TokenSecret) of + true -> + ok(Req, <<"oauth_token=requestkey&oauth_token_secret=requestsecret">>); + false -> + invalid_signature(Req) + end + end, false); +handle_oauth_req(#httpd{path_parts=[_OAuth, <<"authorize">>]}=Req) -> + {ok, serve_oauth_authorize(Req)}; +handle_oauth_req(#httpd{path_parts=[_OAuth, <<"access_token">>], method='GET'}=Req) -> + serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> + case oauth:token(Params) of + "requestkey" -> + case oauth:verify(Signature, "GET", URL, Params, Consumer, "requestsecret") of + true -> + ok(Req, <<"oauth_token=accesskey&oauth_token_secret=accesssecret">>); + false -> + invalid_signature(Req) + end; + _ -> + couch_httpd:send_error(Req, 400, <<"invalid_token">>, <<"Invalid OAuth token.">>) + end + end, false); +handle_oauth_req(#httpd{path_parts=[_OAuth, <<"access_token">>]}=Req) -> + couch_httpd:send_method_not_allowed(Req, "GET"). + +invalid_signature(Req) -> + couch_httpd:send_error(Req, 400, <<"invalid_signature">>, <<"Invalid signature value.">>). + +% This needs to be protected i.e. force user to login using HTTP Basic Auth or form-based login. +serve_oauth_authorize(#httpd{method=Method}=Req) -> + case Method of + 'GET' -> + % Confirm with the User that they want to authenticate the Consumer + serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> + AccessToken = couch_util:get_value("oauth_token", Params), + TokenSecret = couch_config:get("oauth_token_secrets", AccessToken), + case oauth:verify(Signature, "GET", URL, Params, Consumer, TokenSecret) of + true -> + ok(Req, <<"oauth_token=requestkey&oauth_token_secret=requestsecret">>); + false -> + invalid_signature(Req) + end + end, false); + 'POST' -> + % If the User has confirmed, we direct the User back to the Consumer with a verification code + serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> + AccessToken = couch_util:get_value("oauth_token", Params), + TokenSecret = couch_config:get("oauth_token_secrets", AccessToken), + case oauth:verify(Signature, "POST", URL, Params, Consumer, TokenSecret) of + true -> + %redirect(oauth_callback, oauth_token, oauth_verifier), + ok(Req, <<"oauth_token=requestkey&oauth_token_secret=requestsecret">>); + false -> + invalid_signature(Req) + end + end, false); + _ -> + couch_httpd:send_method_not_allowed(Req, "GET,POST") + end. + +serve_oauth(#httpd{mochi_req=MochiReq}=Req, Fun, FailSilently) -> + % 1. In the HTTP Authorization header as defined in OAuth HTTP Authorization Scheme. + % 2. As the HTTP POST request body with a content-type of application/x-www-form-urlencoded. + % 3. Added to the URLs in the query part (as defined by [RFC3986] section 3). + AuthHeader = case MochiReq:get_header_value("authorization") of + undefined -> + ""; + Else -> + [Head | Tail] = re:split(Else, "\\s", [{parts, 2}, {return, list}]), + case [string:to_lower(Head) | Tail] of + ["oauth", Rest] -> Rest; + _ -> "" + end + end, + HeaderParams = oauth_uri:params_from_header_string(AuthHeader), + %Realm = couch_util:get_value("realm", HeaderParams), + Params = proplists:delete("realm", HeaderParams) ++ MochiReq:parse_qs(), + ?LOG_DEBUG("OAuth Params: ~p", [Params]), + case couch_util:get_value("oauth_version", Params, "1.0") of + "1.0" -> + case couch_util:get_value("oauth_consumer_key", Params, undefined) of + undefined -> + case FailSilently of + true -> Req; + false -> couch_httpd:send_error(Req, 400, <<"invalid_consumer">>, <<"Invalid consumer.">>) + end; + ConsumerKey -> + SigMethod = couch_util:get_value("oauth_signature_method", Params), + case consumer_lookup(ConsumerKey, SigMethod) of + none -> + couch_httpd:send_error(Req, 400, <<"invalid_consumer">>, <<"Invalid consumer (key or signature method).">>); + Consumer -> + Signature = couch_util:get_value("oauth_signature", Params), + URL = couch_httpd:absolute_uri(Req, MochiReq:get(raw_path)), + Fun(URL, proplists:delete("oauth_signature", Params), + Consumer, Signature) + end + end; + _ -> + couch_httpd:send_error(Req, 400, <<"invalid_oauth_version">>, <<"Invalid OAuth version.">>) + end. + +consumer_lookup(Key, MethodStr) -> + SignatureMethod = case MethodStr of + "PLAINTEXT" -> plaintext; + "HMAC-SHA1" -> hmac_sha1; + %"RSA-SHA1" -> rsa_sha1; + _Else -> undefined + end, + case SignatureMethod of + undefined -> none; + _SupportedMethod -> + case couch_config:get("oauth_consumer_secrets", Key, undefined) of + undefined -> none; + Secret -> {Key, Secret, SignatureMethod} + end + end. + +ok(#httpd{mochi_req=MochiReq}, Body) -> + {ok, MochiReq:respond({200, [], Body})}. diff --git a/apps/couch/src/couch_httpd_rewrite.erl b/apps/couch/src/couch_httpd_rewrite.erl new file mode 100644 index 00000000..ca4ac1f0 --- /dev/null +++ b/apps/couch/src/couch_httpd_rewrite.erl @@ -0,0 +1,425 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +% +% bind_path is based on bind method from Webmachine + + +%% @doc Module for URL rewriting by pattern matching. + +-module(couch_httpd_rewrite). +-export([handle_rewrite_req/3]). +-include("couch_db.hrl"). + +-define(SEPARATOR, $\/). +-define(MATCH_ALL, {bind, <<"*">>}). + + +%% doc The http rewrite handler. All rewriting is done from +%% /dbname/_design/ddocname/_rewrite by default. +%% +%% each rules should be in rewrites member of the design doc. +%% Ex of a complete rule : +%% +%% { +%% .... +%% "rewrites": [ +%% { +%% "from": "", +%% "to": "index.html", +%% "method": "GET", +%% "query": {} +%% } +%% ] +%% } +%% +%% from: is the path rule used to bind current uri to the rule. It +%% use pattern matching for that. +%% +%% to: rule to rewrite an url. It can contain variables depending on binding +%% variables discovered during pattern matching and query args (url args and from +%% the query member.) +%% +%% method: method to bind the request method to the rule. by default "*" +%% query: query args you want to define they can contain dynamic variable +%% by binding the key to the bindings +%% +%% +%% to and from are path with patterns. pattern can be string starting with ":" or +%% "*". ex: +%% /somepath/:var/* +%% +%% This path is converted in erlang list by splitting "/". Each var are +%% converted in atom. "*" is converted to '*' atom. The pattern matching is done +%% by splitting "/" in request url in a list of token. A string pattern will +%% match equal token. The star atom ('*' in single quotes) will match any number +%% of tokens, but may only be present as the last pathtern in a pathspec. If all +%% tokens are matched and all pathterms are used, then the pathspec matches. It works +%% like webmachine. Each identified token will be reused in to rule and in query +%% +%% The pattern matching is done by first matching the request method to a rule. by +%% default all methods match a rule. (method is equal to "*" by default). Then +%% It will try to match the path to one rule. If no rule match, then a 404 error +%% is displayed. +%% +%% Once a rule is found we rewrite the request url using the "to" and +%% "query" members. The identified token are matched to the rule and +%% will replace var. if '*' is found in the rule it will contain the remaining +%% part if it exists. +%% +%% Examples: +%% +%% Dispatch rule URL TO Tokens +%% +%% {"from": "/a/b", /a/b?k=v /some/b?k=v var =:= b +%% "to": "/some/"} k = v +%% +%% {"from": "/a/b", /a/b /some/b?var=b var =:= b +%% "to": "/some/:var"} +%% +%% {"from": "/a", /a /some +%% "to": "/some/*"} +%% +%% {"from": "/a/*", /a/b/c /some/b/c +%% "to": "/some/*"} +%% +%% {"from": "/a", /a /some +%% "to": "/some/*"} +%% +%% {"from": "/a/:foo/*", /a/b/c /some/b/c?foo=b foo =:= b +%% "to": "/some/:foo/*"} +%% +%% {"from": "/a/:foo", /a/b /some/?k=b&foo=b foo =:= b +%% "to": "/some", +%% "query": { +%% "k": ":foo" +%% }} +%% +%% {"from": "/a", /a?foo=b /some/b foo =:= b +%% "to": "/some/:foo", +%% }} + + + +handle_rewrite_req(#httpd{ + path_parts=[DbName, <<"_design">>, DesignName, _Rewrite|PathParts], + method=Method, + mochi_req=MochiReq}=Req, _Db, DDoc) -> + + % we are in a design handler + DesignId = <<"_design/", DesignName/binary>>, + Prefix = <<"/", DbName/binary, "/", DesignId/binary>>, + QueryList = couch_httpd:qs(Req), + QueryList1 = [{to_binding(K), V} || {K, V} <- QueryList], + + #doc{body={Props}} = DDoc, + + % get rules from ddoc + case couch_util:get_value(<<"rewrites">>, Props) of + undefined -> + couch_httpd:send_error(Req, 404, <<"rewrite_error">>, + <<"Invalid path.">>); + Rules -> + % create dispatch list from rules + DispatchList = [make_rule(Rule) || {Rule} <- Rules], + + %% get raw path by matching url to a rule. + RawPath = case try_bind_path(DispatchList, couch_util:to_binary(Method), PathParts, + QueryList1) of + no_dispatch_path -> + throw(not_found); + {NewPathParts, Bindings} -> + Parts = [quote_plus(X) || X <- NewPathParts], + + % build new path, reencode query args, eventually convert + % them to json + Path = lists:append( + string:join(Parts, [?SEPARATOR]), + case Bindings of + [] -> []; + _ -> [$?, encode_query(Bindings)] + end), + + % if path is relative detect it and rewrite path + case mochiweb_util:safe_relative_path(Path) of + undefined -> + ?b2l(Prefix) ++ "/" ++ Path; + P1 -> + ?b2l(Prefix) ++ "/" ++ P1 + end + + end, + + % normalize final path (fix levels "." and "..") + RawPath1 = ?b2l(iolist_to_binary(normalize_path(RawPath))), + + ?LOG_DEBUG("rewrite to ~p ~n", [RawPath1]), + + % build a new mochiweb request + MochiReq1 = mochiweb_request:new(MochiReq:get(socket), + MochiReq:get(method), + RawPath1, + MochiReq:get(version), + MochiReq:get(headers)), + + % cleanup, It force mochiweb to reparse raw uri. + MochiReq1:cleanup(), + + #httpd{ + db_url_handlers = DbUrlHandlers, + design_url_handlers = DesignUrlHandlers, + default_fun = DefaultFun, + url_handlers = UrlHandlers + } = Req, + couch_httpd:handle_request_int(MochiReq1, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers) + end. + +quote_plus({bind, X}) -> + mochiweb_util:quote_plus(X); +quote_plus(X) -> + mochiweb_util:quote_plus(X). + +%% @doc Try to find a rule matching current url. If none is found +%% 404 error not_found is raised +try_bind_path([], _Method, _PathParts, _QueryList) -> + no_dispatch_path; +try_bind_path([Dispatch|Rest], Method, PathParts, QueryList) -> + [{PathParts1, Method1}, RedirectPath, QueryArgs] = Dispatch, + case bind_method(Method1, Method) of + true -> + case bind_path(PathParts1, PathParts, []) of + {ok, Remaining, Bindings} -> + Bindings1 = Bindings ++ QueryList, + % we parse query args from the rule and fill + % it eventually with bindings vars + QueryArgs1 = make_query_list(QueryArgs, Bindings1, []), + % remove params in QueryLists1 that are already in + % QueryArgs1 + Bindings2 = lists:foldl(fun({K, V}, Acc) -> + K1 = to_binding(K), + KV = case couch_util:get_value(K1, QueryArgs1) of + undefined -> [{K1, V}]; + _V1 -> [] + end, + Acc ++ KV + end, [], Bindings1), + + FinalBindings = Bindings2 ++ QueryArgs1, + NewPathParts = make_new_path(RedirectPath, FinalBindings, + Remaining, []), + {NewPathParts, FinalBindings}; + fail -> + try_bind_path(Rest, Method, PathParts, QueryList) + end; + false -> + try_bind_path(Rest, Method, PathParts, QueryList) + end. + +%% rewriting dynamically the quey list given as query member in +%% rewrites. Each value is replaced by one binding or an argument +%% passed in url. +make_query_list([], _Bindings, Acc) -> + Acc; +make_query_list([{Key, {Value}}|Rest], Bindings, Acc) -> + Value1 = to_json({Value}), + make_query_list(Rest, Bindings, [{to_binding(Key), Value1}|Acc]); +make_query_list([{Key, Value}|Rest], Bindings, Acc) when is_binary(Value) -> + Value1 = replace_var(Key, Value, Bindings), + make_query_list(Rest, Bindings, [{to_binding(Key), Value1}|Acc]); +make_query_list([{Key, Value}|Rest], Bindings, Acc) when is_list(Value) -> + Value1 = replace_var(Key, Value, Bindings), + make_query_list(Rest, Bindings, [{to_binding(Key), Value1}|Acc]); +make_query_list([{Key, Value}|Rest], Bindings, Acc) -> + make_query_list(Rest, Bindings, [{to_binding(Key), Value}|Acc]). + +replace_var(Key, Value, Bindings) -> + case Value of + <<":", Var/binary>> -> + get_var(Var, Bindings, Value); + _ when is_list(Value) -> + Value1 = lists:foldr(fun(V, Acc) -> + V1 = case V of + <<":", VName/binary>> -> + case get_var(VName, Bindings, V) of + V2 when is_list(V2) -> + iolist_to_binary(V2); + V2 -> V2 + end; + _ -> + + V + end, + [V1|Acc] + end, [], Value), + to_json(Value1); + _ when is_binary(Value) -> + Value; + _ -> + case Key of + <<"key">> -> to_json(Value); + <<"startkey">> -> to_json(Value); + <<"endkey">> -> to_json(Value); + _ -> + lists:flatten(?JSON_ENCODE(Value)) + end + end. + + +get_var(VarName, Props, Default) -> + VarName1 = to_binding(VarName), + couch_util:get_value(VarName1, Props, Default). + +%% doc: build new patch from bindings. bindings are query args +%% (+ dynamic query rewritten if needed) and bindings found in +%% bind_path step. +make_new_path([], _Bindings, _Remaining, Acc) -> + lists:reverse(Acc); +make_new_path([?MATCH_ALL], _Bindings, Remaining, Acc) -> + Acc1 = lists:reverse(Acc) ++ Remaining, + Acc1; +make_new_path([?MATCH_ALL|_Rest], _Bindings, Remaining, Acc) -> + Acc1 = lists:reverse(Acc) ++ Remaining, + Acc1; +make_new_path([{bind, P}|Rest], Bindings, Remaining, Acc) -> + P2 = case couch_util:get_value({bind, P}, Bindings) of + undefined -> << "undefined">>; + P1 -> P1 + end, + make_new_path(Rest, Bindings, Remaining, [P2|Acc]); +make_new_path([P|Rest], Bindings, Remaining, Acc) -> + make_new_path(Rest, Bindings, Remaining, [P|Acc]). + + +%% @doc If method of the query fith the rule method. If the +%% method rule is '*', which is the default, all +%% request method will bind. It allows us to make rules +%% depending on HTTP method. +bind_method(?MATCH_ALL, _Method) -> + true; +bind_method({bind, Method}, Method) -> + true; +bind_method(_, _) -> + false. + + +%% @doc bind path. Using the rule from we try to bind variables given +%% to the current url by pattern matching +bind_path([], [], Bindings) -> + {ok, [], Bindings}; +bind_path([?MATCH_ALL], Rest, Bindings) when is_list(Rest) -> + {ok, Rest, Bindings}; +bind_path(_, [], _) -> + fail; +bind_path([{bind, Token}|RestToken],[Match|RestMatch],Bindings) -> + bind_path(RestToken, RestMatch, [{{bind, Token}, Match}|Bindings]); +bind_path([Token|RestToken], [Token|RestMatch], Bindings) -> + bind_path(RestToken, RestMatch, Bindings); +bind_path(_, _, _) -> + fail. + + +%% normalize path. +normalize_path(Path) -> + "/" ++ string:join(normalize_path1(string:tokens(Path, + "/"), []), [?SEPARATOR]). + + +normalize_path1([], Acc) -> + lists:reverse(Acc); +normalize_path1([".."|Rest], Acc) -> + Acc1 = case Acc of + [] -> [".."|Acc]; + [T|_] when T =:= ".." -> [".."|Acc]; + [_|R] -> R + end, + normalize_path1(Rest, Acc1); +normalize_path1(["."|Rest], Acc) -> + normalize_path1(Rest, Acc); +normalize_path1([Path|Rest], Acc) -> + normalize_path1(Rest, [Path|Acc]). + + +%% @doc transform json rule in erlang for pattern matching +make_rule(Rule) -> + Method = case couch_util:get_value(<<"method">>, Rule) of + undefined -> ?MATCH_ALL; + M -> to_binding(M) + end, + QueryArgs = case couch_util:get_value(<<"query">>, Rule) of + undefined -> []; + {Args} -> Args + end, + FromParts = case couch_util:get_value(<<"from">>, Rule) of + undefined -> [?MATCH_ALL]; + From -> + parse_path(From) + end, + ToParts = case couch_util:get_value(<<"to">>, Rule) of + undefined -> + throw({error, invalid_rewrite_target}); + To -> + parse_path(To) + end, + [{FromParts, Method}, ToParts, QueryArgs]. + +parse_path(Path) -> + {ok, SlashRE} = re:compile(<<"\\/">>), + path_to_list(re:split(Path, SlashRE), [], 0). + +%% @doc convert a path rule (from or to) to an erlang list +%% * and path variable starting by ":" are converted +%% in erlang atom. +path_to_list([], Acc, _DotDotCount) -> + lists:reverse(Acc); +path_to_list([<<>>|R], Acc, DotDotCount) -> + path_to_list(R, Acc, DotDotCount); +path_to_list([<<"*">>|R], Acc, DotDotCount) -> + path_to_list(R, [?MATCH_ALL|Acc], DotDotCount); +path_to_list([<<"..">>|R], Acc, DotDotCount) when DotDotCount == 2 -> + case couch_config:get("httpd", "secure_rewrites", "true") of + "false" -> + path_to_list(R, [<<"..">>|Acc], DotDotCount+1); + _Else -> + ?LOG_INFO("insecure_rewrite_rule ~p blocked", [lists:reverse(Acc) ++ [<<"..">>] ++ R]), + throw({insecure_rewrite_rule, "too many ../.. segments"}) + end; +path_to_list([<<"..">>|R], Acc, DotDotCount) -> + path_to_list(R, [<<"..">>|Acc], DotDotCount+1); +path_to_list([P|R], Acc, DotDotCount) -> + P1 = case P of + <<":", Var/binary>> -> + to_binding(Var); + _ -> P + end, + path_to_list(R, [P1|Acc], DotDotCount). + +encode_query(Props) -> + Props1 = lists:foldl(fun ({{bind, K}, V}, Acc) -> + V1 = case is_list(V) orelse is_binary(V) of + true -> V; + false -> + % probably it's a number + quote_plus(V) + end, + [{K, V1} | Acc] + end, [], Props), + lists:flatten(mochiweb_util:urlencode(Props1)). + +to_binding({bind, V}) -> + {bind, V}; +to_binding(V) when is_list(V) -> + to_binding(?l2b(V)); +to_binding(V) -> + {bind, V}. + +to_json(V) -> + iolist_to_binary(?JSON_ENCODE(V)). diff --git a/apps/couch/src/couch_httpd_show.erl b/apps/couch/src/couch_httpd_show.erl new file mode 100644 index 00000000..d50ca83a --- /dev/null +++ b/apps/couch/src/couch_httpd_show.erl @@ -0,0 +1,399 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_show). + +-export([handle_doc_show_req/3, handle_doc_update_req/3, handle_view_list_req/3, + handle_view_list/6, get_fun_key/3]). + +-include("couch_db.hrl"). + +-import(couch_httpd, + [send_json/2,send_json/3,send_json/4,send_method_not_allowed/2, + start_json_response/2,send_chunk/2,last_chunk/1,send_chunked_error/2, + start_chunked_response/3, send_error/4]). + + +% /db/_design/foo/_show/bar/docid +% show converts a json doc to a response of any content-type. +% it looks up the doc an then passes it to the query server. +% then it sends the response from the query server to the http client. + +maybe_open_doc(Db, DocId) -> + case catch couch_httpd_db:couch_doc_open(Db, DocId, nil, [conflicts]) of + {not_found, missing} -> nil; + {not_found,deleted} -> nil; + Doc -> Doc + end. +handle_doc_show_req(#httpd{ + path_parts=[_, _, _, _, ShowName, DocId] + }=Req, Db, DDoc) -> + + % open the doc + Doc = maybe_open_doc(Db, DocId), + + % we don't handle revs here b/c they are an internal api + % returns 404 if there is no doc with DocId + handle_doc_show(Req, Db, DDoc, ShowName, Doc, DocId); + +handle_doc_show_req(#httpd{ + path_parts=[_, _, _, _, ShowName, DocId|Rest] + }=Req, Db, DDoc) -> + + DocParts = [DocId|Rest], + DocId1 = ?l2b(string:join([?b2l(P)|| P <- DocParts], "/")), + + % open the doc + Doc = maybe_open_doc(Db, DocId1), + + % we don't handle revs here b/c they are an internal api + % pass 404 docs to the show function + handle_doc_show(Req, Db, DDoc, ShowName, Doc, DocId1); + +handle_doc_show_req(#httpd{ + path_parts=[_, _, _, _, ShowName] + }=Req, Db, DDoc) -> + % with no docid the doc is nil + handle_doc_show(Req, Db, DDoc, ShowName, nil); + +handle_doc_show_req(Req, _Db, _DDoc) -> + send_error(Req, 404, <<"show_error">>, <<"Invalid path.">>). + +handle_doc_show(Req, Db, DDoc, ShowName, Doc) -> + handle_doc_show(Req, Db, DDoc, ShowName, Doc, null). + +handle_doc_show(Req, Db, DDoc, ShowName, Doc, DocId) -> + % get responder for ddoc/showname + CurrentEtag = show_etag(Req, Doc, DDoc, []), + couch_httpd:etag_respond(Req, CurrentEtag, fun() -> + JsonReq = couch_httpd_external:json_req_obj(Req, Db, DocId), + JsonDoc = couch_query_servers:json_doc(Doc), + [<<"resp">>, ExternalResp] = + couch_query_servers:ddoc_prompt(DDoc, [<<"shows">>, ShowName], [JsonDoc, JsonReq]), + JsonResp = apply_etag(ExternalResp, CurrentEtag), + couch_httpd_external:send_external_response(Req, JsonResp) + end). + + + +show_etag(#httpd{user_ctx=UserCtx}=Req, Doc, DDoc, More) -> + Accept = couch_httpd:header_value(Req, "Accept"), + DocPart = case Doc of + nil -> nil; + Doc -> couch_httpd:doc_etag(Doc) + end, + couch_httpd:make_etag({couch_httpd:doc_etag(DDoc), DocPart, Accept, UserCtx#user_ctx.roles, More}). + +get_fun_key(DDoc, Type, Name) -> + #doc{body={Props}} = DDoc, + Lang = couch_util:get_value(<<"language">>, Props, <<"javascript">>), + Src = couch_util:get_nested_json_value({Props}, [Type, Name]), + {Lang, Src}. + +% /db/_design/foo/update/bar/docid +% updates a doc based on a request +% handle_doc_update_req(#httpd{method = 'GET'}=Req, _Db, _DDoc) -> +% % anything but GET +% send_method_not_allowed(Req, "POST,PUT,DELETE,ETC"); + +handle_doc_update_req(#httpd{ + path_parts=[_, _, _, _, UpdateName, DocId] + }=Req, Db, DDoc) -> + Doc = try couch_httpd_db:couch_doc_open(Db, DocId, nil, [conflicts]) + catch + _ -> nil + end, + send_doc_update_response(Req, Db, DDoc, UpdateName, Doc, DocId); + +handle_doc_update_req(#httpd{ + path_parts=[_, _, _, _, UpdateName] + }=Req, Db, DDoc) -> + send_doc_update_response(Req, Db, DDoc, UpdateName, nil, null); + +handle_doc_update_req(Req, _Db, _DDoc) -> + send_error(Req, 404, <<"update_error">>, <<"Invalid path.">>). + +send_doc_update_response(Req, Db, DDoc, UpdateName, Doc, DocId) -> + JsonReq = couch_httpd_external:json_req_obj(Req, Db, DocId), + JsonDoc = couch_query_servers:json_doc(Doc), + {Code, JsonResp1} = case couch_query_servers:ddoc_prompt(DDoc, + [<<"updates">>, UpdateName], [JsonDoc, JsonReq]) of + [<<"up">>, {NewJsonDoc}, {JsonResp}] -> + Options = case couch_httpd:header_value(Req, "X-Couch-Full-Commit", + "false") of + "true" -> + [full_commit]; + _ -> + [] + end, + NewDoc = couch_doc:from_json_obj({NewJsonDoc}), + {ok, NewRev} = couch_db:update_doc(Db, NewDoc, Options), + NewRevStr = couch_doc:rev_to_str(NewRev), + JsonRespWithRev = {[{<<"headers">>, + {[{<<"X-Couch-Update-NewRev">>, NewRevStr}]}} | JsonResp]}, + {201, JsonRespWithRev}; + [<<"up">>, _Other, JsonResp] -> + {200, JsonResp} + end, + + JsonResp2 = couch_util:json_apply_field({<<"code">>, Code}, JsonResp1), + % todo set location field + couch_httpd_external:send_external_response(Req, JsonResp2). + + +% view-list request with view and list from same design doc. +handle_view_list_req(#httpd{method='GET', + path_parts=[_, _, DesignName, _, ListName, ViewName]}=Req, Db, DDoc) -> + handle_view_list(Req, Db, DDoc, ListName, {DesignName, ViewName}, nil); + +% view-list request with view and list from different design docs. +handle_view_list_req(#httpd{method='GET', + path_parts=[_, _, _, _, ListName, ViewDesignName, ViewName]}=Req, Db, DDoc) -> + handle_view_list(Req, Db, DDoc, ListName, {ViewDesignName, ViewName}, nil); + +handle_view_list_req(#httpd{method='GET'}=Req, _Db, _DDoc) -> + send_error(Req, 404, <<"list_error">>, <<"Invalid path.">>); + +handle_view_list_req(#httpd{method='POST', + path_parts=[_, _, DesignName, _, ListName, ViewName]}=Req, Db, DDoc) -> + % {Props2} = couch_httpd:json_body(Req), + ReqBody = couch_httpd:body(Req), + {Props2} = ?JSON_DECODE(ReqBody), + Keys = couch_util:get_value(<<"keys">>, Props2, nil), + handle_view_list(Req#httpd{req_body=ReqBody}, Db, DDoc, ListName, {DesignName, ViewName}, Keys); + +handle_view_list_req(#httpd{method='POST', + path_parts=[_, _, _, _, ListName, ViewDesignName, ViewName]}=Req, Db, DDoc) -> + % {Props2} = couch_httpd:json_body(Req), + ReqBody = couch_httpd:body(Req), + {Props2} = ?JSON_DECODE(ReqBody), + Keys = couch_util:get_value(<<"keys">>, Props2, nil), + handle_view_list(Req#httpd{req_body=ReqBody}, Db, DDoc, ListName, {ViewDesignName, ViewName}, Keys); + +handle_view_list_req(#httpd{method='POST'}=Req, _Db, _DDoc) -> + send_error(Req, 404, <<"list_error">>, <<"Invalid path.">>); + +handle_view_list_req(Req, _Db, _DDoc) -> + send_method_not_allowed(Req, "GET,POST,HEAD"). + +handle_view_list(Req, Db, DDoc, LName, {ViewDesignName, ViewName}, Keys) -> + ViewDesignId = <<"_design/", ViewDesignName/binary>>, + {ViewType, View, Group, QueryArgs} = couch_httpd_view:load_view(Req, Db, {ViewDesignId, ViewName}, Keys), + Etag = list_etag(Req, Db, Group, {couch_httpd:doc_etag(DDoc), Keys}), + couch_httpd:etag_respond(Req, Etag, fun() -> + output_list(ViewType, Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) + end). + +list_etag(#httpd{user_ctx=UserCtx}=Req, Db, Group, More) -> + Accept = couch_httpd:header_value(Req, "Accept"), + couch_httpd_view:view_group_etag(Group, Db, {More, Accept, UserCtx#user_ctx.roles}). + +output_list(map, Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) -> + output_map_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group); +output_list(reduce, Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) -> + output_reduce_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group). + +% next step: +% use with_ddoc_proc/2 to make this simpler +output_map_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) -> + #view_query_args{ + limit = Limit, + skip = SkipCount + } = QueryArgs, + + FoldAccInit = {Limit, SkipCount, undefined, []}, + {ok, RowCount} = couch_view:get_row_count(View), + + + couch_query_servers:with_ddoc_proc(DDoc, fun(QServer) -> + + ListFoldHelpers = #view_fold_helper_funs{ + reduce_count = fun couch_view:reduce_to_count/1, + start_response = StartListRespFun = make_map_start_resp_fun(QServer, Db, LName), + send_row = make_map_send_row_fun(QServer) + }, + CurrentSeq = Group#group.current_seq, + + {ok, _, FoldResult} = case Keys of + nil -> + FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs, Etag, Db, CurrentSeq, RowCount, ListFoldHelpers), + couch_view:fold(View, FoldlFun, FoldAccInit, + couch_httpd_view:make_key_options(QueryArgs)); + Keys -> + lists:foldl( + fun(Key, {ok, _, FoldAcc}) -> + QueryArgs2 = QueryArgs#view_query_args{ + start_key = Key, + end_key = Key + }, + FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs2, Etag, Db, CurrentSeq, RowCount, ListFoldHelpers), + couch_view:fold(View, FoldlFun, FoldAcc, + couch_httpd_view:make_key_options(QueryArgs2)) + end, {ok, nil, FoldAccInit}, Keys) + end, + finish_list(Req, QServer, Etag, FoldResult, StartListRespFun, CurrentSeq, RowCount) + end). + + +output_reduce_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) -> + #view_query_args{ + limit = Limit, + skip = SkipCount, + group_level = GroupLevel + } = QueryArgs, + + CurrentSeq = Group#group.current_seq, + + couch_query_servers:with_ddoc_proc(DDoc, fun(QServer) -> + StartListRespFun = make_reduce_start_resp_fun(QServer, Db, LName), + SendListRowFun = make_reduce_send_row_fun(QServer, Db), + {ok, GroupRowsFun, RespFun} = couch_httpd_view:make_reduce_fold_funs(Req, + GroupLevel, QueryArgs, Etag, CurrentSeq, + #reduce_fold_helper_funs{ + start_response = StartListRespFun, + send_row = SendListRowFun + }), + FoldAccInit = {Limit, SkipCount, undefined, []}, + {ok, FoldResult} = case Keys of + nil -> + couch_view:fold_reduce(View, RespFun, FoldAccInit, [{key_group_fun, GroupRowsFun} | + couch_httpd_view:make_key_options(QueryArgs)]); + Keys -> + lists:foldl( + fun(Key, {ok, FoldAcc}) -> + couch_view:fold_reduce(View, RespFun, FoldAcc, + [{key_group_fun, GroupRowsFun} | + couch_httpd_view:make_key_options( + QueryArgs#view_query_args{start_key=Key, end_key=Key})] + ) + end, {ok, FoldAccInit}, Keys) + end, + finish_list(Req, QServer, Etag, FoldResult, StartListRespFun, CurrentSeq, null) + end). + + +make_map_start_resp_fun(QueryServer, Db, LName) -> + fun(Req, Etag, TotalRows, Offset, _Acc, UpdateSeq) -> + Head = {[{<<"total_rows">>, TotalRows}, {<<"offset">>, Offset}, {<<"update_seq">>, UpdateSeq}]}, + start_list_resp(QueryServer, LName, Req, Db, Head, Etag) + end. + +make_reduce_start_resp_fun(QueryServer, Db, LName) -> + fun(Req2, Etag, _Acc, UpdateSeq) -> + start_list_resp(QueryServer, LName, Req2, Db, {[{<<"update_seq">>, UpdateSeq}]}, Etag) + end. + +start_list_resp(QServer, LName, Req, Db, Head, Etag) -> + JsonReq = couch_httpd_external:json_req_obj(Req, Db), + [<<"start">>,Chunks,JsonResp] = couch_query_servers:ddoc_proc_prompt(QServer, + [<<"lists">>, LName], [Head, JsonReq]), + JsonResp2 = apply_etag(JsonResp, Etag), + #extern_resp_args{ + code = Code, + ctype = CType, + headers = ExtHeaders + } = couch_httpd_external:parse_external_response(JsonResp2), + JsonHeaders = couch_httpd_external:default_or_content_type(CType, ExtHeaders), + {ok, Resp} = start_chunked_response(Req, Code, JsonHeaders), + {ok, Resp, ?b2l(?l2b(Chunks))}. + +make_map_send_row_fun(QueryServer) -> + fun(Resp, Db, Row, IncludeDocs, RowFront) -> + send_list_row(Resp, QueryServer, Db, Row, RowFront, IncludeDocs) + end. + +make_reduce_send_row_fun(QueryServer, Db) -> + fun(Resp, Row, RowFront) -> + send_list_row(Resp, QueryServer, Db, Row, RowFront, false) + end. + +send_list_row(Resp, QueryServer, Db, Row, RowFront, IncludeDoc) -> + try + [Go,Chunks] = prompt_list_row(QueryServer, Db, Row, IncludeDoc), + Chunk = RowFront ++ ?b2l(?l2b(Chunks)), + send_non_empty_chunk(Resp, Chunk), + case Go of + <<"chunks">> -> + {ok, ""}; + <<"end">> -> + {stop, stop} + end + catch + throw:Error -> + send_chunked_error(Resp, Error), + throw({already_sent, Resp, Error}) + end. + + +prompt_list_row({Proc, _DDocId}, Db, {{Key, DocId}, Value}, IncludeDoc) -> + JsonRow = couch_httpd_view:view_row_obj(Db, {{Key, DocId}, Value}, IncludeDoc), + couch_query_servers:proc_prompt(Proc, [<<"list_row">>, JsonRow]); + +prompt_list_row({Proc, _DDocId}, _, {Key, Value}, _IncludeDoc) -> + JsonRow = {[{key, Key}, {value, Value}]}, + couch_query_servers:proc_prompt(Proc, [<<"list_row">>, JsonRow]). + +send_non_empty_chunk(Resp, Chunk) -> + case Chunk of + [] -> ok; + _ -> send_chunk(Resp, Chunk) + end. + +finish_list(Req, {Proc, _DDocId}, Etag, FoldResult, StartFun, CurrentSeq, TotalRows) -> + FoldResult2 = case FoldResult of + {Limit, SkipCount, Response, RowAcc} -> + {Limit, SkipCount, Response, RowAcc, nil}; + Else -> + Else + end, + case FoldResult2 of + {_, _, undefined, _, _} -> + {ok, Resp, BeginBody} = + render_head_for_empty_list(StartFun, Req, Etag, CurrentSeq, TotalRows), + [<<"end">>, Chunks] = couch_query_servers:proc_prompt(Proc, [<<"list_end">>]), + Chunk = BeginBody ++ ?b2l(?l2b(Chunks)), + send_non_empty_chunk(Resp, Chunk); + {_, _, Resp, stop, _} -> + ok; + {_, _, Resp, _, _} -> + [<<"end">>, Chunks] = couch_query_servers:proc_prompt(Proc, [<<"list_end">>]), + send_non_empty_chunk(Resp, ?b2l(?l2b(Chunks))) + end, + last_chunk(Resp). + + +render_head_for_empty_list(StartListRespFun, Req, Etag, CurrentSeq, null) -> + StartListRespFun(Req, Etag, [], CurrentSeq); % for reduce +render_head_for_empty_list(StartListRespFun, Req, Etag, CurrentSeq, TotalRows) -> + StartListRespFun(Req, Etag, TotalRows, null, [], CurrentSeq). + +apply_etag({ExternalResponse}, CurrentEtag) -> + % Here we embark on the delicate task of replacing or creating the + % headers on the JsonResponse object. We need to control the Etag and + % Vary headers. If the external function controls the Etag, we'd have to + % run it to check for a match, which sort of defeats the purpose. + case couch_util:get_value(<<"headers">>, ExternalResponse, nil) of + nil -> + % no JSON headers + % add our Etag and Vary headers to the response + {[{<<"headers">>, {[{<<"Etag">>, CurrentEtag}, {<<"Vary">>, <<"Accept">>}]}} | ExternalResponse]}; + JsonHeaders -> + {[case Field of + {<<"headers">>, JsonHeaders} -> % add our headers + JsonHeadersEtagged = couch_util:json_apply_field({<<"Etag">>, CurrentEtag}, JsonHeaders), + JsonHeadersVaried = couch_util:json_apply_field({<<"Vary">>, <<"Accept">>}, JsonHeadersEtagged), + {<<"headers">>, JsonHeadersVaried}; + _ -> % skip non-header fields + Field + end || Field <- ExternalResponse]} + end. + diff --git a/apps/couch/src/couch_httpd_stats_handlers.erl b/apps/couch/src/couch_httpd_stats_handlers.erl new file mode 100644 index 00000000..41aeaed0 --- /dev/null +++ b/apps/couch/src/couch_httpd_stats_handlers.erl @@ -0,0 +1,56 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_stats_handlers). +-include("couch_db.hrl"). + +-export([handle_stats_req/1]). +-import(couch_httpd, [ + send_json/2, send_json/3, send_json/4, send_method_not_allowed/2, + start_json_response/2, send_chunk/2, end_json_response/1, + start_chunked_response/3, send_error/4 +]). + +handle_stats_req(#httpd{method='GET', path_parts=[_]}=Req) -> + flush(Req), + send_json(Req, couch_stats_aggregator:all(range(Req))); + +handle_stats_req(#httpd{method='GET', path_parts=[_, _Mod]}) -> + throw({bad_request, <<"Stat names must have exactly to parts.">>}); + +handle_stats_req(#httpd{method='GET', path_parts=[_, Mod, Key]}=Req) -> + flush(Req), + Stats = couch_stats_aggregator:get_json({list_to_atom(binary_to_list(Mod)), + list_to_atom(binary_to_list(Key))}, range(Req)), + send_json(Req, {[{Mod, {[{Key, Stats}]}}]}); + +handle_stats_req(#httpd{method='GET', path_parts=[_, _Mod, _Key | _Extra]}) -> + throw({bad_request, <<"Stat names must have exactly two parts.">>}); + +handle_stats_req(Req) -> + send_method_not_allowed(Req, "GET"). + +range(Req) -> + case couch_util:get_value("range", couch_httpd:qs(Req)) of + undefined -> + 0; + Value -> + list_to_integer(Value) + end. + +flush(Req) -> + case couch_util:get_value("flush", couch_httpd:qs(Req)) of + "true" -> + couch_stats_aggregator:collect_sample(); + _Else -> + ok + end. diff --git a/apps/couch/src/couch_httpd_view.erl b/apps/couch/src/couch_httpd_view.erl new file mode 100644 index 00000000..e1a0dfad --- /dev/null +++ b/apps/couch/src/couch_httpd_view.erl @@ -0,0 +1,692 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_view). +-include("couch_db.hrl"). + +-export([handle_view_req/3,handle_temp_view_req/2]). + +-export([get_stale_type/1, get_reduce_type/1, parse_view_params/3]). +-export([make_view_fold_fun/7, finish_view_fold/4, finish_view_fold/5, view_row_obj/3]). +-export([view_group_etag/2, view_group_etag/3, make_reduce_fold_funs/6]). +-export([design_doc_view/5, parse_bool_param/1, doc_member/2]). +-export([make_key_options/1, load_view/4]). + +-import(couch_httpd, + [send_json/2,send_json/3,send_json/4,send_method_not_allowed/2,send_chunk/2, + start_json_response/2, start_json_response/3, end_json_response/1, + send_chunked_error/2]). + +-import(couch_db,[get_update_seq/1]). + +design_doc_view(Req, Db, DName, ViewName, Keys) -> + DesignId = <<"_design/", DName/binary>>, + Stale = get_stale_type(Req), + Reduce = get_reduce_type(Req), + Result = case couch_view:get_map_view(Db, DesignId, ViewName, Stale) of + {ok, View, Group} -> + QueryArgs = parse_view_params(Req, Keys, map), + output_map_view(Req, View, Group, Db, QueryArgs, Keys); + {not_found, Reason} -> + case couch_view:get_reduce_view(Db, DesignId, ViewName, Stale) of + {ok, ReduceView, Group} -> + case Reduce of + false -> + QueryArgs = parse_view_params(Req, Keys, red_map), + MapView = couch_view:extract_map_view(ReduceView), + output_map_view(Req, MapView, Group, Db, QueryArgs, Keys); + _ -> + QueryArgs = parse_view_params(Req, Keys, reduce), + output_reduce_view(Req, Db, ReduceView, Group, QueryArgs, Keys) + end; + _ -> + throw({not_found, Reason}) + end + end, + couch_stats_collector:increment({httpd, view_reads}), + Result. + +handle_view_req(#httpd{method='GET', + path_parts=[_, _, DName, _, ViewName]}=Req, Db, _DDoc) -> + design_doc_view(Req, Db, DName, ViewName, nil); + +handle_view_req(#httpd{method='POST', + path_parts=[_, _, DName, _, ViewName]}=Req, Db, _DDoc) -> + couch_httpd:validate_ctype(Req, "application/json"), + {Fields} = couch_httpd:json_body_obj(Req), + case couch_util:get_value(<<"keys">>, Fields, nil) of + nil -> + Fmt = "POST to view ~p/~p in database ~p with no keys member.", + ?LOG_DEBUG(Fmt, [DName, ViewName, Db]), + design_doc_view(Req, Db, DName, ViewName, nil); + Keys when is_list(Keys) -> + design_doc_view(Req, Db, DName, ViewName, Keys); + _ -> + throw({bad_request, "`keys` member must be a array."}) + end; + +handle_view_req(Req, _Db, _DDoc) -> + send_method_not_allowed(Req, "GET,POST,HEAD"). + +handle_temp_view_req(#httpd{method='POST'}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + ok = couch_db:check_is_admin(Db), + couch_stats_collector:increment({httpd, temporary_view_reads}), + {Props} = couch_httpd:json_body_obj(Req), + Language = couch_util:get_value(<<"language">>, Props, <<"javascript">>), + {DesignOptions} = couch_util:get_value(<<"options">>, Props, {[]}), + MapSrc = couch_util:get_value(<<"map">>, Props), + Keys = couch_util:get_value(<<"keys">>, Props, nil), + Reduce = get_reduce_type(Req), + case couch_util:get_value(<<"reduce">>, Props, null) of + null -> + QueryArgs = parse_view_params(Req, Keys, map), + {ok, View, Group} = couch_view:get_temp_map_view(Db, Language, + DesignOptions, MapSrc), + output_map_view(Req, View, Group, Db, QueryArgs, Keys); + _ when Reduce =:= false -> + QueryArgs = parse_view_params(Req, Keys, red_map), + {ok, View, Group} = couch_view:get_temp_map_view(Db, Language, + DesignOptions, MapSrc), + output_map_view(Req, View, Group, Db, QueryArgs, Keys); + RedSrc -> + QueryArgs = parse_view_params(Req, Keys, reduce), + {ok, View, Group} = couch_view:get_temp_reduce_view(Db, Language, + DesignOptions, MapSrc, RedSrc), + output_reduce_view(Req, Db, View, Group, QueryArgs, Keys) + end; + +handle_temp_view_req(Req, _Db) -> + send_method_not_allowed(Req, "POST"). + +output_map_view(Req, View, Group, Db, QueryArgs, nil) -> + #view_query_args{ + limit = Limit, + skip = SkipCount + } = QueryArgs, + CurrentEtag = view_group_etag(Group, Db), + couch_httpd:etag_respond(Req, CurrentEtag, fun() -> + {ok, RowCount} = couch_view:get_row_count(View), + FoldlFun = make_view_fold_fun(Req, QueryArgs, CurrentEtag, Db, Group#group.current_seq, RowCount, #view_fold_helper_funs{reduce_count=fun couch_view:reduce_to_count/1}), + FoldAccInit = {Limit, SkipCount, undefined, []}, + {ok, LastReduce, FoldResult} = couch_view:fold(View, + FoldlFun, FoldAccInit, make_key_options(QueryArgs)), + finish_view_fold(Req, RowCount, + couch_view:reduce_to_count(LastReduce), FoldResult) + end); + +output_map_view(Req, View, Group, Db, QueryArgs, Keys) -> + #view_query_args{ + limit = Limit, + skip = SkipCount + } = QueryArgs, + CurrentEtag = view_group_etag(Group, Db, Keys), + couch_httpd:etag_respond(Req, CurrentEtag, fun() -> + {ok, RowCount} = couch_view:get_row_count(View), + FoldAccInit = {Limit, SkipCount, undefined, []}, + {LastReduce, FoldResult} = lists:foldl(fun(Key, {_, FoldAcc}) -> + FoldlFun = make_view_fold_fun(Req, QueryArgs#view_query_args{}, + CurrentEtag, Db, Group#group.current_seq, RowCount, + #view_fold_helper_funs{ + reduce_count = fun couch_view:reduce_to_count/1 + }), + {ok, LastReduce, FoldResult} = couch_view:fold(View, FoldlFun, + FoldAcc, make_key_options( + QueryArgs#view_query_args{start_key=Key, end_key=Key})), + {LastReduce, FoldResult} + end, {{[],[]}, FoldAccInit}, Keys), + finish_view_fold(Req, RowCount, couch_view:reduce_to_count(LastReduce), + FoldResult, [{update_seq,Group#group.current_seq}]) + end). + +output_reduce_view(Req, Db, View, Group, QueryArgs, nil) -> + #view_query_args{ + limit = Limit, + skip = Skip, + group_level = GroupLevel + } = QueryArgs, + CurrentEtag = view_group_etag(Group, Db), + couch_httpd:etag_respond(Req, CurrentEtag, fun() -> + {ok, GroupRowsFun, RespFun} = make_reduce_fold_funs(Req, GroupLevel, + QueryArgs, CurrentEtag, Group#group.current_seq, + #reduce_fold_helper_funs{}), + FoldAccInit = {Limit, Skip, undefined, []}, + {ok, {_, _, Resp, _}} = couch_view:fold_reduce(View, + RespFun, FoldAccInit, [{key_group_fun, GroupRowsFun} | + make_key_options(QueryArgs)]), + finish_reduce_fold(Req, Resp) + end); + +output_reduce_view(Req, Db, View, Group, QueryArgs, Keys) -> + #view_query_args{ + limit = Limit, + skip = Skip, + group_level = GroupLevel + } = QueryArgs, + CurrentEtag = view_group_etag(Group, Db, Keys), + couch_httpd:etag_respond(Req, CurrentEtag, fun() -> + {ok, GroupRowsFun, RespFun} = make_reduce_fold_funs(Req, GroupLevel, + QueryArgs, CurrentEtag, Group#group.current_seq, + #reduce_fold_helper_funs{}), + {Resp, _RedAcc3} = lists:foldl( + fun(Key, {Resp, RedAcc}) -> + % run the reduce once for each key in keys, with limit etc + % reapplied for each key + FoldAccInit = {Limit, Skip, Resp, RedAcc}, + {_, {_, _, Resp2, RedAcc2}} = couch_view:fold_reduce(View, + RespFun, FoldAccInit, [{key_group_fun, GroupRowsFun} | + make_key_options(QueryArgs#view_query_args{ + start_key=Key, end_key=Key})]), + % Switch to comma + {Resp2, RedAcc2} + end, + {undefined, []}, Keys), % Start with no comma + finish_reduce_fold(Req, Resp, [{update_seq,Group#group.current_seq}]) + end). + +reverse_key_default(?MIN_STR) -> ?MAX_STR; +reverse_key_default(?MAX_STR) -> ?MIN_STR; +reverse_key_default(Key) -> Key. + +get_stale_type(Req) -> + list_to_existing_atom(couch_httpd:qs_value(Req, "stale", "nil")). + +get_reduce_type(Req) -> + list_to_existing_atom(couch_httpd:qs_value(Req, "reduce", "true")). + +load_view(Req, Db, {ViewDesignId, ViewName}, Keys) -> + Stale = get_stale_type(Req), + Reduce = get_reduce_type(Req), + case couch_view:get_map_view(Db, ViewDesignId, ViewName, Stale) of + {ok, View, Group} -> + QueryArgs = parse_view_params(Req, Keys, map), + {map, View, Group, QueryArgs}; + {not_found, _Reason} -> + case couch_view:get_reduce_view(Db, ViewDesignId, ViewName, Stale) of + {ok, ReduceView, Group} -> + case Reduce of + false -> + QueryArgs = parse_view_params(Req, Keys, map_red), + MapView = couch_view:extract_map_view(ReduceView), + {map, MapView, Group, QueryArgs}; + _ -> + QueryArgs = parse_view_params(Req, Keys, reduce), + {reduce, ReduceView, Group, QueryArgs} + end; + {not_found, Reason} -> + throw({not_found, Reason}) + end + end. + +% query_parse_error could be removed +% we wouldn't need to pass the view type, it'd just parse params. +% I'm not sure what to do about the error handling, but +% it might simplify things to have a parse_view_params function +% that doesn't throw(). +parse_view_params(Req, Keys, ViewType) -> + QueryList = couch_httpd:qs(Req), + QueryParams = + lists:foldl(fun({K, V}, Acc) -> + parse_view_param(K, V) ++ Acc + end, [], QueryList), + IsMultiGet = (Keys =/= nil), + Args = #view_query_args{ + view_type=ViewType, + multi_get=IsMultiGet + }, + QueryArgs = lists:foldl(fun({K, V}, Args2) -> + validate_view_query(K, V, Args2) + end, Args, lists:reverse(QueryParams)), % Reverse to match QS order. + + GroupLevel = QueryArgs#view_query_args.group_level, + case {ViewType, GroupLevel, IsMultiGet} of + {reduce, exact, true} -> + QueryArgs; + {reduce, _, false} -> + QueryArgs; + {reduce, _, _} -> + % we can simplify code if we just drop this error message. + Msg = <<"Multi-key fetchs for reduce " + "view must include `group=true`">>, + throw({query_parse_error, Msg}); + _ -> + QueryArgs + end, + QueryArgs. + +parse_view_param("", _) -> + []; +parse_view_param("key", Value) -> + JsonKey = ?JSON_DECODE(Value), + [{start_key, JsonKey}, {end_key, JsonKey}]; +parse_view_param("startkey_docid", Value) -> + [{start_docid, ?l2b(Value)}]; +parse_view_param("endkey_docid", Value) -> + [{end_docid, ?l2b(Value)}]; +parse_view_param("startkey", Value) -> + [{start_key, ?JSON_DECODE(Value)}]; +parse_view_param("endkey", Value) -> + [{end_key, ?JSON_DECODE(Value)}]; +parse_view_param("limit", Value) -> + [{limit, parse_positive_int_param(Value)}]; +parse_view_param("count", _Value) -> + throw({query_parse_error, <<"Query parameter 'count' is now 'limit'.">>}); +parse_view_param("stale", "ok") -> + [{stale, ok}]; +parse_view_param("stale", _Value) -> + throw({query_parse_error, <<"stale only available as stale=ok">>}); +parse_view_param("update", _Value) -> + throw({query_parse_error, <<"update=false is now stale=ok">>}); +parse_view_param("descending", Value) -> + [{descending, parse_bool_param(Value)}]; +parse_view_param("skip", Value) -> + [{skip, parse_int_param(Value)}]; +parse_view_param("group", Value) -> + case parse_bool_param(Value) of + true -> [{group_level, exact}]; + false -> [{group_level, 0}] + end; +parse_view_param("group_level", Value) -> + [{group_level, parse_positive_int_param(Value)}]; +parse_view_param("inclusive_end", Value) -> + [{inclusive_end, parse_bool_param(Value)}]; +parse_view_param("reduce", Value) -> + [{reduce, parse_bool_param(Value)}]; +parse_view_param("include_docs", Value) -> + [{include_docs, parse_bool_param(Value)}]; +parse_view_param("list", Value) -> + [{list, ?l2b(Value)}]; +parse_view_param("callback", _) -> + []; % Verified in the JSON response functions +parse_view_param(Key, Value) -> + [{extra, {Key, Value}}]. + +validate_view_query(start_key, Value, Args) -> + case Args#view_query_args.multi_get of + true -> + Msg = <<"Query parameter `start_key` is " + "not compatible with multi-get">>, + throw({query_parse_error, Msg}); + _ -> + Args#view_query_args{start_key=Value} + end; +validate_view_query(start_docid, Value, Args) -> + Args#view_query_args{start_docid=Value}; +validate_view_query(end_key, Value, Args) -> + case Args#view_query_args.multi_get of + true-> + Msg = <<"Query parameter `end_key` is " + "not compatible with multi-get">>, + throw({query_parse_error, Msg}); + _ -> + Args#view_query_args{end_key=Value} + end; +validate_view_query(end_docid, Value, Args) -> + Args#view_query_args{end_docid=Value}; +validate_view_query(limit, Value, Args) -> + Args#view_query_args{limit=Value}; +validate_view_query(list, Value, Args) -> + Args#view_query_args{list=Value}; +validate_view_query(stale, _, Args) -> + Args; +validate_view_query(descending, true, Args) -> + case Args#view_query_args.direction of + rev -> Args; % Already reversed + fwd -> + Args#view_query_args{ + direction = rev, + start_docid = + reverse_key_default(Args#view_query_args.start_docid), + end_docid = + reverse_key_default(Args#view_query_args.end_docid) + } + end; +validate_view_query(descending, false, Args) -> + Args; % Ignore default condition +validate_view_query(skip, Value, Args) -> + Args#view_query_args{skip=Value}; +validate_view_query(group_level, Value, Args) -> + case Args#view_query_args.view_type of + reduce -> + Args#view_query_args{group_level=Value}; + _ -> + Msg = <<"Invalid URL parameter 'group' or " + " 'group_level' for non-reduce view.">>, + throw({query_parse_error, Msg}) + end; +validate_view_query(inclusive_end, Value, Args) -> + Args#view_query_args{inclusive_end=Value}; +validate_view_query(reduce, _, Args) -> + case Args#view_query_args.view_type of + map -> + Msg = <<"Invalid URL parameter `reduce` for map view.">>, + throw({query_parse_error, Msg}); + _ -> + Args + end; +validate_view_query(include_docs, true, Args) -> + case Args#view_query_args.view_type of + reduce -> + Msg = <<"Query parameter `include_docs` " + "is invalid for reduce views.">>, + throw({query_parse_error, Msg}); + _ -> + Args#view_query_args{include_docs=true} + end; +% Use the view_query_args record's default value +validate_view_query(include_docs, _Value, Args) -> + Args; +validate_view_query(extra, _Value, Args) -> + Args. + +make_view_fold_fun(Req, QueryArgs, Etag, Db, UpdateSeq, TotalViewCount, HelperFuns) -> + #view_fold_helper_funs{ + start_response = StartRespFun, + send_row = SendRowFun, + reduce_count = ReduceCountFun + } = apply_default_helper_funs(HelperFuns), + + #view_query_args{ + include_docs = IncludeDocs + } = QueryArgs, + + fun({{Key, DocId}, Value}, OffsetReds, + {AccLimit, AccSkip, Resp, RowFunAcc}) -> + case {AccLimit, AccSkip, Resp} of + {0, _, _} -> + % we've done "limit" rows, stop foldling + {stop, {0, 0, Resp, RowFunAcc}}; + {_, AccSkip, _} when AccSkip > 0 -> + % just keep skipping + {ok, {AccLimit, AccSkip - 1, Resp, RowFunAcc}}; + {_, _, undefined} -> + % rendering the first row, first we start the response + Offset = ReduceCountFun(OffsetReds), + {ok, Resp2, RowFunAcc0} = StartRespFun(Req, Etag, + TotalViewCount, Offset, RowFunAcc, UpdateSeq), + {Go, RowFunAcc2} = SendRowFun(Resp2, Db, {{Key, DocId}, Value}, + IncludeDocs, RowFunAcc0), + {Go, {AccLimit - 1, 0, Resp2, RowFunAcc2}}; + {AccLimit, _, Resp} when (AccLimit > 0) -> + % rendering all other rows + {Go, RowFunAcc2} = SendRowFun(Resp, Db, {{Key, DocId}, Value}, + IncludeDocs, RowFunAcc), + {Go, {AccLimit - 1, 0, Resp, RowFunAcc2}} + end + end. + +make_reduce_fold_funs(Req, GroupLevel, _QueryArgs, Etag, UpdateSeq, HelperFuns) -> + #reduce_fold_helper_funs{ + start_response = StartRespFun, + send_row = SendRowFun + } = apply_default_helper_funs(HelperFuns), + + GroupRowsFun = + fun({_Key1,_}, {_Key2,_}) when GroupLevel == 0 -> + true; + ({Key1,_}, {Key2,_}) + when is_integer(GroupLevel) and is_list(Key1) and is_list(Key2) -> + lists:sublist(Key1, GroupLevel) == lists:sublist(Key2, GroupLevel); + ({Key1,_}, {Key2,_}) -> + Key1 == Key2 + end, + + RespFun = fun + (_Key, _Red, {AccLimit, AccSkip, Resp, RowAcc}) when AccSkip > 0 -> + % keep skipping + {ok, {AccLimit, AccSkip - 1, Resp, RowAcc}}; + (_Key, _Red, {0, _AccSkip, Resp, RowAcc}) -> + % we've exhausted limit rows, stop + {stop, {0, _AccSkip, Resp, RowAcc}}; + + (_Key, Red, {AccLimit, 0, undefined, RowAcc0}) when GroupLevel == 0 -> + % we haven't started responding yet and group=false + {ok, Resp2, RowAcc} = StartRespFun(Req, Etag, RowAcc0, UpdateSeq), + {Go, RowAcc2} = SendRowFun(Resp2, {null, Red}, RowAcc), + {Go, {AccLimit - 1, 0, Resp2, RowAcc2}}; + (_Key, Red, {AccLimit, 0, Resp, RowAcc}) when GroupLevel == 0 -> + % group=false but we've already started the response + {Go, RowAcc2} = SendRowFun(Resp, {null, Red}, RowAcc), + {Go, {AccLimit - 1, 0, Resp, RowAcc2}}; + + (Key, Red, {AccLimit, 0, undefined, RowAcc0}) + when is_integer(GroupLevel), is_list(Key) -> + % group_level and we haven't responded yet + {ok, Resp2, RowAcc} = StartRespFun(Req, Etag, RowAcc0, UpdateSeq), + {Go, RowAcc2} = SendRowFun(Resp2, + {lists:sublist(Key, GroupLevel), Red}, RowAcc), + {Go, {AccLimit - 1, 0, Resp2, RowAcc2}}; + (Key, Red, {AccLimit, 0, Resp, RowAcc}) + when is_integer(GroupLevel), is_list(Key) -> + % group_level and we've already started the response + {Go, RowAcc2} = SendRowFun(Resp, + {lists:sublist(Key, GroupLevel), Red}, RowAcc), + {Go, {AccLimit - 1, 0, Resp, RowAcc2}}; + + (Key, Red, {AccLimit, 0, undefined, RowAcc0}) -> + % group=true and we haven't responded yet + {ok, Resp2, RowAcc} = StartRespFun(Req, Etag, RowAcc0, UpdateSeq), + {Go, RowAcc2} = SendRowFun(Resp2, {Key, Red}, RowAcc), + {Go, {AccLimit - 1, 0, Resp2, RowAcc2}}; + (Key, Red, {AccLimit, 0, Resp, RowAcc}) -> + % group=true and we've already started the response + {Go, RowAcc2} = SendRowFun(Resp, {Key, Red}, RowAcc), + {Go, {AccLimit - 1, 0, Resp, RowAcc2}} + end, + {ok, GroupRowsFun, RespFun}. + +apply_default_helper_funs( + #view_fold_helper_funs{ + start_response = StartResp, + send_row = SendRow + }=Helpers) -> + StartResp2 = case StartResp of + undefined -> fun json_view_start_resp/6; + _ -> StartResp + end, + + SendRow2 = case SendRow of + undefined -> fun send_json_view_row/5; + _ -> SendRow + end, + + Helpers#view_fold_helper_funs{ + start_response = StartResp2, + send_row = SendRow2 + }; + + +apply_default_helper_funs( + #reduce_fold_helper_funs{ + start_response = StartResp, + send_row = SendRow + }=Helpers) -> + StartResp2 = case StartResp of + undefined -> fun json_reduce_start_resp/4; + _ -> StartResp + end, + + SendRow2 = case SendRow of + undefined -> fun send_json_reduce_row/3; + _ -> SendRow + end, + + Helpers#reduce_fold_helper_funs{ + start_response = StartResp2, + send_row = SendRow2 + }. + +make_key_options(#view_query_args{direction = Dir}=QueryArgs) -> + [{dir,Dir} | make_start_key_option(QueryArgs) ++ + make_end_key_option(QueryArgs)]. + +make_start_key_option( + #view_query_args{ + start_key = StartKey, + start_docid = StartDocId}) -> + if StartKey == undefined -> + []; + true -> + [{start_key, {StartKey, StartDocId}}] + end. + +make_end_key_option(#view_query_args{end_key = undefined}) -> + []; +make_end_key_option( + #view_query_args{end_key = EndKey, + end_docid = EndDocId, + inclusive_end = true}) -> + [{end_key, {EndKey, EndDocId}}]; +make_end_key_option( + #view_query_args{ + end_key = EndKey, + end_docid = EndDocId, + inclusive_end = false}) -> + [{end_key_gt, {EndKey,reverse_key_default(EndDocId)}}]. + +json_view_start_resp(Req, Etag, TotalViewCount, Offset, _Acc, UpdateSeq) -> + {ok, Resp} = start_json_response(Req, 200, [{"Etag", Etag}]), + BeginBody = case couch_httpd:qs_value(Req, "update_seq") of + "true" -> + io_lib:format( + "{\"total_rows\":~w,\"update_seq\":~w," + "\"offset\":~w,\"rows\":[\r\n", + [TotalViewCount, UpdateSeq, Offset]); + _Else -> + io_lib:format( + "{\"total_rows\":~w,\"offset\":~w,\"rows\":[\r\n", + [TotalViewCount, Offset]) + end, + {ok, Resp, BeginBody}. + +send_json_view_row(Resp, Db, {{Key, DocId}, Value}, IncludeDocs, RowFront) -> + JsonObj = view_row_obj(Db, {{Key, DocId}, Value}, IncludeDocs), + send_chunk(Resp, RowFront ++ ?JSON_ENCODE(JsonObj)), + {ok, ",\r\n"}. + +json_reduce_start_resp(Req, Etag, _Acc0, UpdateSeq) -> + {ok, Resp} = start_json_response(Req, 200, [{"Etag", Etag}]), + case couch_httpd:qs_value(Req, "update_seq") of + "true" -> + {ok, Resp, io_lib:format("{\"update_seq\":~w,\"rows\":[\r\n",[UpdateSeq])}; + _Else -> + {ok, Resp, "{\"rows\":[\r\n"} + end. + +send_json_reduce_row(Resp, {Key, Value}, RowFront) -> + send_chunk(Resp, RowFront ++ ?JSON_ENCODE({[{key, Key}, {value, Value}]})), + {ok, ",\r\n"}. + +view_group_etag(Group, Db) -> + view_group_etag(Group, Db, nil). + +view_group_etag(#group{sig=Sig,current_seq=CurrentSeq}, _Db, Extra) -> + % ?LOG_ERROR("Group ~p",[Group]), + % This is not as granular as it could be. + % If there are updates to the db that do not effect the view index, + % they will change the Etag. For more granular Etags we'd need to keep + % track of the last Db seq that caused an index change. + couch_httpd:make_etag({Sig, CurrentSeq, Extra}). + +% the view row has an error +view_row_obj(_Db, {{Key, error}, Value}, _IncludeDocs) -> + {[{key, Key}, {error, Value}]}; +% include docs in the view output +view_row_obj(Db, {{Key, DocId}, {Props}}, true) -> + Rev = case couch_util:get_value(<<"_rev">>, Props) of + undefined -> + nil; + Rev0 -> + couch_doc:parse_rev(Rev0) + end, + IncludeId = couch_util:get_value(<<"_id">>, Props, DocId), + view_row_with_doc(Db, {{Key, DocId}, {Props}}, {IncludeId, Rev}); +view_row_obj(Db, {{Key, DocId}, Value}, true) -> + view_row_with_doc(Db, {{Key, DocId}, Value}, {DocId, nil}); +% the normal case for rendering a view row +view_row_obj(_Db, {{Key, DocId}, Value}, _IncludeDocs) -> + {[{id, DocId}, {key, Key}, {value, Value}]}. + +view_row_with_doc(Db, {{Key, DocId}, Value}, IdRev) -> + {[{id, DocId}, {key, Key}, {value, Value}] ++ doc_member(Db, IdRev)}. + +doc_member(Db, {DocId, Rev}) -> + ?LOG_DEBUG("Include Doc: ~p ~p", [DocId, Rev]), + case (catch couch_httpd_db:couch_doc_open(Db, DocId, Rev, [])) of + #doc{} = Doc -> + JsonDoc = couch_doc:to_json_obj(Doc, []), + [{doc, JsonDoc}]; + _Else -> + [{doc, null}] + end. + +finish_view_fold(Req, TotalRows, Offset, FoldResult) -> + finish_view_fold(Req, TotalRows, Offset, FoldResult, []). + +finish_view_fold(Req, TotalRows, Offset, FoldResult, Fields) -> + case FoldResult of + {_, _, undefined, _} -> + % nothing found in the view or keys, nothing has been returned + % send empty view + send_json(Req, 200, {[ + {total_rows, TotalRows}, + {offset, Offset}, + {rows, []} + ] ++ Fields}); + {_, _, Resp, _} -> + % end the view + send_chunk(Resp, "\r\n]}"), + end_json_response(Resp) + end. + +finish_reduce_fold(Req, Resp) -> + finish_reduce_fold(Req, Resp, []). + +finish_reduce_fold(Req, Resp, Fields) -> + case Resp of + undefined -> + send_json(Req, 200, {[ + {rows, []} + ] ++ Fields}); + Resp -> + send_chunk(Resp, "\r\n]}"), + end_json_response(Resp) + end. + +parse_bool_param(Val) -> + case string:to_lower(Val) of + "true" -> true; + "false" -> false; + _ -> + Msg = io_lib:format("Invalid boolean parameter: ~p", [Val]), + throw({query_parse_error, ?l2b(Msg)}) + end. + +parse_int_param(Val) -> + case (catch list_to_integer(Val)) of + IntVal when is_integer(IntVal) -> + IntVal; + _ -> + Msg = io_lib:format("Invalid value for integer parameter: ~p", [Val]), + throw({query_parse_error, ?l2b(Msg)}) + end. + +parse_positive_int_param(Val) -> + case parse_int_param(Val) of + IntVal when IntVal >= 0 -> + IntVal; + _ -> + Fmt = "Invalid value for positive integer parameter: ~p", + Msg = io_lib:format(Fmt, [Val]), + throw({query_parse_error, ?l2b(Msg)}) + end. + diff --git a/apps/couch/src/couch_js_functions.hrl b/apps/couch/src/couch_js_functions.hrl new file mode 100644 index 00000000..1f314f6e --- /dev/null +++ b/apps/couch/src/couch_js_functions.hrl @@ -0,0 +1,97 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(AUTH_DB_DOC_VALIDATE_FUNCTION, <<" + function(newDoc, oldDoc, userCtx) { + if (newDoc._deleted === true) { + // allow deletes by admins and matching users + // without checking the other fields + if ((userCtx.roles.indexOf('_admin') !== -1) || + (userCtx.name == oldDoc.name)) { + return; + } else { + throw({forbidden: 'Only admins may delete other user docs.'}); + } + } + + if ((oldDoc && oldDoc.type !== 'user') || newDoc.type !== 'user') { + throw({forbidden : 'doc.type must be user'}); + } // we only allow user docs for now + + if (!newDoc.name) { + throw({forbidden: 'doc.name is required'}); + } + + if (!(newDoc.roles && (typeof newDoc.roles.length !== 'undefined'))) { + throw({forbidden: 'doc.roles must be an array'}); + } + + if (newDoc._id !== ('org.couchdb.user:' + newDoc.name)) { + throw({ + forbidden: 'Doc ID must be of the form org.couchdb.user:name' + }); + } + + if (oldDoc) { // validate all updates + if (oldDoc.name !== newDoc.name) { + throw({forbidden: 'Usernames can not be changed.'}); + } + } + + if (newDoc.password_sha && !newDoc.salt) { + throw({ + forbidden: 'Users with password_sha must have a salt.' + + 'See /_utils/script/couch.js for example code.' + }); + } + + if (userCtx.roles.indexOf('_admin') === -1) { + if (oldDoc) { // validate non-admin updates + if (userCtx.name !== newDoc.name) { + throw({ + forbidden: 'You may only update your own user document.' + }); + } + // validate role updates + var oldRoles = oldDoc.roles.sort(); + var newRoles = newDoc.roles.sort(); + + if (oldRoles.length !== newRoles.length) { + throw({forbidden: 'Only _admin may edit roles'}); + } + + for (var i = 0; i < oldRoles.length; i++) { + if (oldRoles[i] !== newRoles[i]) { + throw({forbidden: 'Only _admin may edit roles'}); + } + } + } else if (newDoc.roles.length > 0) { + throw({forbidden: 'Only _admin may set roles'}); + } + } + + // no system roles in users db + for (var i = 0; i < newDoc.roles.length; i++) { + if (newDoc.roles[i][0] === '_') { + throw({ + forbidden: + 'No system roles (starting with underscore) in users db.' + }); + } + } + + // no system names as names + if (newDoc.name[0] === '_') { + throw({forbidden: 'Username may not start with underscore.'}); + } + } +">>). diff --git a/apps/couch/src/couch_key_tree.erl b/apps/couch/src/couch_key_tree.erl new file mode 100644 index 00000000..4fe09bf3 --- /dev/null +++ b/apps/couch/src/couch_key_tree.erl @@ -0,0 +1,329 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_key_tree). + +-export([merge/2, find_missing/2, get_key_leafs/2, get_full_key_paths/2, get/2]). +-export([map/2, get_all_leafs/1, count_leafs/1, remove_leafs/2, + get_all_leafs_full/1,stem/2,map_leafs/2]). + +% a key tree looks like this: +% Tree -> [] or [{Key, Value, ChildTree} | SiblingTree] +% ChildTree -> Tree +% SiblingTree -> [] or [{SiblingKey, Value, Tree} | Tree] +% And each Key < SiblingKey + + +% partial trees arranged by how much they are cut off. + +merge(A, B) -> + {Merged, HasConflicts} = + lists:foldl( + fun(InsertTree, {AccTrees, AccConflicts}) -> + {ok, Merged, Conflicts} = merge_one(AccTrees, InsertTree, [], false), + {Merged, Conflicts or AccConflicts} + end, + {A, false}, B), + if HasConflicts or + ((length(Merged) =/= length(A)) and (length(Merged) =/= length(B))) -> + Conflicts = conflicts; + true -> + Conflicts = no_conflicts + end, + {lists:sort(Merged), Conflicts}. + +merge_one([], Insert, OutAcc, ConflictsAcc) -> + {ok, [Insert | OutAcc], ConflictsAcc}; +merge_one([{Start, Tree}|Rest], {StartInsert, TreeInsert}, OutAcc, ConflictsAcc) -> + if Start =< StartInsert -> + StartA = Start, + StartB = StartInsert, + TreeA = Tree, + TreeB = TreeInsert; + true -> + StartB = Start, + StartA = StartInsert, + TreeB = Tree, + TreeA = TreeInsert + end, + case merge_at([TreeA], StartB - StartA, TreeB) of + {ok, [CombinedTrees], Conflicts} -> + merge_one(Rest, {StartA, CombinedTrees}, OutAcc, Conflicts or ConflictsAcc); + no -> + merge_one(Rest, {StartB, TreeB}, [{StartA, TreeA} | OutAcc], ConflictsAcc) + end. + +merge_at([], _Place, _Insert) -> + no; +merge_at([{Key, Value, SubTree}|Sibs], 0, {InsertKey, InsertValue, InsertSubTree}) -> + if Key == InsertKey -> + {Merge, Conflicts} = merge_simple(SubTree, InsertSubTree), + {ok, [{Key, Value, Merge} | Sibs], Conflicts}; + true -> + case merge_at(Sibs, 0, {InsertKey, InsertValue, InsertSubTree}) of + {ok, Merged, Conflicts} -> + {ok, [{Key, Value, SubTree} | Merged], Conflicts}; + no -> + no + end + end; +merge_at([{Key, Value, SubTree}|Sibs], Place, Insert) -> + case merge_at(SubTree, Place - 1,Insert) of + {ok, Merged, Conflicts} -> + {ok, [{Key, Value, Merged} | Sibs], Conflicts}; + no -> + case merge_at(Sibs, Place, Insert) of + {ok, Merged, Conflicts} -> + {ok, [{Key, Value, SubTree} | Merged], Conflicts}; + no -> + no + end + end. + +% key tree functions +merge_simple([], B) -> + {B, false}; +merge_simple(A, []) -> + {A, false}; +merge_simple([ATree | ANextTree], [BTree | BNextTree]) -> + {AKey, AValue, ASubTree} = ATree, + {BKey, _BValue, BSubTree} = BTree, + if + AKey == BKey -> + %same key + {MergedSubTree, Conflict1} = merge_simple(ASubTree, BSubTree), + {MergedNextTree, Conflict2} = merge_simple(ANextTree, BNextTree), + {[{AKey, AValue, MergedSubTree} | MergedNextTree], Conflict1 or Conflict2}; + AKey < BKey -> + {MTree, _} = merge_simple(ANextTree, [BTree | BNextTree]), + {[ATree | MTree], true}; + true -> + {MTree, _} = merge_simple([ATree | ANextTree], BNextTree), + {[BTree | MTree], true} + end. + +find_missing(_Tree, []) -> + []; +find_missing([], SeachKeys) -> + SeachKeys; +find_missing([{Start, {Key, Value, SubTree}} | RestTree], SeachKeys) -> + PossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos >= Start], + ImpossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos < Start], + Missing = find_missing_simple(Start, [{Key, Value, SubTree}], PossibleKeys), + find_missing(RestTree, ImpossibleKeys ++ Missing). + +find_missing_simple(_Pos, _Tree, []) -> + []; +find_missing_simple(_Pos, [], SeachKeys) -> + SeachKeys; +find_missing_simple(Pos, [{Key, _, SubTree} | RestTree], SeachKeys) -> + PossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos >= Pos], + ImpossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos < Pos], + + SrcKeys2 = PossibleKeys -- [{Pos, Key}], + SrcKeys3 = find_missing_simple(Pos + 1, SubTree, SrcKeys2), + ImpossibleKeys ++ find_missing_simple(Pos, RestTree, SrcKeys3). + + +filter_leafs([], _Keys, FilteredAcc, RemovedKeysAcc) -> + {FilteredAcc, RemovedKeysAcc}; +filter_leafs([{Pos, [{LeafKey, _}|_]} = Path |Rest], Keys, FilteredAcc, RemovedKeysAcc) -> + FilteredKeys = lists:delete({Pos, LeafKey}, Keys), + if FilteredKeys == Keys -> + % this leaf is not a key we are looking to remove + filter_leafs(Rest, Keys, [Path | FilteredAcc], RemovedKeysAcc); + true -> + % this did match a key, remove both the node and the input key + filter_leafs(Rest, FilteredKeys, FilteredAcc, [{Pos, LeafKey} | RemovedKeysAcc]) + end. + +% Removes any branches from the tree whose leaf node(s) are in the Keys +remove_leafs(Trees, Keys) -> + % flatten each branch in a tree into a tree path + Paths = get_all_leafs_full(Trees), + + % filter out any that are in the keys list. + {FilteredPaths, RemovedKeys} = filter_leafs(Paths, Keys, [], []), + + % convert paths back to trees + NewTree = lists:foldl( + fun({PathPos, Path},TreeAcc) -> + [SingleTree] = lists:foldl( + fun({K,V},NewTreeAcc) -> [{K,V,NewTreeAcc}] end, [], Path), + {NewTrees, _} = merge(TreeAcc, [{PathPos + 1 - length(Path), SingleTree}]), + NewTrees + end, [], FilteredPaths), + {NewTree, RemovedKeys}. + + +% get the leafs in the tree matching the keys. The matching key nodes can be +% leafs or an inner nodes. If an inner node, then the leafs for that node +% are returned. +get_key_leafs(Tree, Keys) -> + get_key_leafs(Tree, Keys, []). + +get_key_leafs(_, [], Acc) -> + {Acc, []}; +get_key_leafs([], Keys, Acc) -> + {Acc, Keys}; +get_key_leafs([{Pos, Tree}|Rest], Keys, Acc) -> + {Gotten, RemainingKeys} = get_key_leafs_simple(Pos, [Tree], Keys, []), + get_key_leafs(Rest, RemainingKeys, Gotten ++ Acc). + +get_key_leafs_simple(_Pos, _Tree, [], _KeyPathAcc) -> + {[], []}; +get_key_leafs_simple(_Pos, [], KeysToGet, _KeyPathAcc) -> + {[], KeysToGet}; +get_key_leafs_simple(Pos, [{Key, _Value, SubTree}=Tree | RestTree], KeysToGet, KeyPathAcc) -> + case lists:delete({Pos, Key}, KeysToGet) of + KeysToGet -> % same list, key not found + {LeafsFound, KeysToGet2} = get_key_leafs_simple(Pos + 1, SubTree, KeysToGet, [Key | KeyPathAcc]), + {RestLeafsFound, KeysRemaining} = get_key_leafs_simple(Pos, RestTree, KeysToGet2, KeyPathAcc), + {LeafsFound ++ RestLeafsFound, KeysRemaining}; + KeysToGet2 -> + LeafsFound = get_all_leafs_simple(Pos, [Tree], KeyPathAcc), + LeafKeysFound = [LeafKeyFound || {LeafKeyFound, _} <- LeafsFound], + KeysToGet2 = KeysToGet2 -- LeafKeysFound, + {RestLeafsFound, KeysRemaining} = get_key_leafs_simple(Pos, RestTree, KeysToGet2, KeyPathAcc), + {LeafsFound ++ RestLeafsFound, KeysRemaining} + end. + +get(Tree, KeysToGet) -> + {KeyPaths, KeysNotFound} = get_full_key_paths(Tree, KeysToGet), + FixedResults = [ {Value, {Pos, [Key0 || {Key0, _} <- Path]}} || {Pos, [{_Key, Value}|_]=Path} <- KeyPaths], + {FixedResults, KeysNotFound}. + +get_full_key_paths(Tree, Keys) -> + get_full_key_paths(Tree, Keys, []). + +get_full_key_paths(_, [], Acc) -> + {Acc, []}; +get_full_key_paths([], Keys, Acc) -> + {Acc, Keys}; +get_full_key_paths([{Pos, Tree}|Rest], Keys, Acc) -> + {Gotten, RemainingKeys} = get_full_key_paths(Pos, [Tree], Keys, []), + get_full_key_paths(Rest, RemainingKeys, Gotten ++ Acc). + + +get_full_key_paths(_Pos, _Tree, [], _KeyPathAcc) -> + {[], []}; +get_full_key_paths(_Pos, [], KeysToGet, _KeyPathAcc) -> + {[], KeysToGet}; +get_full_key_paths(Pos, [{KeyId, Value, SubTree} | RestTree], KeysToGet, KeyPathAcc) -> + KeysToGet2 = KeysToGet -- [{Pos, KeyId}], + CurrentNodeResult = + case length(KeysToGet2) =:= length(KeysToGet) of + true -> % not in the key list. + []; + false -> % this node is the key list. return it + [{Pos, [{KeyId, Value} | KeyPathAcc]}] + end, + {KeysGotten, KeysRemaining} = get_full_key_paths(Pos + 1, SubTree, KeysToGet2, [{KeyId, Value} | KeyPathAcc]), + {KeysGotten2, KeysRemaining2} = get_full_key_paths(Pos, RestTree, KeysRemaining, KeyPathAcc), + {CurrentNodeResult ++ KeysGotten ++ KeysGotten2, KeysRemaining2}. + +get_all_leafs_full(Tree) -> + get_all_leafs_full(Tree, []). + +get_all_leafs_full([], Acc) -> + Acc; +get_all_leafs_full([{Pos, Tree} | Rest], Acc) -> + get_all_leafs_full(Rest, get_all_leafs_full_simple(Pos, [Tree], []) ++ Acc). + +get_all_leafs_full_simple(_Pos, [], _KeyPathAcc) -> + []; +get_all_leafs_full_simple(Pos, [{KeyId, Value, []} | RestTree], KeyPathAcc) -> + [{Pos, [{KeyId, Value} | KeyPathAcc]} | get_all_leafs_full_simple(Pos, RestTree, KeyPathAcc)]; +get_all_leafs_full_simple(Pos, [{KeyId, Value, SubTree} | RestTree], KeyPathAcc) -> + get_all_leafs_full_simple(Pos + 1, SubTree, [{KeyId, Value} | KeyPathAcc]) ++ get_all_leafs_full_simple(Pos, RestTree, KeyPathAcc). + +get_all_leafs(Trees) -> + get_all_leafs(Trees, []). + +get_all_leafs([], Acc) -> + Acc; +get_all_leafs([{Pos, Tree}|Rest], Acc) -> + get_all_leafs(Rest, get_all_leafs_simple(Pos, [Tree], []) ++ Acc). + +get_all_leafs_simple(_Pos, [], _KeyPathAcc) -> + []; +get_all_leafs_simple(Pos, [{KeyId, Value, []} | RestTree], KeyPathAcc) -> + [{Value, {Pos, [KeyId | KeyPathAcc]}} | get_all_leafs_simple(Pos, RestTree, KeyPathAcc)]; +get_all_leafs_simple(Pos, [{KeyId, _Value, SubTree} | RestTree], KeyPathAcc) -> + get_all_leafs_simple(Pos + 1, SubTree, [KeyId | KeyPathAcc]) ++ get_all_leafs_simple(Pos, RestTree, KeyPathAcc). + + +count_leafs([]) -> + 0; +count_leafs([{_Pos,Tree}|Rest]) -> + count_leafs_simple([Tree]) + count_leafs(Rest). + +count_leafs_simple([]) -> + 0; +count_leafs_simple([{_Key, _Value, []} | RestTree]) -> + 1 + count_leafs_simple(RestTree); +count_leafs_simple([{_Key, _Value, SubTree} | RestTree]) -> + count_leafs_simple(SubTree) + count_leafs_simple(RestTree). + + +map(_Fun, []) -> + []; +map(Fun, [{Pos, Tree}|Rest]) -> + case erlang:fun_info(Fun, arity) of + {arity, 2} -> + [NewTree] = map_simple(fun(A,B,_C) -> Fun(A,B) end, Pos, [Tree]), + [{Pos, NewTree} | map(Fun, Rest)]; + {arity, 3} -> + [NewTree] = map_simple(Fun, Pos, [Tree]), + [{Pos, NewTree} | map(Fun, Rest)] + end. + +map_simple(_Fun, _Pos, []) -> + []; +map_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) -> + Value2 = Fun({Pos, Key}, Value, + if SubTree == [] -> leaf; true -> branch end), + [{Key, Value2, map_simple(Fun, Pos + 1, SubTree)} | map_simple(Fun, Pos, RestTree)]. + + +map_leafs(_Fun, []) -> + []; +map_leafs(Fun, [{Pos, Tree}|Rest]) -> + [NewTree] = map_leafs_simple(Fun, Pos, [Tree]), + [{Pos, NewTree} | map_leafs(Fun, Rest)]. + +map_leafs_simple(_Fun, _Pos, []) -> + []; +map_leafs_simple(Fun, Pos, [{Key, Value, []} | RestTree]) -> + Value2 = Fun({Pos, Key}, Value), + [{Key, Value2, []} | map_leafs_simple(Fun, Pos, RestTree)]; +map_leafs_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) -> + [{Key, Value, map_leafs_simple(Fun, Pos + 1, SubTree)} | map_leafs_simple(Fun, Pos, RestTree)]. + + +stem(Trees, Limit) -> + % flatten each branch in a tree into a tree path + Paths = get_all_leafs_full(Trees), + + Paths2 = [{Pos, lists:sublist(Path, Limit)} || {Pos, Path} <- Paths], + + % convert paths back to trees + lists:foldl( + fun({PathPos, Path},TreeAcc) -> + [SingleTree] = lists:foldl( + fun({K,V},NewTreeAcc) -> [{K,V,NewTreeAcc}] end, [], Path), + {NewTrees, _} = merge(TreeAcc, [{PathPos + 1 - length(Path), SingleTree}]), + NewTrees + end, [], Paths2). + +% Tests moved to test/etap/06?-*.t + diff --git a/apps/couch/src/couch_log.erl b/apps/couch/src/couch_log.erl new file mode 100644 index 00000000..2d62cbb5 --- /dev/null +++ b/apps/couch/src/couch_log.erl @@ -0,0 +1,151 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_log). +-behaviour(gen_event). + +-export([start_link/0,stop/0]). +-export([debug_on/0,info_on/0,get_level/0,get_level_integer/0, set_level/1]). +-export([init/1, handle_event/2, terminate/2, code_change/3, handle_info/2, handle_call/2]). +-export([read/2]). + +-define(LEVEL_ERROR, 3). +-define(LEVEL_INFO, 2). +-define(LEVEL_DEBUG, 1). +-define(LEVEL_TMI, 0). + +level_integer(error) -> ?LEVEL_ERROR; +level_integer(info) -> ?LEVEL_INFO; +level_integer(debug) -> ?LEVEL_DEBUG; +level_integer(tmi) -> ?LEVEL_TMI; +level_integer(_Else) -> ?LEVEL_ERROR. % anything else default to ERROR level + +level_atom(?LEVEL_ERROR) -> error; +level_atom(?LEVEL_INFO) -> info; +level_atom(?LEVEL_DEBUG) -> debug; +level_atom(?LEVEL_TMI) -> tmi. + + +start_link() -> + couch_event_sup:start_link({local, couch_log}, error_logger, couch_log, []). + +stop() -> + couch_event_sup:stop(couch_log). + +init([]) -> + % read config and register for configuration changes + + % just stop if one of the config settings change. couch_server_sup + % will restart us and then we will pick up the new settings. + ok = couch_config:register( + fun("log", "file") -> + ?MODULE:stop(); + ("log", "level") -> + ?MODULE:stop(); + ("log", "include_sasl") -> + ?MODULE:stop() + end), + + Filename = couch_config:get("log", "file", "couchdb.log"), + Level = level_integer(list_to_atom(couch_config:get("log", "level", "info"))), + Sasl = list_to_atom(couch_config:get("log", "include_sasl", "true")), + + case ets:info(?MODULE) of + undefined -> ets:new(?MODULE, [named_table]); + _ -> ok + end, + ets:insert(?MODULE, {level, Level}), + + {ok, Fd} = file:open(Filename, [append]), + {ok, {Fd, Level, Sasl}}. + +debug_on() -> + get_level_integer() =< ?LEVEL_DEBUG. + +info_on() -> + get_level_integer() =< ?LEVEL_INFO. + +set_level(LevelAtom) -> + set_level_integer(level_integer(LevelAtom)). + +get_level() -> + level_atom(get_level_integer()). + +get_level_integer() -> + try + ets:lookup_element(?MODULE, level, 2) + catch error:badarg -> + ?LEVEL_ERROR + end. + +set_level_integer(Int) -> + gen_event:call(error_logger, couch_log, {set_level_integer, Int}). + +handle_event({Pid, couch_error, {Format, Args}}, {Fd, _LogLevel, _Sasl}=State) -> + log(Fd, Pid, error, Format, Args), + {ok, State}; +handle_event({Pid, couch_info, {Format, Args}}, {Fd, LogLevel, _Sasl}=State) +when LogLevel =< ?LEVEL_INFO -> + log(Fd, Pid, info, Format, Args), + {ok, State}; +handle_event({Pid, couch_debug, {Format, Args}}, {Fd, LogLevel, _Sasl}=State) +when LogLevel =< ?LEVEL_DEBUG -> + log(Fd, Pid, debug, Format, Args), + {ok, State}; +handle_event({error_report, _, {Pid, _, _}}=Event, {Fd, _LogLevel, Sasl}=State) +when Sasl =/= false -> + log(Fd, Pid, error, "~p", [Event]), + {ok, State}; +handle_event({error, _, {Pid, Format, Args}}, {Fd, _LogLevel, Sasl}=State) +when Sasl =/= false -> + log(Fd, Pid, error, Format, Args), + {ok, State}; +handle_event({_, _, {Pid, _, _}}=Event, {Fd, LogLevel, _Sasl}=State) +when LogLevel =< ?LEVEL_TMI -> + % log every remaining event if tmi! + log(Fd, Pid, tmi, "~p", [Event]), + {ok, State}; +handle_event(_Event, State) -> + {ok, State}. + +handle_call({set_level_integer, NewLevel}, {Fd, _LogLevel, Sasl}) -> + ets:insert(?MODULE, {level, NewLevel}), + {ok, ok, {Fd, NewLevel, Sasl}}. + +handle_info(_Info, State) -> + {ok, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +terminate(_Arg, {Fd, _LoggingLevel, _Sasl}) -> + file:close(Fd). + +log(Fd, Pid, Level, Format, Args) -> + Msg = io_lib:format(Format, Args), + ok = io:format("[~s] [~p] ~s~n", [Level, Pid, Msg]), % dump to console too + Msg2 = re:replace(lists:flatten(Msg),"\\r\\n|\\r|\\n", "\r\n", + [global, {return, list}]), + ok = io:format(Fd, "[~s] [~s] [~p] ~s\r~n\r~n", [httpd_util:rfc1123_date(), Level, Pid, Msg2]). + +read(Bytes, Offset) -> + LogFileName = couch_config:get("log", "file"), + LogFileSize = couch_util:file_read_size(LogFileName), + + {ok, Fd} = file:open(LogFileName, [read]), + Start = lists:max([LogFileSize - Bytes, 0]) + Offset, + + % TODO: truncate chopped first line + % TODO: make streaming + + {ok, Chunk} = file:pread(Fd, Start, LogFileSize), + Chunk. diff --git a/apps/couch/src/couch_native_process.erl b/apps/couch/src/couch_native_process.erl new file mode 100644 index 00000000..b512f712 --- /dev/null +++ b/apps/couch/src/couch_native_process.erl @@ -0,0 +1,402 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% +% You may obtain a copy of the License at +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, +% software distributed under the License is distributed on an +% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +% either express or implied. +% +% See the License for the specific language governing permissions +% and limitations under the License. +% +% This file drew much inspiration from erlview, which was written by and +% copyright Michael McDaniel [http://autosys.us], and is also under APL 2.0 +% +% +% This module provides the smallest possible native view-server. +% With this module in-place, you can add the following to your couch INI files: +% [native_query_servers] +% erlang={couch_native_process, start_link, []} +% +% Which will then allow following example map function to be used: +% +% fun({Doc}) -> +% % Below, we emit a single record - the _id as key, null as value +% DocId = couch_util:get_value(Doc, <<"_id">>, null), +% Emit(DocId, null) +% end. +% +% which should be roughly the same as the javascript: +% emit(doc._id, null); +% +% This module exposes enough functions such that a native erlang server can +% act as a fully-fleged view server, but no 'helper' functions specifically +% for simplifying your erlang view code. It is expected other third-party +% extensions will evolve which offer useful layers on top of this view server +% to help simplify your view code. +-module(couch_native_process). +-behaviour(gen_server). + +-export([start_link/0,init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3, + handle_info/2]). +-export([set_timeout/2, prompt/2]). + +-define(STATE, native_proc_state). +-record(evstate, {ddocs, funs=[], query_config=[], list_pid=nil, timeout=5000}). + +-include("couch_db.hrl"). + +start_link() -> + gen_server:start_link(?MODULE, [], []). + +% this is a bit messy, see also couch_query_servers handle_info +% stop(_Pid) -> +% ok. + +set_timeout(Pid, TimeOut) -> + gen_server:call(Pid, {set_timeout, TimeOut}). + +prompt(Pid, Data) when is_list(Data) -> + gen_server:call(Pid, {prompt, Data}). + +% gen_server callbacks +init([]) -> + {ok, #evstate{ddocs=dict:new()}}. + +handle_call({set_timeout, TimeOut}, _From, State) -> + {reply, ok, State#evstate{timeout=TimeOut}}; + +handle_call({prompt, Data}, _From, State) -> + ?LOG_DEBUG("Prompt native qs: ~s",[?JSON_ENCODE(Data)]), + {NewState, Resp} = try run(State, to_binary(Data)) of + {S, R} -> {S, R} + catch + throw:{error, Why} -> + {State, [<<"error">>, Why, Why]} + end, + + case Resp of + {error, Reason} -> + Msg = io_lib:format("couch native server error: ~p", [Reason]), + {reply, [<<"error">>, <<"native_query_server">>, list_to_binary(Msg)], NewState}; + [<<"error">> | Rest] -> + % Msg = io_lib:format("couch native server error: ~p", [Rest]), + % TODO: markh? (jan) + {reply, [<<"error">> | Rest], NewState}; + [<<"fatal">> | Rest] -> + % Msg = io_lib:format("couch native server error: ~p", [Rest]), + % TODO: markh? (jan) + {stop, fatal, [<<"error">> | Rest], NewState}; + Resp -> + {reply, Resp, NewState} + end. + +handle_cast(foo, State) -> {noreply, State}. +handle_info({'EXIT',_,normal}, State) -> {noreply, State}; +handle_info({'EXIT',_,Reason}, State) -> + {stop, Reason, State}. +terminate(_Reason, _State) -> ok. +code_change(_OldVersion, State, _Extra) -> {ok, State}. + +run(#evstate{list_pid=Pid}=State, [<<"list_row">>, Row]) when is_pid(Pid) -> + Pid ! {self(), list_row, Row}, + receive + {Pid, chunks, Data} -> + {State, [<<"chunks">>, Data]}; + {Pid, list_end, Data} -> + receive + {'EXIT', Pid, normal} -> ok + after State#evstate.timeout -> + throw({timeout, list_cleanup}) + end, + process_flag(trap_exit, erlang:get(do_trap)), + {State#evstate{list_pid=nil}, [<<"end">>, Data]} + after State#evstate.timeout -> + throw({timeout, list_row}) + end; +run(#evstate{list_pid=Pid}=State, [<<"list_end">>]) when is_pid(Pid) -> + Pid ! {self(), list_end}, + Resp = + receive + {Pid, list_end, Data} -> + receive + {'EXIT', Pid, normal} -> ok + after State#evstate.timeout -> + throw({timeout, list_cleanup}) + end, + [<<"end">>, Data] + after State#evstate.timeout -> + throw({timeout, list_end}) + end, + process_flag(trap_exit, erlang:get(do_trap)), + {State#evstate{list_pid=nil}, Resp}; +run(#evstate{list_pid=Pid}=State, _Command) when is_pid(Pid) -> + {State, [<<"error">>, list_error, list_error]}; +run(#evstate{ddocs=DDocs}, [<<"reset">>]) -> + {#evstate{ddocs=DDocs}, true}; +run(#evstate{ddocs=DDocs}, [<<"reset">>, QueryConfig]) -> + {#evstate{ddocs=DDocs, query_config=QueryConfig}, true}; +run(#evstate{funs=Funs}=State, [<<"add_fun">> , BinFunc]) -> + FunInfo = makefun(State, BinFunc), + {State#evstate{funs=Funs ++ [FunInfo]}, true}; +run(State, [<<"map_doc">> , Doc]) -> + Resp = lists:map(fun({Sig, Fun}) -> + erlang:put(Sig, []), + Fun(Doc), + lists:reverse(erlang:get(Sig)) + end, State#evstate.funs), + {State, Resp}; +run(State, [<<"reduce">>, Funs, KVs]) -> + {Keys, Vals} = + lists:foldl(fun([K, V], {KAcc, VAcc}) -> + {[K | KAcc], [V | VAcc]} + end, {[], []}, KVs), + Keys2 = lists:reverse(Keys), + Vals2 = lists:reverse(Vals), + {State, catch reduce(State, Funs, Keys2, Vals2, false)}; +run(State, [<<"rereduce">>, Funs, Vals]) -> + {State, catch reduce(State, Funs, null, Vals, true)}; +run(#evstate{ddocs=DDocs}=State, [<<"ddoc">>, <<"new">>, DDocId, DDoc]) -> + DDocs2 = store_ddoc(DDocs, DDocId, DDoc), + {State#evstate{ddocs=DDocs2}, true}; +run(#evstate{ddocs=DDocs}=State, [<<"ddoc">>, DDocId | Rest]) -> + DDoc = load_ddoc(DDocs, DDocId), + ddoc(State, DDoc, Rest); +run(_, Unknown) -> + ?LOG_ERROR("Native Process: Unknown command: ~p~n", [Unknown]), + throw({error, unknown_command}). + +ddoc(State, {DDoc}, [FunPath, Args]) -> + % load fun from the FunPath + BFun = lists:foldl(fun + (Key, {Props}) when is_list(Props) -> + couch_util:get_value(Key, Props, nil); + (_Key, Fun) when is_binary(Fun) -> + Fun; + (_Key, nil) -> + throw({error, not_found}); + (_Key, _Fun) -> + throw({error, malformed_ddoc}) + end, {DDoc}, FunPath), + ddoc(State, makefun(State, BFun, {DDoc}), FunPath, Args). + +ddoc(State, {_, Fun}, [<<"validate_doc_update">>], Args) -> + {State, (catch apply(Fun, Args))}; +ddoc(State, {_, Fun}, [<<"filters">>|_], [Docs, Req]) -> + Resp = lists:map(fun(Doc) -> (catch Fun(Doc, Req)) =:= true end, Docs), + {State, [true, Resp]}; +ddoc(State, {_, Fun}, [<<"shows">>|_], Args) -> + Resp = case (catch apply(Fun, Args)) of + FunResp when is_list(FunResp) -> + FunResp; + {FunResp} -> + [<<"resp">>, {FunResp}]; + FunResp -> + FunResp + end, + {State, Resp}; +ddoc(State, {_, Fun}, [<<"updates">>|_], Args) -> + Resp = case (catch apply(Fun, Args)) of + [JsonDoc, JsonResp] -> + [<<"up">>, JsonDoc, JsonResp] + end, + {State, Resp}; +ddoc(State, {Sig, Fun}, [<<"lists">>|_], Args) -> + Self = self(), + SpawnFun = fun() -> + LastChunk = (catch apply(Fun, Args)), + case start_list_resp(Self, Sig) of + started -> + receive + {Self, list_row, _Row} -> ignore; + {Self, list_end} -> ignore + after State#evstate.timeout -> + throw({timeout, list_cleanup_pid}) + end; + _ -> + ok + end, + LastChunks = + case erlang:get(Sig) of + undefined -> [LastChunk]; + OtherChunks -> [LastChunk | OtherChunks] + end, + Self ! {self(), list_end, lists:reverse(LastChunks)} + end, + erlang:put(do_trap, process_flag(trap_exit, true)), + Pid = spawn_link(SpawnFun), + Resp = + receive + {Pid, start, Chunks, JsonResp} -> + [<<"start">>, Chunks, JsonResp] + after State#evstate.timeout -> + throw({timeout, list_start}) + end, + {State#evstate{list_pid=Pid}, Resp}. + +store_ddoc(DDocs, DDocId, DDoc) -> + dict:store(DDocId, DDoc, DDocs). +load_ddoc(DDocs, DDocId) -> + try dict:fetch(DDocId, DDocs) of + {DDoc} -> {DDoc} + catch + _:_Else -> throw({error, ?l2b(io_lib:format("Native Query Server missing DDoc with Id: ~s",[DDocId]))}) + end. + +bindings(State, Sig) -> + bindings(State, Sig, nil). +bindings(State, Sig, DDoc) -> + Self = self(), + + Log = fun(Msg) -> + ?LOG_INFO(Msg, []) + end, + + Emit = fun(Id, Value) -> + Curr = erlang:get(Sig), + erlang:put(Sig, [[Id, Value] | Curr]) + end, + + Start = fun(Headers) -> + erlang:put(list_headers, Headers) + end, + + Send = fun(Chunk) -> + Curr = + case erlang:get(Sig) of + undefined -> []; + Else -> Else + end, + erlang:put(Sig, [Chunk | Curr]) + end, + + GetRow = fun() -> + case start_list_resp(Self, Sig) of + started -> + ok; + _ -> + Chunks = + case erlang:get(Sig) of + undefined -> []; + CurrChunks -> CurrChunks + end, + Self ! {self(), chunks, lists:reverse(Chunks)} + end, + erlang:put(Sig, []), + receive + {Self, list_row, Row} -> Row; + {Self, list_end} -> nil + after State#evstate.timeout -> + throw({timeout, list_pid_getrow}) + end + end, + + FoldRows = fun(Fun, Acc) -> foldrows(GetRow, Fun, Acc) end, + + Bindings = [ + {'Log', Log}, + {'Emit', Emit}, + {'Start', Start}, + {'Send', Send}, + {'GetRow', GetRow}, + {'FoldRows', FoldRows} + ], + case DDoc of + {_Props} -> + Bindings ++ [{'DDoc', DDoc}]; + _Else -> Bindings + end. + +% thanks to erlview, via: +% http://erlang.org/pipermail/erlang-questions/2003-November/010544.html +makefun(State, Source) -> + Sig = couch_util:md5(Source), + BindFuns = bindings(State, Sig), + {Sig, makefun(State, Source, BindFuns)}. +makefun(State, Source, {DDoc}) -> + Sig = couch_util:md5(lists:flatten([Source, term_to_binary(DDoc)])), + BindFuns = bindings(State, Sig, {DDoc}), + {Sig, makefun(State, Source, BindFuns)}; +makefun(_State, Source, BindFuns) when is_list(BindFuns) -> + FunStr = binary_to_list(Source), + {ok, Tokens, _} = erl_scan:string(FunStr), + Form = case (catch erl_parse:parse_exprs(Tokens)) of + {ok, [ParsedForm]} -> + ParsedForm; + {error, {LineNum, _Mod, [Mesg, Params]}}=Error -> + io:format(standard_error, "Syntax error on line: ~p~n", [LineNum]), + io:format(standard_error, "~s~p~n", [Mesg, Params]), + throw(Error) + end, + Bindings = lists:foldl(fun({Name, Fun}, Acc) -> + erl_eval:add_binding(Name, Fun, Acc) + end, erl_eval:new_bindings(), BindFuns), + {value, Fun, _} = erl_eval:expr(Form, Bindings), + Fun. + +reduce(State, BinFuns, Keys, Vals, ReReduce) -> + Funs = case is_list(BinFuns) of + true -> + lists:map(fun(BF) -> makefun(State, BF) end, BinFuns); + _ -> + [makefun(State, BinFuns)] + end, + Reds = lists:map(fun({_Sig, Fun}) -> + Fun(Keys, Vals, ReReduce) + end, Funs), + [true, Reds]. + +foldrows(GetRow, ProcRow, Acc) -> + case GetRow() of + nil -> + {ok, Acc}; + Row -> + case (catch ProcRow(Row, Acc)) of + {ok, Acc2} -> + foldrows(GetRow, ProcRow, Acc2); + {stop, Acc2} -> + {ok, Acc2} + end + end. + +start_list_resp(Self, Sig) -> + case erlang:get(list_started) of + undefined -> + Headers = + case erlang:get(list_headers) of + undefined -> {[{<<"headers">>, {[]}}]}; + CurrHdrs -> CurrHdrs + end, + Chunks = + case erlang:get(Sig) of + undefined -> []; + CurrChunks -> CurrChunks + end, + Self ! {self(), start, lists:reverse(Chunks), Headers}, + erlang:put(list_started, true), + erlang:put(Sig, []), + started; + _ -> + ok + end. + +to_binary({Data}) -> + Pred = fun({Key, Value}) -> + {to_binary(Key), to_binary(Value)} + end, + {lists:map(Pred, Data)}; +to_binary(Data) when is_list(Data) -> + [to_binary(D) || D <- Data]; +to_binary(null) -> + null; +to_binary(true) -> + true; +to_binary(false) -> + false; +to_binary(Data) when is_atom(Data) -> + list_to_binary(atom_to_list(Data)); +to_binary(Data) -> + Data. diff --git a/apps/couch/src/couch_os_process.erl b/apps/couch/src/couch_os_process.erl new file mode 100644 index 00000000..5776776b --- /dev/null +++ b/apps/couch/src/couch_os_process.erl @@ -0,0 +1,185 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_os_process). +-behaviour(gen_server). + +-export([start_link/1, start_link/2, start_link/3, stop/1]). +-export([set_timeout/2, prompt/2]). +-export([send/2, writeline/2, readline/1, writejson/2, readjson/1]). +-export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2, code_change/3]). + +-include("couch_db.hrl"). + +-define(PORT_OPTIONS, [stream, {line, 1024}, binary, exit_status, hide]). + +-record(os_proc, + {command, + port, + writer, + reader, + timeout=5000 + }). + +start_link(Command) -> + start_link(Command, []). +start_link(Command, Options) -> + start_link(Command, Options, ?PORT_OPTIONS). +start_link(Command, Options, PortOptions) -> + gen_server:start_link(couch_os_process, [Command, Options, PortOptions], []). + +stop(Pid) -> + gen_server:cast(Pid, stop). + +% Read/Write API +set_timeout(Pid, TimeOut) when is_integer(TimeOut) -> + ok = gen_server:call(Pid, {set_timeout, TimeOut}). + +% Used by couch_db_update_notifier.erl +send(Pid, Data) -> + gen_server:cast(Pid, {send, Data}). + +prompt(Pid, Data) -> + case gen_server:call(Pid, {prompt, Data}, infinity) of + {ok, Result} -> + Result; + Error -> + ?LOG_ERROR("OS Process Error ~p :: ~p",[Pid,Error]), + throw(Error) + end. + +% Utility functions for reading and writing +% in custom functions +writeline(OsProc, Data) when is_record(OsProc, os_proc) -> + port_command(OsProc#os_proc.port, Data ++ "\n"). + +readline(#os_proc{} = OsProc) -> + readline(OsProc, []). +readline(#os_proc{port = Port} = OsProc, Acc) -> + receive + {Port, {data, {noeol, Data}}} -> + readline(OsProc, [Data|Acc]); + {Port, {data, {eol, Data}}} -> + lists:reverse(Acc, Data); + {Port, Err} -> + catch port_close(Port), + throw({os_process_error, Err}) + after OsProc#os_proc.timeout -> + catch port_close(Port), + throw({os_process_error, "OS process timed out."}) + end. + +% Standard JSON functions +writejson(OsProc, Data) when is_record(OsProc, os_proc) -> + JsonData = ?JSON_ENCODE(Data), + ?LOG_DEBUG("OS Process ~p Input :: ~s", [OsProc#os_proc.port, JsonData]), + true = writeline(OsProc, JsonData). + +readjson(OsProc) when is_record(OsProc, os_proc) -> + Line = readline(OsProc), + ?LOG_DEBUG("OS Process ~p Output :: ~s", [OsProc#os_proc.port, Line]), + case ?JSON_DECODE(Line) of + [<<"log">>, Msg] when is_binary(Msg) -> + % we got a message to log. Log it and continue + ?LOG_INFO("OS Process ~p Log :: ~s", [OsProc#os_proc.port, Msg]), + readjson(OsProc); + [<<"error">>, Id, Reason] -> + throw({couch_util:to_existing_atom(Id),Reason}); + [<<"fatal">>, Id, Reason] -> + ?LOG_INFO("OS Process ~p Fatal Error :: ~s ~p",[OsProc#os_proc.port, Id, Reason]), + throw({couch_util:to_existing_atom(Id),Reason}); + Result -> + Result + end. + + +% gen_server API +init([Command, Options, PortOptions]) -> + process_flag(trap_exit, true), + PrivDir = couch_util:priv_dir(), + Spawnkiller = filename:join(PrivDir, "couchspawnkillable"), + BaseProc = #os_proc{ + command=Command, + port=open_port({spawn, Spawnkiller ++ " " ++ Command}, PortOptions), + writer=fun writejson/2, + reader=fun readjson/1 + }, + KillCmd = readline(BaseProc), + Pid = self(), + ?LOG_DEBUG("OS Process Start :: ~p", [BaseProc#os_proc.port]), + spawn(fun() -> + % this ensure the real os process is killed when this process dies. + erlang:monitor(process, Pid), + receive _ -> ok end, + os:cmd(?b2l(KillCmd)) + end), + OsProc = + lists:foldl(fun(Opt, Proc) -> + case Opt of + {writer, Writer} when is_function(Writer) -> + Proc#os_proc{writer=Writer}; + {reader, Reader} when is_function(Reader) -> + Proc#os_proc{reader=Reader}; + {timeout, TimeOut} when is_integer(TimeOut) -> + Proc#os_proc{timeout=TimeOut} + end + end, BaseProc, Options), + {ok, OsProc}. + +terminate(_Reason, #os_proc{port=Port}) -> + catch port_close(Port), + ok. + +handle_call({set_timeout, TimeOut}, _From, OsProc) -> + {reply, ok, OsProc#os_proc{timeout=TimeOut}}; +handle_call({prompt, Data}, _From, OsProc) -> + #os_proc{writer=Writer, reader=Reader} = OsProc, + try + Writer(OsProc, Data), + {reply, {ok, Reader(OsProc)}, OsProc} + catch + throw:{error, OsError} -> + {reply, OsError, OsProc}; + throw:{fatal, OsError} -> + {stop, normal, OsError, OsProc}; + throw:OtherError -> + {stop, normal, OtherError, OsProc} + end. + +handle_cast({send, Data}, #os_proc{writer=Writer}=OsProc) -> + try + Writer(OsProc, Data), + {noreply, OsProc} + catch + throw:OsError -> + ?LOG_ERROR("Failed sending data: ~p -> ~p", [Data, OsError]), + {stop, normal, OsProc} + end; +handle_cast(stop, OsProc) -> + {stop, normal, OsProc}; +handle_cast(Msg, OsProc) -> + ?LOG_DEBUG("OS Proc: Unknown cast: ~p", [Msg]), + {noreply, OsProc}. + +handle_info({Port, {exit_status, 0}}, #os_proc{port=Port}=OsProc) -> + ?LOG_INFO("OS Process terminated normally", []), + {stop, normal, OsProc}; +handle_info({Port, {exit_status, Status}}, #os_proc{port=Port}=OsProc) -> + ?LOG_ERROR("OS Process died with status: ~p", [Status]), + {stop, {exit_status, Status}, OsProc}; +handle_info(Msg, OsProc) -> + ?LOG_DEBUG("OS Proc: Unknown info: ~p", [Msg]), + {noreply, OsProc}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + diff --git a/apps/couch/src/couch_query_servers.erl b/apps/couch/src/couch_query_servers.erl new file mode 100644 index 00000000..c4f1bf0b --- /dev/null +++ b/apps/couch/src/couch_query_servers.erl @@ -0,0 +1,485 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_query_servers). +-behaviour(gen_server). + +-export([start_link/0]). + +-export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2,code_change/3]). +-export([start_doc_map/2, map_docs/2, stop_doc_map/1]). +-export([reduce/3, rereduce/3,validate_doc_update/5]). +-export([filter_docs/5]). + +-export([with_ddoc_proc/2, proc_prompt/2, ddoc_prompt/3, ddoc_proc_prompt/3, json_doc/1]). + +% -export([test/0]). + +-include("couch_db.hrl"). + +-record(proc, { + pid, + lang, + ddoc_keys = [], + prompt_fun, + set_timeout_fun, + stop_fun +}). + +start_link() -> + gen_server:start_link({local, couch_query_servers}, couch_query_servers, [], []). + +start_doc_map(Lang, Functions) -> + Proc = get_os_process(Lang), + lists:foreach(fun(FunctionSource) -> + true = proc_prompt(Proc, [<<"add_fun">>, FunctionSource]) + end, Functions), + {ok, Proc}. + +map_docs(Proc, Docs) -> + % send the documents + Results = lists:map( + fun(Doc) -> + Json = couch_doc:to_json_obj(Doc, []), + + FunsResults = proc_prompt(Proc, [<<"map_doc">>, Json]), + % the results are a json array of function map yields like this: + % [FunResults1, FunResults2 ...] + % where funresults is are json arrays of key value pairs: + % [[Key1, Value1], [Key2, Value2]] + % Convert the key, value pairs to tuples like + % [{Key1, Value1}, {Key2, Value2}] + lists:map( + fun(FunRs) -> + [list_to_tuple(FunResult) || FunResult <- FunRs] + end, + FunsResults) + end, + Docs), + {ok, Results}. + + +stop_doc_map(nil) -> + ok; +stop_doc_map(Proc) -> + ok = ret_os_process(Proc). + +group_reductions_results([]) -> + []; +group_reductions_results(List) -> + {Heads, Tails} = lists:foldl( + fun([H|T], {HAcc,TAcc}) -> + {[H|HAcc], [T|TAcc]} + end, {[], []}, List), + case Tails of + [[]|_] -> % no tails left + [Heads]; + _ -> + [Heads | group_reductions_results(Tails)] + end. + +rereduce(_Lang, [], _ReducedValues) -> + {ok, []}; +rereduce(Lang, RedSrcs, ReducedValues) -> + Grouped = group_reductions_results(ReducedValues), + Results = lists:zipwith( + fun + (<<"_", _/binary>> = FunSrc, Values) -> + {ok, [Result]} = builtin_reduce(rereduce, [FunSrc], [[[], V] || V <- Values], []), + Result; + (FunSrc, Values) -> + os_rereduce(Lang, [FunSrc], Values) + end, RedSrcs, Grouped), + {ok, Results}. + +reduce(_Lang, [], _KVs) -> + {ok, []}; +reduce(Lang, RedSrcs, KVs) -> + {OsRedSrcs, BuiltinReds} = lists:partition(fun + (<<"_", _/binary>>) -> false; + (_OsFun) -> true + end, RedSrcs), + {ok, OsResults} = os_reduce(Lang, OsRedSrcs, KVs), + {ok, BuiltinResults} = builtin_reduce(reduce, BuiltinReds, KVs, []), + recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, []). + +recombine_reduce_results([], [], [], Acc) -> + {ok, lists:reverse(Acc)}; +recombine_reduce_results([<<"_", _/binary>>|RedSrcs], OsResults, [BRes|BuiltinResults], Acc) -> + recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, [BRes|Acc]); +recombine_reduce_results([_OsFun|RedSrcs], [OsR|OsResults], BuiltinResults, Acc) -> + recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, [OsR|Acc]). + +os_reduce(_Lang, [], _KVs) -> + {ok, []}; +os_reduce(Lang, OsRedSrcs, KVs) -> + Proc = get_os_process(Lang), + OsResults = try proc_prompt(Proc, [<<"reduce">>, OsRedSrcs, KVs]) of + [true, Reductions] -> Reductions + after + ok = ret_os_process(Proc) + end, + {ok, OsResults}. + +os_rereduce(_Lang, [], _KVs) -> + {ok, []}; +os_rereduce(Lang, OsRedSrcs, KVs) -> + Proc = get_os_process(Lang), + try proc_prompt(Proc, [<<"rereduce">>, OsRedSrcs, KVs]) of + [true, [Reduction]] -> Reduction + after + ok = ret_os_process(Proc) + end. + + +builtin_reduce(_Re, [], _KVs, Acc) -> + {ok, lists:reverse(Acc)}; +builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) -> + Sum = builtin_sum_rows(KVs), + builtin_reduce(Re, BuiltinReds, KVs, [Sum|Acc]); +builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> + Count = length(KVs), + builtin_reduce(reduce, BuiltinReds, KVs, [Count|Acc]); +builtin_reduce(rereduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> + Count = builtin_sum_rows(KVs), + builtin_reduce(rereduce, BuiltinReds, KVs, [Count|Acc]); +builtin_reduce(Re, [<<"_stats",_/binary>>|BuiltinReds], KVs, Acc) -> + Stats = builtin_stats(Re, KVs), + builtin_reduce(Re, BuiltinReds, KVs, [Stats|Acc]). + +builtin_sum_rows(KVs) -> + lists:foldl(fun + ([_Key, Value], Acc) when is_number(Value) -> + Acc + Value; + (_Else, _Acc) -> + throw({invalid_value, <<"builtin _sum function requires map values to be numbers">>}) + end, 0, KVs). + +builtin_stats(reduce, [[_,First]|Rest]) when is_number(First) -> + Stats = lists:foldl(fun([_K,V], {S,C,Mi,Ma,Sq}) when is_number(V) -> + {S+V, C+1, erlang:min(Mi,V), erlang:max(Ma,V), Sq+(V*V)}; + (_, _) -> + throw({invalid_value, + <<"builtin _stats function requires map values to be numbers">>}) + end, {First,1,First,First,First*First}, Rest), + {Sum, Cnt, Min, Max, Sqr} = Stats, + {[{sum,Sum}, {count,Cnt}, {min,Min}, {max,Max}, {sumsqr,Sqr}]}; + +builtin_stats(rereduce, [[_,First]|Rest]) -> + {[{sum,Sum0}, {count,Cnt0}, {min,Min0}, {max,Max0}, {sumsqr,Sqr0}]} = First, + Stats = lists:foldl(fun([_K,Red], {S,C,Mi,Ma,Sq}) -> + {[{sum,Sum}, {count,Cnt}, {min,Min}, {max,Max}, {sumsqr,Sqr}]} = Red, + {Sum+S, Cnt+C, erlang:min(Min,Mi), erlang:max(Max,Ma), Sqr+Sq} + end, {Sum0,Cnt0,Min0,Max0,Sqr0}, Rest), + {Sum, Cnt, Min, Max, Sqr} = Stats, + {[{sum,Sum}, {count,Cnt}, {min,Min}, {max,Max}, {sumsqr,Sqr}]}. + +% use the function stored in ddoc.validate_doc_update to test an update. +validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx, SecObj) -> + JsonEditDoc = couch_doc:to_json_obj(EditDoc, [revs]), + JsonDiskDoc = json_doc(DiskDoc), + case ddoc_prompt(DDoc, [<<"validate_doc_update">>], [JsonEditDoc, JsonDiskDoc, Ctx, SecObj]) of + 1 -> + ok; + {[{<<"forbidden">>, Message}]} -> + throw({forbidden, Message}); + {[{<<"unauthorized">>, Message}]} -> + throw({unauthorized, Message}) + end. + +json_doc(nil) -> null; +json_doc(Doc) -> + couch_doc:to_json_obj(Doc, [revs]). + +filter_docs(Req, Db, DDoc, FName, Docs) -> + JsonReq = case Req of + {json_req, JsonObj} -> + JsonObj; + #httpd{} = HttpReq -> + couch_httpd_external:json_req_obj(HttpReq, Db) + end, + JsonDocs = [couch_doc:to_json_obj(Doc, [revs]) || Doc <- Docs], + [true, Passes] = ddoc_prompt(DDoc, [<<"filters">>, FName], [JsonDocs, JsonReq]), + {ok, Passes}. + +ddoc_proc_prompt({Proc, DDocId}, FunPath, Args) -> + proc_prompt(Proc, [<<"ddoc">>, DDocId, FunPath, Args]). + +ddoc_prompt(DDoc, FunPath, Args) -> + with_ddoc_proc(DDoc, fun({Proc, DDocId}) -> + proc_prompt(Proc, [<<"ddoc">>, DDocId, FunPath, Args]) + end). + +with_ddoc_proc(#doc{id=DDocId,revs={Start, [DiskRev|_]}}=DDoc, Fun) -> + Rev = couch_doc:rev_to_str({Start, DiskRev}), + DDocKey = {DDocId, Rev}, + Proc = get_ddoc_process(DDoc, DDocKey), + try Fun({Proc, DDocId}) + after + ok = ret_os_process(Proc) + end. + +init([]) -> + % read config and register for configuration changes + + % just stop if one of the config settings change. couch_server_sup + % will restart us and then we will pick up the new settings. + + ok = couch_config:register( + fun("query_servers" ++ _, _) -> + supervisor:terminate_child(couch_secondary_services, query_servers), + supervisor:restart_child(couch_secondary_services, query_servers) + end), + ok = couch_config:register( + fun("native_query_servers" ++ _, _) -> + supervisor:terminate_child(couch_secondary_services, query_servers), + [supervisor:restart_child(couch_secondary_services, query_servers)] + end), + + Langs = ets:new(couch_query_server_langs, [set, private]), + PidProcs = ets:new(couch_query_server_pid_langs, [set, private]), + LangProcs = ets:new(couch_query_server_procs, [set, private]), + % 'query_servers' specifies an OS command-line to execute. + lists:foreach(fun({Lang, Command}) -> + true = ets:insert(Langs, {?l2b(Lang), + couch_os_process, start_link, [Command]}) + end, couch_config:get("query_servers")), + % 'native_query_servers' specifies a {Module, Func, Arg} tuple. + lists:foreach(fun({Lang, SpecStr}) -> + {ok, {Mod, Fun, SpecArg}} = couch_util:parse_term(SpecStr), + true = ets:insert(Langs, {?l2b(Lang), + Mod, Fun, SpecArg}) + end, couch_config:get("native_query_servers")), + process_flag(trap_exit, true), + {ok, {Langs, % Keyed by language name, value is {Mod,Func,Arg} + PidProcs, % Keyed by PID, valus is a #proc record. + LangProcs % Keyed by language name, value is a #proc record + }}. + +terminate(_Reason, {_Langs, PidProcs, _LangProcs}) -> + [couch_util:shutdown_sync(P) || {P,_} <- ets:tab2list(PidProcs)], + ok. + +handle_call({get_proc, #doc{body={Props}}=DDoc, DDocKey}, _From, {Langs, PidProcs, LangProcs}=Server) -> + % Note to future self. Add max process limit. + Lang = couch_util:get_value(<<"language">>, Props, <<"javascript">>), + case ets:lookup(LangProcs, Lang) of + [{Lang, [P|Rest]}] -> + % find a proc in the set that has the DDoc + case proc_with_ddoc(DDoc, DDocKey, [P|Rest]) of + {ok, Proc} -> + rem_from_list(LangProcs, Lang, Proc), + {reply, {ok, Proc, get_query_server_config()}, Server}; + Error -> + {reply, Error, Server} + end; + _ -> + case (catch new_process(Langs, Lang)) of + {ok, Proc} -> + add_value(PidProcs, Proc#proc.pid, Proc), + case proc_with_ddoc(DDoc, DDocKey, [Proc]) of + {ok, Proc2} -> + {reply, {ok, Proc2, get_query_server_config()}, Server}; + Error -> + {reply, Error, Server} + end; + Error -> + {reply, Error, Server} + end + end; +handle_call({get_proc, Lang}, _From, {Langs, PidProcs, LangProcs}=Server) -> + % Note to future self. Add max process limit. + case ets:lookup(LangProcs, Lang) of + [{Lang, [Proc|_]}] -> + rem_from_list(LangProcs, Lang, Proc), + {reply, {ok, Proc, get_query_server_config()}, Server}; + _ -> + case (catch new_process(Langs, Lang)) of + {ok, Proc} -> + add_value(PidProcs, Proc#proc.pid, Proc), + {reply, {ok, Proc, get_query_server_config()}, Server}; + Error -> + {reply, Error, Server} + end + end; +handle_call({unlink_proc, Pid}, _From, {_, PidProcs, _}=Server) -> + rem_value(PidProcs, Pid), + unlink(Pid), + {reply, ok, Server}; +handle_call({ret_proc, Proc}, _From, {_, PidProcs, LangProcs}=Server) -> + % Along with max process limit, here we should check + % if we're over the limit and discard when we are. + add_value(PidProcs, Proc#proc.pid, Proc), + add_to_list(LangProcs, Proc#proc.lang, Proc), + link(Proc#proc.pid), + {reply, true, Server}. + +handle_cast(_Whatever, Server) -> + {noreply, Server}. + +handle_info({'EXIT', Pid, Status}, {_, PidProcs, LangProcs}=Server) -> + case ets:lookup(PidProcs, Pid) of + [{Pid, Proc}] -> + case Status of + normal -> ok; + _ -> ?LOG_DEBUG("Linked process died abnormally: ~p (reason: ~p)", [Pid, Status]) + end, + rem_value(PidProcs, Pid), + catch rem_from_list(LangProcs, Proc#proc.lang, Proc), + {noreply, Server}; + [] -> + case Status of + normal -> + {noreply, Server}; + _ -> + {stop, Status, Server} + end + end. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +% Private API + +get_query_server_config() -> + ReduceLimit = list_to_atom( + couch_config:get("query_server_config","reduce_limit","true")), + {[{<<"reduce_limit">>, ReduceLimit}]}. + +new_process(Langs, Lang) -> + case ets:lookup(Langs, Lang) of + [{Lang, Mod, Func, Arg}] -> + {ok, Pid} = apply(Mod, Func, Arg), + {ok, #proc{lang=Lang, + pid=Pid, + % Called via proc_prompt, proc_set_timeout, and proc_stop + prompt_fun={Mod, prompt}, + set_timeout_fun={Mod, set_timeout}, + stop_fun={Mod, stop}}}; + _ -> + {unknown_query_language, Lang} + end. + +proc_with_ddoc(DDoc, DDocKey, LangProcs) -> + DDocProcs = lists:filter(fun(#proc{ddoc_keys=Keys}) -> + lists:any(fun(Key) -> + Key == DDocKey + end, Keys) + end, LangProcs), + case DDocProcs of + [DDocProc|_] -> + ?LOG_DEBUG("DDocProc found for DDocKey: ~p",[DDocKey]), + {ok, DDocProc}; + [] -> + [TeachProc|_] = LangProcs, + ?LOG_DEBUG("Teach ddoc to new proc ~p with DDocKey: ~p",[TeachProc, DDocKey]), + {ok, SmartProc} = teach_ddoc(DDoc, DDocKey, TeachProc), + {ok, SmartProc} + end. + +proc_prompt(Proc, Args) -> + {Mod, Func} = Proc#proc.prompt_fun, + apply(Mod, Func, [Proc#proc.pid, Args]). + +proc_stop(Proc) -> + {Mod, Func} = Proc#proc.stop_fun, + apply(Mod, Func, [Proc#proc.pid]). + +proc_set_timeout(Proc, Timeout) -> + {Mod, Func} = Proc#proc.set_timeout_fun, + apply(Mod, Func, [Proc#proc.pid, Timeout]). + +teach_ddoc(DDoc, {DDocId, _Rev}=DDocKey, #proc{ddoc_keys=Keys}=Proc) -> + % send ddoc over the wire + % we only share the rev with the client we know to update code + % but it only keeps the latest copy, per each ddoc, around. + true = proc_prompt(Proc, [<<"ddoc">>, <<"new">>, DDocId, couch_doc:to_json_obj(DDoc, [])]), + % we should remove any other ddocs keys for this docid + % because the query server overwrites without the rev + Keys2 = [{D,R} || {D,R} <- Keys, D /= DDocId], + % add ddoc to the proc + {ok, Proc#proc{ddoc_keys=[DDocKey|Keys2]}}. + +get_ddoc_process(#doc{} = DDoc, DDocKey) -> + % remove this case statement + case gen_server:call(couch_query_servers, {get_proc, DDoc, DDocKey}) of + {ok, Proc, QueryConfig} -> + % process knows the ddoc + case (catch proc_prompt(Proc, [<<"reset">>, QueryConfig])) of + true -> + proc_set_timeout(Proc, list_to_integer(couch_config:get( + "couchdb", "os_process_timeout", "5000"))), + link(Proc#proc.pid), + gen_server:call(couch_query_servers, {unlink_proc, Proc#proc.pid}), + Proc; + _ -> + catch proc_stop(Proc), + get_ddoc_process(DDoc, DDocKey) + end; + Error -> + throw(Error) + end. + +get_os_process(Lang) -> + case gen_server:call(couch_query_servers, {get_proc, Lang}) of + {ok, Proc, QueryConfig} -> + case (catch proc_prompt(Proc, [<<"reset">>, QueryConfig])) of + true -> + proc_set_timeout(Proc, list_to_integer(couch_config:get( + "couchdb", "os_process_timeout", "5000"))), + link(Proc#proc.pid), + gen_server:call(couch_query_servers, {unlink_proc, Proc#proc.pid}), + Proc; + _ -> + catch proc_stop(Proc), + get_os_process(Lang) + end; + Error -> + throw(Error) + end. + +ret_os_process(Proc) -> + true = gen_server:call(couch_query_servers, {ret_proc, Proc}), + catch unlink(Proc#proc.pid), + ok. + +add_value(Tid, Key, Value) -> + true = ets:insert(Tid, {Key, Value}). + +rem_value(Tid, Key) -> + true = ets:delete(Tid, Key). + +add_to_list(Tid, Key, Value) -> + case ets:lookup(Tid, Key) of + [{Key, Vals}] -> + true = ets:insert(Tid, {Key, [Value|Vals]}); + [] -> + true = ets:insert(Tid, {Key, [Value]}) + end. + +rem_from_list(Tid, Key, Value) when is_record(Value, proc)-> + Pid = Value#proc.pid, + case ets:lookup(Tid, Key) of + [{Key, Vals}] -> + % make a new values list that doesn't include the Value arg + NewValues = [Val || #proc{pid=P}=Val <- Vals, P /= Pid], + ets:insert(Tid, {Key, NewValues}); + [] -> ok + end; +rem_from_list(Tid, Key, Value) -> + case ets:lookup(Tid, Key) of + [{Key, Vals}] -> + % make a new values list that doesn't include the Value arg + NewValues = [Val || Val <- Vals, Val /= Value], + ets:insert(Tid, {Key, NewValues}); + [] -> ok + end. diff --git a/apps/couch/src/couch_ref_counter.erl b/apps/couch/src/couch_ref_counter.erl new file mode 100644 index 00000000..5a111ab6 --- /dev/null +++ b/apps/couch/src/couch_ref_counter.erl @@ -0,0 +1,111 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_ref_counter). +-behaviour(gen_server). + +-export([start/1, init/1, terminate/2, handle_call/3, handle_cast/2, code_change/3, handle_info/2]). +-export([drop/1,drop/2,add/1,add/2,count/1]). + +start(ChildProcs) -> + gen_server:start(couch_ref_counter, {self(), ChildProcs}, []). + + +drop(RefCounterPid) -> + drop(RefCounterPid, self()). + +drop(RefCounterPid, Pid) -> + gen_server:call(RefCounterPid, {drop, Pid}). + + +add(RefCounterPid) -> + add(RefCounterPid, self()). + +add(RefCounterPid, Pid) -> + gen_server:call(RefCounterPid, {add, Pid}). + +count(RefCounterPid) -> + gen_server:call(RefCounterPid, count). + +% server functions + +-record(srv, + { + referrers=dict:new(), % a dict of each ref counting proc. + child_procs=[] + }). + +init({Pid, ChildProcs}) -> + [link(ChildProc) || ChildProc <- ChildProcs], + Referrers = dict:from_list([{Pid, {erlang:monitor(process, Pid), 1}}]), + {ok, #srv{referrers=Referrers, child_procs=ChildProcs}}. + + +terminate(_Reason, #srv{child_procs=ChildProcs}) -> + [couch_util:shutdown_sync(Pid) || Pid <- ChildProcs], + ok. + + +handle_call({add, Pid},_From, #srv{referrers=Referrers}=Srv) -> + Referrers2 = + case dict:find(Pid, Referrers) of + error -> + dict:store(Pid, {erlang:monitor(process, Pid), 1}, Referrers); + {ok, {MonRef, RefCnt}} -> + dict:store(Pid, {MonRef, RefCnt + 1}, Referrers) + end, + {reply, ok, Srv#srv{referrers=Referrers2}}; +handle_call(count, _From, Srv) -> + {monitors, Monitors} = process_info(self(), monitors), + {reply, length(Monitors), Srv}; +handle_call({drop, Pid}, _From, #srv{referrers=Referrers}=Srv) -> + Referrers2 = + case dict:find(Pid, Referrers) of + {ok, {MonRef, 1}} -> + erlang:demonitor(MonRef, [flush]), + dict:erase(Pid, Referrers); + {ok, {MonRef, Num}} -> + dict:store(Pid, {MonRef, Num-1}, Referrers); + error -> + Referrers + end, + Srv2 = Srv#srv{referrers=Referrers2}, + case should_close() of + true -> + {stop,normal,ok,Srv2}; + false -> + {reply, ok, Srv2} + end. + +handle_cast(Msg, _Srv)-> + exit({unknown_msg,Msg}). + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_info({'DOWN', MonRef, _, Pid, _}, #srv{referrers=Referrers}=Srv) -> + {ok, {MonRef, _RefCount}} = dict:find(Pid, Referrers), + Srv2 = Srv#srv{referrers=dict:erase(Pid, Referrers)}, + case should_close() of + true -> + {stop,normal,Srv2}; + false -> + {noreply,Srv2} + end. + + +should_close() -> + case process_info(self(), monitors) of + {monitors, []} -> true; + _ -> false + end. diff --git a/apps/couch/src/couch_rep.erl b/apps/couch/src/couch_rep.erl new file mode 100644 index 00000000..65573e8c --- /dev/null +++ b/apps/couch/src/couch_rep.erl @@ -0,0 +1,748 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([replicate/2, checkpoint/1]). + +-include("couch_db.hrl"). + +-record(state, { + changes_feed, + missing_revs, + reader, + writer, + + source, + target, + continuous, + create_target, + init_args, + checkpoint_scheduled = nil, + + start_seq, + history, + source_log, + target_log, + rep_starttime, + src_starttime, + tgt_starttime, + checkpoint_history = nil, + + listeners = [], + complete = false, + committed_seq = 0, + + stats = nil, + doc_ids = nil +}). + +%% convenience function to do a simple replication from the shell +replicate(Source, Target) when is_list(Source) -> + replicate(?l2b(Source), Target); +replicate(Source, Target) when is_binary(Source), is_list(Target) -> + replicate(Source, ?l2b(Target)); +replicate(Source, Target) when is_binary(Source), is_binary(Target) -> + replicate({[{<<"source">>, Source}, {<<"target">>, Target}]}, #user_ctx{}); + +%% function handling POST to _replicate +replicate({Props}=PostBody, UserCtx) -> + {BaseId, Extension} = make_replication_id(PostBody, UserCtx), + Replicator = {BaseId ++ Extension, + {gen_server, start_link, [?MODULE, [BaseId, PostBody, UserCtx], []]}, + temporary, + 1, + worker, + [?MODULE] + }, + + case couch_util:get_value(<<"cancel">>, Props, false) of + true -> + case supervisor:terminate_child(couch_rep_sup, BaseId ++ Extension) of + {error, not_found} -> + {error, not_found}; + ok -> + ok = supervisor:delete_child(couch_rep_sup, BaseId ++ Extension), + {ok, {cancelled, ?l2b(BaseId)}} + end; + false -> + Server = start_replication_server(Replicator), + + case couch_util:get_value(<<"continuous">>, Props, false) of + true -> + {ok, {continuous, ?l2b(BaseId)}}; + false -> + get_result(Server, PostBody, UserCtx) + end + end. + +checkpoint(Server) -> + gen_server:cast(Server, do_checkpoint). + +get_result(Server, PostBody, UserCtx) -> + try gen_server:call(Server, get_result, infinity) of + retry -> replicate(PostBody, UserCtx); + Else -> Else + catch + exit:{noproc, {gen_server, call, [Server, get_result , infinity]}} -> + %% oops, this replication just finished -- restart it. + replicate(PostBody, UserCtx); + exit:{normal, {gen_server, call, [Server, get_result , infinity]}} -> + %% we made the call during terminate + replicate(PostBody, UserCtx) + end. + +init(InitArgs) -> + try do_init(InitArgs) + catch throw:{db_not_found, DbUrl} -> {stop, {db_not_found, DbUrl}} end. + +do_init([RepId, {PostProps}, UserCtx] = InitArgs) -> + process_flag(trap_exit, true), + + SourceProps = couch_util:get_value(<<"source">>, PostProps), + TargetProps = couch_util:get_value(<<"target">>, PostProps), + + DocIds = couch_util:get_value(<<"doc_ids">>, PostProps, nil), + Continuous = couch_util:get_value(<<"continuous">>, PostProps, false), + CreateTarget = couch_util:get_value(<<"create_target">>, PostProps, false), + + ProxyParams = parse_proxy_params( + couch_util:get_value(<<"proxy">>, PostProps, [])), + Source = open_db(SourceProps, UserCtx, ProxyParams), + Target = open_db(TargetProps, UserCtx, ProxyParams, CreateTarget), + + SourceInfo = dbinfo(Source), + TargetInfo = dbinfo(Target), + + case DocIds of + List when is_list(List) -> + % Fast replication using only a list of doc IDs to replicate. + % Replication sessions, checkpoints and logs are not created + % since the update sequence number of the source DB is not used + % for determining which documents are copied into the target DB. + SourceLog = nil, + TargetLog = nil, + + StartSeq = nil, + History = nil, + + ChangesFeed = nil, + MissingRevs = nil, + + {ok, Reader} = + couch_rep_reader:start_link(self(), Source, DocIds, PostProps); + + _ -> + % Replication using the _changes API (DB sequence update numbers). + SourceLog = open_replication_log(Source, RepId), + TargetLog = open_replication_log(Target, RepId), + + {StartSeq, History} = compare_replication_logs(SourceLog, TargetLog), + + {ok, ChangesFeed} = + couch_rep_changes_feed:start_link(self(), Source, StartSeq, PostProps), + {ok, MissingRevs} = + couch_rep_missing_revs:start_link(self(), Target, ChangesFeed, PostProps), + {ok, Reader} = + couch_rep_reader:start_link(self(), Source, MissingRevs, PostProps) + end, + + {ok, Writer} = + couch_rep_writer:start_link(self(), Target, Reader, PostProps), + + Stats = ets:new(replication_stats, [set, private]), + ets:insert(Stats, {total_revs,0}), + ets:insert(Stats, {missing_revs, 0}), + ets:insert(Stats, {docs_read, 0}), + ets:insert(Stats, {docs_written, 0}), + ets:insert(Stats, {doc_write_failures, 0}), + + {ShortId, _} = lists:split(6, RepId), + couch_task_status:add_task("Replication", io_lib:format("~s: ~s -> ~s", + [ShortId, dbname(Source), dbname(Target)]), "Starting"), + + State = #state{ + changes_feed = ChangesFeed, + missing_revs = MissingRevs, + reader = Reader, + writer = Writer, + + source = Source, + target = Target, + continuous = Continuous, + create_target = CreateTarget, + init_args = InitArgs, + stats = Stats, + checkpoint_scheduled = nil, + + start_seq = StartSeq, + history = History, + source_log = SourceLog, + target_log = TargetLog, + rep_starttime = httpd_util:rfc1123_date(), + src_starttime = couch_util:get_value(instance_start_time, SourceInfo), + tgt_starttime = couch_util:get_value(instance_start_time, TargetInfo), + doc_ids = DocIds + }, + {ok, State}. + +handle_call(get_result, From, #state{complete=true, listeners=[]} = State) -> + {stop, normal, State#state{listeners=[From]}}; +handle_call(get_result, From, State) -> + Listeners = State#state.listeners, + {noreply, State#state{listeners=[From|Listeners]}}. + +handle_cast(do_checkpoint, State) -> + {noreply, do_checkpoint(State)}; + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({missing_revs_checkpoint, SourceSeq}, State) -> + couch_task_status:update("MR Processed source update #~p", [SourceSeq]), + {noreply, schedule_checkpoint(State#state{committed_seq = SourceSeq})}; + +handle_info({writer_checkpoint, SourceSeq}, #state{committed_seq=N} = State) + when SourceSeq > N -> + MissingRevs = State#state.missing_revs, + ok = gen_server:cast(MissingRevs, {update_committed_seq, SourceSeq}), + couch_task_status:update("W Processed source update #~p", [SourceSeq]), + {noreply, schedule_checkpoint(State#state{committed_seq = SourceSeq})}; +handle_info({writer_checkpoint, _}, State) -> + {noreply, State}; + +handle_info({update_stats, Key, N}, State) -> + ets:update_counter(State#state.stats, Key, N), + {noreply, State}; + +handle_info({'DOWN', _, _, _, _}, State) -> + ?LOG_INFO("replication terminating because local DB is shutting down", []), + timer:cancel(State#state.checkpoint_scheduled), + {stop, shutdown, State}; + +handle_info({'EXIT', Writer, normal}, #state{writer=Writer} = State) -> + case State#state.listeners of + [] -> + {noreply, State#state{complete = true}}; + _Else -> + {stop, normal, State} + end; + +handle_info({'EXIT', _, normal}, State) -> + {noreply, State}; +handle_info({'EXIT', _Pid, {Err, Reason}}, State) when Err == source_error; + Err == target_error -> + ?LOG_INFO("replication terminating due to ~p: ~p", [Err, Reason]), + timer:cancel(State#state.checkpoint_scheduled), + {stop, shutdown, State}; +handle_info({'EXIT', _Pid, Reason}, State) -> + {stop, Reason, State}. + +terminate(normal, #state{checkpoint_scheduled=nil} = State) -> + do_terminate(State); + +terminate(normal, State) -> + timer:cancel(State#state.checkpoint_scheduled), + do_terminate(do_checkpoint(State)); + +terminate(Reason, State) -> + #state{ + listeners = Listeners, + source = Source, + target = Target, + stats = Stats + } = State, + [gen_server:reply(L, {error, Reason}) || L <- Listeners], + ets:delete(Stats), + close_db(Target), + close_db(Source). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +% internal funs + +start_replication_server(Replicator) -> + RepId = element(1, Replicator), + case supervisor:start_child(couch_rep_sup, Replicator) of + {ok, Pid} -> + ?LOG_INFO("starting new replication ~p at ~p", [RepId, Pid]), + Pid; + {error, already_present} -> + case supervisor:restart_child(couch_rep_sup, RepId) of + {ok, Pid} -> + ?LOG_INFO("starting replication ~p at ~p", [RepId, Pid]), + Pid; + {error, running} -> + %% this error occurs if multiple replicators are racing + %% each other to start and somebody else won. Just grab + %% the Pid by calling start_child again. + {error, {already_started, Pid}} = + supervisor:start_child(couch_rep_sup, Replicator), + ?LOG_DEBUG("replication ~p already running at ~p", [RepId, Pid]), + Pid; + {error, {db_not_found, DbUrl}} -> + throw({db_not_found, <<"could not open ", DbUrl/binary>>}) + end; + {error, {already_started, Pid}} -> + ?LOG_DEBUG("replication ~p already running at ~p", [RepId, Pid]), + Pid; + {error, {{db_not_found, DbUrl}, _}} -> + throw({db_not_found, <<"could not open ", DbUrl/binary>>}) + end. + +compare_replication_logs(SrcDoc, TgtDoc) -> + #doc{body={RepRecProps}} = SrcDoc, + #doc{body={RepRecPropsTgt}} = TgtDoc, + case couch_util:get_value(<<"session_id">>, RepRecProps) == + couch_util:get_value(<<"session_id">>, RepRecPropsTgt) of + true -> + % if the records have the same session id, + % then we have a valid replication history + OldSeqNum = couch_util:get_value(<<"source_last_seq">>, RepRecProps, 0), + OldHistory = couch_util:get_value(<<"history">>, RepRecProps, []), + {OldSeqNum, OldHistory}; + false -> + SourceHistory = couch_util:get_value(<<"history">>, RepRecProps, []), + TargetHistory = couch_util:get_value(<<"history">>, RepRecPropsTgt, []), + ?LOG_INFO("Replication records differ. " + "Scanning histories to find a common ancestor.", []), + ?LOG_DEBUG("Record on source:~p~nRecord on target:~p~n", + [RepRecProps, RepRecPropsTgt]), + compare_rep_history(SourceHistory, TargetHistory) + end. + +compare_rep_history(S, T) when S =:= [] orelse T =:= [] -> + ?LOG_INFO("no common ancestry -- performing full replication", []), + {0, []}; +compare_rep_history([{S}|SourceRest], [{T}|TargetRest]=Target) -> + SourceId = couch_util:get_value(<<"session_id">>, S), + case has_session_id(SourceId, Target) of + true -> + RecordSeqNum = couch_util:get_value(<<"recorded_seq">>, S, 0), + ?LOG_INFO("found a common replication record with source_seq ~p", + [RecordSeqNum]), + {RecordSeqNum, SourceRest}; + false -> + TargetId = couch_util:get_value(<<"session_id">>, T), + case has_session_id(TargetId, SourceRest) of + true -> + RecordSeqNum = couch_util:get_value(<<"recorded_seq">>, T, 0), + ?LOG_INFO("found a common replication record with source_seq ~p", + [RecordSeqNum]), + {RecordSeqNum, TargetRest}; + false -> + compare_rep_history(SourceRest, TargetRest) + end + end. + +close_db(#http_db{}) -> + ok; +close_db(Db) -> + couch_db:close(Db). + +dbname(#http_db{url = Url}) -> + strip_password(Url); +dbname(#db{name = Name}) -> + Name. + +strip_password(Url) -> + re:replace(Url, + "http(s)?://([^:]+):[^@]+@(.*)$", + "http\\1://\\2:*****@\\3", + [{return, list}]). + +dbinfo(#http_db{} = Db) -> + {DbProps} = couch_rep_httpc:request(Db), + [{list_to_existing_atom(?b2l(K)), V} || {K,V} <- DbProps]; +dbinfo(Db) -> + {ok, Info} = couch_db:get_db_info(Db), + Info. + +do_terminate(#state{doc_ids=DocIds} = State) when is_list(DocIds) -> + #state{ + listeners = Listeners, + rep_starttime = ReplicationStartTime, + stats = Stats + } = State, + + RepByDocsJson = {[ + {<<"start_time">>, ?l2b(ReplicationStartTime)}, + {<<"end_time">>, ?l2b(httpd_util:rfc1123_date())}, + {<<"docs_read">>, ets:lookup_element(Stats, docs_read, 2)}, + {<<"docs_written">>, ets:lookup_element(Stats, docs_written, 2)}, + {<<"doc_write_failures">>, + ets:lookup_element(Stats, doc_write_failures, 2)} + ]}, + + terminate_cleanup(State), + [gen_server:reply(L, {ok, RepByDocsJson}) || L <- lists:reverse(Listeners)]; + +do_terminate(State) -> + #state{ + checkpoint_history = CheckpointHistory, + committed_seq = NewSeq, + listeners = Listeners, + source = Source, + continuous = Continuous, + source_log = #doc{body={OldHistory}} + } = State, + + NewRepHistory = case CheckpointHistory of + nil -> + {[{<<"no_changes">>, true} | OldHistory]}; + _Else -> + CheckpointHistory + end, + + %% reply to original requester + OtherListeners = case Continuous of + true -> + []; % continuous replications have no listeners + _ -> + [Original|Rest] = lists:reverse(Listeners), + gen_server:reply(Original, {ok, NewRepHistory}), + Rest + end, + + %% maybe trigger another replication. If this replicator uses a local + %% source Db, changes to that Db since we started will not be included in + %% this pass. + case up_to_date(Source, NewSeq) of + true -> + [gen_server:reply(R, {ok, NewRepHistory}) || R <- OtherListeners]; + false -> + [gen_server:reply(R, retry) || R <- OtherListeners] + end, + terminate_cleanup(State). + +terminate_cleanup(#state{source=Source, target=Target, stats=Stats}) -> + couch_task_status:update("Finishing"), + close_db(Target), + close_db(Source), + ets:delete(Stats). + +has_session_id(_SessionId, []) -> + false; +has_session_id(SessionId, [{Props} | Rest]) -> + case couch_util:get_value(<<"session_id">>, Props, nil) of + SessionId -> + true; + _Else -> + has_session_id(SessionId, Rest) + end. + +maybe_append_options(Options, Props) -> + lists:foldl(fun(Option, Acc) -> + Acc ++ + case couch_util:get_value(Option, Props, false) of + true -> + "+" ++ ?b2l(Option); + false -> + "" + end + end, [], Options). + +make_replication_id({Props}, UserCtx) -> + %% funky algorithm to preserve backwards compatibility + {ok, HostName} = inet:gethostname(), + % Port = mochiweb_socket_server:get(couch_httpd, port), + Src = get_rep_endpoint(UserCtx, couch_util:get_value(<<"source">>, Props)), + Tgt = get_rep_endpoint(UserCtx, couch_util:get_value(<<"target">>, Props)), + Base = [HostName, Src, Tgt] ++ + case couch_util:get_value(<<"filter">>, Props) of + undefined -> + case couch_util:get_value(<<"doc_ids">>, Props) of + undefined -> + []; + DocIds -> + [DocIds] + end; + Filter -> + [Filter, couch_util:get_value(<<"query_params">>, Props, {[]})] + end, + Extension = maybe_append_options( + [<<"continuous">>, <<"create_target">>], Props), + {couch_util:to_hex(couch_util:md5(term_to_binary(Base))), Extension}. + +maybe_add_trailing_slash(Url) -> + re:replace(Url, "[^/]$", "&/", [{return, list}]). + +get_rep_endpoint(_UserCtx, {Props}) -> + Url = maybe_add_trailing_slash(couch_util:get_value(<<"url">>, Props)), + {BinHeaders} = couch_util:get_value(<<"headers">>, Props, {[]}), + {Auth} = couch_util:get_value(<<"auth">>, Props, {[]}), + case couch_util:get_value(<<"oauth">>, Auth) of + undefined -> + {remote, Url, [{?b2l(K),?b2l(V)} || {K,V} <- BinHeaders]}; + {OAuth} -> + {remote, Url, [{?b2l(K),?b2l(V)} || {K,V} <- BinHeaders], OAuth} + end; +get_rep_endpoint(_UserCtx, <<"http://",_/binary>>=Url) -> + {remote, maybe_add_trailing_slash(Url), []}; +get_rep_endpoint(_UserCtx, <<"https://",_/binary>>=Url) -> + {remote, maybe_add_trailing_slash(Url), []}; +get_rep_endpoint(UserCtx, <<DbName/binary>>) -> + {local, DbName, UserCtx}. + +open_replication_log(#http_db{}=Db, RepId) -> + DocId = ?LOCAL_DOC_PREFIX ++ RepId, + Req = Db#http_db{resource=couch_util:url_encode(DocId)}, + case couch_rep_httpc:request(Req) of + {[{<<"error">>, _}, {<<"reason">>, _}]} -> + ?LOG_DEBUG("didn't find a replication log for ~s", [Db#http_db.url]), + #doc{id=?l2b(DocId)}; + Doc -> + ?LOG_DEBUG("found a replication log for ~s", [Db#http_db.url]), + couch_doc:from_json_obj(Doc) + end; +open_replication_log(Db, RepId) -> + DocId = ?l2b(?LOCAL_DOC_PREFIX ++ RepId), + case couch_db:open_doc(Db, DocId, []) of + {ok, Doc} -> + ?LOG_DEBUG("found a replication log for ~s", [Db#db.name]), + Doc; + _ -> + ?LOG_DEBUG("didn't find a replication log for ~s", [Db#db.name]), + #doc{id=DocId} + end. + +open_db(Props, UserCtx, ProxyParams) -> + open_db(Props, UserCtx, ProxyParams, false). + +open_db({Props}, _UserCtx, ProxyParams, CreateTarget) -> + Url = maybe_add_trailing_slash(couch_util:get_value(<<"url">>, Props)), + {AuthProps} = couch_util:get_value(<<"auth">>, Props, {[]}), + {BinHeaders} = couch_util:get_value(<<"headers">>, Props, {[]}), + Headers = [{?b2l(K),?b2l(V)} || {K,V} <- BinHeaders], + DefaultHeaders = (#http_db{})#http_db.headers, + Db1 = #http_db{ + url = Url, + auth = AuthProps, + headers = lists:ukeymerge(1, Headers, DefaultHeaders) + }, + Db = Db1#http_db{options = Db1#http_db.options ++ ProxyParams}, + couch_rep_httpc:db_exists(Db, CreateTarget); +open_db(<<"http://",_/binary>>=Url, _, ProxyParams, CreateTarget) -> + open_db({[{<<"url">>,Url}]}, [], ProxyParams, CreateTarget); +open_db(<<"https://",_/binary>>=Url, _, ProxyParams, CreateTarget) -> + open_db({[{<<"url">>,Url}]}, [], ProxyParams, CreateTarget); +open_db(<<DbName/binary>>, UserCtx, _ProxyParams, CreateTarget) -> + case CreateTarget of + true -> + ok = couch_httpd:verify_is_server_admin(UserCtx), + couch_server:create(DbName, [{user_ctx, UserCtx}]); + false -> ok + end, + + case couch_db:open(DbName, [{user_ctx, UserCtx}]) of + {ok, Db} -> + couch_db:monitor(Db), + Db; + {not_found, no_db_file} -> throw({db_not_found, DbName}) + end. + +schedule_checkpoint(#state{checkpoint_scheduled = nil} = State) -> + Server = self(), + case timer:apply_after(5000, couch_rep, checkpoint, [Server]) of + {ok, TRef} -> + State#state{checkpoint_scheduled = TRef}; + Error -> + ?LOG_ERROR("tried to schedule a checkpoint but got ~p", [Error]), + State + end; +schedule_checkpoint(State) -> + State. + +do_checkpoint(State) -> + #state{ + source = Source, + target = Target, + committed_seq = NewSeqNum, + start_seq = StartSeqNum, + history = OldHistory, + source_log = SourceLog, + target_log = TargetLog, + rep_starttime = ReplicationStartTime, + src_starttime = SrcInstanceStartTime, + tgt_starttime = TgtInstanceStartTime, + stats = Stats + } = State, + case commit_to_both(Source, Target, NewSeqNum) of + {SrcInstanceStartTime, TgtInstanceStartTime} -> + ?LOG_INFO("recording a checkpoint for ~s -> ~s at source update_seq ~p", + [dbname(Source), dbname(Target), NewSeqNum]), + SessionId = couch_uuids:random(), + NewHistoryEntry = {[ + {<<"session_id">>, SessionId}, + {<<"start_time">>, list_to_binary(ReplicationStartTime)}, + {<<"end_time">>, list_to_binary(httpd_util:rfc1123_date())}, + {<<"start_last_seq">>, StartSeqNum}, + {<<"end_last_seq">>, NewSeqNum}, + {<<"recorded_seq">>, NewSeqNum}, + {<<"missing_checked">>, ets:lookup_element(Stats, total_revs, 2)}, + {<<"missing_found">>, ets:lookup_element(Stats, missing_revs, 2)}, + {<<"docs_read">>, ets:lookup_element(Stats, docs_read, 2)}, + {<<"docs_written">>, ets:lookup_element(Stats, docs_written, 2)}, + {<<"doc_write_failures">>, + ets:lookup_element(Stats, doc_write_failures, 2)} + ]}, + % limit history to 50 entries + NewRepHistory = {[ + {<<"session_id">>, SessionId}, + {<<"source_last_seq">>, NewSeqNum}, + {<<"history">>, lists:sublist([NewHistoryEntry | OldHistory], 50)} + ]}, + + try + {SrcRevPos,SrcRevId} = + update_local_doc(Source, SourceLog#doc{body=NewRepHistory}), + {TgtRevPos,TgtRevId} = + update_local_doc(Target, TargetLog#doc{body=NewRepHistory}), + State#state{ + checkpoint_scheduled = nil, + checkpoint_history = NewRepHistory, + source_log = SourceLog#doc{revs={SrcRevPos, [SrcRevId]}}, + target_log = TargetLog#doc{revs={TgtRevPos, [TgtRevId]}} + } + catch throw:conflict -> + ?LOG_ERROR("checkpoint failure: conflict (are you replicating to " + "yourself?)", []), + State + end; + _Else -> + ?LOG_INFO("rebooting ~s -> ~s from last known replication checkpoint", + [dbname(Source), dbname(Target)]), + #state{ + changes_feed = CF, + missing_revs = MR, + reader = Reader, + writer = Writer + } = State, + Pids = [Writer, Reader, MR, CF], + [unlink(Pid) || Pid <- Pids], + [exit(Pid, shutdown) || Pid <- Pids], + close_db(Target), + close_db(Source), + {ok, NewState} = init(State#state.init_args), + NewState#state{listeners=State#state.listeners} + end. + +commit_to_both(Source, Target, RequiredSeq) -> + % commit the src async + ParentPid = self(), + SrcCommitPid = spawn_link(fun() -> + ParentPid ! {self(), ensure_full_commit(Source, RequiredSeq)} end), + + % commit tgt sync + TargetStartTime = ensure_full_commit(Target), + + SourceStartTime = + receive + {SrcCommitPid, Timestamp} -> + Timestamp; + {'EXIT', SrcCommitPid, {http_request_failed, _}} -> + exit(replication_link_failure) + end, + {SourceStartTime, TargetStartTime}. + +ensure_full_commit(#http_db{headers = Headers} = Target) -> + Req = Target#http_db{ + resource = "_ensure_full_commit", + method = post, + headers = couch_util:proplist_apply_field({"Content-Type", "application/json"}, Headers) + }, + {ResultProps} = couch_rep_httpc:request(Req), + true = couch_util:get_value(<<"ok">>, ResultProps), + couch_util:get_value(<<"instance_start_time">>, ResultProps); +ensure_full_commit(Target) -> + {ok, NewDb} = couch_db:open_int(Target#db.name, []), + UpdateSeq = couch_db:get_update_seq(Target), + CommitSeq = couch_db:get_committed_update_seq(NewDb), + InstanceStartTime = NewDb#db.instance_start_time, + couch_db:close(NewDb), + if UpdateSeq > CommitSeq -> + ?LOG_DEBUG("target needs a full commit: update ~p commit ~p", + [UpdateSeq, CommitSeq]), + {ok, DbStartTime} = couch_db:ensure_full_commit(Target), + DbStartTime; + true -> + ?LOG_DEBUG("target doesn't need a full commit", []), + InstanceStartTime + end. + +ensure_full_commit(#http_db{headers = Headers} = Source, RequiredSeq) -> + Req = Source#http_db{ + resource = "_ensure_full_commit", + method = post, + qs = [{seq, RequiredSeq}], + headers = couch_util:proplist_apply_field({"Content-Type", "application/json"}, Headers) + }, + {ResultProps} = couch_rep_httpc:request(Req), + case couch_util:get_value(<<"ok">>, ResultProps) of + true -> + couch_util:get_value(<<"instance_start_time">>, ResultProps); + undefined -> nil end; +ensure_full_commit(Source, RequiredSeq) -> + {ok, NewDb} = couch_db:open_int(Source#db.name, []), + CommitSeq = couch_db:get_committed_update_seq(NewDb), + InstanceStartTime = NewDb#db.instance_start_time, + couch_db:close(NewDb), + if RequiredSeq > CommitSeq -> + ?LOG_DEBUG("source needs a full commit: required ~p committed ~p", + [RequiredSeq, CommitSeq]), + {ok, DbStartTime} = couch_db:ensure_full_commit(Source), + DbStartTime; + true -> + ?LOG_DEBUG("source doesn't need a full commit", []), + InstanceStartTime + end. + +update_local_doc(#http_db{} = Db, #doc{id=DocId} = Doc) -> + Req = Db#http_db{ + resource = couch_util:url_encode(DocId), + method = put, + body = couch_doc:to_json_obj(Doc, [attachments]), + headers = [{"x-couch-full-commit", "false"} | Db#http_db.headers] + }, + {ResponseMembers} = couch_rep_httpc:request(Req), + Rev = couch_util:get_value(<<"rev">>, ResponseMembers), + couch_doc:parse_rev(Rev); +update_local_doc(Db, Doc) -> + {ok, Result} = couch_db:update_doc(Db, Doc, [delay_commit]), + Result. + +up_to_date(#http_db{}, _Seq) -> + true; +up_to_date(Source, Seq) -> + {ok, NewDb} = couch_db:open_int(Source#db.name, []), + T = NewDb#db.update_seq == Seq, + couch_db:close(NewDb), + T. + +parse_proxy_params(ProxyUrl) when is_binary(ProxyUrl) -> + parse_proxy_params(?b2l(ProxyUrl)); +parse_proxy_params([]) -> + []; +parse_proxy_params(ProxyUrl) -> + {url, _, Base, Port, User, Passwd, _Path, _Proto} = + ibrowse_lib:parse_url(ProxyUrl), + [{proxy_host, Base}, {proxy_port, Port}] ++ + case is_list(User) andalso is_list(Passwd) of + false -> + []; + true -> + [{proxy_user, User}, {proxy_password, Passwd}] + end. diff --git a/apps/couch/src/couch_rep_att.erl b/apps/couch/src/couch_rep_att.erl new file mode 100644 index 00000000..28b8945c --- /dev/null +++ b/apps/couch/src/couch_rep_att.erl @@ -0,0 +1,120 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_att). + +-export([convert_stub/2, cleanup/0]). + +-include("couch_db.hrl"). + +convert_stub(#att{data=stub, name=Name} = Attachment, + {#http_db{} = Db, Id, Rev}) -> + {Pos, [RevId|_]} = Rev, + Request = Db#http_db{ + resource = lists:flatten([couch_util:url_encode(Id), "/", + couch_util:url_encode(Name)]), + qs = [{rev, couch_doc:rev_to_str({Pos,RevId})}] + }, + Ref = make_ref(), + RcvFun = fun() -> attachment_receiver(Ref, Request) end, + Attachment#att{data=RcvFun}. + +cleanup() -> + receive + {ibrowse_async_response, _, _} -> + %% TODO maybe log, didn't expect to have data here + cleanup(); + {ibrowse_async_response_end, _} -> + cleanup(); + {ibrowse_async_headers, _, _, _} -> + cleanup() + after 0 -> + erase(), + ok + end. + +% internal funs + +attachment_receiver(Ref, Request) -> + try case get(Ref) of + undefined -> + {ReqId, ContentEncoding} = start_http_request(Request), + put(Ref, {ReqId, ContentEncoding}), + receive_data(Ref, ReqId, ContentEncoding); + {ReqId, ContentEncoding} -> + receive_data(Ref, ReqId, ContentEncoding) + end + catch + throw:{attachment_request_failed, _} -> + case {Request#http_db.retries, Request#http_db.pause} of + {0, _} -> + ?LOG_INFO("request for ~p failed", [Request#http_db.resource]), + throw({attachment_request_failed, max_retries_reached}); + {N, Pause} when N > 0 -> + ?LOG_INFO("request for ~p timed out, retrying in ~p seconds", + [Request#http_db.resource, Pause/1000]), + timer:sleep(Pause), + cleanup(), + attachment_receiver(Ref, Request#http_db{retries = N-1}) + end + end. + +receive_data(Ref, ReqId, ContentEncoding) -> + receive + {ibrowse_async_response, ReqId, {chunk_start,_}} -> + receive_data(Ref, ReqId, ContentEncoding); + {ibrowse_async_response, ReqId, chunk_end} -> + receive_data(Ref, ReqId, ContentEncoding); + {ibrowse_async_response, ReqId, {error, Err}} -> + ?LOG_ERROR("streaming attachment ~p failed with ~p", [ReqId, Err]), + throw({attachment_request_failed, Err}); + {ibrowse_async_response, ReqId, Data} -> + % ?LOG_DEBUG("got ~p bytes for ~p", [size(Data), ReqId]), + Data; + {ibrowse_async_response_end, ReqId} -> + ?LOG_ERROR("streaming att. ended but more data requested ~p", [ReqId]), + throw({attachment_request_failed, premature_end}) + after 31000 -> + throw({attachment_request_failed, timeout}) + end. + +start_http_request(Req) -> + %% set stream_to here because self() has changed + Req2 = Req#http_db{options = [{stream_to,self()} | Req#http_db.options]}, + {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Req2), + receive {ibrowse_async_headers, ReqId, Code, Headers} -> + case validate_headers(Req2, list_to_integer(Code), Headers) of + {ok, ContentEncoding} -> + {ReqId, ContentEncoding}; + {ok, ContentEncoding, NewReqId} -> + {NewReqId, ContentEncoding} + end + after 10000 -> + throw({attachment_request_failed, timeout}) + end. + +validate_headers(_Req, 200, Headers) -> + MochiHeaders = mochiweb_headers:make(Headers), + {ok, mochiweb_headers:get_value("Content-Encoding", MochiHeaders)}; +validate_headers(Req, Code, Headers) when Code > 299, Code < 400 -> + Url = mochiweb_headers:get_value("Location",mochiweb_headers:make(Headers)), + NewReq = couch_rep_httpc:redirected_request(Req, Url), + {ibrowse_req_id, ReqId} = couch_rep_httpc:request(NewReq), + receive {ibrowse_async_headers, ReqId, NewCode, NewHeaders} -> + {ok, Encoding} = validate_headers(NewReq, list_to_integer(NewCode), + NewHeaders) + end, + {ok, Encoding, ReqId}; +validate_headers(Req, Code, _Headers) -> + #http_db{url=Url, resource=Resource} = Req, + ?LOG_ERROR("got ~p for ~s~s", [Code, Url, Resource]), + throw({attachment_request_failed, {bad_code, Code}}). diff --git a/apps/couch/src/couch_rep_changes_feed.erl b/apps/couch/src/couch_rep_changes_feed.erl new file mode 100644 index 00000000..66696912 --- /dev/null +++ b/apps/couch/src/couch_rep_changes_feed.erl @@ -0,0 +1,386 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_changes_feed). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/4, next/1, stop/1]). + +-define(BUFFER_SIZE, 1000). + +-include("couch_db.hrl"). +-include_lib("ibrowse/include/ibrowse.hrl"). + +-record (state, { + changes_from = nil, + changes_loop = nil, + init_args, + last_seq, + conn = nil, + reqid = nil, + complete = false, + count = 0, + partial_chunk = <<>>, + reply_to = nil, + rows = queue:new() +}). + +start_link(Parent, Source, StartSeq, PostProps) -> + gen_server:start_link(?MODULE, [Parent, Source, StartSeq, PostProps], []). + +next(Server) -> + gen_server:call(Server, next_changes, infinity). + +stop(Server) -> + gen_server:call(Server, stop). + +init([_Parent, #http_db{}=Source, Since, PostProps] = Args) -> + process_flag(trap_exit, true), + Feed = case couch_util:get_value(<<"continuous">>, PostProps, false) of + false -> + normal; + true -> + continuous + end, + BaseQS = [ + {"style", all_docs}, + {"heartbeat", 10000}, + {"since", Since}, + {"feed", Feed} + ], + QS = case couch_util:get_value(<<"filter">>, PostProps) of + undefined -> + BaseQS; + FilterName -> + {Params} = couch_util:get_value(<<"query_params">>, PostProps, {[]}), + lists:foldr( + fun({K, V}, QSAcc) -> + Ks = couch_util:to_list(K), + case proplists:is_defined(Ks, QSAcc) of + true -> + QSAcc; + false -> + [{Ks, V} | QSAcc] + end + end, + [{"filter", FilterName} | BaseQS], + Params + ) + end, + Pid = couch_rep_httpc:spawn_link_worker_process(Source), + Req = Source#http_db{ + resource = "_changes", + qs = QS, + conn = Pid, + options = [{stream_to, {self(), once}}, {response_format, binary}], + headers = Source#http_db.headers -- [{"Accept-Encoding", "gzip"}] + }, + {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Req), + + receive + {ibrowse_async_headers, ReqId, "200", _} -> + ibrowse:stream_next(ReqId), + {ok, #state{conn=Pid, last_seq=Since, reqid=ReqId, init_args=Args}}; + {ibrowse_async_headers, ReqId, Code, Hdrs} when Code=="301"; Code=="302" -> + catch ibrowse:stop_worker_process(Pid), + Url2 = mochiweb_headers:get_value("Location", mochiweb_headers:make(Hdrs)), + %% TODO use couch_httpc:request instead of start_http_request + {Pid2, ReqId2} = start_http_request(Url2), + receive {ibrowse_async_headers, ReqId2, "200", _} -> + {ok, #state{conn=Pid2, last_seq=Since, reqid=ReqId2, init_args=Args}} + after 30000 -> + {stop, changes_timeout} + end; + {ibrowse_async_headers, ReqId, "404", _} -> + catch ibrowse:stop_worker_process(Pid), + ?LOG_INFO("source doesn't have _changes, trying _all_docs_by_seq", []), + Self = self(), + BySeqPid = spawn_link(fun() -> by_seq_loop(Self, Source, Since) end), + {ok, #state{last_seq=Since, changes_loop=BySeqPid, init_args=Args}}; + {ibrowse_async_headers, ReqId, Code, _} -> + {stop, {changes_error_code, list_to_integer(Code)}} + after 10000 -> + {stop, changes_timeout} + end; + +init([_Parent, Source, Since, PostProps] = InitArgs) -> + process_flag(trap_exit, true), + Server = self(), + ChangesArgs = #changes_args{ + style = all_docs, + since = Since, + filter = ?b2l(couch_util:get_value(<<"filter">>, PostProps, <<>>)), + feed = case couch_util:get_value(<<"continuous">>, PostProps, false) of + true -> + "continuous"; + false -> + "normal" + end, + timeout = infinity + }, + ChangesPid = spawn_link(fun() -> + ChangesFeedFun = couch_changes:handle_changes( + ChangesArgs, + {json_req, filter_json_req(Source, PostProps)}, + Source + ), + ChangesFeedFun(fun({change, Change, _}, _) -> + gen_server:call(Server, {add_change, Change}, infinity); + (_, _) -> + ok + end) + end), + {ok, #state{changes_loop=ChangesPid, init_args=InitArgs}}. + +filter_json_req(Db, PostProps) -> + case couch_util:get_value(<<"filter">>, PostProps) of + undefined -> + {[]}; + FilterName -> + {Query} = couch_util:get_value(<<"query_params">>, PostProps, {[]}), + {ok, Info} = couch_db:get_db_info(Db), + % simulate a request to db_name/_changes + {[ + {<<"info">>, {Info}}, + {<<"id">>, null}, + {<<"method">>, 'GET'}, + {<<"path">>, [couch_db:name(Db), <<"_changes">>]}, + {<<"query">>, {[{<<"filter">>, FilterName} | Query]}}, + {<<"headers">>, []}, + {<<"body">>, []}, + {<<"peer">>, <<"replicator">>}, + {<<"form">>, []}, + {<<"cookie">>, []}, + {<<"userCtx">>, couch_util:json_user_ctx(Db)} + ]} + end. + +handle_call({add_change, Row}, From, State) -> + handle_add_change(Row, From, State); + +handle_call(next_changes, From, State) -> + handle_next_changes(From, State); + +handle_call(stop, _From, State) -> + {stop, normal, ok, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({ibrowse_async_headers, Id, Code, Hdrs}, #state{reqid=Id}=State) -> + handle_headers(list_to_integer(Code), Hdrs, State); + +handle_info({ibrowse_async_response, Id, {error,connection_closed}}, + #state{reqid=Id}=State) -> + handle_retry(State); + +handle_info({ibrowse_async_response, Id, {error,E}}, #state{reqid=Id}=State) -> + {stop, {error, E}, State}; + +handle_info({ibrowse_async_response, Id, Chunk}, #state{reqid=Id}=State) -> + Messages = [M || M <- re:split(Chunk, ",?\n", [trim]), M =/= <<>>], + handle_messages(Messages, State); + +handle_info({ibrowse_async_response_end, Id}, #state{reqid=Id} = State) -> + handle_feed_completion(State); + +handle_info({'EXIT', From, normal}, #state{changes_loop=From} = State) -> + handle_feed_completion(State); + +handle_info({'EXIT', From, Reason}, #state{changes_loop=From} = State) -> + ?LOG_ERROR("changes_loop died with reason ~p", [Reason]), + {stop, changes_loop_died, State}; + +handle_info({'EXIT', _From, normal}, State) -> + {noreply, State}; + +handle_info(Msg, State) -> + ?LOG_DEBUG("unexpected message at changes_feed ~p", [Msg]), + {noreply, State}. + +terminate(_Reason, State) -> + #state{ + changes_loop = ChangesPid, + conn = Conn + } = State, + if is_pid(ChangesPid) -> exit(ChangesPid, stop); true -> ok end, + if is_pid(Conn) -> catch ibrowse:stop_worker_process(Conn); true -> ok end, + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%internal funs + +handle_add_change(Row, From, #state{reply_to=nil} = State) -> + #state{ + count = Count, + rows = Rows + } = State, + NewState = State#state{count=Count+1, rows=queue:in(Row,Rows)}, + if Count < ?BUFFER_SIZE -> + {reply, ok, NewState}; + true -> + {noreply, NewState#state{changes_from=From}} + end; +handle_add_change(Row, _From, #state{count=0} = State) -> + gen_server:reply(State#state.reply_to, [Row]), + {reply, ok, State#state{reply_to=nil}}. + +handle_next_changes(From, #state{count=0}=State) -> + if State#state.complete -> + {stop, normal, complete, State}; + true -> + {noreply, State#state{reply_to=From}} + end; +handle_next_changes(_From, State) -> + #state{ + changes_from = ChangesFrom, + rows = Rows + } = State, + NewState = State#state{count=0, changes_from=nil, rows=queue:new()}, + maybe_stream_next(NewState), + if ChangesFrom =/= nil -> gen_server:reply(ChangesFrom, ok); true -> ok end, + {reply, queue:to_list(Rows), NewState}. + +handle_headers(200, _, State) -> + maybe_stream_next(State), + {noreply, State}; +handle_headers(301, Hdrs, State) -> + catch ibrowse:stop_worker_process(State#state.conn), + Url = mochiweb_headers:get_value("Location", mochiweb_headers:make(Hdrs)), + %% TODO use couch_httpc:request instead of start_http_request + {Pid, ReqId} = start_http_request(Url), + {noreply, State#state{conn=Pid, reqid=ReqId}}; +handle_headers(Code, Hdrs, State) -> + ?LOG_ERROR("replicator changes feed failed with code ~s and Headers ~n~p", + [Code,Hdrs]), + {stop, {error, Code}, State}. + +handle_messages([], State) -> + maybe_stream_next(State), + {noreply, State}; +handle_messages([<<"{\"results\":[">>|Rest], State) -> + handle_messages(Rest, State); +handle_messages([<<"]">>, <<"\"last_seq\":", _/binary>>], State) -> + handle_feed_completion(State); +handle_messages([<<"{\"last_seq\":", _/binary>>], State) -> + handle_feed_completion(State); +handle_messages([Chunk|Rest], State) -> + #state{ + count = Count, + partial_chunk = Partial, + rows = Rows + } = State, + NewState = try + Row = {Props} = decode_row(<<Partial/binary, Chunk/binary>>), + case State of + #state{reply_to=nil} -> + State#state{ + count = Count+1, + last_seq = couch_util:get_value(<<"seq">>, Props), + partial_chunk = <<>>, + rows=queue:in(Row,Rows) + }; + #state{count=0, reply_to=From}-> + gen_server:reply(From, [Row]), + State#state{reply_to = nil, partial_chunk = <<>>} + end + catch + throw:{invalid_json, Bad} -> + State#state{partial_chunk = Bad} + end, + handle_messages(Rest, NewState). + +handle_feed_completion(#state{reply_to=nil} = State)-> + {noreply, State#state{complete=true}}; +handle_feed_completion(#state{count=0} = State) -> + gen_server:reply(State#state.reply_to, complete), + {stop, normal, State}. + +handle_retry(State) -> + ?LOG_DEBUG("retrying changes feed because our connection closed", []), + #state{ + count = Count, + init_args = [_, Source, _, PostProps], + last_seq = Since, + reply_to = ReplyTo, + rows = Rows + } = State, + case init([nil, Source, Since, PostProps]) of + {ok, State1} -> + MergedState = State1#state{ + count = Count, + reply_to = ReplyTo, + rows = Rows + }, + {noreply, MergedState}; + _ -> + {stop, {error, connection_closed}, State} + end. + +by_seq_loop(Server, Source, StartSeq) -> + Req = Source#http_db{ + resource = "_all_docs_by_seq", + qs = [{limit, 1000}, {startkey, StartSeq}] + }, + {Results} = couch_rep_httpc:request(Req), + Rows = couch_util:get_value(<<"rows">>, Results), + if Rows =:= [] -> exit(normal); true -> ok end, + EndSeq = lists:foldl(fun({RowInfoList}, _) -> + Id = couch_util:get_value(<<"id">>, RowInfoList), + Seq = couch_util:get_value(<<"key">>, RowInfoList), + {RowProps} = couch_util:get_value(<<"value">>, RowInfoList), + RawRevs = [ + couch_util:get_value(<<"rev">>, RowProps), + couch_util:get_value(<<"conflicts">>, RowProps, []), + couch_util:get_value(<<"deleted_conflicts">>, RowProps, []) + ], + ParsedRevs = couch_doc:parse_revs(lists:flatten(RawRevs)), + Change = {[ + {<<"seq">>, Seq}, + {<<"id">>, Id}, + {<<"changes">>, [{[{<<"rev">>,R}]} || R <- ParsedRevs]} + ]}, + gen_server:call(Server, {add_change, Change}, infinity), + Seq + end, 0, Rows), + by_seq_loop(Server, Source, EndSeq). + +decode_row(<<",", Rest/binary>>) -> + decode_row(Rest); +decode_row(Row) -> + ?JSON_DECODE(Row). + +maybe_stream_next(#state{reqid=nil}) -> + ok; +maybe_stream_next(#state{complete=false, count=N} = S) when N < ?BUFFER_SIZE -> + timer:cancel(get(timeout)), + {ok, Timeout} = timer:exit_after(31000, changes_timeout), + put(timeout, Timeout), + ibrowse:stream_next(S#state.reqid); +maybe_stream_next(_) -> + timer:cancel(get(timeout)). + +start_http_request(RawUrl) -> + Url = ibrowse_lib:parse_url(RawUrl), + {ok, Pid} = ibrowse:spawn_link_worker_process(Url#url.host, Url#url.port), + Opts = [ + {stream_to, {self(), once}}, + {inactivity_timeout, 31000}, + {response_format, binary} + ], + {ibrowse_req_id, Id} = + ibrowse:send_req_direct(Pid, RawUrl, [], get, [], Opts, infinity), + {Pid, Id}. diff --git a/apps/couch/src/couch_rep_httpc.erl b/apps/couch/src/couch_rep_httpc.erl new file mode 100644 index 00000000..aaa38106 --- /dev/null +++ b/apps/couch/src/couch_rep_httpc.erl @@ -0,0 +1,245 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_httpc). +-include("couch_db.hrl"). +-include_lib("ibrowse/include/ibrowse.hrl"). + +-export([db_exists/1, db_exists/2, full_url/1, request/1, redirected_request/2, + spawn_worker_process/1, spawn_link_worker_process/1]). + +request(#http_db{} = Req) -> + do_request(Req). + +do_request(#http_db{url=Url} = Req) when is_binary(Url) -> + do_request(Req#http_db{url = ?b2l(Url)}); + +do_request(Req) -> + #http_db{ + auth = Auth, + body = B, + conn = Conn, + headers = Headers0, + method = Method, + options = Opts, + qs = QS + } = Req, + Url = full_url(Req), + Headers = case couch_util:get_value(<<"oauth">>, Auth) of + undefined -> + Headers0; + {OAuthProps} -> + [oauth_header(Url, QS, Method, OAuthProps) | Headers0] + end, + Body = case B of + {Fun, InitialState} when is_function(Fun) -> + {Fun, InitialState}; + nil -> + []; + _Else -> + iolist_to_binary(?JSON_ENCODE(B)) + end, + Resp = case Conn of + nil -> + ibrowse:send_req(Url, Headers, Method, Body, Opts, infinity); + _ -> + ibrowse:send_req_direct(Conn, Url, Headers, Method, Body, Opts, infinity) + end, + process_response(Resp, Req). + +db_exists(Req) -> + db_exists(Req, Req#http_db.url). + +db_exists(Req, true) -> + db_exists(Req, Req#http_db.url, true); + +db_exists(Req, false) -> + db_exists(Req, Req#http_db.url, false); + +db_exists(Req, CanonicalUrl) -> + db_exists(Req, CanonicalUrl, false). + +db_exists(Req, CanonicalUrl, CreateDB) -> + #http_db{ + auth = Auth, + headers = Headers0, + url = Url + } = Req, + HeadersFun = fun(Method) -> + case couch_util:get_value(<<"oauth">>, Auth) of + undefined -> + Headers0; + {OAuthProps} -> + [oauth_header(Url, [], Method, OAuthProps) | Headers0] + end + end, + case CreateDB of + true -> + catch ibrowse:send_req(Url, HeadersFun(put), put); + _Else -> ok + end, + case catch ibrowse:send_req(Url, HeadersFun(head), head) of + {ok, "200", _, _} -> + Req#http_db{url = CanonicalUrl}; + {ok, "301", RespHeaders, _} -> + RedirectUrl = redirect_url(RespHeaders, Req#http_db.url), + db_exists(Req#http_db{url = RedirectUrl}, RedirectUrl); + {ok, "302", RespHeaders, _} -> + RedirectUrl = redirect_url(RespHeaders, Req#http_db.url), + db_exists(Req#http_db{url = RedirectUrl}, CanonicalUrl); + Error -> + ?LOG_DEBUG("DB at ~s could not be found because ~p", [Url, Error]), + throw({db_not_found, ?l2b(Url)}) + end. + +redirect_url(RespHeaders, OrigUrl) -> + MochiHeaders = mochiweb_headers:make(RespHeaders), + RedUrl = mochiweb_headers:get_value("Location", MochiHeaders), + {url, _, Base, Port, _, _, Path, Proto} = ibrowse_lib:parse_url(RedUrl), + {url, _, _, _, User, Passwd, _, _} = ibrowse_lib:parse_url(OrigUrl), + Creds = case is_list(User) andalso is_list(Passwd) of + true -> + User ++ ":" ++ Passwd ++ "@"; + false -> + [] + end, + atom_to_list(Proto) ++ "://" ++ Creds ++ Base ++ ":" ++ + integer_to_list(Port) ++ Path. + +full_url(#http_db{url=Url} = Req) when is_binary(Url) -> + full_url(Req#http_db{url = ?b2l(Url)}); + +full_url(#http_db{qs=[]} = Req) -> + Req#http_db.url ++ Req#http_db.resource; + +full_url(Req) -> + #http_db{ + url = Url, + resource = Resource, + qs = QS + } = Req, + QStr = lists:map(fun({K,V}) -> io_lib:format("~s=~s", + [couch_util:to_list(K), couch_util:to_list(V)]) end, QS), + lists:flatten([Url, Resource, "?", string:join(QStr, "&")]). + +process_response({ok, Status, Headers, Body}, Req) -> + Code = list_to_integer(Status), + if Code =:= 200; Code =:= 201 -> + ?JSON_DECODE(maybe_decompress(Headers, Body)); + Code =:= 301; Code =:= 302 -> + RedirectUrl = redirect_url(Headers, Req#http_db.url), + do_request(redirected_request(Req, RedirectUrl)); + Code =:= 409 -> + throw(conflict); + Code >= 400, Code < 500 -> + ?JSON_DECODE(maybe_decompress(Headers, Body)); + Code =:= 500; Code =:= 502; Code =:= 503 -> + #http_db{pause = Pause, retries = Retries} = Req, + ?LOG_INFO("retrying couch_rep_httpc request in ~p seconds " ++ + % "due to remote server error: ~s~s", [Pause/1000, Req#http_db.url, + "due to remote server error: ~p Body ~s", [Pause/1000, Code, + Body]), + timer:sleep(Pause), + do_request(Req#http_db{retries = Retries-1, pause = 2*Pause}); + true -> + exit({http_request_failed, ?l2b(["unhandled response code ", Status])}) + end; + +process_response({ibrowse_req_id, Id}, _Req) -> + {ibrowse_req_id, Id}; + +process_response({error, _Reason}, #http_db{url=Url, retries=0}) -> + ?LOG_ERROR("couch_rep_httpc request failed after 10 retries: ~s", [Url]), + exit({http_request_failed, ?l2b(["failed to replicate ", Url])}); +process_response({error, Reason}, Req) -> + #http_db{ + method = Method, + retries = Retries, + pause = Pause + } = Req, + ShortReason = case Reason of + connection_closed -> + connection_closed; + {'EXIT', {noproc, _}} -> + noproc; + {'EXIT', {normal, _}} -> + normal; + Else -> + Else + end, + ?LOG_DEBUG("retrying couch_rep_httpc ~p request in ~p seconds due to " ++ + "{error, ~p}", [Method, Pause/1000, ShortReason]), + timer:sleep(Pause), + if Reason == worker_is_dead -> + C = spawn_link_worker_process(Req), + do_request(Req#http_db{retries = Retries-1, pause = 2*Pause, conn=C}); + true -> + do_request(Req#http_db{retries = Retries-1, pause = 2*Pause}) + end. + +redirected_request(Req, RedirectUrl) -> + {Base, QStr, _} = mochiweb_util:urlsplit_path(RedirectUrl), + QS = mochiweb_util:parse_qs(QStr), + Hdrs = case couch_util:get_value(<<"oauth">>, Req#http_db.auth) of + undefined -> + Req#http_db.headers; + _Else -> + lists:keydelete("Authorization", 1, Req#http_db.headers) + end, + Req#http_db{url=Base, resource="", qs=QS, headers=Hdrs}. + +spawn_worker_process(Req) -> + Url = ibrowse_lib:parse_url(Req#http_db.url), + {ok, Pid} = ibrowse_http_client:start(Url), + Pid. + +spawn_link_worker_process(Req) -> + Url = ibrowse_lib:parse_url(Req#http_db.url), + {ok, Pid} = ibrowse_http_client:start_link(Url), + Pid. + +maybe_decompress(Headers, Body) -> + MochiHeaders = mochiweb_headers:make(Headers), + case mochiweb_headers:get_value("Content-Encoding", MochiHeaders) of + "gzip" -> + zlib:gunzip(Body); + _ -> + Body + end. + +oauth_header(Url, QS, Action, Props) -> + % erlang-oauth doesn't like iolists + QSL = [{couch_util:to_list(K), ?b2l(?l2b(couch_util:to_list(V)))} || + {K,V} <- QS], + ConsumerKey = ?b2l(couch_util:get_value(<<"consumer_key">>, Props)), + Token = ?b2l(couch_util:get_value(<<"token">>, Props)), + TokenSecret = ?b2l(couch_util:get_value(<<"token_secret">>, Props)), + ConsumerSecret = ?b2l(couch_util:get_value(<<"consumer_secret">>, Props)), + SignatureMethodStr = ?b2l(couch_util:get_value(<<"signature_method">>, Props, <<"HMAC-SHA1">>)), + SignatureMethodAtom = case SignatureMethodStr of + "PLAINTEXT" -> + plaintext; + "HMAC-SHA1" -> + hmac_sha1; + "RSA-SHA1" -> + rsa_sha1 + end, + Consumer = {ConsumerKey, ConsumerSecret, SignatureMethodAtom}, + Method = case Action of + get -> "GET"; + post -> "POST"; + put -> "PUT"; + head -> "HEAD" + end, + Params = oauth:signed_params(Method, Url, QSL, Consumer, Token, TokenSecret) + -- QSL, + {"Authorization", "OAuth " ++ oauth_uri:params_to_header_string(Params)}. diff --git a/apps/couch/src/couch_rep_missing_revs.erl b/apps/couch/src/couch_rep_missing_revs.erl new file mode 100644 index 00000000..1eff6774 --- /dev/null +++ b/apps/couch/src/couch_rep_missing_revs.erl @@ -0,0 +1,198 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_missing_revs). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/4, next/1, stop/1]). + +-define(BUFFER_SIZE, 1000). + +-include("couch_db.hrl"). + +-record (state, { + changes_loop, + changes_from = nil, + target, + parent, + complete = false, + count = 0, + reply_to = nil, + rows = queue:new(), + high_source_seq = 0, + high_missing_seq = 0, + high_committed_seq = 0 +}). + +start_link(Parent, Target, ChangesFeed, PostProps) -> + gen_server:start_link(?MODULE, [Parent, Target, ChangesFeed, PostProps], []). + +next(Server) -> + gen_server:call(Server, next_missing_revs, infinity). + +stop(Server) -> + gen_server:call(Server, stop). + +init([Parent, Target, ChangesFeed, _PostProps]) -> + process_flag(trap_exit, true), + Self = self(), + Pid = spawn_link(fun() -> changes_loop(Self, ChangesFeed, Target) end), + {ok, #state{changes_loop=Pid, target=Target, parent=Parent}}. + +handle_call({add_missing_revs, {HighSeq, Revs}}, From, State) -> + State#state.parent ! {update_stats, missing_revs, length(Revs)}, + handle_add_missing_revs(HighSeq, Revs, From, State); + +handle_call(next_missing_revs, From, State) -> + handle_next_missing_revs(From, State). + +handle_cast({update_committed_seq, N}, State) -> + if State#state.high_committed_seq < N -> + ?LOG_DEBUG("missing_revs updating committed seq to ~p", [N]); + true -> ok end, + {noreply, State#state{high_committed_seq=N}}. + +handle_info({'EXIT', Pid, Reason}, #state{changes_loop=Pid} = State) -> + handle_changes_loop_exit(Reason, State); + +handle_info(Msg, State) -> + ?LOG_INFO("unexpected message ~p", [Msg]), + {noreply, State}. + +terminate(_Reason, #state{changes_loop=Pid}) when is_pid(Pid) -> + exit(Pid, shutdown), + ok; +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%internal funs + +handle_add_missing_revs(HighSeq, [], _From, State) -> + NewState = State#state{high_source_seq=HighSeq}, + maybe_checkpoint(NewState), + {reply, ok, NewState}; +handle_add_missing_revs(HighSeq, Revs, From, #state{reply_to=nil} = State) -> + #state{rows=Rows, count=Count} = State, + NewState = State#state{ + rows = queue:join(Rows, queue:from_list(Revs)), + count = Count + length(Revs), + high_source_seq = HighSeq, + high_missing_seq = HighSeq + }, + if NewState#state.count < ?BUFFER_SIZE -> + {reply, ok, NewState}; + true -> + {noreply, NewState#state{changes_from=From}} + end; +handle_add_missing_revs(HighSeq, Revs, _From, #state{count=0} = State) -> + gen_server:reply(State#state.reply_to, {HighSeq, Revs}), + NewState = State#state{ + high_source_seq = HighSeq, + high_missing_seq = HighSeq, + reply_to = nil + }, + {reply, ok, NewState}. + +handle_next_missing_revs(From, #state{count=0} = State) -> + if State#state.complete -> + {stop, normal, complete, State}; + true -> + {noreply, State#state{reply_to=From}} + end; +handle_next_missing_revs(_From, State) -> + #state{ + changes_from = ChangesFrom, + high_missing_seq = HighSeq, + rows = Rows + } = State, + if ChangesFrom =/= nil -> gen_server:reply(ChangesFrom, ok); true -> ok end, + NewState = State#state{count=0, changes_from=nil, rows=queue:new()}, + {reply, {HighSeq, queue:to_list(Rows)}, NewState}. + +handle_changes_loop_exit(normal, State) -> + if State#state.reply_to =/= nil -> + gen_server:reply(State#state.reply_to, complete), + {stop, normal, State}; + true -> + {noreply, State#state{complete=true, changes_loop=nil}} + end; +handle_changes_loop_exit(Reason, State) -> + {stop, Reason, State#state{changes_loop=nil}}. + +changes_loop(OurServer, SourceChangesServer, Target) -> + case couch_rep_changes_feed:next(SourceChangesServer) of + complete -> + exit(normal); + Changes -> + MissingRevs = get_missing_revs(Target, Changes), + gen_server:call(OurServer, {add_missing_revs, MissingRevs}, infinity) + end, + changes_loop(OurServer, SourceChangesServer, Target). + +get_missing_revs(#http_db{}=Target, Changes) -> + Transform = fun({Props}) -> + C = couch_util:get_value(<<"changes">>, Props), + Id = couch_util:get_value(<<"id">>, Props), + {Id, [R || {[{<<"rev">>, R}]} <- C]} + end, + IdRevsList = [Transform(Change) || Change <- Changes], + SeqDict = changes_dictionary(Changes), + {LastProps} = lists:last(Changes), + HighSeq = couch_util:get_value(<<"seq">>, LastProps), + Request = Target#http_db{ + resource = "_missing_revs", + method = post, + body = {IdRevsList} + }, + {Resp} = couch_rep_httpc:request(Request), + case couch_util:get_value(<<"missing_revs">>, Resp) of + {MissingRevs} -> + X = [{Id, dict:fetch(Id, SeqDict), couch_doc:parse_revs(RevStrs)} || + {Id,RevStrs} <- MissingRevs], + {HighSeq, X}; + _ -> + exit({target_error, couch_util:get_value(<<"error">>, Resp)}) + end; + +get_missing_revs(Target, Changes) -> + Transform = fun({Props}) -> + C = couch_util:get_value(<<"changes">>, Props), + Id = couch_util:get_value(<<"id">>, Props), + {Id, [couch_doc:parse_rev(R) || {[{<<"rev">>, R}]} <- C]} + end, + IdRevsList = [Transform(Change) || Change <- Changes], + SeqDict = changes_dictionary(Changes), + {LastProps} = lists:last(Changes), + HighSeq = couch_util:get_value(<<"seq">>, LastProps), + {ok, Results} = couch_db:get_missing_revs(Target, IdRevsList), + {HighSeq, [{Id, dict:fetch(Id, SeqDict), Revs} || {Id, Revs, _} <- Results]}. + +changes_dictionary(ChangeList) -> + KVs = [{couch_util:get_value(<<"id">>,C), couch_util:get_value(<<"seq">>,C)} + || {C} <- ChangeList], + dict:from_list(KVs). + +%% save a checkpoint if no revs are missing on target so we don't +%% rescan metadata unnecessarily +maybe_checkpoint(#state{high_missing_seq=N, high_committed_seq=N} = State) -> + #state{ + parent = Parent, + high_source_seq = SourceSeq + } = State, + Parent ! {missing_revs_checkpoint, SourceSeq}; +maybe_checkpoint(_State) -> + ok. diff --git a/apps/couch/src/couch_rep_reader.erl b/apps/couch/src/couch_rep_reader.erl new file mode 100644 index 00000000..8722f3f5 --- /dev/null +++ b/apps/couch/src/couch_rep_reader.erl @@ -0,0 +1,340 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_reader). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/4, next/1]). + +-import(couch_util, [url_encode/1]). + +-define (BUFFER_SIZE, 1000). +-define (MAX_CONCURRENT_REQUESTS, 100). +-define (MAX_CONNECTIONS, 20). +-define (MAX_PIPELINE_SIZE, 50). + +-include("couch_db.hrl"). +-include_lib("ibrowse/include/ibrowse.hrl"). + +-record (state, { + parent, + source, + missing_revs, + reader_loop, + reader_from = [], + count = 0, + docs = queue:new(), + reply_to = nil, + complete = false, + monitor_count = 0, + pending_doc_request = nil, + requested_seqs = [], + opened_seqs = [] +}). + +start_link(Parent, Source, MissingRevs_or_DocIds, PostProps) -> + gen_server:start_link( + ?MODULE, [Parent, Source, MissingRevs_or_DocIds, PostProps], [] + ). + +next(Pid) -> + gen_server:call(Pid, next_docs, infinity). + +init([Parent, Source, MissingRevs_or_DocIds, _PostProps]) -> + process_flag(trap_exit, true), + if is_record(Source, http_db) -> + #url{host=Host, port=Port} = ibrowse_lib:parse_url(Source#http_db.url), + ibrowse:set_max_sessions(Host, Port, ?MAX_CONNECTIONS), + ibrowse:set_max_pipeline_size(Host, Port, ?MAX_PIPELINE_SIZE); + true -> ok end, + Self = self(), + ReaderLoop = spawn_link( + fun() -> reader_loop(Self, Source, MissingRevs_or_DocIds) end + ), + MissingRevs = case MissingRevs_or_DocIds of + Pid when is_pid(Pid) -> + Pid; + _ListDocIds -> + nil + end, + State = #state{ + parent = Parent, + source = Source, + missing_revs = MissingRevs, + reader_loop = ReaderLoop + }, + {ok, State}. + +handle_call({add_docs, Seq, Docs}, From, State) -> + State#state.parent ! {update_stats, docs_read, length(Docs)}, + handle_add_docs(Seq, lists:flatten(Docs), From, State); + +handle_call({add_request_seqs, Seqs}, _From, State) -> + SeqList = State#state.requested_seqs, + {reply, ok, State#state{requested_seqs = lists:merge(Seqs, SeqList)}}; + +handle_call(next_docs, From, State) -> + handle_next_docs(From, State); + +handle_call({open_remote_doc, Id, Seq, Revs}, From, State) -> + handle_open_remote_doc(Id, Seq, Revs, From, State). + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({'DOWN', _, _, _, Reason}, State) -> + handle_monitor_down(Reason, State); + +handle_info({'EXIT', Loop, complete}, #state{reader_loop=Loop} = State) -> + handle_reader_loop_complete(State). + +terminate(_Reason, _State) -> + % ?LOG_INFO("rep reader terminating with reason ~p", [_Reason]), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%internal funs + +handle_add_docs(_Seq, [], _From, State) -> + {reply, ok, State}; +handle_add_docs(Seq, DocsToAdd, From, #state{reply_to=nil} = State) -> + State1 = update_sequence_lists(Seq, State), + NewState = State1#state{ + docs = queue:join(State1#state.docs, queue:from_list(DocsToAdd)), + count = State1#state.count + length(DocsToAdd) + }, + if NewState#state.count < ?BUFFER_SIZE -> + {reply, ok, NewState}; + true -> + {noreply, NewState#state{reader_from=[From|State#state.reader_from]}} + end; +handle_add_docs(Seq, DocsToAdd, _From, #state{count=0} = State) -> + NewState = update_sequence_lists(Seq, State), + HighSeq = calculate_new_high_seq(NewState), + gen_server:reply(State#state.reply_to, {HighSeq, DocsToAdd}), + {reply, ok, NewState#state{reply_to=nil}}. + +handle_next_docs(From, #state{count=0} = State) -> + if State#state.complete -> + {stop, normal, {complete, calculate_new_high_seq(State)}, State}; + true -> + {noreply, State#state{reply_to=From}} + end; +handle_next_docs(_From, State) -> + #state{ + reader_from = ReaderFrom, + docs = Docs + } = State, + [gen_server:reply(F, ok) || F <- ReaderFrom], + NewState = State#state{count=0, reader_from=[], docs=queue:new()}, + {reply, {calculate_new_high_seq(State), queue:to_list(Docs)}, NewState}. + +handle_open_remote_doc(Id, Seq, Revs, From, #state{monitor_count=N} = State) + when N > ?MAX_CONCURRENT_REQUESTS -> + {noreply, State#state{pending_doc_request={From,Id,Seq,Revs}}}; +handle_open_remote_doc(Id, Seq, Revs, _, #state{source=#http_db{}} = State) -> + #state{ + monitor_count = Count, + source = Source + } = State, + {_, _Ref} = spawn_document_request(Source, Id, Seq, Revs), + {reply, ok, State#state{monitor_count = Count+1}}. + +handle_monitor_down(normal, #state{pending_doc_request=nil, reply_to=nil, + monitor_count=1, complete=waiting_on_monitors} = State) -> + {noreply, State#state{complete=true, monitor_count=0}}; +handle_monitor_down(normal, #state{pending_doc_request=nil, reply_to=From, + monitor_count=1, complete=waiting_on_monitors} = State) -> + gen_server:reply(From, {complete, calculate_new_high_seq(State)}), + {stop, normal, State#state{complete=true, monitor_count=0}}; +handle_monitor_down(normal, #state{pending_doc_request=nil} = State) -> + #state{monitor_count = Count} = State, + {noreply, State#state{monitor_count = Count-1}}; +handle_monitor_down(normal, State) -> + #state{ + source = Source, + pending_doc_request = {From, Id, Seq, Revs} + } = State, + gen_server:reply(From, ok), + {_, _NewRef} = spawn_document_request(Source, Id, Seq, Revs), + {noreply, State#state{pending_doc_request=nil}}; +handle_monitor_down(Reason, State) -> + {stop, Reason, State}. + +handle_reader_loop_complete(#state{reply_to=nil, monitor_count=0} = State) -> + {noreply, State#state{complete = true}}; +handle_reader_loop_complete(#state{monitor_count=0} = State) -> + HighSeq = calculate_new_high_seq(State), + gen_server:reply(State#state.reply_to, {complete, HighSeq}), + {stop, normal, State}; +handle_reader_loop_complete(State) -> + {noreply, State#state{complete = waiting_on_monitors}}. + +calculate_new_high_seq(#state{missing_revs=nil}) -> + nil; +calculate_new_high_seq(#state{requested_seqs=[], opened_seqs=[Open|_]}) -> + Open; +calculate_new_high_seq(#state{requested_seqs=[Req|_], opened_seqs=[Open|_]}) + when Req < Open -> + 0; +calculate_new_high_seq(#state{opened_seqs=[]}) -> + 0; +calculate_new_high_seq(State) -> + hd(State#state.opened_seqs). + +split_revlist(Rev, {[CurrentAcc|Rest], BaseLength, Length}) -> + case Length+size(Rev) > 8192 of + false -> + {[[Rev|CurrentAcc] | Rest], BaseLength, Length+size(Rev)}; + true -> + {[[Rev],CurrentAcc|Rest], BaseLength, BaseLength} + end. + +% We store outstanding requested sequences and a subset of already opened +% sequences in 2 ordered lists. The subset of opened seqs is a) the largest +% opened seq smaller than the smallest outstanding request seq plus b) all the +% opened seqs greater than the smallest outstanding request. I believe its the +% minimal set of info needed to correctly calculate which seqs have been +% replicated (because remote docs can be opened out-of-order) -- APK +update_sequence_lists(_Seq, #state{missing_revs=nil} = State) -> + State; +update_sequence_lists(Seq, State) -> + Requested = lists:delete(Seq, State#state.requested_seqs), + AllOpened = lists:merge([Seq], State#state.opened_seqs), + Opened = case Requested of + [] -> + [lists:last(AllOpened)]; + [EarliestReq|_] -> + case lists:splitwith(fun(X) -> X < EarliestReq end, AllOpened) of + {[], Greater} -> + Greater; + {Less, Greater} -> + [lists:last(Less) | Greater] + end + end, + State#state{ + requested_seqs = Requested, + opened_seqs = Opened + }. + +open_doc_revs(#http_db{} = DbS, DocId, Revs) -> + %% all this logic just splits up revision lists that are too long for + %% MochiWeb into multiple requests + BaseQS = [{revs,true}, {latest,true}, {att_encoding_info,true}], + BaseReq = DbS#http_db{resource=url_encode(DocId), qs=BaseQS}, + BaseLength = length(couch_rep_httpc:full_url(BaseReq)) + 11, % &open_revs= + + {RevLists, _, _} = lists:foldl(fun split_revlist/2, + {[[]], BaseLength, BaseLength}, couch_doc:revs_to_strs(Revs)), + + Requests = [BaseReq#http_db{ + qs = [{open_revs, ?JSON_ENCODE(RevList)} | BaseQS] + } || RevList <- RevLists], + JsonResults = lists:flatten([couch_rep_httpc:request(R) || R <- Requests]), + + Transform = + fun({[{<<"missing">>, Rev}]}) -> + {{not_found, missing}, couch_doc:parse_rev(Rev)}; + ({[{<<"ok">>, Json}]}) -> + #doc{id=Id, revs=Rev, atts=Atts} = Doc = couch_doc:from_json_obj(Json), + Doc#doc{atts=[couch_rep_att:convert_stub(A, {DbS,Id,Rev}) || A <- Atts]} + end, + [Transform(Result) || Result <- JsonResults]. + +open_doc(#http_db{} = DbS, DocId) -> + % get latest rev of the doc + Req = DbS#http_db{ + resource=url_encode(DocId), + qs=[{att_encoding_info, true}] + }, + case couch_rep_httpc:request(Req) of + {[{<<"error">>,<<"not_found">>}, {<<"reason">>,<<"missing">>}]} -> + []; + Json -> + #doc{id=Id, revs=Rev, atts=Atts} = Doc = couch_doc:from_json_obj(Json), + [Doc#doc{ + atts=[couch_rep_att:convert_stub(A, {DbS,Id,Rev}) || A <- Atts] + }] + end. + +reader_loop(ReaderServer, Source, DocIds) when is_list(DocIds) -> + case Source of + #http_db{} -> + [gen_server:call(ReaderServer, {open_remote_doc, Id, nil, nil}, + infinity) || Id <- DocIds]; + _LocalDb -> + Docs = lists:foldr(fun(Id, Acc) -> + case couch_db:open_doc(Source, Id) of + {ok, Doc} -> + [Doc | Acc]; + _ -> + Acc + end + end, [], DocIds), + gen_server:call(ReaderServer, {add_docs, nil, Docs}, infinity) + end, + exit(complete); + +reader_loop(ReaderServer, Source, MissingRevsServer) -> + case couch_rep_missing_revs:next(MissingRevsServer) of + complete -> + exit(complete); + {HighSeq, IdsRevs} -> + % to be safe, make sure Results are sorted by source_seq + SortedIdsRevs = lists:keysort(2, IdsRevs), + RequestSeqs = [S || {_,S,_} <- SortedIdsRevs], + gen_server:call(ReaderServer, {add_request_seqs, RequestSeqs}, infinity), + case Source of + #http_db{} -> + [gen_server:call(ReaderServer, {open_remote_doc, Id, Seq, Revs}, + infinity) || {Id,Seq,Revs} <- SortedIdsRevs], + reader_loop(ReaderServer, Source, MissingRevsServer); + _Local -> + Source2 = maybe_reopen_db(Source, HighSeq), + lists:foreach(fun({Id,Seq,Revs}) -> + {ok, Docs} = couch_db:open_doc_revs(Source2, Id, Revs, [latest]), + JustTheDocs = [Doc || {ok, Doc} <- Docs], + gen_server:call(ReaderServer, {add_docs, Seq, JustTheDocs}, + infinity) + end, SortedIdsRevs), + reader_loop(ReaderServer, Source2, MissingRevsServer) + end + end. + +maybe_reopen_db(#db{update_seq=OldSeq} = Db, HighSeq) when HighSeq > OldSeq -> + {ok, NewDb} = couch_db:open(Db#db.name, [{user_ctx, Db#db.user_ctx}]), + couch_db:close(Db), + NewDb; +maybe_reopen_db(Db, _HighSeq) -> + Db. + +spawn_document_request(Source, Id, nil, nil) -> + spawn_document_request(Source, Id); +spawn_document_request(Source, Id, Seq, Revs) -> + Server = self(), + SpawnFun = fun() -> + Results = open_doc_revs(Source, Id, Revs), + gen_server:call(Server, {add_docs, Seq, Results}, infinity) + end, + spawn_monitor(SpawnFun). + +spawn_document_request(Source, Id) -> + Server = self(), + SpawnFun = fun() -> + Results = open_doc(Source, Id), + gen_server:call(Server, {add_docs, nil, Results}, infinity) + end, + spawn_monitor(SpawnFun). diff --git a/apps/couch/src/couch_rep_sup.erl b/apps/couch/src/couch_rep_sup.erl new file mode 100644 index 00000000..1318c598 --- /dev/null +++ b/apps/couch/src/couch_rep_sup.erl @@ -0,0 +1,31 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_sup). +-behaviour(supervisor). +-export([init/1, start_link/0]). + +-include("couch_db.hrl"). + +start_link() -> + supervisor:start_link({local,?MODULE}, ?MODULE, []). + +%%============================================================================= +%% supervisor callbacks +%%============================================================================= + +init([]) -> + {ok, {{one_for_one, 3, 10}, []}}. + +%%============================================================================= +%% internal functions +%%============================================================================= diff --git a/apps/couch/src/couch_rep_writer.erl b/apps/couch/src/couch_rep_writer.erl new file mode 100644 index 00000000..dd6396fd --- /dev/null +++ b/apps/couch/src/couch_rep_writer.erl @@ -0,0 +1,170 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_writer). + +-export([start_link/4]). + +-include("couch_db.hrl"). + +start_link(Parent, Target, Reader, _PostProps) -> + {ok, spawn_link(fun() -> writer_loop(Parent, Reader, Target) end)}. + +writer_loop(Parent, Reader, Target) -> + case couch_rep_reader:next(Reader) of + {complete, nil} -> + ok; + {complete, FinalSeq} -> + Parent ! {writer_checkpoint, FinalSeq}, + ok; + {HighSeq, Docs} -> + DocCount = length(Docs), + try write_docs(Target, Docs) of + {ok, []} -> + Parent ! {update_stats, docs_written, DocCount}; + {ok, Errors} -> + ErrorCount = length(Errors), + Parent ! {update_stats, doc_write_failures, ErrorCount}, + Parent ! {update_stats, docs_written, DocCount - ErrorCount} + catch + {attachment_request_failed, Err} -> + ?LOG_DEBUG("writer failed to write an attachment ~p", [Err]), + exit({attachment_request_failed, Err, Docs}) + end, + case HighSeq of + nil -> + ok; + _SeqNumber -> + Parent ! {writer_checkpoint, HighSeq} + end, + couch_rep_att:cleanup(), + couch_util:should_flush(), + writer_loop(Parent, Reader, Target) + end. + +write_docs(#http_db{} = Db, Docs) -> + {DocsAtts, DocsNoAtts} = lists:partition( + fun(#doc{atts=[]}) -> false; (_) -> true end, + Docs + ), + ErrorsJson0 = write_bulk_docs(Db, DocsNoAtts), + ErrorsJson = lists:foldl( + fun(Doc, Acc) -> write_multi_part_doc(Db, Doc) ++ Acc end, + ErrorsJson0, + DocsAtts + ), + {ok, ErrorsJson}; +write_docs(Db, Docs) -> + couch_db:update_docs(Db, Docs, [delay_commit], replicated_changes). + +write_bulk_docs(_Db, []) -> + []; +write_bulk_docs(#http_db{headers = Headers} = Db, Docs) -> + JsonDocs = [ + couch_doc:to_json_obj(Doc, [revs, att_gzip_length]) || Doc <- Docs + ], + Request = Db#http_db{ + resource = "_bulk_docs", + method = post, + body = {[{new_edits, false}, {docs, JsonDocs}]}, + headers = couch_util:proplist_apply_field({"Content-Type", "application/json"}, [{"X-Couch-Full-Commit", "false"} | Headers]) + }, + ErrorsJson = case couch_rep_httpc:request(Request) of + {FailProps} -> + exit({target_error, couch_util:get_value(<<"error">>, FailProps)}); + List when is_list(List) -> + List + end, + [write_docs_1(V) || V <- ErrorsJson]. + +write_multi_part_doc(#http_db{headers=Headers} = Db, #doc{atts=Atts} = Doc) -> + JsonBytes = ?JSON_ENCODE( + couch_doc:to_json_obj( + Doc, + [follows, att_encoding_info, attachments] + ) + ), + Boundary = couch_uuids:random(), + {ContentType, Len} = couch_doc:len_doc_to_multi_part_stream( + Boundary, JsonBytes, Atts, true + ), + StreamerPid = spawn_link( + fun() -> streamer_fun(Boundary, JsonBytes, Atts) end + ), + BodyFun = fun(Acc) -> + DataQueue = case Acc of + nil -> + StreamerPid ! {start, self()}, + receive + {queue, Q} -> + Q + end; + Queue -> + Queue + end, + case couch_work_queue:dequeue(DataQueue) of + closed -> + eof; + {ok, Data} -> + {ok, iolist_to_binary(Data), DataQueue} + end + end, + Request = Db#http_db{ + resource = couch_util:url_encode(Doc#doc.id), + method = put, + qs = [{new_edits, false}], + body = {BodyFun, nil}, + headers = [ + {"x-couch-full-commit", "false"}, + {"Content-Type", ?b2l(ContentType)}, + {"Content-Length", Len} | Headers + ] + }, + Result = case couch_rep_httpc:request(Request) of + {[{<<"error">>, Error}, {<<"reason">>, Reason}]} -> + {Pos, [RevId | _]} = Doc#doc.revs, + ErrId = couch_util:to_existing_atom(Error), + [{Doc#doc.id, couch_doc:rev_to_str({Pos, RevId})}, {ErrId, Reason}]; + _ -> + [] + end, + StreamerPid ! stop, + Result. + +streamer_fun(Boundary, JsonBytes, Atts) -> + receive + stop -> + ok; + {start, From} -> + % better use a brand new queue, to ensure there's no garbage from + % a previous (failed) iteration + {ok, DataQueue} = couch_work_queue:new(1024 * 1024, 1000), + From ! {queue, DataQueue}, + couch_doc:doc_to_multi_part_stream( + Boundary, + JsonBytes, + Atts, + fun(Data) -> + couch_work_queue:queue(DataQueue, Data) + end, + true + ), + couch_work_queue:close(DataQueue), + streamer_fun(Boundary, JsonBytes, Atts) + end. + +write_docs_1({Props}) -> + Id = couch_util:get_value(<<"id">>, Props), + Rev = couch_doc:parse_rev(couch_util:get_value(<<"rev">>, Props)), + ErrId = couch_util:to_existing_atom(couch_util:get_value(<<"error">>, Props)), + Reason = couch_util:get_value(<<"reason">>, Props), + {{Id, Rev}, {ErrId, Reason}}. diff --git a/apps/couch/src/couch_server.erl b/apps/couch/src/couch_server.erl new file mode 100644 index 00000000..43fd9044 --- /dev/null +++ b/apps/couch/src/couch_server.erl @@ -0,0 +1,399 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_server). +-behaviour(gen_server). + +-export([open/2,create/2,delete/2,all_databases/0,get_version/0]). +-export([init/1, handle_call/3,sup_start_link/0]). +-export([handle_cast/2,code_change/3,handle_info/2,terminate/2]). +-export([dev_start/0,is_admin/2,has_admins/0,get_stats/0]). + +-include("couch_db.hrl"). + +-record(server,{ + root_dir = [], + dbname_regexp, + max_dbs_open=100, + dbs_open=0, + start_time="" + }). + +dev_start() -> + couch:stop(), + up_to_date = make:all([load, debug_info]), + couch:start(). + +get_version() -> + Apps = application:loaded_applications(), + case lists:keysearch(couch, 1, Apps) of + {value, {_, _, Vsn}} -> + Vsn; + false -> + "0.0.0" + end. + +get_stats() -> + {ok, #server{start_time=Time,dbs_open=Open}} = + gen_server:call(couch_server, get_server), + [{start_time, ?l2b(Time)}, {dbs_open, Open}]. + +sup_start_link() -> + gen_server:start_link({local, couch_server}, couch_server, [], []). + +open(DbName, Options) -> + case gen_server:call(couch_server, {open, DbName, Options}, infinity) of + {ok, Db} -> + Ctx = couch_util:get_value(user_ctx, Options, #user_ctx{}), + {ok, Db#db{user_ctx=Ctx}}; + Error -> + Error + end. + +create(DbName, Options) -> + case gen_server:call(couch_server, {create, DbName, Options}, infinity) of + {ok, Db} -> + Ctx = couch_util:get_value(user_ctx, Options, #user_ctx{}), + {ok, Db#db{user_ctx=Ctx}}; + Error -> + Error + end. + +delete(DbName, Options) -> + gen_server:call(couch_server, {delete, DbName, Options}, infinity). + +check_dbname(#server{dbname_regexp=RegExp}, DbName) -> + case re:run(DbName, RegExp, [{capture, none}]) of + nomatch -> + case DbName of + "_users" -> ok; + _Else -> + {error, illegal_database_name} + end; + match -> + ok + end. + +is_admin(User, ClearPwd) -> + case couch_config:get("admins", User) of + "-hashed-" ++ HashedPwdAndSalt -> + [HashedPwd, Salt] = string:tokens(HashedPwdAndSalt, ","), + couch_util:to_hex(crypto:sha(ClearPwd ++ Salt)) == HashedPwd; + _Else -> + false + end. + +has_admins() -> + couch_config:get("admins") /= []. + +get_full_filename(Server, DbName) -> + filename:join([Server#server.root_dir, "./" ++ DbName ++ ".couch"]). + +hash_admin_passwords() -> + hash_admin_passwords(true). + +hash_admin_passwords(Persist) -> + lists:foreach( + fun({_User, "-hashed-" ++ _}) -> + ok; % already hashed + ({User, ClearPassword}) -> + Salt = ?b2l(couch_uuids:random()), + Hashed = couch_util:to_hex(crypto:sha(ClearPassword ++ Salt)), + couch_config:set("admins", + User, "-hashed-" ++ Hashed ++ "," ++ Salt, Persist) + end, couch_config:get("admins")). + +init([]) -> + % read config and register for configuration changes + + % just stop if one of the config settings change. couch_server_sup + % will restart us and then we will pick up the new settings. + + RootDir = couch_config:get("couchdb", "database_dir", "."), + MaxDbsOpen = list_to_integer( + couch_config:get("couchdb", "max_dbs_open")), + Self = self(), + ok = couch_config:register( + fun("couchdb", "database_dir") -> + exit(Self, config_change) + end), + ok = couch_config:register( + fun("couchdb", "max_dbs_open", Max) -> + gen_server:call(couch_server, + {set_max_dbs_open, list_to_integer(Max)}) + end), + ok = couch_file:init_delete_dir(RootDir), + hash_admin_passwords(), + ok = couch_config:register( + fun("admins", _Key, _Value, Persist) -> + % spawn here so couch_config doesn't try to call itself + spawn(fun() -> hash_admin_passwords(Persist) end) + end, false), + {ok, RegExp} = re:compile("^[a-z][a-z0-9\\_\\$()\\+\\-\\/]*$"), + ets:new(couch_dbs_by_name, [set, private, named_table]), + ets:new(couch_dbs_by_pid, [set, private, named_table]), + ets:new(couch_dbs_by_lru, [ordered_set, private, named_table]), + ets:new(couch_sys_dbs, [set, private, named_table]), + process_flag(trap_exit, true), + {ok, #server{root_dir=RootDir, + dbname_regexp=RegExp, + max_dbs_open=MaxDbsOpen, + start_time=httpd_util:rfc1123_date()}}. + +terminate(_Reason, _Srv) -> + [couch_util:shutdown_sync(Pid) || {_, {Pid, _LruTime}} <- + ets:tab2list(couch_dbs_by_name)], + ok. + +all_databases() -> + {ok, #server{root_dir=Root}} = gen_server:call(couch_server, get_server), + NormRoot = couch_util:normpath(Root), + Filenames = + filelib:fold_files(Root, "^[a-z0-9\\_\\$()\\+\\-]*[\\.]couch$", true, + fun(Filename, AccIn) -> + NormFilename = couch_util:normpath(Filename), + case NormFilename -- NormRoot of + [$/ | RelativeFilename] -> ok; + RelativeFilename -> ok + end, + [list_to_binary(filename:rootname(RelativeFilename, ".couch")) | AccIn] + end, []), + {ok, Filenames}. + + +maybe_close_lru_db(#server{dbs_open=NumOpen, max_dbs_open=MaxOpen}=Server) + when NumOpen < MaxOpen -> + {ok, Server}; +maybe_close_lru_db(#server{dbs_open=NumOpen}=Server) -> + % must free up the lru db. + case try_close_lru(now()) of + ok -> + {ok, Server#server{dbs_open=NumOpen - 1}}; + Error -> Error + end. + +try_close_lru(StartTime) -> + LruTime = get_lru(), + if LruTime > StartTime -> + % this means we've looped through all our opened dbs and found them + % all in use. + {error, all_dbs_active}; + true -> + [{_, DbName}] = ets:lookup(couch_dbs_by_lru, LruTime), + [{_, {opened, MainPid, LruTime}}] = ets:lookup(couch_dbs_by_name, DbName), + case couch_db:is_idle(MainPid) of + true -> + couch_util:shutdown_sync(MainPid), + true = ets:delete(couch_dbs_by_lru, LruTime), + true = ets:delete(couch_dbs_by_name, DbName), + true = ets:delete(couch_dbs_by_pid, MainPid), + true = ets:delete(couch_sys_dbs, DbName), + ok; + false -> + % this still has referrers. Go ahead and give it a current lru time + % and try the next one in the table. + NewLruTime = now(), + true = ets:insert(couch_dbs_by_name, {DbName, {opened, MainPid, NewLruTime}}), + true = ets:insert(couch_dbs_by_pid, {MainPid, DbName}), + true = ets:delete(couch_dbs_by_lru, LruTime), + true = ets:insert(couch_dbs_by_lru, {NewLruTime, DbName}), + try_close_lru(StartTime) + end + end. + +get_lru() -> + get_lru(ets:first(couch_dbs_by_lru)). + +get_lru(LruTime) -> + [{LruTime, DbName}] = ets:lookup(couch_dbs_by_lru, LruTime), + case ets:member(couch_sys_dbs, DbName) of + false -> + LruTime; + true -> + [{_, {opened, MainPid, _}}] = ets:lookup(couch_dbs_by_name, DbName), + case couch_db:is_idle(MainPid) of + true -> + LruTime; + false -> + get_lru(ets:next(couch_dbs_by_lru, LruTime)) + end + end. + +open_async(Server, From, DbName, Filepath, Options) -> + Parent = self(), + Opener = spawn_link(fun() -> + Res = couch_db:start_link(DbName, Filepath, Options), + gen_server:call( + Parent, {open_result, DbName, Res, Options}, infinity + ), + unlink(Parent), + case Res of + {ok, DbReader} -> + unlink(DbReader); + _ -> + ok + end + end), + true = ets:insert(couch_dbs_by_name, {DbName, {opening, Opener, [From]}}), + true = ets:insert(couch_dbs_by_pid, {Opener, DbName}), + DbsOpen = case lists:member(sys_db, Options) of + true -> + true = ets:insert(couch_sys_dbs, {DbName, true}), + Server#server.dbs_open; + false -> + Server#server.dbs_open + 1 + end, + Server#server{dbs_open = DbsOpen}. + +handle_call({set_max_dbs_open, Max}, _From, Server) -> + {reply, ok, Server#server{max_dbs_open=Max}}; +handle_call(get_server, _From, Server) -> + {reply, {ok, Server}, Server}; +handle_call({open_result, DbName, {ok, OpenedDbPid}, Options}, _From, Server) -> + link(OpenedDbPid), + [{DbName, {opening,Opener,Froms}}] = ets:lookup(couch_dbs_by_name, DbName), + lists:foreach(fun({FromPid,_}=From) -> + gen_server:reply(From, + catch couch_db:open_ref_counted(OpenedDbPid, FromPid)) + end, Froms), + LruTime = now(), + true = ets:insert(couch_dbs_by_name, + {DbName, {opened, OpenedDbPid, LruTime}}), + true = ets:delete(couch_dbs_by_pid, Opener), + true = ets:insert(couch_dbs_by_pid, {OpenedDbPid, DbName}), + true = ets:insert(couch_dbs_by_lru, {LruTime, DbName}), + case lists:member(create, Options) of + true -> + couch_db_update_notifier:notify({created, DbName}); + false -> + ok + end, + {reply, ok, Server}; +handle_call({open_result, DbName, Error, Options}, _From, Server) -> + [{DbName, {opening,Opener,Froms}}] = ets:lookup(couch_dbs_by_name, DbName), + lists:foreach(fun(From) -> + gen_server:reply(From, Error) + end, Froms), + true = ets:delete(couch_dbs_by_name, DbName), + true = ets:delete(couch_dbs_by_pid, Opener), + DbsOpen = case lists:member(sys_db, Options) of + true -> + true = ets:delete(couch_sys_dbs, DbName), + Server#server.dbs_open; + false -> + Server#server.dbs_open - 1 + end, + {reply, ok, Server#server{dbs_open = DbsOpen}}; +handle_call({open, DbName, Options}, {FromPid,_}=From, Server) -> + LruTime = now(), + case ets:lookup(couch_dbs_by_name, DbName) of + [] -> + open_db(DbName, Server, Options, From); + [{_, {opening, Opener, Froms}}] -> + true = ets:insert(couch_dbs_by_name, {DbName, {opening, Opener, [From|Froms]}}), + {noreply, Server}; + [{_, {opened, MainPid, PrevLruTime}}] -> + true = ets:insert(couch_dbs_by_name, {DbName, {opened, MainPid, LruTime}}), + true = ets:delete(couch_dbs_by_lru, PrevLruTime), + true = ets:insert(couch_dbs_by_lru, {LruTime, DbName}), + {reply, couch_db:open_ref_counted(MainPid, FromPid), Server} + end; +handle_call({create, DbName, Options}, From, Server) -> + case ets:lookup(couch_dbs_by_name, DbName) of + [] -> + open_db(DbName, Server, [create | Options], From); + [_AlreadyRunningDb] -> + {reply, file_exists, Server} + end; +handle_call({delete, DbName, _Options}, _From, Server) -> + DbNameList = binary_to_list(DbName), + case check_dbname(Server, DbNameList) of + ok -> + FullFilepath = get_full_filename(Server, DbNameList), + UpdateState = + case ets:lookup(couch_dbs_by_name, DbName) of + [] -> false; + [{_, {opening, Pid, Froms}}] -> + couch_util:shutdown_sync(Pid), + true = ets:delete(couch_dbs_by_name, DbName), + true = ets:delete(couch_dbs_by_pid, Pid), + [gen_server:send_result(F, not_found) || F <- Froms], + true; + [{_, {opened, Pid, LruTime}}] -> + couch_util:shutdown_sync(Pid), + true = ets:delete(couch_dbs_by_name, DbName), + true = ets:delete(couch_dbs_by_pid, Pid), + true = ets:delete(couch_dbs_by_lru, LruTime), + true + end, + Server2 = case UpdateState of + true -> + DbsOpen = case ets:member(couch_sys_dbs, DbName) of + true -> + true = ets:delete(couch_sys_dbs, DbName), + Server#server.dbs_open; + false -> + Server#server.dbs_open - 1 + end, + Server#server{dbs_open = DbsOpen}; + false -> + Server + end, + + %% Delete any leftover .compact files. If we don't do this a subsequent + %% request for this DB will try to open the .compact file and use it. + couch_file:delete(Server#server.root_dir, FullFilepath ++ ".compact"), + + case couch_file:delete(Server#server.root_dir, FullFilepath) of + ok -> + couch_db_update_notifier:notify({deleted, DbName}), + {reply, ok, Server2}; + {error, enoent} -> + {reply, not_found, Server2}; + Else -> + {reply, Else, Server2} + end; + Error -> + {reply, Error, Server} + end. + +handle_cast(Msg, _Server) -> + exit({unknown_cast_message, Msg}). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_info({'EXIT', _Pid, config_change}, Server) -> + {noreply, shutdown, Server}; +handle_info(Error, _Server) -> + ?LOG_ERROR("Unexpected message, restarting couch_server: ~p", [Error]), + exit(kill). + +open_db(DbName, Server, Options, From) -> + DbNameList = binary_to_list(DbName), + case check_dbname(Server, DbNameList) of + ok -> + Filepath = get_full_filename(Server, DbNameList), + case lists:member(sys_db, Options) of + true -> + {noreply, open_async(Server, From, DbName, Filepath, Options)}; + false -> + case maybe_close_lru_db(Server) of + {ok, Server2} -> + {noreply, open_async(Server2, From, DbName, Filepath, Options)}; + CloseError -> + {reply, CloseError, Server} + end + end; + Error -> + {reply, Error, Server} + end. diff --git a/apps/couch/src/couch_server_sup.erl b/apps/couch/src/couch_server_sup.erl new file mode 100644 index 00000000..4f0445da --- /dev/null +++ b/apps/couch/src/couch_server_sup.erl @@ -0,0 +1,193 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_server_sup). +-behaviour(supervisor). + + +-export([start_link/1,stop/0, couch_config_start_link_wrapper/2, + start_primary_services/0,start_secondary_services/0, + restart_core_server/0]). + +-include("couch_db.hrl"). + +%% supervisor callbacks +-export([init/1]). + +start_link(IniFiles) -> + case whereis(couch_server_sup) of + undefined -> + start_server(IniFiles); + _Else -> + {error, already_started} + end. + +restart_core_server() -> + init:restart(). + +couch_config_start_link_wrapper(IniFiles, FirstConfigPid) -> + case is_process_alive(FirstConfigPid) of + true -> + link(FirstConfigPid), + {ok, FirstConfigPid}; + false -> couch_config:start_link(IniFiles) + end. + +start_server(IniFiles) -> + case init:get_argument(pidfile) of + {ok, [PidFile]} -> + case file:write_file(PidFile, os:getpid()) of + ok -> ok; + Error -> io:format("Failed to write PID file ~s, error: ~p", [PidFile, Error]) + end; + _ -> ok + end, + + {ok, ConfigPid} = couch_config:start_link(IniFiles), + + LogLevel = couch_config:get("log", "level", "info"), + % announce startup + io:format("Apache CouchDB ~s (LogLevel=~s) is starting.~n", [ + couch_server:get_version(), + LogLevel + ]), + case LogLevel of + "debug" -> + io:format("Configuration Settings ~p:~n", [IniFiles]), + [io:format(" [~s] ~s=~p~n", [Module, Variable, Value]) + || {{Module, Variable}, Value} <- couch_config:all()]; + _ -> ok + end, + + LibDir = + case couch_config:get("couchdb", "util_driver_dir", null) of + null -> + filename:join(couch_util:priv_dir(), "lib"); + LibDir0 -> LibDir0 + end, + + ok = couch_util:start_driver(LibDir), + + BaseChildSpecs = + {{one_for_all, 10, 3600}, + [{couch_config, + {couch_server_sup, couch_config_start_link_wrapper, [IniFiles, ConfigPid]}, + permanent, + brutal_kill, + worker, + [couch_config]}, + {couch_primary_services, + {couch_server_sup, start_primary_services, []}, + permanent, + infinity, + supervisor, + [couch_server_sup]}, + {couch_secondary_services, + {couch_server_sup, start_secondary_services, []}, + permanent, + infinity, + supervisor, + [couch_server_sup]} + ]}, + + % ensure these applications are running + application:start(ibrowse), + application:start(crypto), + + {ok, Pid} = supervisor:start_link( + {local, couch_server_sup}, couch_server_sup, BaseChildSpecs), + + % launch the icu bridge + % just restart if one of the config settings change. + + couch_config:register( + fun("couchdb", "util_driver_dir") -> + ?MODULE:stop(); + ("daemons", _) -> + ?MODULE:stop() + end, Pid), + + unlink(ConfigPid), + + Ip = couch_config:get("httpd", "bind_address"), + Port = mochiweb_socket_server:get(couch_httpd, port), + io:format("Apache CouchDB has started. Time to relax.~n"), + ?LOG_INFO("Apache CouchDB has started on http://~s:~w/", [Ip, Port]), + + case couch_config:get("couchdb", "uri_file", null) of + null -> ok; + UriFile -> + Line = io_lib:format("http://~s:~w/~n", [Ip, Port]), + file:write_file(UriFile, Line) + end, + + {ok, Pid}. + +start_primary_services() -> + supervisor:start_link({local, couch_primary_services}, couch_server_sup, + {{one_for_one, 10, 3600}, + [{couch_log, + {couch_log, start_link, []}, + permanent, + brutal_kill, + worker, + [couch_log]}, + {couch_replication_supervisor, + {couch_rep_sup, start_link, []}, + permanent, + infinity, + supervisor, + [couch_rep_sup]}, + {couch_task_status, + {couch_task_status, start_link, []}, + permanent, + brutal_kill, + worker, + [couch_task_status]}, + {couch_server, + {couch_server, sup_start_link, []}, + permanent, + 1000, + worker, + [couch_server]}, + {couch_db_update_event, + {gen_event, start_link, [{local, couch_db_update}]}, + permanent, + brutal_kill, + worker, + dynamic} + ] + }). + +start_secondary_services() -> + DaemonChildSpecs = [ + begin + {ok, {Module, Fun, Args}} = couch_util:parse_term(SpecStr), + + {list_to_atom(Name), + {Module, Fun, Args}, + permanent, + 1000, + worker, + [Module]} + end + || {Name, SpecStr} + <- couch_config:get("daemons"), SpecStr /= ""], + + supervisor:start_link({local, couch_secondary_services}, couch_server_sup, + {{one_for_one, 10, 3600}, DaemonChildSpecs}). + +stop() -> + catch exit(whereis(couch_server_sup), normal). + +init(ChildSpecs) -> + {ok, ChildSpecs}. diff --git a/apps/couch/src/couch_stats_aggregator.erl b/apps/couch/src/couch_stats_aggregator.erl new file mode 100644 index 00000000..6090355d --- /dev/null +++ b/apps/couch/src/couch_stats_aggregator.erl @@ -0,0 +1,297 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_stats_aggregator). +-behaviour(gen_server). + +-export([start/0, start/1, stop/0]). +-export([all/0, all/1, get/1, get/2, get_json/1, get_json/2, collect_sample/0]). + +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +-record(aggregate, { + description = <<"">>, + seconds = 0, + count = 0, + current = null, + sum = null, + mean = null, + variance = null, + stddev = null, + min = null, + max = null, + samples = [] +}). + + +start() -> + PrivDir = couch_util:priv_dir(), + start(filename:join(PrivDir, "stat_descriptions.cfg")). + +start(FileName) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [FileName], []). + +stop() -> + gen_server:cast(?MODULE, stop). + +all() -> + ?MODULE:all(0). +all(Time) when is_binary(Time) -> + ?MODULE:all(list_to_integer(binary_to_list(Time))); +all(Time) when is_atom(Time) -> + ?MODULE:all(list_to_integer(atom_to_list(Time))); +all(Time) when is_integer(Time) -> + Aggs = ets:match(?MODULE, {{'$1', Time}, '$2'}), + Stats = lists:map(fun([Key, Agg]) -> {Key, Agg} end, Aggs), + case Stats of + [] -> + {[]}; + _ -> + Ret = lists:foldl(fun({{Mod, Key}, Agg}, Acc) -> + CurrKeys = case proplists:lookup(Mod, Acc) of + none -> []; + {Mod, {Keys}} -> Keys + end, + NewMod = {[{Key, to_json_term(Agg)} | CurrKeys]}, + [{Mod, NewMod} | proplists:delete(Mod, Acc)] + end, [], Stats), + {Ret} + end. + +get(Key) -> + ?MODULE:get(Key, 0). +get(Key, Time) when is_binary(Time) -> + ?MODULE:get(Key, list_to_integer(binary_to_list(Time))); +get(Key, Time) when is_atom(Time) -> + ?MODULE:get(Key, list_to_integer(atom_to_list(Time))); +get(Key, Time) when is_integer(Time) -> + case ets:lookup(?MODULE, {make_key(Key), Time}) of + [] -> #aggregate{seconds=Time}; + [{_, Agg}] -> Agg + end. + +get_json(Key) -> + get_json(Key, 0). +get_json(Key, Time) -> + to_json_term(?MODULE:get(Key, Time)). + +collect_sample() -> + gen_server:call(?MODULE, collect_sample, infinity). + + +init(StatDescsFileName) -> + % Create an aggregate entry for each {description, rate} pair. + ets:new(?MODULE, [named_table, set, protected]), + SampleStr = couch_config:get("stats", "samples", "[0]"), + {ok, Samples} = couch_util:parse_term(SampleStr), + {ok, Descs} = file:consult(StatDescsFileName), + lists:foreach(fun({Sect, Key, Value}) -> + lists:foreach(fun(Secs) -> + Agg = #aggregate{ + description=list_to_binary(Value), + seconds=Secs + }, + ets:insert(?MODULE, {{{Sect, Key}, Secs}, Agg}) + end, Samples) + end, Descs), + + Self = self(), + ok = couch_config:register( + fun("stats", _) -> exit(Self, config_change) end + ), + + Rate = list_to_integer(couch_config:get("stats", "rate", "1000")), + % TODO: Add timer_start to kernel start options. + {ok, TRef} = timer:apply_after(Rate, ?MODULE, collect_sample, []), + {ok, {TRef, Rate}}. + +terminate(_Reason, {TRef, _Rate}) -> + timer:cancel(TRef), + ok. + +handle_call(collect_sample, _, {OldTRef, SampleInterval}) -> + timer:cancel(OldTRef), + {ok, TRef} = timer:apply_after(SampleInterval, ?MODULE, collect_sample, []), + % Gather new stats values to add. + Incs = lists:map(fun({Key, Value}) -> + {Key, {incremental, Value}} + end, couch_stats_collector:all(incremental)), + Abs = lists:map(fun({Key, Values}) -> + couch_stats_collector:clear(Key), + Values2 = case Values of + X when is_list(X) -> X; + Else -> [Else] + end, + {_, Mean} = lists:foldl(fun(Val, {Count, Curr}) -> + {Count+1, Curr + (Val - Curr) / (Count+1)} + end, {0, 0}, Values2), + {Key, {absolute, Mean}} + end, couch_stats_collector:all(absolute)), + + Values = Incs ++ Abs, + Now = erlang:now(), + lists:foreach(fun({{Key, Rate}, Agg}) -> + NewAgg = case proplists:lookup(Key, Values) of + none -> + rem_values(Now, Agg); + {Key, {Type, Value}} -> + NewValue = new_value(Type, Value, Agg#aggregate.current), + Agg2 = add_value(Now, NewValue, Agg), + rem_values(Now, Agg2) + end, + ets:insert(?MODULE, {{Key, Rate}, NewAgg}) + end, ets:tab2list(?MODULE)), + {reply, ok, {TRef, SampleInterval}}. + +handle_cast(stop, State) -> + {stop, normal, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +code_change(_OldVersion, State, _Extra) -> + {ok, State}. + + +new_value(incremental, Value, null) -> + Value; +new_value(incremental, Value, Current) -> + Value - Current; +new_value(absolute, Value, _Current) -> + Value. + +add_value(Time, Value, #aggregate{count=Count, seconds=Secs}=Agg) when Count < 1 -> + Samples = case Secs of + 0 -> []; + _ -> [{Time, Value}] + end, + Agg#aggregate{ + count=1, + current=Value, + sum=Value, + mean=Value, + variance=0.0, + stddev=null, + min=Value, + max=Value, + samples=Samples + }; +add_value(Time, Value, Agg) -> + #aggregate{ + count=Count, + current=Current, + sum=Sum, + mean=Mean, + variance=Variance, + samples=Samples + } = Agg, + + NewCount = Count + 1, + NewMean = Mean + (Value - Mean) / NewCount, + NewVariance = Variance + (Value - Mean) * (Value - NewMean), + StdDev = case NewCount > 1 of + false -> null; + _ -> math:sqrt(NewVariance / (NewCount - 1)) + end, + Agg2 = Agg#aggregate{ + count=NewCount, + current=Current + Value, + sum=Sum + Value, + mean=NewMean, + variance=NewVariance, + stddev=StdDev, + min=lists:min([Agg#aggregate.min, Value]), + max=lists:max([Agg#aggregate.max, Value]) + }, + case Agg2#aggregate.seconds of + 0 -> Agg2; + _ -> Agg2#aggregate{samples=[{Time, Value} | Samples]} + end. + +rem_values(Time, Agg) -> + Seconds = Agg#aggregate.seconds, + Samples = Agg#aggregate.samples, + Pred = fun({When, _Value}) -> + timer:now_diff(Time, When) =< (Seconds * 1000000) + end, + {Keep, Remove} = lists:splitwith(Pred, Samples), + Agg2 = lists:foldl(fun({_, Value}, Acc) -> + rem_value(Value, Acc) + end, Agg, Remove), + Agg2#aggregate{samples=Keep}. + +rem_value(_Value, #aggregate{count=Count, seconds=Secs}) when Count =< 1 -> + #aggregate{seconds=Secs}; +rem_value(Value, Agg) -> + #aggregate{ + count=Count, + sum=Sum, + mean=Mean, + variance=Variance + } = Agg, + + OldMean = (Mean * Count - Value) / (Count - 1), + OldVariance = Variance - (Value - OldMean) * (Value - Mean), + OldCount = Count - 1, + StdDev = case OldCount > 1 of + false -> null; + _ -> math:sqrt(clamp_value(OldVariance / (OldCount - 1))) + end, + Agg#aggregate{ + count=OldCount, + sum=Sum-Value, + mean=clamp_value(OldMean), + variance=clamp_value(OldVariance), + stddev=StdDev + }. + +to_json_term(Agg) -> + {Min, Max} = case Agg#aggregate.seconds > 0 of + false -> + {Agg#aggregate.min, Agg#aggregate.max}; + _ -> + case length(Agg#aggregate.samples) > 0 of + true -> + Extract = fun({_Time, Value}) -> Value end, + Samples = lists:map(Extract, Agg#aggregate.samples), + {lists:min(Samples), lists:max(Samples)}; + _ -> + {null, null} + end + end, + {[ + {description, Agg#aggregate.description}, + {current, round_value(Agg#aggregate.sum)}, + {sum, round_value(Agg#aggregate.sum)}, + {mean, round_value(Agg#aggregate.mean)}, + {stddev, round_value(Agg#aggregate.stddev)}, + {min, Min}, + {max, Max} + ]}. + +make_key({Mod, Val}) when is_integer(Val) -> + {Mod, list_to_atom(integer_to_list(Val))}; +make_key(Key) -> + Key. + +round_value(Val) when not is_number(Val) -> + Val; +round_value(Val) when Val == 0 -> + Val; +round_value(Val) -> + erlang:round(Val * 1000.0) / 1000.0. + +clamp_value(Val) when Val > 0.00000000000001 -> + Val; +clamp_value(_) -> + 0.0. diff --git a/apps/couch/src/couch_stats_collector.erl b/apps/couch/src/couch_stats_collector.erl new file mode 100644 index 00000000..f7b9bb48 --- /dev/null +++ b/apps/couch/src/couch_stats_collector.erl @@ -0,0 +1,136 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +% todo +% - remove existance check on increment(), decrement() and record(). have +% modules initialize counters on startup. + +-module(couch_stats_collector). + +-behaviour(gen_server). + +-export([start/0, stop/0]). +-export([all/0, all/1, get/1, increment/1, decrement/1, record/2, clear/1]). +-export([track_process_count/1, track_process_count/2]). + +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +-define(HIT_TABLE, stats_hit_table). +-define(ABS_TABLE, stats_abs_table). + +start() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +stop() -> + gen_server:call(?MODULE, stop). + +all() -> + ets:tab2list(?HIT_TABLE) ++ abs_to_list(). + +all(Type) -> + case Type of + incremental -> ets:tab2list(?HIT_TABLE); + absolute -> abs_to_list() + end. + +get(Key) -> + case ets:lookup(?HIT_TABLE, Key) of + [] -> + case ets:lookup(?ABS_TABLE, Key) of + [] -> + nil; + AbsVals -> + lists:map(fun({_, Value}) -> Value end, AbsVals) + end; + [{_, Counter}] -> + Counter + end. + +increment(Key) -> + Key2 = make_key(Key), + case catch ets:update_counter(?HIT_TABLE, Key2, 1) of + {'EXIT', {badarg, _}} -> + catch ets:insert(?HIT_TABLE, {Key2, 1}), + ok; + _ -> + ok + end. + +decrement(Key) -> + Key2 = make_key(Key), + case catch ets:update_counter(?HIT_TABLE, Key2, -1) of + {'EXIT', {badarg, _}} -> + catch ets:insert(?HIT_TABLE, {Key2, -1}), + ok; + _ -> ok + end. + +record(Key, Value) -> + catch ets:insert(?ABS_TABLE, {make_key(Key), Value}). + +clear(Key) -> + catch ets:delete(?ABS_TABLE, make_key(Key)). + +track_process_count(Stat) -> + track_process_count(self(), Stat). + +track_process_count(Pid, Stat) -> + MonitorFun = fun() -> + Ref = erlang:monitor(process, Pid), + receive {'DOWN', Ref, _, _, _} -> ok end, + couch_stats_collector:decrement(Stat) + end, + case (catch couch_stats_collector:increment(Stat)) of + ok -> spawn(MonitorFun); + _ -> ok + end. + + +init(_) -> + ets:new(?HIT_TABLE, [named_table, set, public]), + ets:new(?ABS_TABLE, [named_table, duplicate_bag, public]), + {ok, nil}. + +terminate(_Reason, _State) -> + ok. + +handle_call(stop, _, State) -> + {stop, normal, stopped, State}. + +handle_cast(foo, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +code_change(_OldVersion, State, _Extra) -> + {ok, State}. + + +make_key({Module, Key}) when is_integer(Key) -> + {Module, list_to_atom(integer_to_list(Key))}; +make_key(Key) -> + Key. + +abs_to_list() -> + SortedKVs = lists:sort(ets:tab2list(?ABS_TABLE)), + lists:foldl(fun({Key, Val}, Acc) -> + case Acc of + [] -> + [{Key, [Val]}]; + [{Key, Prev} | Rest] -> + [{Key, [Val | Prev]} | Rest]; + Others -> + [{Key, [Val]} | Others] + end + end, [], SortedKVs).
\ No newline at end of file diff --git a/apps/couch/src/couch_stream.erl b/apps/couch/src/couch_stream.erl new file mode 100644 index 00000000..04c17770 --- /dev/null +++ b/apps/couch/src/couch_stream.erl @@ -0,0 +1,319 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_stream). +-behaviour(gen_server). + + +-define(FILE_POINTER_BYTES, 8). +-define(FILE_POINTER_BITS, 8*(?FILE_POINTER_BYTES)). + +-define(STREAM_OFFSET_BYTES, 4). +-define(STREAM_OFFSET_BITS, 8*(?STREAM_OFFSET_BYTES)). + +-define(HUGE_CHUNK, 1000000000). % Huge chuck size when reading all in one go + +-define(DEFAULT_STREAM_CHUNK, 16#00100000). % 1 meg chunks when streaming data + +-export([open/1, open/3, close/1, write/2, foldl/4, foldl/5, foldl_decode/6, + old_foldl/5,old_copy_to_new_stream/4]). +-export([copy_to_new_stream/3,old_read_term/2]). +-export([init/1, terminate/2, handle_call/3]). +-export([handle_cast/2,code_change/3,handle_info/2]). + +-include("couch_db.hrl"). + +-record(stream, + {fd = 0, + written_pointers=[], + buffer_list = [], + buffer_len = 0, + max_buffer = 4096, + written_len = 0, + md5, + % md5 of the content without any transformation applied (e.g. compression) + % needed for the attachment upload integrity check (ticket 558) + identity_md5, + identity_len = 0, + encoding_fun, + end_encoding_fun + }). + + +%%% Interface functions %%% + +open(Fd) -> + open(Fd, identity, []). + +open(Fd, Encoding, Options) -> + gen_server:start_link(couch_stream, {Fd, Encoding, Options}, []). + +close(Pid) -> + gen_server:call(Pid, close, infinity). + +copy_to_new_stream(Fd, PosList, DestFd) -> + {ok, Dest} = open(DestFd), + foldl(Fd, PosList, + fun(Bin, _) -> + ok = write(Dest, Bin) + end, ok), + close(Dest). + + +% 09 UPGRADE CODE +old_copy_to_new_stream(Fd, Pos, Len, DestFd) -> + {ok, Dest} = open(DestFd), + old_foldl(Fd, Pos, Len, + fun(Bin, _) -> + ok = write(Dest, Bin) + end, ok), + close(Dest). + +% 09 UPGRADE CODE +old_foldl(_Fd, null, 0, _Fun, Acc) -> + Acc; +old_foldl(Fd, OldPointer, Len, Fun, Acc) when is_tuple(OldPointer)-> + {ok, Acc2, _} = old_stream_data(Fd, OldPointer, Len, ?DEFAULT_STREAM_CHUNK, Fun, Acc), + Acc2. + +foldl(_Fd, [], _Fun, Acc) -> + Acc; +foldl(Fd, [Pos|Rest], Fun, Acc) -> + {ok, Bin} = couch_file:pread_iolist(Fd, Pos), + foldl(Fd, Rest, Fun, Fun(Bin, Acc)). + +foldl(Fd, PosList, <<>>, Fun, Acc) -> + foldl(Fd, PosList, Fun, Acc); +foldl(Fd, PosList, Md5, Fun, Acc) -> + foldl(Fd, PosList, Md5, couch_util:md5_init(), Fun, Acc). + +foldl_decode(Fd, PosList, Md5, Enc, Fun, Acc) -> + {DecDataFun, DecEndFun} = case Enc of + gzip -> + ungzip_init(); + identity -> + identity_enc_dec_funs() + end, + Result = foldl_decode( + DecDataFun, Fd, PosList, Md5, couch_util:md5_init(), Fun, Acc + ), + DecEndFun(), + Result. + +foldl(_Fd, [], Md5, Md5Acc, _Fun, Acc) -> + Md5 = couch_util:md5_final(Md5Acc), + Acc; +foldl(Fd, [Pos], Md5, Md5Acc, Fun, Acc) -> + {ok, Bin} = couch_file:pread_iolist(Fd, Pos), + Md5 = couch_util:md5_final(couch_util:md5_update(Md5Acc, Bin)), + Fun(Bin, Acc); +foldl(Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc) -> + {ok, Bin} = couch_file:pread_iolist(Fd, Pos), + foldl(Fd, Rest, Md5, couch_util:md5_update(Md5Acc, Bin), Fun, Fun(Bin, Acc)). + +foldl_decode(_DecFun, _Fd, [], Md5, Md5Acc, _Fun, Acc) -> + Md5 = couch_util:md5_final(Md5Acc), + Acc; +foldl_decode(DecFun, Fd, [Pos], Md5, Md5Acc, Fun, Acc) -> + {ok, EncBin} = couch_file:pread_iolist(Fd, Pos), + Md5 = couch_util:md5_final(couch_util:md5_update(Md5Acc, EncBin)), + Bin = DecFun(EncBin), + Fun(Bin, Acc); +foldl_decode(DecFun, Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc) -> + {ok, EncBin} = couch_file:pread_iolist(Fd, Pos), + Bin = DecFun(EncBin), + Md5Acc2 = couch_util:md5_update(Md5Acc, EncBin), + foldl_decode(DecFun, Fd, Rest, Md5, Md5Acc2, Fun, Fun(Bin, Acc)). + +gzip_init(Options) -> + case couch_util:get_value(compression_level, Options, 0) of + Lvl when Lvl >= 1 andalso Lvl =< 9 -> + Z = zlib:open(), + % 15 = ?MAX_WBITS (defined in the zlib module) + % the 16 + ?MAX_WBITS formula was obtained by inspecting zlib:gzip/1 + ok = zlib:deflateInit(Z, Lvl, deflated, 16 + 15, 8, default), + { + fun(Data) -> + zlib:deflate(Z, Data) + end, + fun() -> + Last = zlib:deflate(Z, [], finish), + ok = zlib:deflateEnd(Z), + ok = zlib:close(Z), + Last + end + }; + _ -> + identity_enc_dec_funs() + end. + +ungzip_init() -> + Z = zlib:open(), + zlib:inflateInit(Z, 16 + 15), + { + fun(Data) -> + zlib:inflate(Z, Data) + end, + fun() -> + ok = zlib:inflateEnd(Z), + ok = zlib:close(Z) + end + }. + +identity_enc_dec_funs() -> + { + fun(Data) -> Data end, + fun() -> [] end + }. + +write(_Pid, <<>>) -> + ok; +write(Pid, Bin) -> + gen_server:call(Pid, {write, Bin}, infinity). + + +init({Fd, Encoding, Options}) -> + {EncodingFun, EndEncodingFun} = case Encoding of + identity -> + identity_enc_dec_funs(); + gzip -> + gzip_init(Options) + end, + {ok, #stream{ + fd=Fd, + md5=couch_util:md5_init(), + identity_md5=couch_util:md5_init(), + encoding_fun=EncodingFun, + end_encoding_fun=EndEncodingFun + } + }. + +terminate(_Reason, _Stream) -> + ok. + +handle_call({write, Bin}, _From, Stream) -> + BinSize = iolist_size(Bin), + #stream{ + fd = Fd, + written_len = WrittenLen, + written_pointers = Written, + buffer_len = BufferLen, + buffer_list = Buffer, + max_buffer = Max, + md5 = Md5, + identity_md5 = IdenMd5, + identity_len = IdenLen, + encoding_fun = EncodingFun} = Stream, + if BinSize + BufferLen > Max -> + WriteBin = lists:reverse(Buffer, [Bin]), + IdenMd5_2 = couch_util:md5_update(IdenMd5, WriteBin), + case EncodingFun(WriteBin) of + [] -> + % case where the encoder did some internal buffering + % (zlib does it for example) + WrittenLen2 = WrittenLen, + Md5_2 = Md5, + Written2 = Written; + WriteBin2 -> + {ok, Pos} = couch_file:append_binary(Fd, WriteBin2), + WrittenLen2 = WrittenLen + iolist_size(WriteBin2), + Md5_2 = couch_util:md5_update(Md5, WriteBin2), + Written2 = [Pos|Written] + end, + + {reply, ok, Stream#stream{ + written_len=WrittenLen2, + written_pointers=Written2, + buffer_list=[], + buffer_len=0, + md5=Md5_2, + identity_md5=IdenMd5_2, + identity_len=IdenLen + BinSize}}; + true -> + {reply, ok, Stream#stream{ + buffer_list=[Bin|Buffer], + buffer_len=BufferLen + BinSize, + identity_len=IdenLen + BinSize}} + end; +handle_call(close, _From, Stream) -> + #stream{ + fd = Fd, + written_len = WrittenLen, + written_pointers = Written, + buffer_list = Buffer, + md5 = Md5, + identity_md5 = IdenMd5, + identity_len = IdenLen, + encoding_fun = EncodingFun, + end_encoding_fun = EndEncodingFun} = Stream, + + WriteBin = lists:reverse(Buffer), + IdenMd5Final = couch_util:md5_final(couch_util:md5_update(IdenMd5, WriteBin)), + WriteBin2 = EncodingFun(WriteBin) ++ EndEncodingFun(), + Md5Final = couch_util:md5_final(couch_util:md5_update(Md5, WriteBin2)), + Result = case WriteBin2 of + [] -> + {lists:reverse(Written), WrittenLen, IdenLen, Md5Final, IdenMd5Final}; + _ -> + {ok, Pos} = couch_file:append_binary(Fd, WriteBin2), + StreamInfo = lists:reverse(Written, [Pos]), + StreamLen = WrittenLen + iolist_size(WriteBin2), + {StreamInfo, StreamLen, IdenLen, Md5Final, IdenMd5Final} + end, + {stop, normal, Result, Stream}. + +handle_cast(_Msg, State) -> + {noreply,State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_info(_Info, State) -> + {noreply, State}. + + + +% 09 UPGRADE CODE +old_read_term(Fd, Sp) -> + {ok, <<TermLen:(?STREAM_OFFSET_BITS)>>, Sp2} + = old_read(Fd, Sp, ?STREAM_OFFSET_BYTES), + {ok, Bin, _Sp3} = old_read(Fd, Sp2, TermLen), + {ok, binary_to_term(Bin)}. + +old_read(Fd, Sp, Num) -> + {ok, RevBin, Sp2} = old_stream_data(Fd, Sp, Num, ?HUGE_CHUNK, fun(Bin, Acc) -> [Bin | Acc] end, []), + Bin = list_to_binary(lists:reverse(RevBin)), + {ok, Bin, Sp2}. + +% 09 UPGRADE CODE +old_stream_data(_Fd, Sp, 0, _MaxChunk, _Fun, Acc) -> + {ok, Acc, Sp}; +old_stream_data(Fd, {Pos, 0}, Num, MaxChunk, Fun, Acc) -> + {ok, <<NextPos:(?FILE_POINTER_BITS), NextOffset:(?STREAM_OFFSET_BITS)>>} + = couch_file:old_pread(Fd, Pos, ?FILE_POINTER_BYTES + ?STREAM_OFFSET_BYTES), + Sp = {NextPos, NextOffset}, + % Check NextPos is past current Pos (this is always true in a stream) + % Guards against potential infinite loops caused by corruption. + case NextPos > Pos of + true -> ok; + false -> throw({error, stream_corruption}) + end, + old_stream_data(Fd, Sp, Num, MaxChunk, Fun, Acc); +old_stream_data(Fd, {Pos, Offset}, Num, MaxChunk, Fun, Acc) -> + ReadAmount = lists:min([MaxChunk, Num, Offset]), + {ok, Bin} = couch_file:old_pread(Fd, Pos, ReadAmount), + Sp = {Pos + ReadAmount, Offset - ReadAmount}, + old_stream_data(Fd, Sp, Num - ReadAmount, MaxChunk, Fun, Fun(Bin, Acc)). + + +% Tests moved to tests/etap/050-stream.t + diff --git a/apps/couch/src/couch_task_status.erl b/apps/couch/src/couch_task_status.erl new file mode 100644 index 00000000..c4487dc4 --- /dev/null +++ b/apps/couch/src/couch_task_status.erl @@ -0,0 +1,124 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_task_status). +-behaviour(gen_server). + +% This module allows is used to track the status of long running tasks. +% Long running tasks register (add_task/3) then update their status (update/1) +% and the task and status is added to tasks list. When the tracked task dies +% it will be automatically removed the tracking. To get the tasks list, use the +% all/0 function + +-export([start_link/0, stop/0]). +-export([all/0, add_task/3, update/1, update/2, set_update_frequency/1]). + +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +-import(couch_util, [to_binary/1]). + +-include("couch_db.hrl"). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +stop() -> + gen_server:cast(?MODULE, stop). + + +all() -> + gen_server:call(?MODULE, all). + + +add_task(Type, TaskName, StatusText) -> + put(task_status_update, {{0, 0, 0}, 0}), + Msg = { + add_task, + to_binary(Type), + to_binary(TaskName), + to_binary(StatusText) + }, + gen_server:call(?MODULE, Msg). + + +set_update_frequency(Msecs) -> + put(task_status_update, {{0, 0, 0}, Msecs * 1000}). + + +update(StatusText) -> + update("~s", [StatusText]). + +update(Format, Data) -> + {LastUpdateTime, Frequency} = get(task_status_update), + case timer:now_diff(Now = now(), LastUpdateTime) >= Frequency of + true -> + put(task_status_update, {Now, Frequency}), + Msg = ?l2b(io_lib:format(Format, Data)), + gen_server:cast(?MODULE, {update_status, self(), Msg}); + false -> + ok + end. + + +init([]) -> + % read configuration settings and register for configuration changes + ets:new(?MODULE, [ordered_set, protected, named_table]), + {ok, nil}. + + +terminate(_Reason,_State) -> + ok. + + +handle_call({add_task, Type, TaskName, StatusText}, {From, _}, Server) -> + case ets:lookup(?MODULE, From) of + [] -> + true = ets:insert(?MODULE, {From, {Type, TaskName, StatusText}}), + erlang:monitor(process, From), + {reply, ok, Server}; + [_] -> + {reply, {add_task_error, already_registered}, Server} + end; +handle_call(all, _, Server) -> + All = [ + [ + {type, Type}, + {task, Task}, + {status, Status}, + {pid, ?l2b(pid_to_list(Pid))} + ] + || + {Pid, {Type, Task, Status}} <- ets:tab2list(?MODULE) + ], + {reply, All, Server}. + + +handle_cast({update_status, Pid, StatusText}, Server) -> + [{Pid, {Type, TaskName, _StatusText}}] = ets:lookup(?MODULE, Pid), + ?LOG_DEBUG("New task status for ~s: ~s",[TaskName, StatusText]), + true = ets:insert(?MODULE, {Pid, {Type, TaskName, StatusText}}), + {noreply, Server}; +handle_cast(stop, State) -> + {stop, normal, State}. + +handle_info({'DOWN', _MonitorRef, _Type, Pid, _Info}, Server) -> + %% should we also erlang:demonitor(_MonitorRef), ? + ets:delete(?MODULE, Pid), + {noreply, Server}. + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + diff --git a/apps/couch/src/couch_util.erl b/apps/couch/src/couch_util.erl new file mode 100644 index 00000000..8217a268 --- /dev/null +++ b/apps/couch/src/couch_util.erl @@ -0,0 +1,454 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_util). + +-export([priv_dir/0, start_driver/1, normpath/1]). +-export([should_flush/0, should_flush/1, to_existing_atom/1]). +-export([rand32/0, implode/2, collate/2, collate/3]). +-export([abs_pathname/1,abs_pathname/2, trim/1, ascii_lower/1]). +-export([encodeBase64Url/1, decodeBase64Url/1]). +-export([to_hex/1, parse_term/1, dict_find/3]). +-export([file_read_size/1, get_nested_json_value/2, json_user_ctx/1]). +-export([proplist_apply_field/2, json_apply_field/2]). +-export([to_binary/1, to_integer/1, to_list/1, url_encode/1]). +-export([json_encode/1, json_decode/1]). +-export([verify/2,simple_call/2,shutdown_sync/1]). +-export([compressible_att_type/1]). +-export([get_value/2, get_value/3]). +-export([md5/1, md5_init/0, md5_update/2, md5_final/1]). +-export([reorder_results/2]). + +-include("couch_db.hrl"). +-include_lib("kernel/include/file.hrl"). + +% arbitrarily chosen amount of memory to use before flushing to disk +-define(FLUSH_MAX_MEM, 10000000). + +priv_dir() -> + case code:priv_dir(couch) of + {error, bad_name} -> + % small hack, in dev mode "app" is couchdb. Fixing requires + % renaming src/couch to src/couch. Not really worth the hassle. + % -Damien + code:priv_dir(couchdb); + Dir -> Dir + end. + +start_driver(LibDir) -> + case erl_ddll:load_driver(LibDir, "couch_icu_driver") of + ok -> + ok; + {error, already_loaded} -> + ok = erl_ddll:reload_driver(LibDir, "couch_icu_driver"); + {error, Error} -> + exit(erl_ddll:format_error(Error)) + end. + +% Normalize a pathname by removing .. and . components. +normpath(Path) -> + normparts(filename:split(Path), []). + +normparts([], Acc) -> + filename:join(lists:reverse(Acc)); +normparts([".." | RestParts], [_Drop | RestAcc]) -> + normparts(RestParts, RestAcc); +normparts(["." | RestParts], Acc) -> + normparts(RestParts, Acc); +normparts([Part | RestParts], Acc) -> + normparts(RestParts, [Part | Acc]). + +% works like list_to_existing_atom, except can be list or binary and it +% gives you the original value instead of an error if no existing atom. +to_existing_atom(V) when is_list(V) -> + try list_to_existing_atom(V) catch _:_ -> V end; +to_existing_atom(V) when is_binary(V) -> + try list_to_existing_atom(?b2l(V)) catch _:_ -> V end; +to_existing_atom(V) when is_atom(V) -> + V. + +shutdown_sync(Pid) when not is_pid(Pid)-> + ok; +shutdown_sync(Pid) -> + MRef = erlang:monitor(process, Pid), + try + catch unlink(Pid), + catch exit(Pid, shutdown), + receive + {'DOWN', MRef, _, _, _} -> + ok + end + after + erlang:demonitor(MRef, [flush]) + end. + + +simple_call(Pid, Message) -> + MRef = erlang:monitor(process, Pid), + try + Pid ! {self(), Message}, + receive + {Pid, Result} -> + Result; + {'DOWN', MRef, _, _, Reason} -> + exit(Reason) + end + after + erlang:demonitor(MRef, [flush]) + end. + +to_hex([]) -> + []; +to_hex(Bin) when is_binary(Bin) -> + to_hex(binary_to_list(Bin)); +to_hex([H|T]) -> + [to_digit(H div 16), to_digit(H rem 16) | to_hex(T)]. + +to_digit(N) when N < 10 -> $0 + N; +to_digit(N) -> $a + N-10. + + +parse_term(Bin) when is_binary(Bin) -> + parse_term(binary_to_list(Bin)); +parse_term(List) -> + {ok, Tokens, _} = erl_scan:string(List ++ "."), + erl_parse:parse_term(Tokens). + +get_value(Key, List) -> + get_value(Key, List, undefined). + +get_value(Key, List, Default) -> + case lists:keysearch(Key, 1, List) of + {value, {Key,Value}} -> + Value; + false -> + Default + end. + +get_nested_json_value({Props}, [Key|Keys]) -> + case couch_util:get_value(Key, Props, nil) of + nil -> throw({not_found, <<"missing json key: ", Key/binary>>}); + Value -> get_nested_json_value(Value, Keys) + end; +get_nested_json_value(Value, []) -> + Value; +get_nested_json_value(_NotJSONObj, _) -> + throw({not_found, json_mismatch}). + +proplist_apply_field(H, L) -> + {R} = json_apply_field(H, {L}), + R. + +json_apply_field(H, {L}) -> + json_apply_field(H, L, []). +json_apply_field({Key, NewValue}, [{Key, _OldVal} | Headers], Acc) -> + json_apply_field({Key, NewValue}, Headers, Acc); +json_apply_field({Key, NewValue}, [{OtherKey, OtherVal} | Headers], Acc) -> + json_apply_field({Key, NewValue}, Headers, [{OtherKey, OtherVal} | Acc]); +json_apply_field({Key, NewValue}, [], Acc) -> + {[{Key, NewValue}|Acc]}. + +json_user_ctx(#db{name=DbName, user_ctx=Ctx}) -> + {[{<<"db">>, DbName}, + {<<"name">>,Ctx#user_ctx.name}, + {<<"roles">>,Ctx#user_ctx.roles}]}. + + +% returns a random integer +rand32() -> + crypto:rand_uniform(0, 16#100000000). + +% given a pathname "../foo/bar/" it gives back the fully qualified +% absolute pathname. +abs_pathname(" " ++ Filename) -> + % strip leading whitspace + abs_pathname(Filename); +abs_pathname([$/ |_]=Filename) -> + Filename; +abs_pathname(Filename) -> + {ok, Cwd} = file:get_cwd(), + {Filename2, Args} = separate_cmd_args(Filename, ""), + abs_pathname(Filename2, Cwd) ++ Args. + +abs_pathname(Filename, Dir) -> + Name = filename:absname(Filename, Dir ++ "/"), + OutFilename = filename:join(fix_path_list(filename:split(Name), [])), + % If the filename is a dir (last char slash, put back end slash + case string:right(Filename,1) of + "/" -> + OutFilename ++ "/"; + "\\" -> + OutFilename ++ "/"; + _Else-> + OutFilename + end. + +% if this as an executable with arguments, seperate out the arguments +% ""./foo\ bar.sh -baz=blah" -> {"./foo\ bar.sh", " -baz=blah"} +separate_cmd_args("", CmdAcc) -> + {lists:reverse(CmdAcc), ""}; +separate_cmd_args("\\ " ++ Rest, CmdAcc) -> % handle skipped value + separate_cmd_args(Rest, " \\" ++ CmdAcc); +separate_cmd_args(" " ++ Rest, CmdAcc) -> + {lists:reverse(CmdAcc), " " ++ Rest}; +separate_cmd_args([Char|Rest], CmdAcc) -> + separate_cmd_args(Rest, [Char | CmdAcc]). + +% lowercases string bytes that are the ascii characters A-Z. +% All other characters/bytes are ignored. +ascii_lower(String) -> + ascii_lower(String, []). + +ascii_lower([], Acc) -> + lists:reverse(Acc); +ascii_lower([Char | RestString], Acc) when Char >= $A, Char =< $B -> + ascii_lower(RestString, [Char + ($a-$A) | Acc]); +ascii_lower([Char | RestString], Acc) -> + ascii_lower(RestString, [Char | Acc]). + +% Is a character whitespace? +is_whitespace($\s) -> true; +is_whitespace($\t) -> true; +is_whitespace($\n) -> true; +is_whitespace($\r) -> true; +is_whitespace(_Else) -> false. + + +% removes leading and trailing whitespace from a string +trim(String) -> + String2 = lists:dropwhile(fun is_whitespace/1, String), + lists:reverse(lists:dropwhile(fun is_whitespace/1, lists:reverse(String2))). + +% takes a heirarchical list of dirs and removes the dots ".", double dots +% ".." and the corresponding parent dirs. +fix_path_list([], Acc) -> + lists:reverse(Acc); +fix_path_list([".."|Rest], [_PrevAcc|RestAcc]) -> + fix_path_list(Rest, RestAcc); +fix_path_list(["."|Rest], Acc) -> + fix_path_list(Rest, Acc); +fix_path_list([Dir | Rest], Acc) -> + fix_path_list(Rest, [Dir | Acc]). + + +implode(List, Sep) -> + implode(List, Sep, []). + +implode([], _Sep, Acc) -> + lists:flatten(lists:reverse(Acc)); +implode([H], Sep, Acc) -> + implode([], Sep, [H|Acc]); +implode([H|T], Sep, Acc) -> + implode(T, Sep, [Sep,H|Acc]). + + +drv_port() -> + case get(couch_drv_port) of + undefined -> + Port = open_port({spawn, "couch_icu_driver"}, []), + put(couch_drv_port, Port), + Port; + Port -> + Port + end. + +collate(A, B) -> + collate(A, B, []). + +collate(A, B, Options) when is_binary(A), is_binary(B) -> + Operation = + case lists:member(nocase, Options) of + true -> 1; % Case insensitive + false -> 0 % Case sensitive + end, + SizeA = byte_size(A), + SizeB = byte_size(B), + Bin = <<SizeA:32/native, A/binary, SizeB:32/native, B/binary>>, + [Result] = erlang:port_control(drv_port(), Operation, Bin), + % Result is 0 for lt, 1 for eq and 2 for gt. Subtract 1 to return the + % expected typical -1, 0, 1 + Result - 1. + +should_flush() -> + should_flush(?FLUSH_MAX_MEM). + +should_flush(MemThreshHold) -> + {memory, ProcMem} = process_info(self(), memory), + BinMem = lists:foldl(fun({_Id, Size, _NRefs}, Acc) -> Size+Acc end, + 0, element(2,process_info(self(), binary))), + if ProcMem+BinMem > 2*MemThreshHold -> + garbage_collect(), + {memory, ProcMem2} = process_info(self(), memory), + BinMem2 = lists:foldl(fun({_Id, Size, _NRefs}, Acc) -> Size+Acc end, + 0, element(2,process_info(self(), binary))), + ProcMem2+BinMem2 > MemThreshHold; + true -> false end. + +encodeBase64Url(Url) -> + Url1 = iolist_to_binary(re:replace(base64:encode(Url), "=+$", "")), + Url2 = iolist_to_binary(re:replace(Url1, "/", "_", [global])), + iolist_to_binary(re:replace(Url2, "\\+", "-", [global])). + +decodeBase64Url(Url64) -> + Url1 = re:replace(iolist_to_binary(Url64), "-", "+", [global]), + Url2 = iolist_to_binary( + re:replace(iolist_to_binary(Url1), "_", "/", [global]) + ), + Padding = ?l2b(lists:duplicate((4 - size(Url2) rem 4) rem 4, $=)), + base64:decode(<<Url2/binary, Padding/binary>>). + +dict_find(Key, Dict, DefaultValue) -> + case dict:find(Key, Dict) of + {ok, Value} -> + Value; + error -> + DefaultValue + end. + + +file_read_size(FileName) -> + case file:read_file_info(FileName) of + {ok, FileInfo} -> + FileInfo#file_info.size; + Error -> Error + end. + +to_binary(V) when is_binary(V) -> + V; +to_binary(V) when is_list(V) -> + try + list_to_binary(V) + catch + _ -> + list_to_binary(io_lib:format("~p", [V])) + end; +to_binary(V) when is_atom(V) -> + list_to_binary(atom_to_list(V)); +to_binary(V) -> + list_to_binary(io_lib:format("~p", [V])). + +to_integer(V) when is_integer(V) -> + V; +to_integer(V) when is_list(V) -> + erlang:list_to_integer(V); +to_integer(V) when is_binary(V) -> + erlang:list_to_integer(binary_to_list(V)). + +to_list(V) when is_list(V) -> + V; +to_list(V) when is_binary(V) -> + binary_to_list(V); +to_list(V) when is_atom(V) -> + atom_to_list(V); +to_list(V) -> + lists:flatten(io_lib:format("~p", [V])). + +url_encode(Bin) when is_binary(Bin) -> + url_encode(binary_to_list(Bin)); +url_encode([H|T]) -> + if + H >= $a, $z >= H -> + [H|url_encode(T)]; + H >= $A, $Z >= H -> + [H|url_encode(T)]; + H >= $0, $9 >= H -> + [H|url_encode(T)]; + H == $_; H == $.; H == $-; H == $: -> + [H|url_encode(T)]; + true -> + case lists:flatten(io_lib:format("~.16.0B", [H])) of + [X, Y] -> + [$%, X, Y | url_encode(T)]; + [X] -> + [$%, $0, X | url_encode(T)] + end + end; +url_encode([]) -> + []. + +json_encode(V) -> + Handler = + fun({L}) when is_list(L) -> + {struct,L}; + (Bad) -> + exit({json_encode, {bad_term, Bad}}) + end, + (mochijson2:encoder([{handler, Handler}]))(V). + +json_decode(V) -> + try (mochijson2:decoder([{object_hook, fun({struct,L}) -> {L} end}]))(V) + catch + _Type:_Error -> + throw({invalid_json,V}) + end. + +verify([X|RestX], [Y|RestY], Result) -> + verify(RestX, RestY, (X bxor Y) bor Result); +verify([], [], Result) -> + Result == 0. + +verify(<<X/binary>>, <<Y/binary>>) -> + verify(?b2l(X), ?b2l(Y)); +verify(X, Y) when is_list(X) and is_list(Y) -> + case length(X) == length(Y) of + true -> + verify(X, Y, 0); + false -> + false + end; +verify(_X, _Y) -> false. + +compressible_att_type(MimeType) when is_binary(MimeType) -> + compressible_att_type(?b2l(MimeType)); +compressible_att_type(MimeType) -> + TypeExpList = re:split( + couch_config:get("attachments", "compressible_types", ""), + ", ?", + [{return, list}] + ), + lists:any( + fun(TypeExp) -> + Regexp = "^\\s*" ++ + re:replace(TypeExp, "\\*", ".*", [{return, list}]) ++ "\\s*$", + case re:run(MimeType, Regexp, [caseless]) of + {match, _} -> + true; + _ -> + false + end + end, + [T || T <- TypeExpList, T /= []] + ). + +-spec md5(Data::(iolist() | binary())) -> Digest::binary(). +md5(Data) -> + try crypto:md5(Data) catch error:_ -> erlang:md5(Data) end. + +-spec md5_init() -> Context::binary(). +md5_init() -> + try crypto:md5_init() catch error:_ -> erlang:md5_init() end. + +-spec md5_update(Context::binary(), Data::(iolist() | binary())) -> + NewContext::binary(). +md5_update(Ctx, D) -> + try crypto:md5_update(Ctx,D) catch error:_ -> erlang:md5_update(Ctx,D) end. + +-spec md5_final(Context::binary()) -> Digest::binary(). +md5_final(Ctx) -> + try crypto:md5_final(Ctx) catch error:_ -> erlang:md5_final(Ctx) end. + +% linear search is faster for small lists, length() is 0.5 ms for 100k list +reorder_results(Keys, SortedResults) when length(Keys) < 100 -> + [couch_util:get_value(Key, SortedResults) || Key <- Keys]; +reorder_results(Keys, SortedResults) -> + KeyDict = dict:from_list(SortedResults), + [dict:fetch(Key, KeyDict) || Key <- Keys]. diff --git a/apps/couch/src/couch_uuids.erl b/apps/couch/src/couch_uuids.erl new file mode 100644 index 00000000..e1851e1d --- /dev/null +++ b/apps/couch/src/couch_uuids.erl @@ -0,0 +1,95 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +-module(couch_uuids). +-include("couch_db.hrl"). + +-behaviour(gen_server). + +-export([start/0, stop/0]). +-export([new/0, random/0, utc_random/0]). + +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +start() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +stop() -> + gen_server:cast(?MODULE, stop). + +new() -> + gen_server:call(?MODULE, create). + +random() -> + list_to_binary(couch_util:to_hex(crypto:rand_bytes(16))). + +utc_random() -> + Now = {_, _, Micro} = now(), + Nowish = calendar:now_to_universal_time(Now), + Nowsecs = calendar:datetime_to_gregorian_seconds(Nowish), + Then = calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}), + Prefix = io_lib:format("~14.16.0b", [(Nowsecs - Then) * 1000000 + Micro]), + list_to_binary(Prefix ++ couch_util:to_hex(crypto:rand_bytes(9))). + +init([]) -> + ok = couch_config:register( + fun("uuids", _) -> gen_server:cast(?MODULE, change) end + ), + {ok, state()}. + +terminate(_Reason, _State) -> + ok. + +handle_call(create, _From, random) -> + {reply, random(), random}; +handle_call(create, _From, utc_random) -> + {reply, utc_random(), utc_random}; +handle_call(create, _From, {sequential, Pref, Seq}) -> + Result = ?l2b(Pref ++ io_lib:format("~6.16.0b", [Seq])), + case Seq >= 16#fff000 of + true -> + {reply, Result, {sequential, new_prefix(), inc()}}; + _ -> + {reply, Result, {sequential, Pref, Seq + inc()}} + end. + +handle_cast(change, _State) -> + {noreply, state()}; +handle_cast(stop, State) -> + {stop, normal, State}; +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +new_prefix() -> + couch_util:to_hex((crypto:rand_bytes(13))). + +inc() -> + crypto:rand_uniform(1, 16#ffe). + +state() -> + AlgoStr = couch_config:get("uuids", "algorithm", "random"), + case couch_util:to_existing_atom(AlgoStr) of + random -> + random; + utc_random -> + utc_random; + sequential -> + {sequential, new_prefix(), inc()}; + Unknown -> + throw({unknown_uuid_algorithm, Unknown}) + end. diff --git a/apps/couch/src/couch_view.erl b/apps/couch/src/couch_view.erl new file mode 100644 index 00000000..38c0a783 --- /dev/null +++ b/apps/couch/src/couch_view.erl @@ -0,0 +1,438 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_view). +-behaviour(gen_server). + +-export([start_link/0,fold/4,less_json/2,less_json_ids/2,expand_dups/2, + detuple_kvs/2,init/1,terminate/2,handle_call/3,handle_cast/2,handle_info/2, + code_change/3,get_reduce_view/4,get_temp_reduce_view/5,get_temp_map_view/4, + get_map_view/4,get_row_count/1,reduce_to_count/1,fold_reduce/4, + extract_map_view/1,get_group_server/2,get_group_info/2,cleanup_index_files/1]). + +-include("couch_db.hrl"). + + +-record(server,{ + root_dir = []}). + +start_link() -> + gen_server:start_link({local, couch_view}, couch_view, [], []). + +get_temp_updater(DbName, Language, DesignOptions, MapSrc, RedSrc) -> + % make temp group + % do we need to close this db? + {ok, _Db, Group} = + couch_view_group:open_temp_group(DbName, Language, DesignOptions, MapSrc, RedSrc), + case gen_server:call(couch_view, {get_group_server, DbName, Group}) of + {ok, Pid} -> + Pid; + Error -> + throw(Error) + end. + +get_group_server(DbName, GroupId) -> + % get signature for group + case couch_view_group:open_db_group(DbName, GroupId) of + % do we need to close this db? + {ok, _Db, Group} -> + case gen_server:call(couch_view, {get_group_server, DbName, Group}) of + {ok, Pid} -> + Pid; + Error -> + throw(Error) + end; + Error -> + throw(Error) + end. + +get_group(Db, GroupId, Stale) -> + MinUpdateSeq = case Stale of + ok -> 0; + _Else -> couch_db:get_update_seq(Db) + end, + couch_view_group:request_group( + get_group_server(couch_db:name(Db), GroupId), + MinUpdateSeq). + +get_temp_group(Db, Language, DesignOptions, MapSrc, RedSrc) -> + couch_view_group:request_group( + get_temp_updater(couch_db:name(Db), Language, DesignOptions, MapSrc, RedSrc), + couch_db:get_update_seq(Db)). + +get_group_info(Db, GroupId) -> + couch_view_group:request_group_info( + get_group_server(couch_db:name(Db), GroupId)). + +cleanup_index_files(Db) -> + % load all ddocs + {ok, DesignDocs} = couch_db:get_design_docs(Db), + + % make unique list of group sigs + Sigs = lists:map(fun(#doc{id = GroupId}) -> + {ok, Info} = get_group_info(Db, GroupId), + ?b2l(couch_util:get_value(signature, Info)) + end, [DD||DD <- DesignDocs, DD#doc.deleted == false]), + + FileList = list_index_files(Db), + + % regex that matches all ddocs + RegExp = "("++ string:join(Sigs, "|") ++")", + + % filter out the ones in use + DeleteFiles = [FilePath + || FilePath <- FileList, + re:run(FilePath, RegExp, [{capture, none}]) =:= nomatch], + % delete unused files + ?LOG_DEBUG("deleting unused view index files: ~p",[DeleteFiles]), + RootDir = couch_config:get("couchdb", "view_index_dir"), + [couch_file:delete(RootDir,File,false)||File <- DeleteFiles], + ok. + +list_index_files(Db) -> + % call server to fetch the index files + RootDir = couch_config:get("couchdb", "view_index_dir"), + filelib:wildcard(RootDir ++ "/." ++ ?b2l(couch_db:name(Db)) ++ "_design"++"/*"). + + +get_row_count(#view{btree=Bt}) -> + {ok, {Count, _Reds}} = couch_btree:full_reduce(Bt), + {ok, Count}. + +get_temp_reduce_view(Db, Language, DesignOptions, MapSrc, RedSrc) -> + {ok, #group{views=[View]}=Group} = + get_temp_group(Db, Language, DesignOptions, MapSrc, RedSrc), + {ok, {temp_reduce, View}, Group}. + + +get_reduce_view(Db, GroupId, Name, Update) -> + case get_group(Db, GroupId, Update) of + {ok, #group{views=Views,def_lang=Lang}=Group} -> + case get_reduce_view0(Name, Lang, Views) of + {ok, View} -> + {ok, View, Group}; + Else -> + Else + end; + Error -> + Error + end. + +get_reduce_view0(_Name, _Lang, []) -> + {not_found, missing_named_view}; +get_reduce_view0(Name, Lang, [#view{reduce_funs=RedFuns}=View|Rest]) -> + case get_key_pos(Name, RedFuns, 0) of + 0 -> get_reduce_view0(Name, Lang, Rest); + N -> {ok, {reduce, N, Lang, View}} + end. + +extract_map_view({reduce, _N, _Lang, View}) -> + View. + +detuple_kvs([], Acc) -> + lists:reverse(Acc); +detuple_kvs([KV | Rest], Acc) -> + {{Key,Id},Value} = KV, + NKV = [[Key, Id], Value], + detuple_kvs(Rest, [NKV | Acc]). + +expand_dups([], Acc) -> + lists:reverse(Acc); +expand_dups([{Key, {dups, Vals}} | Rest], Acc) -> + Expanded = [{Key, Val} || Val <- Vals], + expand_dups(Rest, Expanded ++ Acc); +expand_dups([KV | Rest], Acc) -> + expand_dups(Rest, [KV | Acc]). + +fold_reduce({temp_reduce, #view{btree=Bt}}, Fun, Acc, Options) -> + WrapperFun = fun({GroupedKey, _}, PartialReds, Acc0) -> + {_, [Red]} = couch_btree:final_reduce(Bt, PartialReds), + Fun(GroupedKey, Red, Acc0) + end, + couch_btree:fold_reduce(Bt, WrapperFun, Acc, Options); + +fold_reduce({reduce, NthRed, Lang, #view{btree=Bt, reduce_funs=RedFuns}}, Fun, Acc, Options) -> + PreResultPadding = lists:duplicate(NthRed - 1, []), + PostResultPadding = lists:duplicate(length(RedFuns) - NthRed, []), + {_Name, FunSrc} = lists:nth(NthRed,RedFuns), + ReduceFun = + fun(reduce, KVs) -> + {ok, Reduced} = couch_query_servers:reduce(Lang, [FunSrc], detuple_kvs(expand_dups(KVs, []),[])), + {0, PreResultPadding ++ Reduced ++ PostResultPadding}; + (rereduce, Reds) -> + UserReds = [[lists:nth(NthRed, UserRedsList)] || {_, UserRedsList} <- Reds], + {ok, Reduced} = couch_query_servers:rereduce(Lang, [FunSrc], UserReds), + {0, PreResultPadding ++ Reduced ++ PostResultPadding} + end, + WrapperFun = fun({GroupedKey, _}, PartialReds, Acc0) -> + {_, Reds} = couch_btree:final_reduce(ReduceFun, PartialReds), + Fun(GroupedKey, lists:nth(NthRed, Reds), Acc0) + end, + couch_btree:fold_reduce(Bt, WrapperFun, Acc, Options). + +get_key_pos(_Key, [], _N) -> + 0; +get_key_pos(Key, [{Key1,_Value}|_], N) when Key == Key1 -> + N + 1; +get_key_pos(Key, [_|Rest], N) -> + get_key_pos(Key, Rest, N+1). + + +get_temp_map_view(Db, Language, DesignOptions, Src) -> + {ok, #group{views=[View]}=Group} = get_temp_group(Db, Language, DesignOptions, Src, []), + {ok, View, Group}. + +get_map_view(Db, GroupId, Name, Stale) -> + case get_group(Db, GroupId, Stale) of + {ok, #group{views=Views}=Group} -> + case get_map_view0(Name, Views) of + {ok, View} -> + {ok, View, Group}; + Else -> + Else + end; + Error -> + Error + end. + +get_map_view0(_Name, []) -> + {not_found, missing_named_view}; +get_map_view0(Name, [#view{map_names=MapNames}=View|Rest]) -> + case lists:member(Name, MapNames) of + true -> {ok, View}; + false -> get_map_view0(Name, Rest) + end. + +reduce_to_count(Reductions) -> + {Count, _} = + couch_btree:final_reduce( + fun(reduce, KVs) -> + Count = lists:sum( + [case V of {dups, Vals} -> length(Vals); _ -> 1 end + || {_,V} <- KVs]), + {Count, []}; + (rereduce, Reds) -> + {lists:sum([Count0 || {Count0, _} <- Reds]), []} + end, Reductions), + Count. + + + +fold_fun(_Fun, [], _, Acc) -> + {ok, Acc}; +fold_fun(Fun, [KV|Rest], {KVReds, Reds}, Acc) -> + case Fun(KV, {KVReds, Reds}, Acc) of + {ok, Acc2} -> + fold_fun(Fun, Rest, {[KV|KVReds], Reds}, Acc2); + {stop, Acc2} -> + {stop, Acc2} + end. + + +fold(#view{btree=Btree}, Fun, Acc, Options) -> + WrapperFun = + fun(KV, Reds, Acc2) -> + fold_fun(Fun, expand_dups([KV],[]), Reds, Acc2) + end, + {ok, _LastReduce, _AccResult} = couch_btree:fold(Btree, WrapperFun, Acc, Options). + + +init([]) -> + % read configuration settings and register for configuration changes + RootDir = couch_config:get("couchdb", "view_index_dir"), + Self = self(), + ok = couch_config:register( + fun("couchdb", "view_index_dir")-> + exit(Self, config_change) + end), + + couch_db_update_notifier:start_link( + fun({deleted, DbName}) -> + gen_server:cast(couch_view, {reset_indexes, DbName}); + ({created, DbName}) -> + gen_server:cast(couch_view, {reset_indexes, DbName}); + (_Else) -> + ok + end), + ets:new(couch_groups_by_db, [bag, private, named_table]), + ets:new(group_servers_by_sig, [set, protected, named_table]), + ets:new(couch_groups_by_updater, [set, private, named_table]), + process_flag(trap_exit, true), + ok = couch_file:init_delete_dir(RootDir), + {ok, #server{root_dir=RootDir}}. + + +terminate(_Reason, _Srv) -> + [couch_util:shutdown_sync(Pid) || {Pid, _} <- + ets:tab2list(couch_groups_by_updater)], + ok. + + +handle_call({get_group_server, DbName, + #group{name=GroupId,sig=Sig}=Group}, _From, #server{root_dir=Root}=Server) -> + case ets:lookup(group_servers_by_sig, {DbName, Sig}) of + [] -> + ?LOG_DEBUG("Spawning new group server for view group ~s in database ~s.", + [GroupId, DbName]), + case (catch couch_view_group:start_link({Root, DbName, Group})) of + {ok, NewPid} -> + add_to_ets(NewPid, DbName, Sig), + {reply, {ok, NewPid}, Server}; + {error, invalid_view_seq} -> + do_reset_indexes(DbName, Root), + case (catch couch_view_group:start_link({Root, DbName, Group})) of + {ok, NewPid} -> + add_to_ets(NewPid, DbName, Sig), + {reply, {ok, NewPid}, Server}; + Error -> + {reply, Error, Server} + end; + Error -> + {reply, Error, Server} + end; + [{_, ExistingPid}] -> + {reply, {ok, ExistingPid}, Server} + end. + +handle_cast({reset_indexes, DbName}, #server{root_dir=Root}=Server) -> + do_reset_indexes(DbName, Root), + {noreply, Server}. + +do_reset_indexes(DbName, Root) -> + % shutdown all the updaters and clear the files, the db got changed + Names = ets:lookup(couch_groups_by_db, DbName), + lists:foreach( + fun({_DbName, Sig}) -> + ?LOG_DEBUG("Killing update process for view group ~s. in database ~s.", [Sig, DbName]), + [{_, Pid}] = ets:lookup(group_servers_by_sig, {DbName, Sig}), + couch_util:shutdown_sync(Pid), + delete_from_ets(Pid, DbName, Sig) + end, Names), + delete_index_dir(Root, DbName), + RootDelDir = couch_config:get("couchdb", "view_index_dir"), + couch_file:delete(RootDelDir, Root ++ "/." ++ ?b2l(DbName) ++ "_temp"). + +handle_info({'EXIT', FromPid, Reason}, Server) -> + case ets:lookup(couch_groups_by_updater, FromPid) of + [] -> + if Reason /= normal -> + % non-updater linked process died, we propagate the error + ?LOG_ERROR("Exit on non-updater process: ~p", [Reason]), + exit(Reason); + true -> ok + end; + [{_, {DbName, GroupId}}] -> + delete_from_ets(FromPid, DbName, GroupId) + end, + {noreply, Server}. + +add_to_ets(Pid, DbName, Sig) -> + true = ets:insert(couch_groups_by_updater, {Pid, {DbName, Sig}}), + true = ets:insert(group_servers_by_sig, {{DbName, Sig}, Pid}), + true = ets:insert(couch_groups_by_db, {DbName, Sig}). + +delete_from_ets(Pid, DbName, Sig) -> + true = ets:delete(couch_groups_by_updater, Pid), + true = ets:delete(group_servers_by_sig, {DbName, Sig}), + true = ets:delete_object(couch_groups_by_db, {DbName, Sig}). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +delete_index_dir(RootDir, DbName) -> + nuke_dir(RootDir, RootDir ++ "/." ++ ?b2l(DbName) ++ "_design"). + +nuke_dir(RootDelDir, Dir) -> + case file:list_dir(Dir) of + {error, enoent} -> ok; % doesn't exist + {ok, Files} -> + lists:foreach( + fun(File)-> + Full = Dir ++ "/" ++ File, + case couch_file:delete(RootDelDir, Full, false) of + ok -> ok; + {error, eperm} -> + ok = nuke_dir(RootDelDir, Full) + end + end, + Files), + ok = file:del_dir(Dir) + end. + + +% keys come back in the language of btree - tuples. +less_json_ids({JsonA, IdA}, {JsonB, IdB}) -> + case less_json0(JsonA, JsonB) of + 0 -> + IdA < IdB; + Result -> + Result < 0 + end. + +less_json(A,B) -> + less_json0(A,B) < 0. + +less_json0(A,A) -> 0; + +less_json0(A,B) when is_atom(A), is_atom(B) -> atom_sort(A) - atom_sort(B); +less_json0(A,_) when is_atom(A) -> -1; +less_json0(_,B) when is_atom(B) -> 1; + +less_json0(A,B) when is_number(A), is_number(B) -> A - B; +less_json0(A,_) when is_number(A) -> -1; +less_json0(_,B) when is_number(B) -> 1; + +less_json0(A,B) when is_binary(A), is_binary(B) -> couch_util:collate(A,B); +less_json0(A,_) when is_binary(A) -> -1; +less_json0(_,B) when is_binary(B) -> 1; + +less_json0(A,B) when is_list(A), is_list(B) -> less_list(A,B); +less_json0(A,_) when is_list(A) -> -1; +less_json0(_,B) when is_list(B) -> 1; + +less_json0({A},{B}) when is_list(A), is_list(B) -> less_props(A,B); +less_json0({A},_) when is_list(A) -> -1; +less_json0(_,{B}) when is_list(B) -> 1. + +atom_sort(null) -> 1; +atom_sort(false) -> 2; +atom_sort(true) -> 3. + +less_props([], [_|_]) -> + -1; +less_props(_, []) -> + 1; +less_props([{AKey, AValue}|RestA], [{BKey, BValue}|RestB]) -> + case couch_util:collate(AKey, BKey) of + 0 -> + case less_json0(AValue, BValue) of + 0 -> + less_props(RestA, RestB); + Result -> + Result + end; + Result -> + Result + end. + +less_list([], [_|_]) -> + -1; +less_list(_, []) -> + 1; +less_list([A|RestA], [B|RestB]) -> + case less_json0(A,B) of + 0 -> + less_list(RestA, RestB); + Result -> + Result + end. diff --git a/apps/couch/src/couch_view_compactor.erl b/apps/couch/src/couch_view_compactor.erl new file mode 100644 index 00000000..895556bf --- /dev/null +++ b/apps/couch/src/couch_view_compactor.erl @@ -0,0 +1,98 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_view_compactor). + +-include ("couch_db.hrl"). + +-export([start_compact/2]). + +%% @spec start_compact(DbName::binary(), GroupId:binary()) -> ok +%% @doc Compacts the views. GroupId must not include the _design/ prefix +start_compact(DbName, GroupId) -> + Pid = couch_view:get_group_server(DbName, <<"_design/",GroupId/binary>>), + gen_server:cast(Pid, {start_compact, fun compact_group/2}). + +%%============================================================================= +%% internal functions +%%============================================================================= + +%% @spec compact_group(Group, NewGroup) -> ok +compact_group(Group, EmptyGroup) -> + #group{ + current_seq = Seq, + id_btree = IdBtree, + name = GroupId, + views = Views + } = Group, + + #group{ + db = Db, + id_btree = EmptyIdBtree, + views = EmptyViews + } = EmptyGroup, + + {ok, {Count, _}} = couch_btree:full_reduce(Db#db.fulldocinfo_by_id_btree), + + <<"_design", ShortName/binary>> = GroupId, + DbName = couch_db:name(Db), + TaskName = <<DbName/binary, ShortName/binary>>, + couch_task_status:add_task(<<"View Group Compaction">>, TaskName, <<"">>), + + Fun = fun(KV, {Bt, Acc, TotalCopied}) -> + if TotalCopied rem 10000 =:= 0 -> + couch_task_status:update("Copied ~p of ~p Ids (~p%)", + [TotalCopied, Count, (TotalCopied*100) div Count]), + {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])), + {ok, {Bt2, [], TotalCopied+1}}; + true -> + {ok, {Bt, [KV|Acc], TotalCopied+1}} + end + end, + {ok, _, {Bt3, Uncopied, _Total}} = couch_btree:foldl(IdBtree, Fun, + {EmptyIdBtree, [], 0}), + {ok, NewIdBtree} = couch_btree:add(Bt3, lists:reverse(Uncopied)), + + NewViews = lists:map(fun({View, EmptyView}) -> + compact_view(View, EmptyView) + end, lists:zip(Views, EmptyViews)), + + NewGroup = EmptyGroup#group{ + id_btree=NewIdBtree, + views=NewViews, + current_seq=Seq + }, + + Pid = couch_view:get_group_server(DbName, GroupId), + gen_server:cast(Pid, {compact_done, NewGroup}). + +%% @spec compact_view(View, EmptyView, Retry) -> CompactView +compact_view(View, EmptyView) -> + {ok, Count} = couch_view:get_row_count(View), + + %% Key is {Key,DocId} + Fun = fun(KV, {Bt, Acc, TotalCopied}) -> + if TotalCopied rem 10000 =:= 0 -> + couch_task_status:update("View #~p: copied ~p of ~p KVs (~p%)", + [View#view.id_num, TotalCopied, Count, (TotalCopied*100) div Count]), + {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])), + {ok, {Bt2, [], TotalCopied + 1}}; + true -> + {ok, {Bt, [KV|Acc], TotalCopied + 1}} + end + end, + + {ok, _, {Bt3, Uncopied, _Total}} = couch_btree:foldl(View#view.btree, Fun, + {EmptyView#view.btree, [], 0}), + {ok, NewBt} = couch_btree:add(Bt3, lists:reverse(Uncopied)), + EmptyView#view{btree = NewBt}. + diff --git a/apps/couch/src/couch_view_group.erl b/apps/couch/src/couch_view_group.erl new file mode 100644 index 00000000..f01befdf --- /dev/null +++ b/apps/couch/src/couch_view_group.erl @@ -0,0 +1,592 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_view_group). +-behaviour(gen_server). + +%% API +-export([start_link/1, request_group/2, request_group_info/1]). +-export([open_db_group/2, open_temp_group/5, design_doc_to_view_group/1,design_root/2]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-include("couch_db.hrl"). + +-record(group_state, { + type, + db_name, + init_args, + group, + updater_pid=nil, + compactor_pid=nil, + waiting_commit=false, + waiting_list=[], + ref_counter=nil +}). + +% api methods +request_group(Pid, Seq) -> + ?LOG_DEBUG("request_group {Pid, Seq} ~p", [{Pid, Seq}]), + case gen_server:call(Pid, {request_group, Seq}, infinity) of + {ok, Group, RefCounter} -> + couch_ref_counter:add(RefCounter), + {ok, Group}; + Error -> + ?LOG_DEBUG("request_group Error ~p", [Error]), + throw(Error) + end. + +request_group_info(Pid) -> + case gen_server:call(Pid, request_group_info) of + {ok, GroupInfoList} -> + {ok, GroupInfoList}; + Error -> + throw(Error) + end. + +% from template +start_link(InitArgs) -> + case gen_server:start_link(couch_view_group, + {InitArgs, self(), Ref = make_ref()}, []) of + {ok, Pid} -> + {ok, Pid}; + ignore -> + receive + {Ref, Pid, Error} -> + case process_info(self(), trap_exit) of + {trap_exit, true} -> receive {'EXIT', Pid, _} -> ok end; + {trap_exit, false} -> ok + end, + Error + end; + Error -> + Error + end. + +% init creates a closure which spawns the appropriate view_updater. +init({InitArgs, ReturnPid, Ref}) -> + process_flag(trap_exit, true), + case prepare_group(InitArgs, false) of + {ok, #group{db=Db, fd=Fd, current_seq=Seq}=Group} -> + case Seq > couch_db:get_update_seq(Db) of + true -> + ReturnPid ! {Ref, self(), {error, invalid_view_seq}}, + ignore; + _ -> + couch_db:monitor(Db), + Owner = self(), + Pid = spawn_link( + fun()-> couch_view_updater:update(Owner, Group) end + ), + {ok, RefCounter} = couch_ref_counter:start([Fd]), + {ok, #group_state{ + db_name=couch_db:name(Db), + init_args=InitArgs, + updater_pid = Pid, + group=Group, + ref_counter=RefCounter}} + end; + Error -> + ReturnPid ! {Ref, self(), Error}, + ignore + end. + + + + +% There are two sources of messages: couch_view, which requests an up to date +% view group, and the couch_view_updater, which when spawned, updates the +% group and sends it back here. We employ a caching mechanism, so that between +% database writes, we don't have to spawn a couch_view_updater with every view +% request. + +% The caching mechanism: each request is submitted with a seq_id for the +% database at the time it was read. We guarantee to return a view from that +% sequence or newer. + +% If the request sequence is higher than our current high_target seq, we set +% that as the highest seqence. If the updater is not running, we launch it. + +handle_call({request_group, RequestSeq}, From, + #group_state{ + db_name=DbName, + group=#group{current_seq=Seq}=Group, + updater_pid=nil, + waiting_list=WaitList + }=State) when RequestSeq > Seq -> + {ok, Db} = couch_db:open_int(DbName, []), + Group2 = Group#group{db=Db}, + Owner = self(), + Pid = spawn_link(fun()-> couch_view_updater:update(Owner, Group2) end), + + {noreply, State#group_state{ + updater_pid=Pid, + group=Group2, + waiting_list=[{From,RequestSeq}|WaitList] + }, infinity}; + + +% If the request seqence is less than or equal to the seq_id of a known Group, +% we respond with that Group. +handle_call({request_group, RequestSeq}, _From, #group_state{ + group = #group{current_seq=GroupSeq} = Group, + ref_counter = RefCounter + } = State) when RequestSeq =< GroupSeq -> + {reply, {ok, Group, RefCounter}, State}; + +% Otherwise: TargetSeq => RequestSeq > GroupSeq +% We've already initiated the appropriate action, so just hold the response until the group is up to the RequestSeq +handle_call({request_group, RequestSeq}, From, + #group_state{waiting_list=WaitList}=State) -> + {noreply, State#group_state{ + waiting_list=[{From, RequestSeq}|WaitList] + }, infinity}; + +handle_call(request_group_info, _From, State) -> + GroupInfo = get_group_info(State), + {reply, {ok, GroupInfo}, State}. + +handle_cast({start_compact, CompactFun}, #group_state{compactor_pid=nil} + = State) -> + #group_state{ + group = #group{name = GroupId, sig = GroupSig} = Group, + init_args = {RootDir, DbName, _} + } = State, + ?LOG_INFO("View index compaction starting for ~s ~s", [DbName, GroupId]), + {ok, Db} = couch_db:open_int(DbName, []), + {ok, Fd} = open_index_file(compact, RootDir, DbName, GroupSig), + NewGroup = reset_file(Db, Fd, DbName, Group), + Pid = spawn_link(fun() -> CompactFun(Group, NewGroup) end), + {noreply, State#group_state{compactor_pid = Pid}}; +handle_cast({start_compact, _}, State) -> + %% compact already running, this is a no-op + {noreply, State}; + +handle_cast({compact_done, #group{current_seq=NewSeq} = NewGroup}, + #group_state{group = #group{current_seq=OldSeq}} = State) + when NewSeq >= OldSeq -> + #group_state{ + group = #group{name=GroupId, fd=OldFd, sig=GroupSig} = Group, + init_args = {RootDir, DbName, _}, + updater_pid = UpdaterPid, + ref_counter = RefCounter + } = State, + + ?LOG_INFO("View index compaction complete for ~s ~s", [DbName, GroupId]), + FileName = index_file_name(RootDir, DbName, GroupSig), + CompactName = index_file_name(compact, RootDir, DbName, GroupSig), + ok = couch_file:delete(RootDir, FileName), + ok = file:rename(CompactName, FileName), + + %% if an updater is running, kill it and start a new one + NewUpdaterPid = + if is_pid(UpdaterPid) -> + unlink(UpdaterPid), + exit(UpdaterPid, view_compaction_complete), + Owner = self(), + spawn_link(fun()-> couch_view_updater:update(Owner, NewGroup) end); + true -> + nil + end, + + %% cleanup old group + unlink(OldFd), + couch_ref_counter:drop(RefCounter), + {ok, NewRefCounter} = couch_ref_counter:start([NewGroup#group.fd]), + case Group#group.db of + nil -> ok; + Else -> couch_db:close(Else) + end, + + self() ! delayed_commit, + {noreply, State#group_state{ + group=NewGroup, + ref_counter=NewRefCounter, + compactor_pid=nil, + updater_pid=NewUpdaterPid + }}; +handle_cast({compact_done, NewGroup}, State) -> + #group_state{ + group = #group{name = GroupId, current_seq = CurrentSeq}, + init_args={_RootDir, DbName, _} + } = State, + ?LOG_INFO("View index compaction still behind for ~s ~s -- current: ~p " ++ + "compact: ~p", [DbName, GroupId, CurrentSeq, NewGroup#group.current_seq]), + couch_db:close(NewGroup#group.db), + {ok, Db} = couch_db:open_int(DbName, []), + Pid = spawn_link(fun() -> + {_,Ref} = erlang:spawn_monitor(fun() -> + couch_view_updater:update(nil, NewGroup#group{db = Db}) + end), + receive + {'DOWN', Ref, _, _, {new_group, NewGroup2}} -> + #group{name=GroupId} = NewGroup2, + Pid2 = couch_view:get_group_server(DbName, GroupId), + gen_server:cast(Pid2, {compact_done, NewGroup2}) + end + end), + {noreply, State#group_state{compactor_pid = Pid}}; + +handle_cast({partial_update, Pid, NewGroup}, #group_state{updater_pid=Pid} + = State) -> + #group_state{ + db_name = DbName, + waiting_commit = WaitingCommit + } = State, + NewSeq = NewGroup#group.current_seq, + ?LOG_INFO("checkpointing view update at seq ~p for ~s ~s", [NewSeq, + DbName, NewGroup#group.name]), + if not WaitingCommit -> + erlang:send_after(1000, self(), delayed_commit); + true -> ok + end, + {noreply, State#group_state{group=NewGroup, waiting_commit=true}}; +handle_cast({partial_update, _, _}, State) -> + %% message from an old (probably pre-compaction) updater; ignore + {noreply, State}. + +handle_info(delayed_commit, #group_state{db_name=DbName,group=Group}=State) -> + {ok, Db} = couch_db:open_int(DbName, []), + CommittedSeq = couch_db:get_committed_update_seq(Db), + couch_db:close(Db), + if CommittedSeq >= Group#group.current_seq -> + % save the header + Header = {Group#group.sig, get_index_header_data(Group)}, + ok = couch_file:write_header(Group#group.fd, Header), + {noreply, State#group_state{waiting_commit=false}}; + true -> + % We can't commit the header because the database seq that's fully + % committed to disk is still behind us. If we committed now and the + % database lost those changes our view could be forever out of sync + % with the database. But a crash before we commit these changes, no big + % deal, we only lose incremental changes since last committal. + erlang:send_after(1000, self(), delayed_commit), + {noreply, State#group_state{waiting_commit=true}} + end; + +handle_info({'EXIT', FromPid, {new_group, #group{db=Db}=Group}}, + #group_state{db_name=DbName, + updater_pid=UpPid, + ref_counter=RefCounter, + waiting_list=WaitList, + waiting_commit=WaitingCommit}=State) when UpPid == FromPid -> + ok = couch_db:close(Db), + if not WaitingCommit -> + erlang:send_after(1000, self(), delayed_commit); + true -> ok + end, + case reply_with_group(Group, WaitList, [], RefCounter) of + [] -> + {noreply, State#group_state{waiting_commit=true, waiting_list=[], + group=Group#group{db=nil}, updater_pid=nil}}; + StillWaiting -> + % we still have some waiters, reopen the database and reupdate the index + {ok, Db2} = couch_db:open_int(DbName, []), + Group2 = Group#group{db=Db2}, + Owner = self(), + Pid = spawn_link(fun() -> couch_view_updater:update(Owner, Group2) end), + {noreply, State#group_state{waiting_commit=true, + waiting_list=StillWaiting, group=Group2, updater_pid=Pid}} + end; +handle_info({'EXIT', _, {new_group, _}}, State) -> + %% message from an old (probably pre-compaction) updater; ignore + {noreply, State}; + +handle_info({'EXIT', FromPid, reset}, + #group_state{ + init_args=InitArgs, + updater_pid=UpPid, + group=Group}=State) when UpPid == FromPid -> + ok = couch_db:close(Group#group.db), + case prepare_group(InitArgs, true) of + {ok, ResetGroup} -> + Owner = self(), + Pid = spawn_link(fun()-> couch_view_updater:update(Owner, ResetGroup) end), + {noreply, State#group_state{ + updater_pid=Pid, + group=ResetGroup}}; + Error -> + {stop, normal, reply_all(State, Error)} + end; +handle_info({'EXIT', _, reset}, State) -> + %% message from an old (probably pre-compaction) updater; ignore + {noreply, State}; + +handle_info({'EXIT', _FromPid, normal}, State) -> + {noreply, State}; + +handle_info({'EXIT', FromPid, {{nocatch, Reason}, _Trace}}, State) -> + ?LOG_DEBUG("Uncaught throw() in linked pid: ~p", [{FromPid, Reason}]), + {stop, Reason, State}; + +handle_info({'EXIT', FromPid, Reason}, State) -> + ?LOG_DEBUG("Exit from linked pid: ~p", [{FromPid, Reason}]), + {stop, Reason, State}; + +handle_info({'DOWN',_,_,_,_}, State) -> + ?LOG_INFO("Shutting down view group server, monitored db is closing.", []), + {stop, normal, reply_all(State, shutdown)}. + + +terminate(Reason, #group_state{updater_pid=Update, compactor_pid=Compact}=S) -> + reply_all(S, Reason), + couch_util:shutdown_sync(Update), + couch_util:shutdown_sync(Compact), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% Local Functions + +% reply_with_group/3 +% for each item in the WaitingList {Pid, Seq} +% if the Seq is =< GroupSeq, reply +reply_with_group(Group=#group{current_seq=GroupSeq}, [{Pid, Seq}|WaitList], + StillWaiting, RefCounter) when Seq =< GroupSeq -> + gen_server:reply(Pid, {ok, Group, RefCounter}), + reply_with_group(Group, WaitList, StillWaiting, RefCounter); + +% else +% put it in the continuing waiting list +reply_with_group(Group, [{Pid, Seq}|WaitList], StillWaiting, RefCounter) -> + reply_with_group(Group, WaitList, [{Pid, Seq}|StillWaiting], RefCounter); + +% return the still waiting list +reply_with_group(_Group, [], StillWaiting, _RefCounter) -> + StillWaiting. + +reply_all(#group_state{waiting_list=WaitList}=State, Reply) -> + [catch gen_server:reply(Pid, Reply) || {Pid, _} <- WaitList], + State#group_state{waiting_list=[]}. + +prepare_group({RootDir, DbName, #group{sig=Sig}=Group}, ForceReset)-> + case couch_db:open_int(DbName, []) of + {ok, Db} -> + case open_index_file(RootDir, DbName, Sig) of + {ok, Fd} -> + if ForceReset -> + % this can happen if we missed a purge + {ok, reset_file(Db, Fd, DbName, Group)}; + true -> + % 09 UPGRADE CODE + ok = couch_file:upgrade_old_header(Fd, <<$r, $c, $k, 0>>), + case (catch couch_file:read_header(Fd)) of + {ok, {Sig, HeaderInfo}} -> + % sigs match! + {ok, init_group(Db, Fd, Group, HeaderInfo)}; + _ -> + % this happens on a new file + {ok, reset_file(Db, Fd, DbName, Group)} + end + end; + Error -> + catch delete_index_file(RootDir, DbName, Sig), + Error + end; + Else -> + Else + end. + +get_index_header_data(#group{current_seq=Seq, purge_seq=PurgeSeq, + id_btree=IdBtree,views=Views}) -> + ViewStates = [couch_btree:get_state(Btree) || #view{btree=Btree} <- Views], + #index_header{seq=Seq, + purge_seq=PurgeSeq, + id_btree_state=couch_btree:get_state(IdBtree), + view_states=ViewStates}. + +hex_sig(GroupSig) -> + couch_util:to_hex(?b2l(GroupSig)). + +design_root(RootDir, DbName) -> + RootDir ++ "/." ++ ?b2l(DbName) ++ "_design/". + +index_file_name(RootDir, DbName, GroupSig) -> + design_root(RootDir, DbName) ++ hex_sig(GroupSig) ++".view". + +index_file_name(compact, RootDir, DbName, GroupSig) -> + design_root(RootDir, DbName) ++ hex_sig(GroupSig) ++".compact.view". + + +open_index_file(RootDir, DbName, GroupSig) -> + FileName = index_file_name(RootDir, DbName, GroupSig), + case couch_file:open(FileName) of + {ok, Fd} -> {ok, Fd}; + {error, enoent} -> couch_file:open(FileName, [create]); + Error -> Error + end. + +open_index_file(compact, RootDir, DbName, GroupSig) -> + FileName = index_file_name(compact, RootDir, DbName, GroupSig), + case couch_file:open(FileName) of + {ok, Fd} -> {ok, Fd}; + {error, enoent} -> couch_file:open(FileName, [create]); + Error -> Error + end. + +open_temp_group(DbName, Language, DesignOptions, MapSrc, RedSrc) -> + case couch_db:open_int(DbName, []) of + {ok, Db} -> + View = #view{map_names=[<<"_temp">>], + id_num=0, + btree=nil, + def=MapSrc, + reduce_funs= if RedSrc==[] -> []; true -> [{<<"_temp">>, RedSrc}] end, + options=DesignOptions}, + + {ok, Db, set_view_sig(#group{name = <<"_temp">>, db=Db, views=[View], + def_lang=Language, design_options=DesignOptions})}; + Error -> + Error + end. + +set_view_sig(#group{ + views=Views, + def_lang=Language, + design_options=DesignOptions}=G) -> + G#group{sig=couch_util:md5(term_to_binary({Views, Language, DesignOptions}))}. + +open_db_group(DbName, GroupId) -> + case couch_db:open_int(DbName, []) of + {ok, Db} -> + case couch_db:open_doc(Db, GroupId) of + {ok, Doc} -> + {ok, Db, design_doc_to_view_group(Doc)}; + Else -> + couch_db:close(Db), + Else + end; + Else -> + Else + end. + +get_group_info(State) -> + #group_state{ + group=Group, + updater_pid=UpdaterPid, + compactor_pid=CompactorPid, + waiting_commit=WaitingCommit, + waiting_list=WaitersList + } = State, + #group{ + fd = Fd, + sig = GroupSig, + def_lang = Lang, + current_seq=CurrentSeq, + purge_seq=PurgeSeq + } = Group, + {ok, Size} = couch_file:bytes(Fd), + [ + {signature, ?l2b(hex_sig(GroupSig))}, + {language, Lang}, + {disk_size, Size}, + {updater_running, UpdaterPid /= nil}, + {compact_running, CompactorPid /= nil}, + {waiting_commit, WaitingCommit}, + {waiting_clients, length(WaitersList)}, + {update_seq, CurrentSeq}, + {purge_seq, PurgeSeq} + ]. + +% maybe move to another module +design_doc_to_view_group(#doc{id=Id,body={Fields}}) -> + Language = couch_util:get_value(<<"language">>, Fields, <<"javascript">>), + {DesignOptions} = couch_util:get_value(<<"options">>, Fields, {[]}), + {RawViews} = couch_util:get_value(<<"views">>, Fields, {[]}), + % add the views to a dictionary object, with the map source as the key + DictBySrc = + lists:foldl( + fun({Name, {MRFuns}}, DictBySrcAcc) -> + MapSrc = couch_util:get_value(<<"map">>, MRFuns), + RedSrc = couch_util:get_value(<<"reduce">>, MRFuns, null), + {ViewOptions} = couch_util:get_value(<<"options">>, MRFuns, {[]}), + View = + case dict:find({MapSrc, ViewOptions}, DictBySrcAcc) of + {ok, View0} -> View0; + error -> #view{def=MapSrc, options=ViewOptions} % create new view object + end, + View2 = + if RedSrc == null -> + View#view{map_names=[Name|View#view.map_names]}; + true -> + View#view{reduce_funs=[{Name,RedSrc}|View#view.reduce_funs]} + end, + dict:store({MapSrc, ViewOptions}, View2, DictBySrcAcc) + end, dict:new(), RawViews), + % number the views + {Views, _N} = lists:mapfoldl( + fun({_Src, View}, N) -> + {View#view{id_num=N},N+1} + end, 0, lists:sort(dict:to_list(DictBySrc))), + + set_view_sig(#group{name=Id, views=Views, def_lang=Language, design_options=DesignOptions}). + +reset_group(#group{views=Views}=Group) -> + Views2 = [View#view{btree=nil} || View <- Views], + Group#group{db=nil,fd=nil,query_server=nil,current_seq=0, + id_btree=nil,views=Views2}. + +reset_file(Db, Fd, DbName, #group{sig=Sig,name=Name} = Group) -> + ?LOG_DEBUG("Resetting group index \"~s\" in db ~s", [Name, DbName]), + ok = couch_file:truncate(Fd, 0), + ok = couch_file:write_header(Fd, {Sig, nil}), + init_group(Db, Fd, reset_group(Group), nil). + +delete_index_file(RootDir, DbName, GroupSig) -> + couch_file:delete(RootDir, index_file_name(RootDir, DbName, GroupSig)). + +init_group(Db, Fd, #group{views=Views}=Group, nil) -> + init_group(Db, Fd, Group, + #index_header{seq=0, purge_seq=couch_db:get_purge_seq(Db), + id_btree_state=nil, view_states=[nil || _ <- Views]}); +init_group(Db, Fd, #group{def_lang=Lang,views=Views}= + Group, IndexHeader) -> + #index_header{seq=Seq, purge_seq=PurgeSeq, + id_btree_state=IdBtreeState, view_states=ViewStates} = IndexHeader, + {ok, IdBtree} = couch_btree:open(IdBtreeState, Fd), + Views2 = lists:zipwith( + fun(BtreeState, #view{reduce_funs=RedFuns,options=Options}=View) -> + FunSrcs = [FunSrc || {_Name, FunSrc} <- RedFuns], + ReduceFun = + fun(reduce, KVs) -> + KVs2 = couch_view:expand_dups(KVs,[]), + KVs3 = couch_view:detuple_kvs(KVs2,[]), + {ok, Reduced} = couch_query_servers:reduce(Lang, FunSrcs, + KVs3), + {length(KVs3), Reduced}; + (rereduce, Reds) -> + Count = lists:sum([Count0 || {Count0, _} <- Reds]), + UserReds = [UserRedsList || {_, UserRedsList} <- Reds], + {ok, Reduced} = couch_query_servers:rereduce(Lang, FunSrcs, + UserReds), + {Count, Reduced} + end, + + case couch_util:get_value(<<"collation">>, Options, <<"default">>) of + <<"default">> -> + Less = fun couch_view:less_json_ids/2; + <<"raw">> -> + Less = fun(A,B) -> A < B end + end, + {ok, Btree} = couch_btree:open(BtreeState, Fd, + [{less, Less}, + {reduce, ReduceFun}]), + View#view{btree=Btree} + end, + ViewStates, Views), + Group#group{db=Db, fd=Fd, current_seq=Seq, purge_seq=PurgeSeq, + id_btree=IdBtree, views=Views2}. + + diff --git a/apps/couch/src/couch_view_updater.erl b/apps/couch/src/couch_view_updater.erl new file mode 100644 index 00000000..2a9c960f --- /dev/null +++ b/apps/couch/src/couch_view_updater.erl @@ -0,0 +1,252 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_view_updater). + +-export([update/2]). + +-include("couch_db.hrl"). + +-spec update(_, #group{}) -> no_return(). + +update(Owner, Group) -> + #group{ + db = #db{name=DbName} = Db, + name = GroupName, + current_seq = Seq, + purge_seq = PurgeSeq + } = Group, + couch_task_status:add_task(<<"View Group Indexer">>, <<DbName/binary," ",GroupName/binary>>, <<"Starting index update">>), + + DbPurgeSeq = couch_db:get_purge_seq(Db), + Group2 = + if DbPurgeSeq == PurgeSeq -> + Group; + DbPurgeSeq == PurgeSeq + 1 -> + couch_task_status:update(<<"Removing purged entries from view index.">>), + purge_index(Group); + true -> + couch_task_status:update(<<"Resetting view index due to lost purge entries.">>), + exit(reset) + end, + {ok, MapQueue} = couch_work_queue:new(100000, 500), + {ok, WriteQueue} = couch_work_queue:new(100000, 500), + Self = self(), + ViewEmptyKVs = [{View, []} || View <- Group2#group.views], + spawn_link(fun() -> do_maps(Group, MapQueue, WriteQueue, ViewEmptyKVs) end), + spawn_link(fun() -> do_writes(Self, Owner, Group2, WriteQueue, Seq == 0) end), + % compute on all docs modified since we last computed. + TotalChanges = couch_db:count_changes_since(Db, Seq), + % update status every half second + couch_task_status:set_update_frequency(500), + #group{ design_options = DesignOptions } = Group, + IncludeDesign = couch_util:get_value(<<"include_design">>, + DesignOptions, false), + LocalSeq = couch_util:get_value(<<"local_seq">>, DesignOptions, false), + DocOpts = + case LocalSeq of + true -> [conflicts, deleted_conflicts, local_seq]; + _ -> [conflicts, deleted_conflicts] + end, + {ok, _, _} + = couch_db:enum_docs_since( + Db, + Seq, + fun(DocInfo, _, ChangesProcessed) -> + couch_task_status:update("Processed ~p of ~p changes (~p%)", + [ChangesProcessed, TotalChanges, (ChangesProcessed*100) div TotalChanges]), + load_doc(Db, DocInfo, MapQueue, DocOpts, IncludeDesign), + {ok, ChangesProcessed+1} + end, + 0, []), + couch_task_status:set_update_frequency(0), + couch_task_status:update("Finishing."), + couch_work_queue:close(MapQueue), + receive {new_group, NewGroup} -> + exit({new_group, + NewGroup#group{current_seq=couch_db:get_update_seq(Db)}}) + end. + + +purge_index(#group{db=Db, views=Views, id_btree=IdBtree}=Group) -> + {ok, PurgedIdsRevs} = couch_db:get_last_purged(Db), + Ids = [Id || {Id, _Revs} <- PurgedIdsRevs], + {ok, Lookups, IdBtree2} = couch_btree:query_modify(IdBtree, Ids, [], Ids), + + % now populate the dictionary with all the keys to delete + ViewKeysToRemoveDict = lists:foldl( + fun({ok,{DocId,ViewNumRowKeys}}, ViewDictAcc) -> + lists:foldl( + fun({ViewNum, RowKey}, ViewDictAcc2) -> + dict:append(ViewNum, {RowKey, DocId}, ViewDictAcc2) + end, ViewDictAcc, ViewNumRowKeys); + ({not_found, _}, ViewDictAcc) -> + ViewDictAcc + end, dict:new(), Lookups), + + % Now remove the values from the btrees + Views2 = lists:map( + fun(#view{id_num=Num,btree=Btree}=View) -> + case dict:find(Num, ViewKeysToRemoveDict) of + {ok, RemoveKeys} -> + {ok, Btree2} = couch_btree:add_remove(Btree, [], RemoveKeys), + View#view{btree=Btree2}; + error -> % no keys to remove in this view + View + end + end, Views), + Group#group{id_btree=IdBtree2, + views=Views2, + purge_seq=couch_db:get_purge_seq(Db)}. + + +load_doc(Db, DocInfo, MapQueue, DocOpts, IncludeDesign) -> + #doc_info{id=DocId, high_seq=Seq, revs=[#rev_info{deleted=Deleted}|_]} = DocInfo, + case {IncludeDesign, DocId} of + {false, <<?DESIGN_DOC_PREFIX, _/binary>>} -> % we skip design docs + ok; + _ -> + if Deleted -> + couch_work_queue:queue(MapQueue, {Seq, #doc{id=DocId, deleted=true}}); + true -> + {ok, Doc} = couch_db:open_doc_int(Db, DocInfo, DocOpts), + couch_work_queue:queue(MapQueue, {Seq, Doc}) + end + end. + +do_maps(Group, MapQueue, WriteQueue, ViewEmptyKVs) -> + case couch_work_queue:dequeue(MapQueue) of + closed -> + couch_work_queue:close(WriteQueue), + couch_query_servers:stop_doc_map(Group#group.query_server); + {ok, Queue} -> + Docs = [Doc || {_,#doc{deleted=false}=Doc} <- Queue], + DelKVs = [{Id, []} || {_, #doc{deleted=true,id=Id}} <- Queue], + LastSeq = lists:max([Seq || {Seq, _Doc} <- Queue]), + {Group1, Results} = view_compute(Group, Docs), + {ViewKVs, DocIdViewIdKeys} = view_insert_query_results(Docs, + Results, ViewEmptyKVs, DelKVs), + couch_work_queue:queue(WriteQueue, {LastSeq, ViewKVs, DocIdViewIdKeys}), + do_maps(Group1, MapQueue, WriteQueue, ViewEmptyKVs) + end. + +do_writes(Parent, Owner, Group, WriteQueue, InitialBuild) -> + case couch_work_queue:dequeue(WriteQueue) of + closed -> + Parent ! {new_group, Group}; + {ok, Queue} -> + {NewSeq, ViewKeyValues, DocIdViewIdKeys} = lists:foldl( + fun({Seq, ViewKVs, DocIdViewIdKeys}, nil) -> + {Seq, ViewKVs, DocIdViewIdKeys}; + ({Seq, ViewKVs, DocIdViewIdKeys}, Acc) -> + {Seq2, AccViewKVs, AccDocIdViewIdKeys} = Acc, + AccViewKVs2 = lists:zipwith( + fun({View, KVsIn}, {_View, KVsAcc}) -> + {View, KVsIn ++ KVsAcc} + end, ViewKVs, AccViewKVs), + {lists:max([Seq, Seq2]), + AccViewKVs2, DocIdViewIdKeys ++ AccDocIdViewIdKeys} + end, nil, Queue), + Group2 = write_changes(Group, ViewKeyValues, DocIdViewIdKeys, NewSeq, + InitialBuild), + case Owner of + nil -> ok; + _ -> ok = gen_server:cast(Owner, {partial_update, Parent, Group2}) + end, + do_writes(Parent, Owner, Group2, WriteQueue, InitialBuild) + end. + +view_insert_query_results([], [], ViewKVs, DocIdViewIdKeysAcc) -> + {ViewKVs, DocIdViewIdKeysAcc}; +view_insert_query_results([Doc|RestDocs], [QueryResults | RestResults], ViewKVs, DocIdViewIdKeysAcc) -> + {NewViewKVs, NewViewIdKeys} = view_insert_doc_query_results(Doc, QueryResults, ViewKVs, [], []), + NewDocIdViewIdKeys = [{Doc#doc.id, NewViewIdKeys} | DocIdViewIdKeysAcc], + view_insert_query_results(RestDocs, RestResults, NewViewKVs, NewDocIdViewIdKeys). + + +view_insert_doc_query_results(_Doc, [], [], ViewKVsAcc, ViewIdKeysAcc) -> + {lists:reverse(ViewKVsAcc), lists:reverse(ViewIdKeysAcc)}; +view_insert_doc_query_results(#doc{id=DocId}=Doc, [ResultKVs|RestResults], [{View, KVs}|RestViewKVs], ViewKVsAcc, ViewIdKeysAcc) -> + % Take any identical keys and combine the values + ResultKVs2 = lists:foldl( + fun({Key,Value}, [{PrevKey,PrevVal}|AccRest]) -> + case Key == PrevKey of + true -> + case PrevVal of + {dups, Dups} -> + [{PrevKey, {dups, [Value|Dups]}} | AccRest]; + _ -> + [{PrevKey, {dups, [Value,PrevVal]}} | AccRest] + end; + false -> + [{Key,Value},{PrevKey,PrevVal}|AccRest] + end; + (KV, []) -> + [KV] + end, [], lists:sort(ResultKVs)), + NewKVs = [{{Key, DocId}, Value} || {Key, Value} <- ResultKVs2], + NewViewKVsAcc = [{View, NewKVs ++ KVs} | ViewKVsAcc], + NewViewIdKeys = [{View#view.id_num, Key} || {Key, _Value} <- ResultKVs2], + NewViewIdKeysAcc = NewViewIdKeys ++ ViewIdKeysAcc, + view_insert_doc_query_results(Doc, RestResults, RestViewKVs, NewViewKVsAcc, NewViewIdKeysAcc). + +view_compute(Group, []) -> + {Group, []}; +view_compute(#group{def_lang=DefLang, query_server=QueryServerIn}=Group, Docs) -> + {ok, QueryServer} = + case QueryServerIn of + nil -> % doc map not started + Definitions = [View#view.def || View <- Group#group.views], + couch_query_servers:start_doc_map(DefLang, Definitions); + _ -> + {ok, QueryServerIn} + end, + {ok, Results} = couch_query_servers:map_docs(QueryServer, Docs), + {Group#group{query_server=QueryServer}, Results}. + + + +write_changes(Group, ViewKeyValuesToAdd, DocIdViewIdKeys, NewSeq, InitialBuild) -> + #group{id_btree=IdBtree} = Group, + + AddDocIdViewIdKeys = [{DocId, ViewIdKeys} || {DocId, ViewIdKeys} <- DocIdViewIdKeys, ViewIdKeys /= []], + if InitialBuild -> + RemoveDocIds = [], + LookupDocIds = []; + true -> + RemoveDocIds = [DocId || {DocId, ViewIdKeys} <- DocIdViewIdKeys, ViewIdKeys == []], + LookupDocIds = [DocId || {DocId, _ViewIdKeys} <- DocIdViewIdKeys] + end, + {ok, LookupResults, IdBtree2} + = couch_btree:query_modify(IdBtree, LookupDocIds, AddDocIdViewIdKeys, RemoveDocIds), + KeysToRemoveByView = lists:foldl( + fun(LookupResult, KeysToRemoveByViewAcc) -> + case LookupResult of + {ok, {DocId, ViewIdKeys}} -> + lists:foldl( + fun({ViewId, Key}, KeysToRemoveByViewAcc2) -> + dict:append(ViewId, {Key, DocId}, KeysToRemoveByViewAcc2) + end, + KeysToRemoveByViewAcc, ViewIdKeys); + {not_found, _} -> + KeysToRemoveByViewAcc + end + end, + dict:new(), LookupResults), + Views2 = lists:zipwith(fun(View, {_View, AddKeyValues}) -> + KeysToRemove = couch_util:dict_find(View#view.id_num, KeysToRemoveByView, []), + {ok, ViewBtree2} = couch_btree:add_remove(View#view.btree, AddKeyValues, KeysToRemove), + View#view{btree = ViewBtree2} + end, Group#group.views, ViewKeyValuesToAdd), + Group#group{views=Views2, current_seq=NewSeq, id_btree=IdBtree2}. + + diff --git a/apps/couch/src/couch_work_queue.erl b/apps/couch/src/couch_work_queue.erl new file mode 100644 index 00000000..decfcad8 --- /dev/null +++ b/apps/couch/src/couch_work_queue.erl @@ -0,0 +1,115 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_work_queue). +-behaviour(gen_server). + +-export([new/2,queue/2,dequeue/1,dequeue/2,close/1]). +-export([init/1, terminate/2, handle_call/3, handle_cast/2, code_change/3, handle_info/2]). + +-record(q, { + queue=queue:new(), + blocked=[], + max_size, + max_items, + items=0, + size=0, + work_waiter=nil, + close_on_dequeue=false +}). + +new(MaxSize, MaxItems) -> + gen_server:start_link(couch_work_queue, {MaxSize, MaxItems}, []). + +queue(Wq, Item) -> + gen_server:call(Wq, {queue, Item}, infinity). + +dequeue(Wq) -> + dequeue(Wq, all). + +dequeue(Wq, MaxItems) -> + try gen_server:call(Wq, {dequeue, MaxItems}, infinity) + catch + _:_ -> closed + end. + +close(Wq) -> + gen_server:cast(Wq, close). + + +init({MaxSize,MaxItems}) -> + {ok, #q{max_size=MaxSize, max_items=MaxItems}}. + +terminate(_Reason, #q{work_waiter=nil}) -> + ok; +terminate(_Reason, #q{work_waiter={WWFrom, _}}) -> + gen_server:reply(WWFrom, closed). + +handle_call({queue, Item}, From, #q{work_waiter=nil}=Q0) -> + Q = Q0#q{size=Q0#q.size + byte_size(term_to_binary(Item)), + items=Q0#q.items + 1, + queue=queue:in(Item, Q0#q.queue)}, + case (Q#q.size >= Q#q.max_size) orelse + (Q#q.items >= Q#q.max_items) of + true -> + {noreply, Q#q{blocked=[From | Q#q.blocked]}}; + false -> + {reply, ok, Q} + end; +handle_call({queue, Item}, _From, #q{work_waiter={WWFrom, _Max}}=Q) -> + gen_server:reply(WWFrom, {ok, [Item]}), + {reply, ok, Q#q{work_waiter=nil}}; +handle_call({dequeue, _Max}, _From, #q{work_waiter=WW}) when WW /= nil -> + exit("Only one caller allowed to wait for work at a time"); +handle_call({dequeue, Max}, From, #q{items=0}=Q) -> + {noreply, Q#q{work_waiter={From, Max}}}; +handle_call({dequeue, Max}, _From, #q{queue=Queue, max_size=MaxSize, + max_items=MaxItems, items=Items,close_on_dequeue=Close}=Q) -> + if Max >= Items orelse Max == all -> + [gen_server:reply(From, ok) || From <- Q#q.blocked], + Q2 = #q{max_size=MaxSize, max_items=MaxItems}, + if Close -> + {stop, normal, {ok, queue:to_list(Queue)}, Q2}; + true -> + {reply, {ok, queue:to_list(Queue)}, Q2} + end; + true -> + {DequeuedItems, Queue2, Blocked2} = + dequeue_items(Max, Queue, Q#q.blocked, []), + {reply, {ok, DequeuedItems}, + Q#q{items=Items-Max,blocked=Blocked2,queue=Queue2}} + end. + +dequeue_items(0, Queue, Blocked, DequeuedAcc) -> + {lists:reverse(DequeuedAcc), Queue, Blocked}; +dequeue_items(NumItems, Queue, Blocked, DequeuedAcc) -> + {{value, Item}, Queue2} = queue:out(Queue), + case Blocked of + [] -> + Blocked2 = Blocked; + [From|Blocked2] -> + gen_server:reply(From, ok) + end, + dequeue_items(NumItems-1, Queue2, Blocked2, [Item | DequeuedAcc]). + + +handle_cast(close, #q{items=0}=Q) -> + {stop, normal, Q}; +handle_cast(close, Q) -> + {noreply, Q#q{close_on_dequeue=true}}. + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_info(X, Q) -> + {stop, X, Q}. diff --git a/apps/etap/src/etap.app.src b/apps/etap/src/etap.app.src new file mode 100644 index 00000000..fe6af267 --- /dev/null +++ b/apps/etap/src/etap.app.src @@ -0,0 +1,6 @@ +{application, etap, [ + {description, "TAP compliant testing library"}, + {vsn, "unknown"}, + {registered, []}, + {applications, [kernel, stdlib]} +]}. diff --git a/apps/etap/src/etap.erl b/apps/etap/src/etap.erl new file mode 100644 index 00000000..5ad5dba3 --- /dev/null +++ b/apps/etap/src/etap.erl @@ -0,0 +1,416 @@ +%% Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net> +%% +%% Permission is hereby granted, free of charge, to any person +%% obtaining a copy of this software and associated documentation +%% files (the "Software"), to deal in the Software without +%% restriction, including without limitation the rights to use, +%% copy, modify, merge, publish, distribute, sublicense, and/or sell +%% copies of the Software, and to permit persons to whom the +%% Software is furnished to do so, subject to the following +%% conditions: +%% +%% The above copyright notice and this permission notice shall be +%% included in all copies or substantial portions of the Software. +%% +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +%% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +%% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +%% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +%% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +%% OTHER DEALINGS IN THE SOFTWARE. +%% +%% @author Nick Gerakines <nick@gerakines.net> [http://socklabs.com/] +%% @author Jeremy Wall <jeremy@marzhillstudios.com> +%% @version 0.3.4 +%% @copyright 2007-2008 Jeremy Wall, 2008-2009 Nick Gerakines +%% @reference http://testanything.org/wiki/index.php/Main_Page +%% @reference http://en.wikipedia.org/wiki/Test_Anything_Protocol +%% @todo Finish implementing the skip directive. +%% @todo Document the messages handled by this receive loop. +%% @todo Explain in documentation why we use a process to handle test input. +%% @doc etap is a TAP testing module for Erlang components and applications. +%% This module allows developers to test their software using the TAP method. +%% +%% <blockquote cite="http://en.wikipedia.org/wiki/Test_Anything_Protocol"><p> +%% TAP, the Test Anything Protocol, is a simple text-based interface between +%% testing modules in a test harness. TAP started life as part of the test +%% harness for Perl but now has implementations in C/C++, Python, PHP, Perl +%% and probably others by the time you read this. +%% </p></blockquote> +%% +%% The testing process begins by defining a plan using etap:plan/1, running +%% a number of etap tests and then calling eta:end_tests/0. Please refer to +%% the Erlang modules in the t directory of this project for example tests. +-module(etap). +-export([ + ensure_test_server/0, start_etap_server/0, test_server/1, + diag/1, diag/2, plan/1, end_tests/0, not_ok/2, ok/2, is/3, isnt/3, + any/3, none/3, fun_is/3, is_greater/3, skip/1, skip/2, + ensure_coverage_starts/0, ensure_coverage_ends/0, coverage_report/0, + datetime/1, skip/3, bail/0, bail/1 +]). +-record(test_state, {planned = 0, count = 0, pass = 0, fail = 0, skip = 0, skip_reason = ""}). +-vsn("0.3.4"). + +%% @spec plan(N) -> Result +%% N = unknown | skip | {skip, string()} | integer() +%% Result = ok +%% @doc Create a test plan and boot strap the test server. +plan(unknown) -> + ensure_coverage_starts(), + ensure_test_server(), + etap_server ! {self(), plan, unknown}, + ok; +plan(skip) -> + io:format("1..0 # skip~n"); +plan({skip, Reason}) -> + io:format("1..0 # skip ~s~n", [Reason]); +plan(N) when is_integer(N), N > 0 -> + ensure_coverage_starts(), + ensure_test_server(), + etap_server ! {self(), plan, N}, + ok. + +%% @spec end_tests() -> ok +%% @doc End the current test plan and output test results. +%% @todo This should probably be done in the test_server process. +end_tests() -> + ensure_coverage_ends(), + etap_server ! {self(), state}, + State = receive X -> X end, + if + State#test_state.planned == -1 -> + io:format("1..~p~n", [State#test_state.count]); + true -> + ok + end, + case whereis(etap_server) of + undefined -> ok; + _ -> etap_server ! done, ok + end. + +%% @private +ensure_coverage_starts() -> + case os:getenv("COVER") of + false -> ok; + _ -> + BeamDir = case os:getenv("COVER_BIN") of false -> "ebin"; X -> X end, + cover:compile_beam_directory(BeamDir) + end. + +%% @private +%% @doc Attempts to write out any collected coverage data to the cover/ +%% directory. This function should not be called externally, but it could be. +ensure_coverage_ends() -> + case os:getenv("COVER") of + false -> ok; + _ -> + filelib:ensure_dir("cover/"), + Name = lists:flatten([ + io_lib:format("~.16b", [X]) || X <- binary_to_list(erlang:md5( + term_to_binary({make_ref(), now()}) + )) + ]), + cover:export("cover/" ++ Name ++ ".coverdata") + end. + +%% @spec coverage_report() -> ok +%% @doc Use the cover module's covreage report builder to create code coverage +%% reports from recently created coverdata files. +coverage_report() -> + [cover:import(File) || File <- filelib:wildcard("cover/*.coverdata")], + lists:foreach( + fun(Mod) -> + cover:analyse_to_file(Mod, atom_to_list(Mod) ++ "_coverage.txt", []) + end, + cover:imported_modules() + ), + ok. + +bail() -> + bail(""). + +bail(Reason) -> + etap_server ! {self(), diag, "Bail out! " ++ Reason}, + ensure_coverage_ends(), + etap_server ! done, ok, + ok. + + +%% @spec diag(S) -> ok +%% S = string() +%% @doc Print a debug/status message related to the test suite. +diag(S) -> etap_server ! {self(), diag, "# " ++ S}, ok. + +%% @spec diag(Format, Data) -> ok +%% Format = atom() | string() | binary() +%% Data = [term()] +%% UnicodeList = [Unicode] +%% Unicode = int() +%% @doc Print a debug/status message related to the test suite. +%% Function arguments are passed through io_lib:format/2. +diag(Format, Data) -> diag(io_lib:format(Format, Data)). + +%% @spec ok(Expr, Desc) -> Result +%% Expr = true | false +%% Desc = string() +%% Result = true | false +%% @doc Assert that a statement is true. +ok(Expr, Desc) -> mk_tap(Expr == true, Desc). + +%% @spec not_ok(Expr, Desc) -> Result +%% Expr = true | false +%% Desc = string() +%% Result = true | false +%% @doc Assert that a statement is false. +not_ok(Expr, Desc) -> mk_tap(Expr == false, Desc). + +%% @spec is(Got, Expected, Desc) -> Result +%% Got = any() +%% Expected = any() +%% Desc = string() +%% Result = true | false +%% @doc Assert that two values are the same. +is(Got, Expected, Desc) -> + case mk_tap(Got == Expected, Desc) of + false -> + etap_server ! {self(), diag, " ---"}, + etap_server ! {self(), diag, io_lib:format(" description: ~p", [Desc])}, + etap_server ! {self(), diag, io_lib:format(" found: ~p", [Got])}, + etap_server ! {self(), diag, io_lib:format(" wanted: ~p", [Expected])}, + etap_server ! {self(), diag, " ..."}, + false; + true -> true + end. + +%% @spec isnt(Got, Expected, Desc) -> Result +%% Got = any() +%% Expected = any() +%% Desc = string() +%% Result = true | false +%% @doc Assert that two values are not the same. +isnt(Got, Expected, Desc) -> mk_tap(Got /= Expected, Desc). + +%% @spec is_greater(ValueA, ValueB, Desc) -> Result +%% ValueA = number() +%% ValueB = number() +%% Desc = string() +%% Result = true | false +%% @doc Assert that an integer is greater than another. +is_greater(ValueA, ValueB, Desc) when is_integer(ValueA), is_integer(ValueB) -> + mk_tap(ValueA > ValueB, Desc). + +%% @spec any(Got, Items, Desc) -> Result +%% Got = any() +%% Items = [any()] +%% Desc = string() +%% Result = true | false +%% @doc Assert that an item is in a list. +any(Got, Items, Desc) -> + is(lists:member(Got, Items), true, Desc). + +%% @spec none(Got, Items, Desc) -> Result +%% Got = any() +%% Items = [any()] +%% Desc = string() +%% Result = true | false +%% @doc Assert that an item is not in a list. +none(Got, Items, Desc) -> + is(lists:member(Got, Items), false, Desc). + +%% @spec fun_is(Fun, Expected, Desc) -> Result +%% Fun = function() +%% Expected = any() +%% Desc = string() +%% Result = true | false +%% @doc Use an anonymous function to assert a pattern match. +fun_is(Fun, Expected, Desc) when is_function(Fun) -> + is(Fun(Expected), true, Desc). + +%% @equiv skip(TestFun, "") +skip(TestFun) when is_function(TestFun) -> + skip(TestFun, ""). + +%% @spec skip(TestFun, Reason) -> ok +%% TestFun = function() +%% Reason = string() +%% @doc Skip a test. +skip(TestFun, Reason) when is_function(TestFun), is_list(Reason) -> + begin_skip(Reason), + catch TestFun(), + end_skip(), + ok. + +%% @spec skip(Q, TestFun, Reason) -> ok +%% Q = true | false | function() +%% TestFun = function() +%% Reason = string() +%% @doc Skips a test conditionally. The first argument to this function can +%% either be the 'true' or 'false' atoms or a function that returns 'true' or +%% 'false'. +skip(QFun, TestFun, Reason) when is_function(QFun), is_function(TestFun), is_list(Reason) -> + case QFun() of + true -> begin_skip(Reason), TestFun(), end_skip(); + _ -> TestFun() + end, + ok; + +skip(Q, TestFun, Reason) when is_function(TestFun), is_list(Reason), Q == true -> + begin_skip(Reason), + TestFun(), + end_skip(), + ok; + +skip(_, TestFun, Reason) when is_function(TestFun), is_list(Reason) -> + TestFun(), + ok. + +%% @private +begin_skip(Reason) -> + etap_server ! {self(), begin_skip, Reason}. + +%% @private +end_skip() -> + etap_server ! {self(), end_skip}. + +% --- +% Internal / Private functions + +%% @private +%% @doc Start the etap_server process if it is not running already. +ensure_test_server() -> + case whereis(etap_server) of + undefined -> + proc_lib:start(?MODULE, start_etap_server,[]); + _ -> + diag("The test server is already running.") + end. + +%% @private +%% @doc Start the etap_server loop and register itself as the etap_server +%% process. +start_etap_server() -> + catch register(etap_server, self()), + proc_lib:init_ack(ok), + etap:test_server(#test_state{ + planned = 0, + count = 0, + pass = 0, + fail = 0, + skip = 0, + skip_reason = "" + }). + + +%% @private +%% @doc The main etap_server receive/run loop. The etap_server receive loop +%% responds to seven messages apperatining to failure or passing of tests. +%% It is also used to initiate the testing process with the {_, plan, _} +%% message that clears the current test state. +test_server(State) -> + NewState = receive + {_From, plan, unknown} -> + io:format("# Current time local ~s~n", [datetime(erlang:localtime())]), + io:format("# Using etap version ~p~n", [ proplists:get_value(vsn, proplists:get_value(attributes, etap:module_info())) ]), + State#test_state{ + planned = -1, + count = 0, + pass = 0, + fail = 0, + skip = 0, + skip_reason = "" + }; + {_From, plan, N} -> + io:format("# Current time local ~s~n", [datetime(erlang:localtime())]), + io:format("# Using etap version ~p~n", [ proplists:get_value(vsn, proplists:get_value(attributes, etap:module_info())) ]), + io:format("1..~p~n", [N]), + State#test_state{ + planned = N, + count = 0, + pass = 0, + fail = 0, + skip = 0, + skip_reason = "" + }; + {_From, begin_skip, Reason} -> + State#test_state{ + skip = 1, + skip_reason = Reason + }; + {_From, end_skip} -> + State#test_state{ + skip = 0, + skip_reason = "" + }; + {_From, pass, Desc} -> + FullMessage = skip_diag( + " - " ++ Desc, + State#test_state.skip, + State#test_state.skip_reason + ), + io:format("ok ~p ~s~n", [State#test_state.count + 1, FullMessage]), + State#test_state{ + count = State#test_state.count + 1, + pass = State#test_state.pass + 1 + }; + + {_From, fail, Desc} -> + FullMessage = skip_diag( + " - " ++ Desc, + State#test_state.skip, + State#test_state.skip_reason + ), + io:format("not ok ~p ~s~n", [State#test_state.count + 1, FullMessage]), + State#test_state{ + count = State#test_state.count + 1, + fail = State#test_state.fail + 1 + }; + {From, state} -> + From ! State, + State; + {_From, diag, Message} -> + io:format("~s~n", [Message]), + State; + {From, count} -> + From ! State#test_state.count, + State; + {From, is_skip} -> + From ! State#test_state.skip, + State; + done -> + exit(normal) + end, + test_server(NewState). + +%% @private +%% @doc Process the result of a test and send it to the etap_server process. +mk_tap(Result, Desc) -> + IsSkip = lib:sendw(etap_server, is_skip), + case [IsSkip, Result] of + [_, true] -> + etap_server ! {self(), pass, Desc}, + true; + [1, _] -> + etap_server ! {self(), pass, Desc}, + true; + _ -> + etap_server ! {self(), fail, Desc}, + false + end. + +%% @private +%% @doc Format a date/time string. +datetime(DateTime) -> + {{Year, Month, Day}, {Hour, Min, Sec}} = DateTime, + io_lib:format("~4.10.0B-~2.10.0B-~2.10.0B ~2.10.0B:~2.10.0B:~2.10.0B", [Year, Month, Day, Hour, Min, Sec]). + +%% @private +%% @doc Craft an output message taking skip/todo into consideration. +skip_diag(Message, 0, _) -> + Message; +skip_diag(_Message, 1, "") -> + " # SKIP"; +skip_diag(_Message, 1, Reason) -> + " # SKIP : " ++ Reason. diff --git a/apps/etap/src/etap_application.erl b/apps/etap/src/etap_application.erl new file mode 100644 index 00000000..98b52751 --- /dev/null +++ b/apps/etap/src/etap_application.erl @@ -0,0 +1,72 @@ +%% Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net> +%% +%% Permission is hereby granted, free of charge, to any person +%% obtaining a copy of this software and associated documentation +%% files (the "Software"), to deal in the Software without +%% restriction, including without limitation the rights to use, +%% copy, modify, merge, publish, distribute, sublicense, and/or sell +%% copies of the Software, and to permit persons to whom the +%% Software is furnished to do so, subject to the following +%% conditions: +%% +%% The above copyright notice and this permission notice shall be +%% included in all copies or substantial portions of the Software. +%% +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +%% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +%% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +%% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +%% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +%% OTHER DEALINGS IN THE SOFTWARE. +%% +%% @author Nick Gerakines <nick@gerakines.net> [http://socklabs.com/] +%% @copyright 2008 Nick Gerakines +%% @reference http://testanything.org/wiki/index.php/Main_Page +%% @reference http://en.wikipedia.org/wiki/Test_Anything_Protocol +%% @todo Explain in documentation why we use a process to handle test input. +%% @todo Add test to verify the number of members in a pg2 group. +%% @doc Provide test functionality to the application and related behaviors. +-module(etap_application). +-export([ + start_ok/2, ensure_loaded/3, load_ok/2, + pg2_group_exists/2, pg2_group_doesntexist/2 +]). + +%% @spec load_ok(string(), string()) -> true | false +%% @doc Assert that an application can be loaded successfully. +load_ok(AppName, Desc) -> + etap:ok(application:load(AppName) == ok, Desc). + +%% @spec start_ok(string(), string()) -> true | false +%% @doc Assert that an application can be started successfully. +start_ok(AppName, Desc) -> + etap:ok(application:start(AppName) == ok, Desc). + +%% @spec ensure_loaded(string(), string(), string()) -> true | false +%% @doc Assert that an application has been loaded successfully. +ensure_loaded(AppName, AppVsn, Desc) -> + etap:any( + fun(Match) -> case Match of {AppName, _, AppVsn} -> true; _ -> false end end, + application:loaded_applications(), + Desc + ). + +%% @spec pg2_group_exists(string(), string()) -> true | false +%% @doc Assert that a pg2 group exists. +pg2_group_exists(GroupName, Desc) -> + etap:any( + fun(Match) -> Match == GroupName end, + pg2:which_groups(), + Desc + ). + +%% @spec pg2_group_doesntexist(string(), string()) -> true | false +%% @doc Assert that a pg2 group does not exists. +pg2_group_doesntexist(GroupName, Desc) -> + etap:none( + fun(Match) -> Match == GroupName end, + pg2:which_groups(), + Desc + ). diff --git a/apps/etap/src/etap_can.erl b/apps/etap/src/etap_can.erl new file mode 100644 index 00000000..552b7174 --- /dev/null +++ b/apps/etap/src/etap_can.erl @@ -0,0 +1,79 @@ +%% Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net> +%% +%% Permission is hereby granted, free of charge, to any person +%% obtaining a copy of this software and associated documentation +%% files (the "Software"), to deal in the Software without +%% restriction, including without limitation the rights to use, +%% copy, modify, merge, publish, distribute, sublicense, and/or sell +%% copies of the Software, and to permit persons to whom the +%% Software is furnished to do so, subject to the following +%% conditions: +%% +%% The above copyright notice and this permission notice shall be +%% included in all copies or substantial portions of the Software. +%% +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +%% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +%% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +%% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +%% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +%% OTHER DEALINGS IN THE SOFTWARE. +%% +%% @reference http://testanything.org/wiki/index.php/Main_Page +%% @reference http://en.wikipedia.org/wiki/Test_Anything_Protocol +%% @doc Provide test functionality modules +-module(etap_can). + +-export([ + loaded_ok/2, can_ok/2, can_ok/3, + has_attrib/2, is_attrib/3, is_behaviour/2 +]). + +%% @spec loaded_ok(atom(), string()) -> true | false +%% @doc Assert that a module has been loaded successfully. +loaded_ok(M, Desc) when is_atom(M) -> + etap:fun_is(fun({module, _}) -> true; (_) -> false end, code:load_file(M), Desc). + +%% @spec can_ok(atom(), atom()) -> true | false +%% @doc Assert that a module exports a given function. +can_ok(M, F) when is_atom(M), is_atom(F) -> + Matches = [X || {X, _} <- M:module_info(exports), X == F], + etap:ok(Matches > 0, lists:concat([M, " can ", F])). + +%% @spec can_ok(atom(), atom(), integer()) -> true | false +%% @doc Assert that a module exports a given function with a given arity. +can_ok(M, F, A) when is_atom(M); is_atom(F), is_number(A) -> + Matches = [X || X <- M:module_info(exports), X == {F, A}], + etap:ok(Matches > 0, lists:concat([M, " can ", F, "/", A])). + +%% @spec has_attrib(M, A) -> true | false +%% M = atom() +%% A = atom() +%% @doc Asserts that a module has a given attribute. +has_attrib(M, A) when is_atom(M), is_atom(A) -> + etap:isnt( + proplists:get_value(A, M:module_info(attributes), 'asdlkjasdlkads'), + 'asdlkjasdlkads', + lists:concat([M, " has attribute ", A]) + ). + +%% @spec has_attrib(M, A. V) -> true | false +%% M = atom() +%% A = atom() +%% V = any() +%% @doc Asserts that a module has a given attribute with a given value. +is_attrib(M, A, V) when is_atom(M) andalso is_atom(A) -> + etap:is( + proplists:get_value(A, M:module_info(attributes)), + [V], + lists:concat([M, "'s ", A, " is ", V]) + ). + +%% @spec is_behavior(M, B) -> true | false +%% M = atom() +%% B = atom() +%% @doc Asserts that a given module has a specific behavior. +is_behaviour(M, B) when is_atom(M) andalso is_atom(B) -> + is_attrib(M, behaviour, B). diff --git a/apps/etap/src/etap_exception.erl b/apps/etap/src/etap_exception.erl new file mode 100644 index 00000000..ba660727 --- /dev/null +++ b/apps/etap/src/etap_exception.erl @@ -0,0 +1,66 @@ +%% Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net> +%% +%% Permission is hereby granted, free of charge, to any person +%% obtaining a copy of this software and associated documentation +%% files (the "Software"), to deal in the Software without +%% restriction, including without limitation the rights to use, +%% copy, modify, merge, publish, distribute, sublicense, and/or sell +%% copies of the Software, and to permit persons to whom the +%% Software is furnished to do so, subject to the following +%% conditions: +%% +%% The above copyright notice and this permission notice shall be +%% included in all copies or substantial portions of the Software. +%% +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +%% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +%% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +%% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +%% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +%% OTHER DEALINGS IN THE SOFTWARE. +%% +%% @reference http://testanything.org/wiki/index.php/Main_Page +%% @reference http://en.wikipedia.org/wiki/Test_Anything_Protocol +%% @doc Adds exception based testing to the etap suite. +-module(etap_exception). + +-export([dies_ok/2, lives_ok/2, throws_ok/3]). + +% --- +% External / Public functions + +%% @doc Assert that an exception is raised when running a given function. +dies_ok(F, Desc) -> + case (catch F()) of + {'EXIT', _} -> etap:ok(true, Desc); + _ -> etap:ok(false, Desc) + end. + +%% @doc Assert that an exception is not raised when running a given function. +lives_ok(F, Desc) -> + etap:is(try_this(F), success, Desc). + +%% @doc Assert that the exception thrown by a function matches the given exception. +throws_ok(F, Exception, Desc) -> + try F() of + _ -> etap:ok(nok, Desc) + catch + _:E -> + etap:is(E, Exception, Desc) + end. + +% --- +% Internal / Private functions + +%% @private +%% @doc Run a function and catch any exceptions. +try_this(F) when is_function(F, 0) -> + try F() of + _ -> success + catch + throw:E -> {throw, E}; + error:E -> {error, E}; + exit:E -> {exit, E} + end. diff --git a/apps/etap/src/etap_process.erl b/apps/etap/src/etap_process.erl new file mode 100644 index 00000000..69f5ba00 --- /dev/null +++ b/apps/etap/src/etap_process.erl @@ -0,0 +1,42 @@ +%% Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net> +%% +%% Permission is hereby granted, free of charge, to any person +%% obtaining a copy of this software and associated documentation +%% files (the "Software"), to deal in the Software without +%% restriction, including without limitation the rights to use, +%% copy, modify, merge, publish, distribute, sublicense, and/or sell +%% copies of the Software, and to permit persons to whom the +%% Software is furnished to do so, subject to the following +%% conditions: +%% +%% The above copyright notice and this permission notice shall be +%% included in all copies or substantial portions of the Software. +%% +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +%% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +%% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +%% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +%% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +%% OTHER DEALINGS IN THE SOFTWARE. +%% +%% @doc Adds process/pid testing to the etap suite. +-module(etap_process). + +-export([is_pid/2, is_alive/2, is_mfa/3]). + +% --- +% External / Public functions + +%% @doc Assert that a given variable is a pid. +is_pid(Pid, Desc) when is_pid(Pid) -> etap:ok(true, Desc); +is_pid(_, Desc) -> etap:ok(false, Desc). + +%% @doc Assert that a given process/pid is alive. +is_alive(Pid, Desc) -> + etap:ok(erlang:is_process_alive(Pid), Desc). + +%% @doc Assert that the current function of a pid is a given {M, F, A} tuple. +is_mfa(Pid, MFA, Desc) -> + etap:is({current_function, MFA}, erlang:process_info(Pid, current_function), Desc). diff --git a/apps/etap/src/etap_report.erl b/apps/etap/src/etap_report.erl new file mode 100644 index 00000000..6d692fb6 --- /dev/null +++ b/apps/etap/src/etap_report.erl @@ -0,0 +1,343 @@ +%% Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net> +%% +%% Permission is hereby granted, free of charge, to any person +%% obtaining a copy of this software and associated documentation +%% files (the "Software"), to deal in the Software without +%% restriction, including without limitation the rights to use, +%% copy, modify, merge, publish, distribute, sublicense, and/or sell +%% copies of the Software, and to permit persons to whom the +%% Software is furnished to do so, subject to the following +%% conditions: +%% +%% The above copyright notice and this permission notice shall be +%% included in all copies or substantial portions of the Software. +%% +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +%% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +%% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +%% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +%% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +%% OTHER DEALINGS IN THE SOFTWARE. +%% +%% @doc A module for creating nice looking code coverage reports. +-module(etap_report). +-export([create/0]). + +%% @spec create() -> ok +%% @doc Create html code coverage reports for each module that code coverage +%% data exists for. +create() -> + [cover:import(File) || File <- filelib:wildcard("cover/*.coverdata")], + Modules = lists:foldl( + fun(Module, Acc) -> + [{Module, file_report(Module)} | Acc] + end, + [], + cover:imported_modules() + ), + index(Modules). + +%% @private +index(Modules) -> + {ok, IndexFD} = file:open("cover/index.html", [write]), + io:format(IndexFD, "<html><head><style> + table.percent_graph { height: 12px; border:1px solid #E2E6EF; empty-cells: show; } + table.percent_graph td.covered { height: 10px; background: #00f000; } + table.percent_graph td.uncovered { height: 10px; background: #e00000; } + .odd { background-color: #ddd; } + .even { background-color: #fff; } + </style></head>", []), + io:format(IndexFD, "<body>", []), + lists:foldl( + fun({Module, {Good, Bad, Source}}, LastRow) -> + case {Good + Bad, Source} of + {0, _} -> LastRow; + {_, none} -> LastRow; + _ -> + CovPer = round((Good / (Good + Bad)) * 100), + UnCovPer = round((Bad / (Good + Bad)) * 100), + RowClass = case LastRow of 1 -> "odd"; _ -> "even" end, + io:format(IndexFD, "<div class=\"~s\">", [RowClass]), + io:format(IndexFD, "<a href=\"~s\">~s</a>", [atom_to_list(Module) ++ "_report.html", atom_to_list(Module)]), + io:format(IndexFD, " + <table cellspacing='0' cellpadding='0' align='right'> + <tr> + <td><tt>~p%</tt> </td><td> + <table cellspacing='0' class='percent_graph' cellpadding='0' width='100'> + <tr><td class='covered' width='~p' /><td class='uncovered' width='~p' /></tr> + </table> + </td> + </tr> + </table> + ", [CovPer, CovPer, UnCovPer]), + io:format(IndexFD, "</div>", []), + case LastRow of + 1 -> 0; + 0 -> 1 + end + end + end, + 0, + lists:sort(Modules) + ), + {TotalGood, TotalBad} = lists:foldl( + fun({_, {Good, Bad, Source}}, {TGood, TBad}) -> + case Source of none -> {TGood, TBad}; _ -> {TGood + Good, TBad + Bad} end + end, + {0, 0}, + Modules + ), + io:format(IndexFD, "<p>Generated on ~s.</p>~n", [etap:datetime({date(), time()})]), + case TotalGood + TotalBad of + 0 -> ok; + _ -> + TotalCovPer = round((TotalGood / (TotalGood + TotalBad)) * 100), + TotalUnCovPer = round((TotalBad / (TotalGood + TotalBad)) * 100), + io:format(IndexFD, "<div>", []), + io:format(IndexFD, "Total + <table cellspacing='0' cellpadding='0' align='right'> + <tr> + <td><tt>~p%</tt> </td><td> + <table cellspacing='0' class='percent_graph' cellpadding='0' width='100'> + <tr><td class='covered' width='~p' /><td class='uncovered' width='~p' /></tr> + </table> + </td> + </tr> + </table> + ", [TotalCovPer, TotalCovPer, TotalUnCovPer]), + io:format(IndexFD, "</div>", []) + end, + io:format(IndexFD, "</body></html>", []), + file:close(IndexFD), + ok. + +%% @private +file_report(Module) -> + {ok, Data} = cover:analyse(Module, calls, line), + Source = find_source(Module), + {Good, Bad} = collect_coverage(Data, {0, 0}), + case {Source, Good + Bad} of + {none, _} -> ok; + {_, 0} -> ok; + _ -> + {ok, SourceFD} = file:open(Source, [read]), + {ok, WriteFD} = file:open("cover/" ++ atom_to_list(Module) ++ "_report.html", [write]), + io:format(WriteFD, "~s", [header(Module, Good, Bad)]), + output_lines(Data, WriteFD, SourceFD, 1), + io:format(WriteFD, "~s", [footer()]), + file:close(WriteFD), + file:close(SourceFD), + ok + end, + {Good, Bad, Source}. + +%% @private +collect_coverage([], Acc) -> Acc; +collect_coverage([{{_, _}, 0} | Data], {Good, Bad}) -> + collect_coverage(Data, {Good, Bad + 1}); +collect_coverage([_ | Data], {Good, Bad}) -> + collect_coverage(Data, {Good + 1, Bad}). + +%% @private +output_lines(Data, WriteFD, SourceFD, LineNumber) -> + {Match, NextData} = datas_match(Data, LineNumber), + case io:get_line(SourceFD, '') of + eof -> ok; + Line = "%% @todo" ++ _ -> + io:format(WriteFD, "~s", [out_line(LineNumber, highlight, Line)]), + output_lines(NextData, WriteFD, SourceFD, LineNumber + 1); + Line = "% " ++ _ -> + io:format(WriteFD, "~s", [out_line(LineNumber, none, Line)]), + output_lines(NextData, WriteFD, SourceFD, LineNumber + 1); + Line -> + case Match of + {true, CC} -> + io:format(WriteFD, "~s", [out_line(LineNumber, CC, Line)]), + output_lines(NextData, WriteFD, SourceFD, LineNumber + 1); + false -> + io:format(WriteFD, "~s", [out_line(LineNumber, none, Line)]), + output_lines(NextData, WriteFD, SourceFD, LineNumber + 1) + end + end. + +%% @private +out_line(Number, none, Line) -> + PadNu = string:right(integer_to_list(Number), 5, $.), + io_lib:format("<span class=\"marked\"><a name=\"line~p\"></a>~s ~s</span>", [Number, PadNu, Line]); +out_line(Number, highlight, Line) -> + PadNu = string:right(integer_to_list(Number), 5, $.), + io_lib:format("<span class=\"highlight\"><a name=\"line~p\"></a>~s ~s</span>", [Number, PadNu, Line]); +out_line(Number, 0, Line) -> + PadNu = string:right(integer_to_list(Number), 5, $.), + io_lib:format("<span class=\"uncovered\"><a name=\"line~p\"></a>~s ~s</span>", [Number, PadNu, Line]); +out_line(Number, _, Line) -> + PadNu = string:right(integer_to_list(Number), 5, $.), + io_lib:format("<span class=\"covered\"><a name=\"line~p\"></a>~s ~s</span>", [Number, PadNu, Line]). + +%% @private +datas_match([], _) -> {false, []}; +datas_match([{{_, Line}, CC} | Datas], LineNumber) when Line == LineNumber -> {{true, CC}, Datas}; +datas_match(Data, _) -> {false, Data}. + +%% @private +find_source(Module) when is_atom(Module) -> + Root = filename:rootname(Module), + Dir = filename:dirname(Root), + XDir = case os:getenv("SRC") of false -> "src"; X -> X end, + find_source([ + filename:join([Dir, Root ++ ".erl"]), + filename:join([Dir, "..", "src", Root ++ ".erl"]), + filename:join([Dir, "src", Root ++ ".erl"]), + filename:join([Dir, "elibs", Root ++ ".erl"]), + filename:join([Dir, "..", "elibs", Root ++ ".erl"]), + filename:join([Dir, XDir, Root ++ ".erl"]) + ]); +find_source([]) -> none; +find_source([Test | Tests]) -> + case filelib:is_file(Test) of + true -> Test; + false -> find_source(Tests) + end. + +%% @private +header(Module, Good, Bad) -> + io:format("Good ~p~n", [Good]), + io:format("Bad ~p~n", [Bad]), + CovPer = round((Good / (Good + Bad)) * 100), + UnCovPer = round((Bad / (Good + Bad)) * 100), + io:format("CovPer ~p~n", [CovPer]), + io_lib:format("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"> + <html lang='en' xml:lang='en' xmlns='http://www.w3.org/1999/xhtml'> + <head> + <title>~s - C0 code coverage information</title> + <style type='text/css'>body { background-color: rgb(240, 240, 245); }</style> + <style type='text/css'>span.marked0 { + background-color: rgb(185, 210, 200); + display: block; + } + span.marked { display: block; background-color: #ffffff; } + span.highlight { display: block; background-color: #fff9d7; } + span.covered { display: block; background-color: #f7f7f7 ; } + span.uncovered { display: block; background-color: #ffebe8 ; } + span.overview { + border-bottom: 1px solid #E2E6EF; + } + div.overview { + border-bottom: 1px solid #E2E6EF; + } + body { + font-family: verdana, arial, helvetica; + } + div.footer { + font-size: 68%; + margin-top: 1.5em; + } + h1, h2, h3, h4, h5, h6 { + margin-bottom: 0.5em; + } + h5 { + margin-top: 0.5em; + } + .hidden { + display: none; + } + div.separator { + height: 10px; + } + table.percent_graph { + height: 12px; + border: 1px solid #E2E6EF; + empty-cells: show; + } + table.percent_graph td.covered { + height: 10px; + background: #00f000; + } + table.percent_graph td.uncovered { + height: 10px; + background: #e00000; + } + table.percent_graph td.NA { + height: 10px; + background: #eaeaea; + } + table.report { + border-collapse: collapse; + width: 100%; + } + table.report td.heading { + background: #dcecff; + border: 1px solid #E2E6EF; + font-weight: bold; + text-align: center; + } + table.report td.heading:hover { + background: #c0ffc0; + } + table.report td.text { + border: 1px solid #E2E6EF; + } + table.report td.value { + text-align: right; + border: 1px solid #E2E6EF; + } + table.report tr.light { + background-color: rgb(240, 240, 245); + } + table.report tr.dark { + background-color: rgb(230, 230, 235); + } + </style> + </head> + <body> + <h3>C0 code coverage information</h3> + <p>Generated on ~s with <a href='http://github.com/ngerakines/etap'>etap 0.3.4</a>. + </p> + <table class='report'> + <thead> + <tr> + <td class='heading'>Name</td> + <td class='heading'>Total lines</td> + <td class='heading'>Lines of code</td> + <td class='heading'>Total coverage</td> + <td class='heading'>Code coverage</td> + </tr> + </thead> + <tbody> + <tr class='light'> + + <td> + <a href='~s'>~s</a> + </td> + <td class='value'> + <tt>??</tt> + </td> + <td class='value'> + <tt>??</tt> + </td> + <td class='value'> + <tt>??</tt> + </td> + <td> + <table cellspacing='0' cellpadding='0' align='right'> + <tr> + <td><tt>~p%</tt> </td><td> + <table cellspacing='0' class='percent_graph' cellpadding='0' width='100'> + <tr><td class='covered' width='~p' /><td class='uncovered' width='~p' /></tr> + </table> + </td> + </tr> + </table> + </td> + </tr> + </tbody> + </table><pre>", [Module, etap:datetime({date(), time()}), atom_to_list(Module) ++ "_report.html", Module, CovPer, CovPer, UnCovPer]). + +%% @private +footer() -> + "</pre><hr /><p>Generated using <a href='http://github.com/ngerakines/etap'>etap 0.3.4</a>.</p> + </body> + </html> + ". diff --git a/apps/etap/src/etap_request.erl b/apps/etap/src/etap_request.erl new file mode 100644 index 00000000..9fd23aca --- /dev/null +++ b/apps/etap/src/etap_request.erl @@ -0,0 +1,89 @@ +%% Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net> +%% +%% Permission is hereby granted, free of charge, to any person +%% obtaining a copy of this software and associated documentation +%% files (the "Software"), to deal in the Software without +%% restriction, including without limitation the rights to use, +%% copy, modify, merge, publish, distribute, sublicense, and/or sell +%% copies of the Software, and to permit persons to whom the +%% Software is furnished to do so, subject to the following +%% conditions: +%% +%% The above copyright notice and this permission notice shall be +%% included in all copies or substantial portions of the Software. +%% +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +%% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +%% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +%% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +%% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +%% OTHER DEALINGS IN THE SOFTWARE. +%% +%% @doc Provides test functionality against a specific web request. Many of +%% the exported methods can be used to build your own more complex tests. +-module(etap_request, [Method, Url, InHeaders, InBody, Status, OutHeaders, OutBody]). + +-export([status_is/2]). + +-export([ + method/0, url/0, status/0, status_code/0, status_line/0, rheaders/0, + has_rheader/1, rheader/1, rbody/0, header_is/3, body_is/2, + body_has_string/2 +]). + +% --- +% Tests + +%% @doc Assert that response status code is the given status code. +status_is(Code, Desc) -> + etap:is(status_code(), Code, Desc). + +header_is(Name, Value, Desc) -> + etap:is(rheader(Name), Value, Desc). + +body_is(Value, Desc) -> + etap:is(rbody(), Value, Desc). + +body_has_string(String, Desc) when is_list(OutBody), is_list(String) -> + etap_string:contains_ok(OutBody, String, Desc). + +% --- +% Accessor functions + +%% @doc Access a request's method. +method() -> Method. + +%% @doc Access a request's URL. +url() -> Url. + +%% @doc Access a request's status. +status() -> Status. + +%% @doc Access a request's status code. +status_code() -> + {_, Code, _} = Status, + Code. + +%% @doc Access a request's status line. +status_line() -> + {_, _, Line} = Status, + Line. + +%% @doc Access a request's headers. +rheaders() -> OutHeaders. + +%% @doc Dertermine if a specific request header exists. +has_rheader(Key) -> + lists:keymember(Key, 1, OutHeaders). + +%% @doc Return a specific request header. +rheader(Key) -> + case lists:keysearch(Key, 1, OutHeaders) of + false -> undefined; + {value, {Key, Value}} -> Value + end. + +%% @doc Access the request's body. +rbody() -> OutBody. diff --git a/apps/etap/src/etap_string.erl b/apps/etap/src/etap_string.erl new file mode 100644 index 00000000..67aa3d54 --- /dev/null +++ b/apps/etap/src/etap_string.erl @@ -0,0 +1,47 @@ +%% Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net> +%% +%% Permission is hereby granted, free of charge, to any person +%% obtaining a copy of this software and associated documentation +%% files (the "Software"), to deal in the Software without +%% restriction, including without limitation the rights to use, +%% copy, modify, merge, publish, distribute, sublicense, and/or sell +%% copies of the Software, and to permit persons to whom the +%% Software is furnished to do so, subject to the following +%% conditions: +%% +%% The above copyright notice and this permission notice shall be +%% included in all copies or substantial portions of the Software. +%% +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +%% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +%% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +%% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +%% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +%% OTHER DEALINGS IN THE SOFTWARE. +%% +%% @author Nick Gerakines <nick@gerakines.net> [http://socklabs.com/] +%% @copyright 2008 Nick Gerakines +%% @doc Provide testing functionality for strings. +-module(etap_string). + +-export([contains_ok/3, is_before/4]). + +%% @spec contains_ok(string(), string(), string()) -> true | false +%% @doc Assert that a string is contained in another string. +contains_ok(Source, String, Desc) -> + etap:isnt( + string:str(Source, String), + 0, + Desc + ). + +%% @spec is_before(string(), string(), string(), string()) -> true | false +%% @doc Assert that a string comes before another string within a larger body. +is_before(Source, StringA, StringB, Desc) -> + etap:is_greater( + string:str(Source, StringB), + string:str(Source, StringA), + Desc + ). diff --git a/apps/etap/src/etap_web.erl b/apps/etap/src/etap_web.erl new file mode 100644 index 00000000..fb7aee16 --- /dev/null +++ b/apps/etap/src/etap_web.erl @@ -0,0 +1,65 @@ +%% Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net> +%% +%% Permission is hereby granted, free of charge, to any person +%% obtaining a copy of this software and associated documentation +%% files (the "Software"), to deal in the Software without +%% restriction, including without limitation the rights to use, +%% copy, modify, merge, publish, distribute, sublicense, and/or sell +%% copies of the Software, and to permit persons to whom the +%% Software is furnished to do so, subject to the following +%% conditions: +%% +%% The above copyright notice and this permission notice shall be +%% included in all copies or substantial portions of the Software. +%% +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +%% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +%% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +%% NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +%% HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +%% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +%% OTHER DEALINGS IN THE SOFTWARE. +%% +%% @author Nick Gerakines <nick@gerakines.net> [http://socklabs.com/] +%% @copyright 2008 Nick Gerakines +%% @todo Support cookies. +%% @doc Provide testing functionality for web requests. +-module(etap_web). + +-export([simple_200/2, simple_404/2, build_request/4]). + +%% @doc Fetch a url and verify that it returned a 200 status. +simple_200(Url, Desc) -> + Request = build_request(get, Url, [], []), + Request:status_is(200, Desc). + +%% @doc Fetch a url and verify that it returned a 404 status. +simple_404(Url, Desc) -> + Request = build_request(get, Url, [], []), + Request:status_is(404, Desc). + +%% @doc Create and return a request structure. +build_request(Method, Url, Headers, Body) + when Method==options;Method==get;Method==head;Method==delete;Method==trace -> + try http:request(Method, {Url, Headers}, [{autoredirect, false}], []) of + {ok, {OutStatus, OutHeaders, OutBody}} -> + etap_request:new(Method, Url, Headers, Body, OutStatus, OutHeaders, OutBody); + _ -> error + catch + _:_ -> error + end; + +%% @doc Create and return a request structure. +build_request(Method, Url, Headers, Body) when Method == post; Method == put -> + ContentType = case lists:keysearch("Content-Type", 1, Headers) of + {value, {"Content-Type", X}} -> X; + _ -> [] + end, + try http:request(Method, {Url, Headers, ContentType, Body}, [{autoredirect, false}], []) of + {ok, {OutStatus, OutHeaders, OutBody}} -> + etap_request:new(Method, Url, Headers, Body, OutStatus, OutHeaders, OutBody); + _ -> error + catch + _:_ -> error + end. diff --git a/apps/fabric/ebin/fabric.app b/apps/fabric/ebin/fabric.app new file mode 100644 index 00000000..8a565d8a --- /dev/null +++ b/apps/fabric/ebin/fabric.app @@ -0,0 +1,28 @@ +{application, fabric, [ + {description, "Routing and proxying layer for CouchDB cluster"}, + {vsn, "1.0.3"}, + {modules, [ + fabric, + fabric_db_create, + fabric_db_delete, + fabric_db_doc_count, + fabric_db_info, + fabric_db_meta, + fabric_dict, + fabric_doc_attachments, + fabric_doc_missing_revs, + fabric_doc_open, + fabric_doc_open_revs, + fabric_doc_update, + fabric_group_info, + fabric_rpc, + fabric_util, + fabric_view, + fabric_view_all_docs, + fabric_view_changes, + fabric_view_map, + fabric_view_reduce + ]}, + {registered, []}, + {applications, [kernel, stdlib, couch, rexi, mem3]} +]}. diff --git a/apps/fabric/ebin/fabric.appup b/apps/fabric/ebin/fabric.appup new file mode 100644 index 00000000..ef5dc496 --- /dev/null +++ b/apps/fabric/ebin/fabric.appup @@ -0,0 +1,3 @@ +{"1.0.3",[{"1.0.2",[ + {load_module, fabric_view_changes} +]}],[{"1.0.2",[]}]}. diff --git a/apps/fabric/include/fabric.hrl b/apps/fabric/include/fabric.hrl new file mode 100644 index 00000000..6ec17b34 --- /dev/null +++ b/apps/fabric/include/fabric.hrl @@ -0,0 +1,22 @@ +-include_lib("eunit/include/eunit.hrl"). + +-record(collector, { + query_args, + callback, + counters, + buffer_size, + blocked = [], + total_rows = 0, + offset = 0, + rows = [], + skip, + limit, + keys, + os_proc, + reducer, + lang, + sorted, + user_acc +}). + +-record(view_row, {key, id, value, doc, worker}). diff --git a/apps/fabric/src/fabric.erl b/apps/fabric/src/fabric.erl new file mode 100644 index 00000000..1be97a98 --- /dev/null +++ b/apps/fabric/src/fabric.erl @@ -0,0 +1,225 @@ +-module(fabric). + +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +% DBs +-export([all_dbs/0, all_dbs/1, create_db/1, create_db/2, delete_db/1, + delete_db/2, get_db_info/1, get_doc_count/1, set_revs_limit/3, + set_security/3, get_revs_limit/1, get_security/1]). + +% Documents +-export([open_doc/3, open_revs/4, get_missing_revs/2, update_doc/3, + update_docs/3, att_receiver/2]). + +% Views +-export([all_docs/4, changes/4, query_view/3, query_view/4, query_view/6, + get_view_group_info/2]). + +% miscellany +-export([design_docs/1, reset_validation_funs/1]). + +-include("fabric.hrl"). + +% db operations + +all_dbs() -> + all_dbs(<<>>). + +all_dbs(Prefix) when is_list(Prefix) -> + all_dbs(list_to_binary(Prefix)); +all_dbs(Prefix) when is_binary(Prefix) -> + Length = byte_size(Prefix), + MatchingDbs = ets:foldl(fun(#shard{dbname=DbName}, Acc) -> + case DbName of + <<Prefix:Length/binary, _/binary>> -> + [DbName | Acc]; + _ -> + Acc + end + end, [], partitions), + {ok, lists:usort(MatchingDbs)}. + +get_db_info(DbName) -> + fabric_db_info:go(dbname(DbName)). + +get_doc_count(DbName) -> + fabric_db_doc_count:go(dbname(DbName)). + +create_db(DbName) -> + create_db(DbName, []). + +create_db(DbName, Options) -> + fabric_db_create:go(dbname(DbName), opts(Options)). + +delete_db(DbName) -> + delete_db(DbName, []). + +delete_db(DbName, Options) -> + fabric_db_delete:go(dbname(DbName), opts(Options)). + +set_revs_limit(DbName, Limit, Options) when is_integer(Limit), Limit > 0 -> + fabric_db_meta:set_revs_limit(dbname(DbName), Limit, opts(Options)). + +get_revs_limit(DbName) -> + {ok, Db} = fabric_util:get_db(dbname(DbName)), + try couch_db:get_revs_limit(Db) after catch couch_db:close(Db) end. + +set_security(DbName, SecObj, Options) -> + fabric_db_meta:set_security(dbname(DbName), SecObj, opts(Options)). + +get_security(DbName) -> + {ok, Db} = fabric_util:get_db(dbname(DbName)), + try couch_db:get_security(Db) after catch couch_db:close(Db) end. + +% doc operations +open_doc(DbName, Id, Options) -> + fabric_doc_open:go(dbname(DbName), docid(Id), opts(Options)). + +open_revs(DbName, Id, Revs, Options) -> + fabric_doc_open_revs:go(dbname(DbName), docid(Id), Revs, opts(Options)). + +get_missing_revs(DbName, IdsRevs) when is_list(IdsRevs) -> + Sanitized = [idrevs(IdR) || IdR <- IdsRevs], + fabric_doc_missing_revs:go(dbname(DbName), Sanitized). + +update_doc(DbName, Doc, Options) -> + case update_docs(DbName, [Doc], opts(Options)) of + {ok, [{ok, NewRev}]} -> + {ok, NewRev}; + {ok, [Error]} -> + throw(Error); + {ok, []} -> + % replication success + #doc{revs = {Pos, [RevId | _]}} = doc(Doc), + {ok, {Pos, RevId}} + end. + +update_docs(DbName, Docs, Options) -> + try fabric_doc_update:go(dbname(DbName), docs(Docs), opts(Options)) + catch {aborted, PreCommitFailures} -> + {aborted, PreCommitFailures} + end. + +att_receiver(Req, Length) -> + fabric_doc_attachments:receiver(Req, Length). + +all_docs(DbName, Callback, Acc0, #view_query_args{} = QueryArgs) when + is_function(Callback, 2) -> + fabric_view_all_docs:go(dbname(DbName), QueryArgs, Callback, Acc0). + +changes(DbName, Callback, Acc0, Options) -> + % TODO use a keylist for Options instead of #changes_args, BugzID 10281 + Feed = Options#changes_args.feed, + fabric_view_changes:go(dbname(DbName), Feed, Options, Callback, Acc0). + +query_view(DbName, DesignName, ViewName) -> + query_view(DbName, DesignName, ViewName, #view_query_args{}). + +query_view(DbName, DesignName, ViewName, QueryArgs) -> + Callback = fun default_callback/2, + query_view(DbName, DesignName, ViewName, Callback, [], QueryArgs). + +query_view(DbName, Design, ViewName, Callback, Acc0, QueryArgs) -> + Db = dbname(DbName), View = name(ViewName), + case is_reduce_view(Db, Design, View, QueryArgs) of + true -> + Mod = fabric_view_reduce; + false -> + Mod = fabric_view_map + end, + Mod:go(Db, Design, View, QueryArgs, Callback, Acc0). + +get_view_group_info(DbName, DesignId) -> + fabric_group_info:go(dbname(DbName), design_doc(DesignId)). + +design_docs(DbName) -> + QueryArgs = #view_query_args{start_key = <<"_design/">>, include_docs=true}, + Callback = fun({total_and_offset, _, _}, []) -> + {ok, []}; + ({row, {Props}}, Acc) -> + case couch_util:get_value(id, Props) of + <<"_design/", _/binary>> -> + {ok, [couch_util:get_value(doc, Props) | Acc]}; + _ -> + {stop, Acc} + end; + (complete, Acc) -> + {ok, lists:reverse(Acc)} + end, + fabric:all_docs(dbname(DbName), Callback, [], QueryArgs). + +reset_validation_funs(DbName) -> + [rexi:cast(Node, {fabric_rpc, reset_validation_funs, [Name]}) || + #shard{node=Node, name=Name} <- mem3:shards(DbName)]. + +%% some simple type validation and transcoding + +dbname(DbName) when is_list(DbName) -> + list_to_binary(DbName); +dbname(DbName) when is_binary(DbName) -> + DbName; +dbname(#db{name=Name}) -> + Name; +dbname(DbName) -> + erlang:error({illegal_database_name, DbName}). + +name(Thing) -> + couch_util:to_binary(Thing). + +docid(DocId) when is_list(DocId) -> + list_to_binary(DocId); +docid(DocId) when is_binary(DocId) -> + DocId; +docid(DocId) -> + erlang:error({illegal_docid, DocId}). + +docs(Docs) when is_list(Docs) -> + [doc(D) || D <- Docs]; +docs(Docs) -> + erlang:error({illegal_docs_list, Docs}). + +doc(#doc{} = Doc) -> + Doc; +doc({_} = Doc) -> + couch_doc:from_json_obj(Doc); +doc(Doc) -> + erlang:error({illegal_doc_format, Doc}). + +design_doc(#doc{} = DDoc) -> + DDoc; +design_doc(DocId) when is_list(DocId) -> + design_doc(list_to_binary(DocId)); +design_doc(<<"_design/", _/binary>> = DocId) -> + DocId; +design_doc(GroupName) -> + <<"_design/", GroupName/binary>>. + +idrevs({Id, Revs}) when is_list(Revs) -> + {docid(Id), [rev(R) || R <- Revs]}. + +rev(Rev) when is_list(Rev); is_binary(Rev) -> + couch_doc:parse_rev(Rev); +rev({Seq, Hash} = Rev) when is_integer(Seq), is_binary(Hash) -> + Rev. + +opts(Options) -> + case couch_util:get_value(user_ctx, Options) of + undefined -> + case erlang:get(user_ctx) of + #user_ctx{} = Ctx -> + [{user_ctx, Ctx} | Options]; + _ -> + Options + end; + _ -> + Options + end. + +default_callback(complete, Acc) -> + {ok, lists:reverse(Acc)}; +default_callback(Row, Acc) -> + {ok, [Row | Acc]}. + +is_reduce_view(_, _, _, #view_query_args{view_type=Reduce}) -> + Reduce =:= reduce. diff --git a/apps/fabric/src/fabric_db_create.erl b/apps/fabric/src/fabric_db_create.erl new file mode 100644 index 00000000..d10bcc22 --- /dev/null +++ b/apps/fabric/src/fabric_db_create.erl @@ -0,0 +1,65 @@ +-module(fabric_db_create). +-export([go/2]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-define(DBNAME_REGEX, "^[a-z][a-z0-9\\_\\$()\\+\\-\\/\\s.]*$"). + +%% @doc Create a new database, and all its partition files across the cluster +%% Options is proplist with user_ctx, n, q +go(DbName, Options) -> + case re:run(DbName, ?DBNAME_REGEX, [{capture,none}]) of + match -> + Shards = mem3:choose_shards(DbName, Options), + Doc = make_document(Shards), + Workers = fabric_util:submit_jobs(Shards, create_db, [Options, Doc]), + Acc0 = fabric_dict:init(Workers, nil), + case fabric_util:recv(Workers, #shard.ref, fun handle_message/3, Acc0) of + {ok, _} -> + ok; + Else -> + Else + end; + nomatch -> + {error, illegal_database_name} + end. + +handle_message(Msg, Shard, Counters) -> + C1 = fabric_dict:store(Shard, Msg, Counters), + case fabric_dict:any(nil, C1) of + true -> + {ok, C1}; + false -> + final_answer(C1) + end. + +make_document([#shard{dbname=DbName}|_] = Shards) -> + {RawOut, ByNodeOut, ByRangeOut} = + lists:foldl(fun(#shard{node=N, range=[B,E]}, {Raw, ByNode, ByRange}) -> + Range = ?l2b([couch_util:to_hex(<<B:32/integer>>), "-", + couch_util:to_hex(<<E:32/integer>>)]), + Node = couch_util:to_binary(N), + {[[<<"add">>, Range, Node] | Raw], orddict:append(Node, Range, ByNode), + orddict:append(Range, Node, ByRange)} + end, {[], [], []}, Shards), + #doc{id=DbName, body = {[ + {<<"changelog">>, lists:sort(RawOut)}, + {<<"by_node">>, {[{K,lists:sort(V)} || {K,V} <- ByNodeOut]}}, + {<<"by_range">>, {[{K,lists:sort(V)} || {K,V} <- ByRangeOut]}} + ]}}. + +final_answer(Counters) -> + Successes = [X || {_, M} = X <- Counters, M == ok orelse M == file_exists], + case fabric_view:is_progress_possible(Successes) of + true -> + case lists:keymember(file_exists, 2, Successes) of + true -> + {error, file_exists}; + false -> + {stop, ok} + end; + false -> + {error, internal_server_error} + end. diff --git a/apps/fabric/src/fabric_db_delete.erl b/apps/fabric/src/fabric_db_delete.erl new file mode 100644 index 00000000..57eefa9e --- /dev/null +++ b/apps/fabric/src/fabric_db_delete.erl @@ -0,0 +1,41 @@ +-module(fabric_db_delete). +-export([go/2]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). + +go(DbName, Options) -> + Shards = mem3:shards(DbName), + Workers = fabric_util:submit_jobs(Shards, delete_db, [Options, DbName]), + Acc0 = fabric_dict:init(Workers, nil), + case fabric_util:recv(Workers, #shard.ref, fun handle_message/3, Acc0) of + {ok, ok} -> + ok; + {ok, not_found} -> + erlang:error(database_does_not_exist); + Error -> + Error + end. + +handle_message(Msg, Shard, Counters) -> + C1 = fabric_dict:store(Shard, Msg, Counters), + case fabric_dict:any(nil, C1) of + true -> + {ok, C1}; + false -> + final_answer(C1) + end. + +final_answer(Counters) -> + Successes = [X || {_, M} = X <- Counters, M == ok orelse M == not_found], + case fabric_view:is_progress_possible(Successes) of + true -> + case lists:keymember(ok, 2, Successes) of + true -> + {stop, ok}; + false -> + {stop, not_found} + end; + false -> + {error, internal_server_error} + end. diff --git a/apps/fabric/src/fabric_db_doc_count.erl b/apps/fabric/src/fabric_db_doc_count.erl new file mode 100644 index 00000000..12d5cbf8 --- /dev/null +++ b/apps/fabric/src/fabric_db_doc_count.erl @@ -0,0 +1,32 @@ +-module(fabric_db_doc_count). + +-export([go/1]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +go(DbName) -> + Shards = mem3:shards(DbName), + Workers = fabric_util:submit_jobs(Shards, get_doc_count, []), + Acc0 = {fabric_dict:init(Workers, nil), 0}, + fabric_util:recv(Workers, #shard.ref, fun handle_message/3, Acc0). + +handle_message({ok, Count}, Shard, {Counters, Acc}) -> + case fabric_dict:lookup_element(Shard, Counters) of + undefined -> + % already heard from someone else in this range + {ok, {Counters, Acc}}; + nil -> + C1 = fabric_dict:store(Shard, ok, Counters), + C2 = fabric_view:remove_overlapping_shards(Shard, C1), + case fabric_dict:any(nil, C2) of + true -> + {ok, {C2, Count+Acc}}; + false -> + {stop, Count+Acc} + end + end; +handle_message(_, _, Acc) -> + {ok, Acc}. + diff --git a/apps/fabric/src/fabric_db_info.erl b/apps/fabric/src/fabric_db_info.erl new file mode 100644 index 00000000..3758c5c3 --- /dev/null +++ b/apps/fabric/src/fabric_db_info.erl @@ -0,0 +1,52 @@ +-module(fabric_db_info). + +-export([go/1]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). + +go(DbName) -> + Shards = mem3:shards(DbName), + Workers = fabric_util:submit_jobs(Shards, get_db_info, []), + Acc0 = {fabric_dict:init(Workers, nil), []}, + fabric_util:recv(Workers, #shard.ref, fun handle_message/3, Acc0). + +handle_message({ok, Info}, #shard{dbname=Name} = Shard, {Counters, Acc}) -> + case fabric_dict:lookup_element(Shard, Counters) of + undefined -> + % already heard from someone else in this range + {ok, {Counters, Acc}}; + nil -> + C1 = fabric_dict:store(Shard, ok, Counters), + C2 = fabric_view:remove_overlapping_shards(Shard, C1), + case fabric_dict:any(nil, C2) of + true -> + {ok, {C2, [Info|Acc]}}; + false -> + {stop, [{db_name,Name}|merge_results(lists:flatten([Info|Acc]))]} + end + end; +handle_message(_, _, Acc) -> + {ok, Acc}. + +merge_results(Info) -> + Dict = lists:foldl(fun({K,V},D0) -> orddict:append(K,V,D0) end, + orddict:new(), Info), + orddict:fold(fun + (doc_count, X, Acc) -> + [{doc_count, lists:sum(X)} | Acc]; + (doc_del_count, X, Acc) -> + [{doc_del_count, lists:sum(X)} | Acc]; + (update_seq, X, Acc) -> + [{update_seq, lists:sum(X)} | Acc]; + (purge_seq, X, Acc) -> + [{purge_seq, lists:sum(X)} | Acc]; + (compact_running, X, Acc) -> + [{compact_running, lists:member(true, X)} | Acc]; + (disk_size, X, Acc) -> + [{disk_size, lists:sum(X)} | Acc]; + (disk_format_version, X, Acc) -> + [{disk_format_version, lists:max(X)} | Acc]; + (_, _, Acc) -> + Acc + end, [{instance_start_time, <<"0">>}], Dict). diff --git a/apps/fabric/src/fabric_db_meta.erl b/apps/fabric/src/fabric_db_meta.erl new file mode 100644 index 00000000..ee15fc72 --- /dev/null +++ b/apps/fabric/src/fabric_db_meta.erl @@ -0,0 +1,35 @@ +-module(fabric_db_meta). + +-export([set_revs_limit/3, set_security/3]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). + +set_revs_limit(DbName, Limit, Options) -> + Shards = mem3:shards(DbName), + Workers = fabric_util:submit_jobs(Shards, set_revs_limit, [Limit, Options]), + Waiting = length(Workers) - 1, + case fabric_util:recv(Workers, #shard.ref, fun handle_message/3, Waiting) of + {ok, ok} -> + ok; + Error -> + Error + end. + +set_security(DbName, SecObj, Options) -> + Shards = mem3:shards(DbName), + Workers = fabric_util:submit_jobs(Shards, set_security, [SecObj, Options]), + Waiting = length(Workers) - 1, + case fabric_util:recv(Workers, #shard.ref, fun handle_message/3, Waiting) of + {ok, ok} -> + ok; + Error -> + Error + end. + +handle_message(ok, _, 0) -> + {stop, ok}; +handle_message(ok, _, Waiting) -> + {ok, Waiting - 1}; +handle_message(Error, _, _Waiting) -> + {error, Error}.
\ No newline at end of file diff --git a/apps/fabric/src/fabric_dict.erl b/apps/fabric/src/fabric_dict.erl new file mode 100644 index 00000000..42d46b34 --- /dev/null +++ b/apps/fabric/src/fabric_dict.erl @@ -0,0 +1,37 @@ +-module(fabric_dict). +-compile(export_all). + +% Instead of ets, let's use an ordered keylist. We'll need to revisit if we +% have >> 100 shards, so a private interface is a good idea. - APK June 2010 + +init(Keys, InitialValue) -> + orddict:from_list([{Key, InitialValue} || Key <- Keys]). + + +decrement_all(Dict) -> + [{K,V-1} || {K,V} <- Dict]. + +store(Key, Value, Dict) -> + orddict:store(Key, Value, Dict). + +erase(Key, Dict) -> + orddict:erase(Key, Dict). + +update_counter(Key, Incr, Dict0) -> + orddict:update_counter(Key, Incr, Dict0). + + +lookup_element(Key, Dict) -> + couch_util:get_value(Key, Dict). + +size(Dict) -> + orddict:size(Dict). + +any(Value, Dict) -> + lists:keymember(Value, 2, Dict). + +filter(Fun, Dict) -> + orddict:filter(Fun, Dict). + +fold(Fun, Acc0, Dict) -> + orddict:fold(Fun, Acc0, Dict). diff --git a/apps/fabric/src/fabric_doc_attachments.erl b/apps/fabric/src/fabric_doc_attachments.erl new file mode 100644 index 00000000..aecdaaef --- /dev/null +++ b/apps/fabric/src/fabric_doc_attachments.erl @@ -0,0 +1,102 @@ +-module(fabric_doc_attachments). + +-include("fabric.hrl"). + +%% couch api calls +-export([receiver/2]). + +receiver(_Req, undefined) -> + <<"">>; +receiver(_Req, {unknown_transfer_encoding, Unknown}) -> + exit({unknown_transfer_encoding, Unknown}); +receiver(Req, chunked) -> + MiddleMan = spawn(fun() -> middleman(Req, chunked) end), + fun(4096, ChunkFun, ok) -> + write_chunks(MiddleMan, ChunkFun) + end; +receiver(_Req, 0) -> + <<"">>; +receiver(Req, Length) when is_integer(Length) -> + Middleman = spawn(fun() -> middleman(Req, Length) end), + fun() -> + Middleman ! {self(), gimme_data}, + receive {Middleman, Data} -> Data end + end; +receiver(_Req, Length) -> + exit({length_not_integer, Length}). + +%% +%% internal +%% + +write_chunks(MiddleMan, ChunkFun) -> + MiddleMan ! {self(), gimme_data}, + receive + {MiddleMan, {0, _Footers}} -> + % MiddleMan ! {self(), done}, + ok; + {MiddleMan, ChunkRecord} -> + ChunkFun(ChunkRecord, ok), + write_chunks(MiddleMan, ChunkFun) + end. + +receive_unchunked_attachment(_Req, 0) -> + ok; +receive_unchunked_attachment(Req, Length) -> + receive {MiddleMan, go} -> + Data = couch_httpd:recv(Req, 0), + MiddleMan ! {self(), Data} + end, + receive_unchunked_attachment(Req, Length - size(Data)). + +middleman(Req, chunked) -> + % spawn a process to actually receive the uploaded data + RcvFun = fun(ChunkRecord, ok) -> + receive {From, go} -> From ! {self(), ChunkRecord} end, ok + end, + Receiver = spawn(fun() -> couch_httpd:recv_chunked(Req,4096,RcvFun,ok) end), + + % take requests from the DB writers and get data from the receiver + N = erlang:list_to_integer(couch_config:get("cluster","n")), + middleman_loop(Receiver, N, dict:new(), 0, []); + +middleman(Req, Length) -> + Receiver = spawn(fun() -> receive_unchunked_attachment(Req, Length) end), + N = erlang:list_to_integer(couch_config:get("cluster","n")), + middleman_loop(Receiver, N, dict:new(), 0, []). + +middleman_loop(Receiver, N, Counters, Offset, ChunkList) -> + receive {From, gimme_data} -> + % figure out how far along this writer (From) is in the list + {NewCounters, WhichChunk} = case dict:find(From, Counters) of + {ok, I} -> + {dict:update_counter(From, 1, Counters), I}; + error -> + {dict:store(From, 2, Counters), 1} + end, + ListIndex = WhichChunk - Offset, + + % talk to the receiver to get another chunk if necessary + ChunkList1 = if ListIndex > length(ChunkList) -> + Receiver ! {self(), go}, + receive {Receiver, ChunkRecord} -> ChunkList ++ [ChunkRecord] end; + true -> ChunkList end, + + % reply to the writer + From ! {self(), lists:nth(ListIndex, ChunkList1)}, + + % check if we can drop a chunk from the head of the list + SmallestIndex = dict:fold(fun(_, Val, Acc) -> lists:min([Val,Acc]) end, + WhichChunk+1, NewCounters), + Size = dict:size(NewCounters), + + {NewChunkList, NewOffset} = + if Size == N andalso (SmallestIndex - Offset) == 2 -> + {tl(ChunkList1), Offset+1}; + true -> + {ChunkList1, Offset} + end, + middleman_loop(Receiver, N, NewCounters, NewOffset, NewChunkList) + after 10000 -> + ok + end. diff --git a/apps/fabric/src/fabric_doc_missing_revs.erl b/apps/fabric/src/fabric_doc_missing_revs.erl new file mode 100644 index 00000000..9a368783 --- /dev/null +++ b/apps/fabric/src/fabric_doc_missing_revs.erl @@ -0,0 +1,64 @@ +-module(fabric_doc_missing_revs). + +-export([go/2]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). + +go(DbName, AllIdsRevs) -> + Workers = lists:map(fun({#shard{name=Name, node=Node} = Shard, IdsRevs}) -> + Ref = rexi:cast(Node, {fabric_rpc, get_missing_revs, [Name, IdsRevs]}), + Shard#shard{ref=Ref} + end, group_idrevs_by_shard(DbName, AllIdsRevs)), + ResultDict = dict:from_list([{Id, {nil,Revs}} || {Id, Revs} <- AllIdsRevs]), + Acc0 = {length(Workers), ResultDict}, + fabric_util:recv(Workers, #shard.ref, fun handle_message/3, Acc0). + +handle_message({rexi_DOWN, _, _, _}, _Worker, Acc0) -> + skip_message(Acc0); +handle_message({rexi_EXIT, _, _, _}, _Worker, Acc0) -> + skip_message(Acc0); +handle_message({ok, Results}, _Worker, {1, D0}) -> + D = update_dict(D0, Results), + {stop, dict:fold(fun force_reply/3, [], D)}; +handle_message({ok, Results}, _Worker, {WaitingCount, D0}) -> + D = update_dict(D0, Results), + case dict:fold(fun maybe_reply/3, {stop, []}, D) of + continue -> + % still haven't heard about some Ids + {ok, {WaitingCount - 1, D}}; + {stop, FinalReply} -> + {stop, FinalReply} + end. + +force_reply(Id, {nil,Revs}, Acc) -> + % never heard about this ID, assume it's missing + [{Id, Revs} | Acc]; +force_reply(_, [], Acc) -> + Acc; +force_reply(Id, Revs, Acc) -> + [{Id, Revs} | Acc]. + +maybe_reply(_, _, continue) -> + continue; +maybe_reply(_, {nil, _}, _) -> + continue; +maybe_reply(_, [], {stop, Acc}) -> + {stop, Acc}; +maybe_reply(Id, Revs, {stop, Acc}) -> + {stop, [{Id, Revs} | Acc]}. + +group_idrevs_by_shard(DbName, IdsRevs) -> + dict:to_list(lists:foldl(fun({Id, Revs}, D0) -> + lists:foldl(fun(Shard, D1) -> + dict:append(Shard, {Id, Revs}, D1) + end, D0, mem3:shards(DbName,Id)) + end, dict:new(), IdsRevs)). + +update_dict(D0, KVs) -> + lists:foldl(fun({K,V,_}, D1) -> dict:store(K, V, D1) end, D0, KVs). + +skip_message({1, Dict}) -> + {stop, dict:fold(fun force_reply/3, [], Dict)}; +skip_message({WaitingCount, Dict}) -> + {ok, {WaitingCount-1, Dict}}. diff --git a/apps/fabric/src/fabric_doc_open.erl b/apps/fabric/src/fabric_doc_open.erl new file mode 100644 index 00000000..5c5699c3 --- /dev/null +++ b/apps/fabric/src/fabric_doc_open.erl @@ -0,0 +1,66 @@ +-module(fabric_doc_open). + +-export([go/3]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +go(DbName, Id, Options) -> + Workers = fabric_util:submit_jobs(mem3:shards(DbName,Id), open_doc, + [Id, [deleted|Options]]), + SuppressDeletedDoc = not lists:member(deleted, Options), + R = couch_util:get_value(r, Options, couch_config:get("cluster","r","2")), + Acc0 = {length(Workers), list_to_integer(R), []}, + case fabric_util:recv(Workers, #shard.ref, fun handle_message/3, Acc0) of + {ok, {ok, #doc{deleted=true}}} when SuppressDeletedDoc -> + {not_found, deleted}; + {ok, Else} -> + Else; + Error -> + Error + end. + +handle_message({rexi_DOWN, _, _, _}, _Worker, Acc0) -> + skip_message(Acc0); +handle_message({rexi_EXIT, _Reason}, _Worker, Acc0) -> + skip_message(Acc0); +handle_message(Reply, _Worker, {WaitingCount, R, Replies}) -> + case merge_read_reply(make_key(Reply), Reply, Replies) of + {_, KeyCount} when KeyCount =:= R -> + {stop, Reply}; + {NewReplies, KeyCount} when KeyCount < R -> + if WaitingCount =:= 1 -> + % last message arrived, but still no quorum + repair_read_quorum_failure(NewReplies); + true -> + {ok, {WaitingCount-1, R, NewReplies}} + end + end. + +skip_message({1, _R, Replies}) -> + repair_read_quorum_failure(Replies); +skip_message({WaitingCount, R, Replies}) -> + {ok, {WaitingCount-1, R, Replies}}. + +merge_read_reply(Key, Reply, Replies) -> + case lists:keyfind(Key, 1, Replies) of + false -> + {[{Key, Reply, 1} | Replies], 1}; + {Key, _, N} -> + {lists:keyreplace(Key, 1, Replies, {Key, Reply, N+1}), N+1} + end. + +make_key({ok, #doc{id=Id, revs=Revs}}) -> + {Id, Revs}; +make_key(Else) -> + Else. + +repair_read_quorum_failure(Replies) -> + case [Doc || {_Key, {ok, Doc}, _Count} <- Replies] of + [] -> + {stop, {not_found, missing}}; + [Doc|_] -> + % TODO merge docs to find the winner as determined by replication + {stop, {ok, Doc}} + end.
\ No newline at end of file diff --git a/apps/fabric/src/fabric_doc_open_revs.erl b/apps/fabric/src/fabric_doc_open_revs.erl new file mode 100644 index 00000000..61ff466f --- /dev/null +++ b/apps/fabric/src/fabric_doc_open_revs.erl @@ -0,0 +1,65 @@ +-module(fabric_doc_open_revs). + +-export([go/4]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +go(DbName, Id, Revs, Options) -> + Workers = fabric_util:submit_jobs(mem3:shards(DbName,Id), open_revs, + [Id, Revs, Options]), + R = couch_util:get_value(r, Options, couch_config:get("cluster","r","2")), + Acc0 = {length(Workers), list_to_integer(R), []}, + case fabric_util:recv(Workers, #shard.ref, fun handle_message/3, Acc0) of + {ok, {ok, Reply}} -> + {ok, Reply}; + Else -> + Else + end. + +handle_message({rexi_DOWN, _, _, _}, _Worker, Acc0) -> + skip_message(Acc0); +handle_message({rexi_EXIT, _}, _Worker, Acc0) -> + skip_message(Acc0); +handle_message(Reply, _Worker, {WaitingCount, R, Replies}) -> + case merge_read_reply(make_key(Reply), Reply, Replies) of + {_, KeyCount} when KeyCount =:= R -> + {stop, Reply}; + {NewReplies, KeyCount} when KeyCount < R -> + if WaitingCount =:= 1 -> + % last message arrived, but still no quorum + repair_read_quorum_failure(NewReplies); + true -> + {ok, {WaitingCount-1, R, NewReplies}} + end + end. + +skip_message({1, _R, Replies}) -> + repair_read_quorum_failure(Replies); +skip_message({WaitingCount, R, Replies}) -> + {ok, {WaitingCount-1, R, Replies}}. + +merge_read_reply(Key, Reply, Replies) -> + case lists:keyfind(Key, 1, Replies) of + false -> + {[{Key, Reply, 1} | Replies], 1}; + {Key, _, N} -> + {lists:keyreplace(Key, 1, Replies, {Key, Reply, N+1}), N+1} + end. + +make_key({ok, #doc{id=Id, revs=Revs}}) -> + {Id, Revs}; +make_key(Else) -> + Else. + +repair_read_quorum_failure(Replies) -> + case [Doc || {_Key, {ok, Doc}, _Count} <- Replies] of + [] -> + {stop, {not_found, missing}}; + [Doc|_] -> + % TODO merge docs to find the winner as determined by replication + {stop, {ok, Doc}} + end. + +
\ No newline at end of file diff --git a/apps/fabric/src/fabric_doc_update.erl b/apps/fabric/src/fabric_doc_update.erl new file mode 100644 index 00000000..f0fcf112 --- /dev/null +++ b/apps/fabric/src/fabric_doc_update.erl @@ -0,0 +1,127 @@ +-module(fabric_doc_update). + +-export([go/3]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +go(_, [], _) -> + {ok, []}; +go(DbName, AllDocs, Opts) -> + validate_atomic_update(DbName, AllDocs, lists:member(all_or_nothing, Opts)), + Options = lists:delete(all_or_nothing, Opts), + GroupedDocs = lists:map(fun({#shard{name=Name, node=Node} = Shard, Docs}) -> + Ref = rexi:cast(Node, {fabric_rpc, update_docs, [Name, Docs, Options]}), + {Shard#shard{ref=Ref}, Docs} + end, group_docs_by_shard(DbName, AllDocs)), + {Workers, _} = lists:unzip(GroupedDocs), + W = couch_util:get_value(w, Options, couch_config:get("cluster","w","2")), + Acc0 = {length(Workers), length(AllDocs), list_to_integer(W), GroupedDocs, + dict:from_list([{Doc,[]} || Doc <- AllDocs])}, + case fabric_util:recv(Workers, #shard.ref, fun handle_message/3, Acc0) of + {ok, Results} -> + Reordered = couch_util:reorder_results(AllDocs, Results), + {ok, [R || R <- Reordered, R =/= noreply]}; + Else -> + Else + end. + +handle_message({rexi_DOWN, _, _, _}, _Worker, Acc0) -> + skip_message(Acc0); +handle_message({rexi_EXIT, _}, _Worker, Acc0) -> + skip_message(Acc0); +handle_message({ok, Replies}, Worker, Acc0) -> + {WaitingCount, DocCount, W, GroupedDocs, DocReplyDict0} = Acc0, + Docs = couch_util:get_value(Worker, GroupedDocs), + DocReplyDict = append_update_replies(Docs, Replies, DocReplyDict0), + case {WaitingCount, dict:size(DocReplyDict)} of + {1, _} -> + % last message has arrived, we need to conclude things + {W, Reply} = dict:fold(fun force_reply/3, {W,[]}, DocReplyDict), + {stop, Reply}; + {_, DocCount} -> + % we've got at least one reply for each document, let's take a look + case dict:fold(fun maybe_reply/3, {stop,W,[]}, DocReplyDict) of + continue -> + {ok, {WaitingCount - 1, DocCount, W, GroupedDocs, DocReplyDict}}; + {stop, W, FinalReplies} -> + {stop, FinalReplies} + end; + {_, N} when N < DocCount -> + % no point in trying to finalize anything yet + {ok, {WaitingCount - 1, DocCount, W, GroupedDocs, DocReplyDict}} + end; +handle_message({missing_stub, Stub}, _, _) -> + throw({missing_stub, Stub}); +handle_message({not_found, no_db_file} = X, Worker, Acc0) -> + {_, _, _, GroupedDocs, _} = Acc0, + Docs = couch_util:get_value(Worker, GroupedDocs), + handle_message({ok, [X || _D <- Docs]}, Worker, Acc0). + +force_reply(Doc, [], {W, Acc}) -> + {W, [{Doc, {error, internal_server_error}} | Acc]}; +force_reply(Doc, [FirstReply|_] = Replies, {W, Acc}) -> + case update_quorum_met(W, Replies) of + {true, Reply} -> + {W, [{Doc,Reply} | Acc]}; + false -> + ?LOG_ERROR("write quorum (~p) failed, reply ~p", [W, FirstReply]), + % TODO make a smarter choice than just picking the first reply + {W, [{Doc,FirstReply} | Acc]} + end. + +maybe_reply(_, _, continue) -> + % we didn't meet quorum for all docs, so we're fast-forwarding the fold + continue; +maybe_reply(Doc, Replies, {stop, W, Acc}) -> + case update_quorum_met(W, Replies) of + {true, Reply} -> + {stop, W, [{Doc, Reply} | Acc]}; + false -> + continue + end. + +update_quorum_met(W, Replies) -> + Counters = lists:foldl(fun(R,D) -> orddict:update_counter(R,1,D) end, + orddict:new(), Replies), + case lists:dropwhile(fun({_, Count}) -> Count < W end, Counters) of + [] -> + false; + [{FinalReply, _} | _] -> + {true, FinalReply} + end. + +-spec group_docs_by_shard(binary(), [#doc{}]) -> [{#shard{}, [#doc{}]}]. +group_docs_by_shard(DbName, Docs) -> + dict:to_list(lists:foldl(fun(#doc{id=Id} = Doc, D0) -> + lists:foldl(fun(Shard, D1) -> + dict:append(Shard, Doc, D1) + end, D0, mem3:shards(DbName,Id)) + end, dict:new(), Docs)). + +append_update_replies([], [], DocReplyDict) -> + DocReplyDict; +append_update_replies([Doc|Rest], [], Dict0) -> + % icky, if replicated_changes only errors show up in result + append_update_replies(Rest, [], dict:append(Doc, noreply, Dict0)); +append_update_replies([Doc|Rest1], [Reply|Rest2], Dict0) -> + % TODO what if the same document shows up twice in one update_docs call? + append_update_replies(Rest1, Rest2, dict:append(Doc, Reply, Dict0)). + +skip_message(Acc0) -> + % TODO fix this + {ok, Acc0}. + +validate_atomic_update(_, _, false) -> + ok; +validate_atomic_update(_DbName, AllDocs, true) -> + % TODO actually perform the validation. This requires some hackery, we need + % to basically extract the prep_and_validate_updates function from couch_db + % and only run that, without actually writing in case of a success. + Error = {not_implemented, <<"all_or_nothing is not supported yet">>}, + PreCommitFailures = lists:map(fun(#doc{id=Id, revs = {Pos,Revs}}) -> + case Revs of [] -> RevId = <<>>; [RevId|_] -> ok end, + {{Id, {Pos, RevId}}, Error} + end, AllDocs), + throw({aborted, PreCommitFailures}). diff --git a/apps/fabric/src/fabric_group_info.erl b/apps/fabric/src/fabric_group_info.erl new file mode 100644 index 00000000..04605a66 --- /dev/null +++ b/apps/fabric/src/fabric_group_info.erl @@ -0,0 +1,52 @@ +-module(fabric_group_info). + +-export([go/2]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +go(DbName, GroupId) when is_binary(GroupId) -> + {ok, DDoc} = fabric:open_doc(DbName, GroupId, []), + go(DbName, DDoc); + +go(DbName, #doc{} = DDoc) -> + Group = couch_view_group:design_doc_to_view_group(#db{name=DbName}, DDoc), + Shards = mem3:shards(DbName), + Workers = fabric_util:submit_jobs(Shards, group_info, [Group]), + Acc0 = {fabric_dict:init(Workers, nil), []}, + fabric_util:recv(Workers, #shard.ref, fun handle_message/3, Acc0). + +handle_message({ok, Info}, Shard, {Counters, Acc}) -> + case fabric_dict:lookup_element(Shard, Counters) of + undefined -> + % already heard from someone else in this range + {ok, {Counters, Acc}}; + nil -> + C1 = fabric_dict:store(Shard, ok, Counters), + C2 = fabric_view:remove_overlapping_shards(Shard, C1), + case fabric_dict:any(nil, C2) of + true -> + {ok, {C2, [Info|Acc]}}; + false -> + {stop, merge_results(lists:flatten([Info|Acc]))} + end + end; +handle_message(_, _, Acc) -> + {ok, Acc}. + +merge_results(Info) -> + Dict = lists:foldl(fun({K,V},D0) -> orddict:append(K,V,D0) end, + orddict:new(), Info), + orddict:fold(fun + (signature, [X|_], Acc) -> + [{signature, X} | Acc]; + (language, [X|_], Acc) -> + [{language, X} | Acc]; + (disk_size, X, Acc) -> + [{disk_size, lists:sum(X)} | Acc]; + (compact_running, X, Acc) -> + [{compact_running, lists:member(true, X)} | Acc]; + (_, _, Acc) -> + Acc + end, [], Dict). diff --git a/apps/fabric/src/fabric_rpc.erl b/apps/fabric/src/fabric_rpc.erl new file mode 100644 index 00000000..f56e3f68 --- /dev/null +++ b/apps/fabric/src/fabric_rpc.erl @@ -0,0 +1,388 @@ +-module(fabric_rpc). + +-export([get_db_info/1, get_doc_count/1, get_update_seq/1]). +-export([open_doc/3, open_revs/4, get_missing_revs/2, update_docs/3]). +-export([all_docs/2, changes/3, map_view/4, reduce_view/4, group_info/2]). +-export([create_db/3, delete_db/3, reset_validation_funs/1, set_security/3, + set_revs_limit/3]). + +-include("fabric.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-record (view_acc, { + db, + limit, + include_docs, + offset = nil, + total_rows, + reduce_fun = fun couch_db:enum_docs_reduce_to_count/1, + group_level = 0 +}). + +%% rpc endpoints +%% call to with_db will supply your M:F with a #db{} and then remaining args + +all_docs(DbName, #view_query_args{keys=nil} = QueryArgs) -> + {ok, Db} = couch_db:open(DbName, []), + #view_query_args{ + start_key = StartKey, + start_docid = StartDocId, + end_key = EndKey, + end_docid = EndDocId, + limit = Limit, + skip = Skip, + include_docs = IncludeDocs, + direction = Dir, + inclusive_end = Inclusive + } = QueryArgs, + {ok, Total} = couch_db:get_doc_count(Db), + Acc0 = #view_acc{ + db = Db, + include_docs = IncludeDocs, + limit = Limit+Skip, + total_rows = Total + }, + EndKeyType = if Inclusive -> end_key; true -> end_key_gt end, + Options = [ + {dir, Dir}, + {start_key, if is_binary(StartKey) -> StartKey; true -> StartDocId end}, + {EndKeyType, if is_binary(EndKey) -> EndKey; true -> EndDocId end} + ], + {ok, _, Acc} = couch_db:enum_docs(Db, fun view_fold/3, Acc0, Options), + final_response(Total, Acc#view_acc.offset). + +changes(DbName, Args, StartSeq) -> + #changes_args{style=Style, dir=Dir} = Args, + case couch_db:open(DbName, []) of + {ok, Db} -> + Enum = fun changes_enumerator/2, + Opts = [{dir,Dir}], + Acc0 = {Db, StartSeq, Args}, + try + {ok, {_, LastSeq, _}} = + couch_db:changes_since(Db, Style, StartSeq, Enum, Opts, Acc0), + rexi:reply({complete, LastSeq}) + after + couch_db:close(Db) + end; + Error -> + rexi:reply(Error) + end. + +map_view(DbName, DDoc, ViewName, QueryArgs) -> + {ok, Db} = couch_db:open(DbName, []), + #view_query_args{ + limit = Limit, + skip = Skip, + keys = Keys, + include_docs = IncludeDocs, + stale = Stale, + view_type = ViewType + } = QueryArgs, + MinSeq = if Stale == ok -> 0; true -> couch_db:get_update_seq(Db) end, + Group0 = couch_view_group:design_doc_to_view_group(Db, DDoc), + {ok, Pid} = gen_server:call(couch_view, {get_group_server, DbName, Group0}), + {ok, Group} = couch_view_group:request_group(Pid, MinSeq), + View = fabric_view:extract_view(Pid, ViewName, Group#group.views, ViewType), + {ok, Total} = couch_view:get_row_count(View), + Acc0 = #view_acc{ + db = Db, + include_docs = IncludeDocs, + limit = Limit+Skip, + total_rows = Total, + reduce_fun = fun couch_view:reduce_to_count/1 + }, + case Keys of + nil -> + Options = couch_httpd_view:make_key_options(QueryArgs), + {ok, _, Acc} = couch_view:fold(View, fun view_fold/3, Acc0, Options); + _ -> + Acc = lists:foldl(fun(Key, AccIn) -> + KeyArgs = QueryArgs#view_query_args{start_key=Key, end_key=Key}, + Options = couch_httpd_view:make_key_options(KeyArgs), + {_Go, _, Out} = couch_view:fold(View, fun view_fold/3, AccIn, + Options), + Out + end, Acc0, Keys) + end, + final_response(Total, Acc#view_acc.offset). + +reduce_view(DbName, Group0, ViewName, QueryArgs) -> + {ok, Db} = couch_db:open(DbName, []), + #view_query_args{ + group_level = GroupLevel, + limit = Limit, + skip = Skip, + keys = Keys, + stale = Stale + } = QueryArgs, + GroupFun = group_rows_fun(GroupLevel), + MinSeq = if Stale == ok -> 0; true -> couch_db:get_update_seq(Db) end, + {ok, Pid} = gen_server:call(couch_view, {get_group_server, DbName, Group0}), + {ok, #group{views=Views, def_lang=Lang}} = couch_view_group:request_group( + Pid, MinSeq), + {NthRed, View} = fabric_view:extract_view(Pid, ViewName, Views, reduce), + ReduceView = {reduce, NthRed, Lang, View}, + Acc0 = #view_acc{group_level = GroupLevel, limit = Limit+Skip}, + case Keys of + nil -> + Options0 = couch_httpd_view:make_key_options(QueryArgs), + Options = [{key_group_fun, GroupFun} | Options0], + couch_view:fold_reduce(ReduceView, fun reduce_fold/3, Acc0, Options); + _ -> + lists:map(fun(Key) -> + KeyArgs = QueryArgs#view_query_args{start_key=Key, end_key=Key}, + Options0 = couch_httpd_view:make_key_options(KeyArgs), + Options = [{key_group_fun, GroupFun} | Options0], + couch_view:fold_reduce(ReduceView, fun reduce_fold/3, Acc0, Options) + end, Keys) + end, + rexi:reply(complete). + +create_db(DbName, Options, Doc) -> + mem3_util:write_db_doc(Doc), + rexi:reply(case couch_server:create(DbName, Options) of + {ok, _} -> + ok; + Error -> + Error + end). + +delete_db(DbName, Options, DocId) -> + mem3_util:delete_db_doc(DocId), + rexi:reply(couch_server:delete(DbName, Options)). + +get_db_info(DbName) -> + with_db(DbName, [], {couch_db, get_db_info, []}). + +get_doc_count(DbName) -> + with_db(DbName, [], {couch_db, get_doc_count, []}). + +get_update_seq(DbName) -> + with_db(DbName, [], {couch_db, get_update_seq, []}). + +set_security(DbName, SecObj, Options) -> + with_db(DbName, Options, {couch_db, set_security, [SecObj]}). + +set_revs_limit(DbName, Limit, Options) -> + with_db(DbName, Options, {couch_db, set_revs_limit, [Limit]}). + +open_doc(DbName, DocId, Options) -> + with_db(DbName, Options, {couch_db, open_doc, [DocId, Options]}). + +open_revs(DbName, Id, Revs, Options) -> + with_db(DbName, Options, {couch_db, open_doc_revs, [Id, Revs, Options]}). + +get_missing_revs(DbName, IdRevsList) -> + % reimplement here so we get [] for Ids with no missing revs in response + rexi:reply(case couch_db:open(DbName, []) of + {ok, Db} -> + Ids = [Id1 || {Id1, _Revs} <- IdRevsList], + {ok, lists:zipwith(fun({Id, Revs}, FullDocInfoResult) -> + case FullDocInfoResult of + {ok, #full_doc_info{rev_tree=RevisionTree} = FullInfo} -> + MissingRevs = couch_key_tree:find_missing(RevisionTree, Revs), + {Id, MissingRevs, possible_ancestors(FullInfo, MissingRevs)}; + not_found -> + {Id, Revs, []} + end + end, IdRevsList, couch_btree:lookup(Db#db.id_tree, Ids))}; + Error -> + Error + end). + +update_docs(DbName, Docs0, Options) -> + case proplists:get_value(replicated_changes, Options) of + true -> + X = replicated_changes; + _ -> + X = interactive_edit + end, + Docs = make_att_readers(Docs0), + with_db(DbName, Options, {couch_db, update_docs, [Docs, Options, X]}). + +group_info(DbName, Group0) -> + {ok, Pid} = gen_server:call(couch_view, {get_group_server, DbName, Group0}), + rexi:reply(couch_view_group:request_group_info(Pid)). + +reset_validation_funs(DbName) -> + case couch_db:open(DbName, []) of + {ok, #db{main_pid = Pid}} -> + gen_server:cast(Pid, {load_validation_funs, undefined}); + _ -> + ok + end. + +%% +%% internal +%% + +with_db(DbName, Options, {M,F,A}) -> + case couch_db:open(DbName, Options) of + {ok, Db} -> + rexi:reply(try + apply(M, F, [Db | A]) + catch Exception -> + Exception; + error:Reason -> + ?LOG_ERROR("~p ~p ~p~n~p", [?MODULE, {M,F}, Reason, + erlang:get_stacktrace()]), + {error, Reason} + end); + Error -> + rexi:reply(Error) + end. + +view_fold(#full_doc_info{} = FullDocInfo, OffsetReds, Acc) -> + % matches for _all_docs and translates #full_doc_info{} -> KV pair + case couch_doc:to_doc_info(FullDocInfo) of + #doc_info{revs=[#rev_info{deleted=false, rev=Rev}|_]} -> + Id = FullDocInfo#full_doc_info.id, + Value = {[{rev,couch_doc:rev_to_str(Rev)}]}, + view_fold({{Id,Id}, Value}, OffsetReds, Acc); + #doc_info{revs=[#rev_info{deleted=true}|_]} -> + {ok, Acc} + end; +view_fold(KV, OffsetReds, #view_acc{offset=nil, total_rows=Total} = Acc) -> + % calculates the offset for this shard + #view_acc{reduce_fun=Reduce} = Acc, + Offset = Reduce(OffsetReds), + case rexi:sync_reply({total_and_offset, Total, Offset}) of + ok -> + view_fold(KV, OffsetReds, Acc#view_acc{offset=Offset}); + stop -> + exit(normal); + timeout -> + exit(timeout) + end; +view_fold(_KV, _Offset, #view_acc{limit=0} = Acc) -> + % we scanned through limit+skip local rows + {stop, Acc}; +view_fold({{Key,Id}, Value}, _Offset, Acc) -> + % the normal case + #view_acc{ + db = Db, + limit = Limit, + include_docs = IncludeDocs + } = Acc, + Doc = if not IncludeDocs -> undefined; true -> + case couch_db:open_doc(Db, Id, []) of + {not_found, deleted} -> + null; + {not_found, missing} -> + undefined; + {ok, Doc0} -> + couch_doc:to_json_obj(Doc0, []) + end + end, + case rexi:sync_reply(#view_row{key=Key, id=Id, value=Value, doc=Doc}) of + ok -> + {ok, Acc#view_acc{limit=Limit-1}}; + timeout -> + exit(timeout) + end. + +final_response(Total, nil) -> + case rexi:sync_reply({total_and_offset, Total, Total}) of ok -> + rexi:reply(complete); + stop -> + ok; + timeout -> + exit(timeout) + end; +final_response(_Total, _Offset) -> + rexi:reply(complete). + +group_rows_fun(exact) -> + fun({Key1,_}, {Key2,_}) -> Key1 == Key2 end; +group_rows_fun(0) -> + fun(_A, _B) -> true end; +group_rows_fun(GroupLevel) when is_integer(GroupLevel) -> + fun({[_|_] = Key1,_}, {[_|_] = Key2,_}) -> + lists:sublist(Key1, GroupLevel) == lists:sublist(Key2, GroupLevel); + ({Key1,_}, {Key2,_}) -> + Key1 == Key2 + end. + +reduce_fold(_Key, _Red, #view_acc{limit=0} = Acc) -> + {stop, Acc}; +reduce_fold(_Key, Red, #view_acc{group_level=0} = Acc) -> + send(null, Red, Acc); +reduce_fold(Key, Red, #view_acc{group_level=exact} = Acc) -> + send(Key, Red, Acc); +reduce_fold(K, Red, #view_acc{group_level=I} = Acc) when I > 0, is_list(K) -> + send(lists:sublist(K, I), Red, Acc). + +send(Key, Value, #view_acc{limit=Limit} = Acc) -> + case rexi:sync_reply(#view_row{key=Key, value=Value}) of + ok -> + {ok, Acc#view_acc{limit=Limit-1}}; + stop -> + exit(normal); + timeout -> + exit(timeout) + end. + +changes_enumerator(DocInfo, {Db, _Seq, Args}) -> + #changes_args{include_docs=IncludeDocs, filter=FilterFun} = Args, + #doc_info{id=Id, high_seq=Seq, revs=[#rev_info{deleted=Del,rev=Rev}|_]} + = DocInfo, + case [Result || Result <- FilterFun(DocInfo), Result /= null] of + [] -> + {ok, {Db, Seq, Args}}; + Results -> + ChangesRow = changes_row(Db, Seq, Id, Results, Rev, Del, IncludeDocs), + Go = rexi:sync_reply(ChangesRow), + {Go, {Db, Seq, Args}} + end. + +changes_row(_, Seq, Id, Results, _, true, true) -> + #view_row{key=Seq, id=Id, value=Results, doc=deleted}; +changes_row(_, Seq, Id, Results, _, true, false) -> + #view_row{key=Seq, id=Id, value=Results, doc=deleted}; +changes_row(Db, Seq, Id, Results, Rev, false, true) -> + #view_row{key=Seq, id=Id, value=Results, doc=doc_member(Db, Id, Rev)}; +changes_row(_, Seq, Id, Results, _, false, false) -> + #view_row{key=Seq, id=Id, value=Results}. + +doc_member(Shard, Id, Rev) -> + case couch_db:open_doc_revs(Shard, Id, [Rev], []) of + {ok, [{ok,Doc}]} -> + couch_doc:to_json_obj(Doc, []); + Error -> + Error + end. + +possible_ancestors(_FullInfo, []) -> + []; +possible_ancestors(FullInfo, MissingRevs) -> + #doc_info{revs=RevsInfo} = couch_doc:to_doc_info(FullInfo), + LeafRevs = [Rev || #rev_info{rev=Rev} <- RevsInfo], + % Find the revs that are possible parents of this rev + lists:foldl(fun({LeafPos, LeafRevId}, Acc) -> + % this leaf is a "possible ancenstor" of the missing + % revs if this LeafPos lessthan any of the missing revs + case lists:any(fun({MissingPos, _}) -> + LeafPos < MissingPos end, MissingRevs) of + true -> + [{LeafPos, LeafRevId} | Acc]; + false -> + Acc + end + end, [], LeafRevs). + +make_att_readers([]) -> + []; +make_att_readers([#doc{atts=Atts0} = Doc | Rest]) -> + % % go through the attachments looking for 'follows' in the data, + % % replace with function that reads the data from MIME stream. + Atts = [Att#att{data=make_att_reader(D)} || #att{data=D} = Att <- Atts0], + [Doc#doc{atts = Atts} | make_att_readers(Rest)]. + +make_att_reader({follows, Parser}) -> + fun() -> + Parser ! {get_bytes, self()}, + receive {bytes, Bytes} -> Bytes end + end; +make_att_reader(Else) -> + Else. diff --git a/apps/fabric/src/fabric_util.erl b/apps/fabric/src/fabric_util.erl new file mode 100644 index 00000000..639a32e7 --- /dev/null +++ b/apps/fabric/src/fabric_util.erl @@ -0,0 +1,89 @@ +-module(fabric_util). + +-export([submit_jobs/3, cleanup/1, recv/4, receive_loop/4, receive_loop/6, + get_db/1]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). + +submit_jobs(Shards, EndPoint, ExtraArgs) -> + lists:map(fun(#shard{node=Node, name=ShardName} = Shard) -> + Ref = rexi:cast(Node, {fabric_rpc, EndPoint, [ShardName | ExtraArgs]}), + Shard#shard{ref = Ref} + end, Shards). + +cleanup(Workers) -> + [rexi:kill(Node, Ref) || #shard{node=Node, ref=Ref} <- Workers]. + +recv(Workers, Keypos, Fun, Acc0) -> + receive_loop(Workers, Keypos, Fun, Acc0). + +receive_loop(Workers, Keypos, Fun, Acc0) -> + case couch_config:get("fabric", "request_timeout", "60000") of + "infinity" -> + Timeout = infinity; + N -> + Timeout = list_to_integer(N) + end, + receive_loop(Workers, Keypos, Fun, Acc0, Timeout, infinity). + +%% @doc set up the receive loop with an overall timeout +-spec receive_loop([any()], integer(), function(), any(), timeout(), timeout()) -> + {ok, any()} | timeout | {error, any()}. +receive_loop(RefPartMap, Keypos, Fun, Acc0, infinity, PerMsgTO) -> + process_mailbox(RefPartMap, Keypos, Fun, Acc0, nil, PerMsgTO); +receive_loop(RefPartMap, Keypos, Fun, Acc0, GlobalTimeout, PerMsgTO) -> + TimeoutRef = erlang:make_ref(), + {ok, TRef} = timer:send_after(GlobalTimeout, {timeout, TimeoutRef}), + try + process_mailbox(RefPartMap, Keypos, Fun, Acc0, TimeoutRef, PerMsgTO) + after + timer:cancel(TRef) + end. + +process_mailbox(RefList, Keypos, Fun, Acc0, TimeoutRef, PerMsgTO) -> + case process_message(RefList, Keypos, Fun, Acc0, TimeoutRef, PerMsgTO) of + {ok, Acc} -> + process_mailbox(RefList, Keypos, Fun, Acc, TimeoutRef, PerMsgTO); + {stop, Acc} -> + {ok, Acc}; + Error -> + Error + end. + +process_message(RefList, Keypos, Fun, Acc0, TimeoutRef, PerMsgTO) -> + receive + {timeout, TimeoutRef} -> + timeout; + {Ref, Msg} -> + case lists:keyfind(Ref, Keypos, RefList) of + false -> + % this was some non-matching message which we will ignore + {ok, Acc0}; + Worker -> + Fun(Msg, Worker, Acc0) + end; + {Ref, From, Msg} -> + case lists:keyfind(Ref, Keypos, RefList) of + false -> + {ok, Acc0}; + Worker -> + Fun(Msg, {Worker, From}, Acc0) + end; + {rexi_DOWN, _RexiMonPid, ServerPid, Reason} = Msg -> + showroom_log:message(alert, "rexi_DOWN ~p ~p", [ServerPid, Reason]), + Fun(Msg, nil, Acc0) + after PerMsgTO -> + timeout + end. + +get_db(DbName) -> + Shards = mem3:shards(DbName), + case lists:partition(fun(#shard{node = N}) -> N =:= node() end, Shards) of + {[#shard{name = ShardName}|_], _} -> + % prefer node-local DBs + couch_db:open(ShardName, []); + {[], [#shard{node = Node, name = ShardName}|_]} -> + % but don't require them + rpc:call(Node, couch_db, open, [ShardName, []]) + end. diff --git a/apps/fabric/src/fabric_view.erl b/apps/fabric/src/fabric_view.erl new file mode 100644 index 00000000..49a3a55a --- /dev/null +++ b/apps/fabric/src/fabric_view.erl @@ -0,0 +1,218 @@ +-module(fabric_view). + +-export([is_progress_possible/1, remove_overlapping_shards/2, maybe_send_row/1, + maybe_pause_worker/3, maybe_resume_worker/2, transform_row/1, keydict/1, + extract_view/4]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +%% @doc looks for a fully covered keyrange in the list of counters +-spec is_progress_possible([{#shard{}, term()}]) -> boolean(). +is_progress_possible([]) -> + false; +is_progress_possible(Counters) -> + Ranges = fabric_dict:fold(fun(#shard{range=[X,Y]}, _, A) -> [{X,Y}|A] end, + [], Counters), + [{Start, Tail0} | Rest] = lists:ukeysort(1, Ranges), + Result = lists:foldl(fun + (_, fail) -> + % we've already declared failure + fail; + (_, complete) -> + % this is the success condition, we can fast-forward + complete; + ({X,_}, Tail) when X > (Tail+1) -> + % gap in the keyrange, we're dead + fail; + ({_,Y}, Tail) -> + case erlang:max(Tail, Y) of + End when (End+1) =:= (2 bsl 31) -> + complete; + Else -> + % the normal condition, adding to the tail + Else + end + end, if (Tail0+1) =:= (2 bsl 31) -> complete; true -> Tail0 end, Rest), + (Start =:= 0) andalso (Result =:= complete). + +-spec remove_overlapping_shards(#shard{}, [{#shard{}, any()}]) -> + [{#shard{}, any()}]. +remove_overlapping_shards(#shard{range=[A,B]} = Shard0, Shards) -> + fabric_dict:filter(fun(#shard{range=[X,Y]} = Shard, _Value) -> + if Shard =:= Shard0 -> + % we can't remove ourselves + true; + A < B, X >= A, X < B -> + % lower bound is inside our range + false; + A < B, Y > A, Y =< B -> + % upper bound is inside our range + false; + B < A, X >= A orelse B < A, X < B -> + % target shard wraps the key range, lower bound is inside + false; + B < A, Y > A orelse B < A, Y =< B -> + % target shard wraps the key range, upper bound is inside + false; + true -> + true + end + end, Shards). + +maybe_pause_worker(Worker, From, State) -> + #collector{buffer_size = BufferSize, counters = Counters} = State, + case fabric_dict:lookup_element(Worker, Counters) of + BufferSize -> + State#collector{blocked = [{Worker,From} | State#collector.blocked]}; + _Count -> + gen_server:reply(From, ok), + State + end. + +maybe_resume_worker(Worker, State) -> + #collector{buffer_size = Buffer, counters = C, blocked = B} = State, + case fabric_dict:lookup_element(Worker, C) of + Count when Count < Buffer/2 -> + case couch_util:get_value(Worker, B) of + undefined -> + State; + From -> + gen_server:reply(From, ok), + State#collector{blocked = lists:keydelete(Worker, 1, B)} + end; + _Other -> + State + end. + +maybe_send_row(#collector{limit=0} = State) -> + #collector{user_acc=AccIn, callback=Callback} = State, + {_, Acc} = Callback(complete, AccIn), + {stop, State#collector{user_acc=Acc}}; +maybe_send_row(State) -> + #collector{ + callback = Callback, + counters = Counters, + skip = Skip, + limit = Limit, + user_acc = AccIn + } = State, + case fabric_dict:any(0, Counters) of + true -> + {ok, State}; + false -> + try get_next_row(State) of + {_, NewState} when Skip > 0 -> + maybe_send_row(NewState#collector{skip=Skip-1, limit=Limit-1}); + {Row, NewState} -> + case Callback(transform_row(Row), AccIn) of + {stop, Acc} -> + {stop, NewState#collector{user_acc=Acc, limit=Limit-1}}; + {ok, Acc} -> + maybe_send_row(NewState#collector{user_acc=Acc, limit=Limit-1}) + end + catch complete -> + {_, Acc} = Callback(complete, AccIn), + {stop, State#collector{user_acc=Acc}} + end + end. + +keydict(nil) -> + undefined; +keydict(Keys) -> + {Dict,_} = lists:foldl(fun(K, {D,I}) -> {dict:store(K,I,D), I+1} end, + {dict:new(),0}, Keys), + Dict. + +%% internal %% + +get_next_row(#collector{rows = []}) -> + throw(complete); +get_next_row(#collector{reducer = RedSrc} = St) when RedSrc =/= undefined -> + #collector{ + query_args = #view_query_args{direction=Dir}, + keys = Keys, + rows = RowDict, + os_proc = Proc, + counters = Counters0 + } = St, + {Key, RestKeys} = find_next_key(Keys, Dir, RowDict), + case dict:find(Key, RowDict) of + {ok, Records} -> + NewRowDict = dict:erase(Key, RowDict), + Counters = lists:foldl(fun(#view_row{worker=Worker}, CountersAcc) -> + fabric_dict:update_counter(Worker, -1, CountersAcc) + end, Counters0, Records), + Wrapped = [[V] || #view_row{value=V} <- Records], + {ok, [Reduced]} = couch_query_servers:rereduce(Proc, [RedSrc], Wrapped), + NewSt = St#collector{keys=RestKeys, rows=NewRowDict, counters=Counters}, + {#view_row{key=Key, id=reduced, value=Reduced}, NewSt}; + error -> + get_next_row(St#collector{keys=RestKeys}) + end; +get_next_row(State) -> + #collector{rows = [Row|Rest], counters = Counters0} = State, + Worker = Row#view_row.worker, + Counters1 = fabric_dict:update_counter(Worker, -1, Counters0), + NewState = maybe_resume_worker(Worker, State#collector{counters=Counters1}), + {Row, NewState#collector{rows = Rest}}. + +find_next_key(nil, Dir, RowDict) -> + case lists:sort(sort_fun(Dir), dict:fetch_keys(RowDict)) of + [] -> + throw(complete); + [Key|_] -> + {Key, nil} + end; +find_next_key([], _, _) -> + throw(complete); +find_next_key([Key|Rest], _, _) -> + {Key, Rest}. + +transform_row(#view_row{key=Key, id=reduced, value=Value}) -> + {row, {[{key,Key}, {value,Value}]}}; +transform_row(#view_row{key=Key, id=undefined}) -> + {row, {[{key,Key}, {error,not_found}]}}; +transform_row(#view_row{key=Key, id=Id, value=Value, doc=undefined}) -> + {row, {[{id,Id}, {key,Key}, {value,Value}]}}; +transform_row(#view_row{key=Key, id=Id, value=Value, doc={error,Reason}}) -> + {row, {[{id,Id}, {key,Key}, {value,Value}, {error,Reason}]}}; +transform_row(#view_row{key=Key, id=Id, value=Value, doc=Doc}) -> + {row, {[{id,Id}, {key,Key}, {value,Value}, {doc,Doc}]}}. + +sort_fun(fwd) -> + fun(A,A) -> true; (A,B) -> couch_view:less_json(A,B) end; +sort_fun(rev) -> + fun(A,A) -> true; (A,B) -> couch_view:less_json(B,A) end. + +extract_view(Pid, ViewName, [], _ViewType) -> + ?LOG_ERROR("missing_named_view ~p", [ViewName]), + exit(Pid, kill), + exit(missing_named_view); +extract_view(Pid, ViewName, [View|Rest], ViewType) -> + case lists:member(ViewName, view_names(View, ViewType)) of + true -> + if ViewType == reduce -> + {index_of(ViewName, view_names(View, reduce)), View}; + true -> + View + end; + false -> + extract_view(Pid, ViewName, Rest, ViewType) + end. + +view_names(View, Type) when Type == red_map; Type == reduce -> + [Name || {Name, _} <- View#view.reduce_funs]; +view_names(View, map) -> + View#view.map_names. + +index_of(X, List) -> + index_of(X, List, 1). + +index_of(_X, [], _I) -> + not_found; +index_of(X, [X|_Rest], I) -> + I; +index_of(X, [_|Rest], I) -> + index_of(X, Rest, I+1). diff --git a/apps/fabric/src/fabric_view_all_docs.erl b/apps/fabric/src/fabric_view_all_docs.erl new file mode 100644 index 00000000..d51a2831 --- /dev/null +++ b/apps/fabric/src/fabric_view_all_docs.erl @@ -0,0 +1,167 @@ +-module(fabric_view_all_docs). + +-export([go/4]). +-export([open_doc/3]). % exported for spawn + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +go(DbName, #view_query_args{keys=nil} = QueryArgs, Callback, Acc0) -> + Workers = lists:map(fun(#shard{name=Name, node=Node} = Shard) -> + Ref = rexi:cast(Node, {fabric_rpc, all_docs, [Name, QueryArgs]}), + Shard#shard{ref = Ref} + end, mem3:shards(DbName)), + BufferSize = couch_config:get("fabric", "map_buffer_size", "2"), + #view_query_args{limit = Limit, skip = Skip} = QueryArgs, + State = #collector{ + query_args = QueryArgs, + callback = Callback, + buffer_size = list_to_integer(BufferSize), + counters = fabric_dict:init(Workers, 0), + skip = Skip, + limit = Limit, + user_acc = Acc0 + }, + try fabric_util:receive_loop(Workers, #shard.ref, fun handle_message/3, + State, infinity, 5000) of + {ok, NewState} -> + {ok, NewState#collector.user_acc}; + Error -> + Error + after + fabric_util:cleanup(Workers) + end; + +go(DbName, QueryArgs, Callback, Acc0) -> + #view_query_args{ + direction = Dir, + include_docs = IncludeDocs, + limit = Limit0, + skip = Skip0, + keys = Keys + } = QueryArgs, + {_, Ref0} = spawn_monitor(fun() -> exit(fabric:get_doc_count(DbName)) end), + Monitors0 = [spawn_monitor(?MODULE, open_doc, [DbName, Id, IncludeDocs]) || + Id <- Keys], + Monitors = if Dir=:=fwd -> Monitors0; true -> lists:reverse(Monitors0) end, + receive {'DOWN', Ref0, _, _, {ok, TotalRows}} -> + {ok, Acc1} = Callback({total_and_offset, TotalRows, 0}, Acc0), + {ok, Acc2} = doc_receive_loop(Monitors, Skip0, Limit0, Callback, Acc1), + Callback(complete, Acc2) + after 10000 -> + Callback(timeout, Acc0) + end. + +handle_message({rexi_DOWN, _, _, _}, nil, State) -> + % TODO see if progress can be made here, possibly by removing all shards + % from that node and checking is_progress_possible + {ok, State}; + +handle_message({rexi_EXIT, _}, Worker, State) -> + #collector{callback=Callback, counters=Counters0, user_acc=Acc} = State, + Counters = fabric_dict:erase(Worker, Counters0), + case fabric_view:is_progress_possible(Counters) of + true -> + {ok, State#collector{counters = Counters}}; + false -> + Callback({error, dead_shards}, Acc), + {error, dead_shards} + end; + +handle_message({total_and_offset, Tot, Off}, {Worker, From}, State) -> + #collector{ + callback = Callback, + counters = Counters0, + total_rows = Total0, + offset = Offset0, + user_acc = AccIn + } = State, + case fabric_dict:lookup_element(Worker, Counters0) of + undefined -> + % this worker lost the race with other partition copies, terminate + gen_server:reply(From, stop), + {ok, State}; + 0 -> + gen_server:reply(From, ok), + Counters1 = fabric_dict:update_counter(Worker, 1, Counters0), + Counters2 = fabric_view:remove_overlapping_shards(Worker, Counters1), + Total = Total0 + Tot, + Offset = Offset0 + Off, + case fabric_dict:any(0, Counters2) of + true -> + {ok, State#collector{ + counters = Counters2, + total_rows = Total, + offset = Offset + }}; + false -> + FinalOffset = erlang:min(Total, Offset+State#collector.skip), + {Go, Acc} = Callback({total_and_offset, Total, FinalOffset}, AccIn), + {Go, State#collector{ + counters = fabric_dict:decrement_all(Counters2), + total_rows = Total, + offset = FinalOffset, + user_acc = Acc + }} + end + end; + +handle_message(#view_row{} = Row, {Worker, From}, State) -> + #collector{query_args = Args, counters = Counters0, rows = Rows0} = State, + Dir = Args#view_query_args.direction, + Rows = merge_row(Dir, Row#view_row{worker=Worker}, Rows0), + Counters1 = fabric_dict:update_counter(Worker, 1, Counters0), + State1 = State#collector{rows=Rows, counters=Counters1}, + State2 = fabric_view:maybe_pause_worker(Worker, From, State1), + fabric_view:maybe_send_row(State2); + +handle_message(complete, Worker, State) -> + Counters = fabric_dict:update_counter(Worker, 1, State#collector.counters), + fabric_view:maybe_send_row(State#collector{counters = Counters}). + + +merge_row(fwd, Row, Rows) -> + lists:keymerge(#view_row.id, [Row], Rows); +merge_row(rev, Row, Rows) -> + lists:rkeymerge(#view_row.id, [Row], Rows). + +doc_receive_loop([], _, _, _, Acc) -> + {ok, Acc}; +doc_receive_loop(_, _, 0, _, Acc) -> + {ok, Acc}; +doc_receive_loop([{Pid,Ref}|Rest], Skip, Limit, Callback, Acc) when Skip > 0 -> + receive {'DOWN', Ref, process, Pid, #view_row{}} -> + doc_receive_loop(Rest, Skip-1, Limit-1, Callback, Acc) + after 10000 -> + timeout + end; +doc_receive_loop([{Pid,Ref}|Rest], 0, Limit, Callback, AccIn) -> + receive {'DOWN', Ref, process, Pid, #view_row{} = Row} -> + case Callback(fabric_view:transform_row(Row), AccIn) of + {ok, Acc} -> + doc_receive_loop(Rest, 0, Limit-1, Callback, Acc); + {stop, Acc} -> + {ok, Acc} + end + after 10000 -> + timeout + end. + +open_doc(DbName, Id, IncludeDocs) -> + Row = case fabric:open_doc(DbName, Id, [deleted]) of + {not_found, missing} -> + Doc = undefined, + #view_row{key=Id}; + {ok, #doc{deleted=true, revs=Revs}} -> + Doc = null, + {RevPos, [RevId|_]} = Revs, + Value = {[{rev,couch_doc:rev_to_str({RevPos, RevId})}, {deleted,true}]}, + #view_row{key=Id, id=Id, value=Value}; + {ok, #doc{revs=Revs} = Doc0} -> + Doc = couch_doc:to_json_obj(Doc0, []), + {RevPos, [RevId|_]} = Revs, + Value = {[{rev,couch_doc:rev_to_str({RevPos, RevId})}]}, + #view_row{key=Id, id=Id, value=Value} + end, + exit(if IncludeDocs -> Row#view_row{doc=Doc}; true -> Row end). diff --git a/apps/fabric/src/fabric_view_changes.erl b/apps/fabric/src/fabric_view_changes.erl new file mode 100644 index 00000000..6030df1d --- /dev/null +++ b/apps/fabric/src/fabric_view_changes.erl @@ -0,0 +1,251 @@ +-module(fabric_view_changes). + +-export([go/5, start_update_notifier/1]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +go(DbName, Feed, Options, Callback, Acc0) when Feed == "continuous" orelse + Feed == "longpoll" -> + Args = make_changes_args(Options), + {ok, Acc} = Callback(start, Acc0), + Notifiers = start_update_notifiers(DbName), + {Timeout, TimeoutFun} = couch_changes:get_changes_timeout(Args, Callback), + try + keep_sending_changes( + DbName, + Args, + Callback, + get_start_seq(DbName, Args), + Acc, + Timeout, + TimeoutFun + ) + after + stop_update_notifiers(Notifiers), + couch_changes:get_rest_db_updated() + end; + +go(DbName, "normal", Options, Callback, Acc0) -> + Args = make_changes_args(Options), + {ok, Acc} = Callback(start, Acc0), + {ok, #collector{counters=Seqs, user_acc=AccOut}} = send_changes( + DbName, + Args, + Callback, + get_start_seq(DbName, Args), + Acc + ), + Callback({stop, pack_seqs(Seqs)}, AccOut). + +keep_sending_changes(DbName, Args, Callback, Seqs, AccIn, Timeout, TFun) -> + #changes_args{limit=Limit, feed=Feed} = Args, + {ok, Collector} = send_changes(DbName, Args, Callback, Seqs, AccIn), + #collector{limit=Limit2, counters=NewSeqs, user_acc=AccOut} = Collector, + LastSeq = pack_seqs(NewSeqs), + if Limit > Limit2, Feed == "longpoll" -> + Callback({stop, LastSeq}, AccOut); + true -> + case couch_changes:wait_db_updated(Timeout, TFun) of + updated -> + keep_sending_changes( + DbName, + Args#changes_args{limit=Limit2}, + Callback, + LastSeq, + AccIn, + Timeout, + TFun + ); + stop -> + Callback({stop, LastSeq}, AccOut) + end + end. + +send_changes(DbName, ChangesArgs, Callback, PackedSeqs, AccIn) -> + AllShards = mem3:shards(DbName), + Seqs = lists:flatmap(fun({#shard{name=Name, node=N} = Shard, Seq}) -> + case lists:member(Shard, AllShards) of + true -> + Ref = rexi:cast(N, {fabric_rpc, changes, [Name,ChangesArgs,Seq]}), + [{Shard#shard{ref = Ref}, Seq}]; + false -> + % Find some replacement shards to cover the missing range + % TODO It's possible in rare cases of shard merging to end up + % with overlapping shard ranges from this technique + lists:map(fun(#shard{name=Name2, node=N2} = NewShard) -> + Ref = rexi:cast(N2, {fabric_rpc, changes, [Name2,ChangesArgs,0]}), + {NewShard#shard{ref = Ref}, 0} + end, find_replacement_shards(Shard, AllShards)) + end + end, unpack_seqs(PackedSeqs, DbName)), + {Workers, _} = lists:unzip(Seqs), + State = #collector{ + query_args = ChangesArgs, + callback = Callback, + counters = fabric_dict:init(Workers, 0), + user_acc = AccIn, + limit = ChangesArgs#changes_args.limit, + rows = Seqs % store sequence positions instead + }, + try fabric_util:receive_loop(Workers, #shard.ref, fun handle_message/3, + State, infinity, 5000) + after + fabric_util:cleanup(Workers) + end. + +handle_message({rexi_DOWN, _, _, _}, nil, State) -> + % TODO see if progress can be made here, possibly by removing all shards + % from that node and checking is_progress_possible + {ok, State}; + +handle_message({rexi_EXIT, Reason}, Worker, State) -> + ?LOG_ERROR("~p rexi_EXIT ~p", [?MODULE, Reason]), + #collector{ + callback=Callback, + counters=Counters0, + rows = Seqs0, + user_acc=Acc + } = State, + Counters = fabric_dict:erase(Worker, Counters0), + Seqs = fabric_dict:erase(Worker, Seqs0), + case fabric_view:is_progress_possible(Counters) of + true -> + {ok, State#collector{counters = Counters, rows=Seqs}}; + false -> + Callback({error, dead_shards}, Acc), + {error, dead_shards} + end; + +handle_message(_, _, #collector{limit=0} = State) -> + {stop, State}; + +handle_message(#view_row{key=Seq} = Row0, {Worker, From}, St) -> + #collector{ + query_args = #changes_args{include_docs=IncludeDocs}, + callback = Callback, + counters = S0, + limit = Limit, + user_acc = AccIn + } = St, + case fabric_dict:lookup_element(Worker, S0) of + undefined -> + % this worker lost the race with other partition copies, terminate it + gen_server:reply(From, stop), + {ok, St}; + _ -> + S1 = fabric_dict:store(Worker, Seq, S0), + S2 = fabric_view:remove_overlapping_shards(Worker, S1), + Row = Row0#view_row{key = pack_seqs(S2)}, + {Go, Acc} = Callback(changes_row(Row, IncludeDocs), AccIn), + gen_server:reply(From, Go), + {Go, St#collector{counters=S2, limit=Limit-1, user_acc=Acc}} + end; + +handle_message({complete, EndSeq}, Worker, State) -> + #collector{ + counters = S0, + total_rows = Completed % override + } = State, + case fabric_dict:lookup_element(Worker, S0) of + undefined -> + {ok, State}; + _ -> + S1 = fabric_dict:store(Worker, EndSeq, S0), + % unlikely to have overlaps here, but possible w/ filters + S2 = fabric_view:remove_overlapping_shards(Worker, S1), + NewState = State#collector{counters=S2, total_rows=Completed+1}, + case fabric_dict:size(S2) =:= (Completed+1) of + true -> + {stop, NewState}; + false -> + {ok, NewState} + end + end. + +make_changes_args(#changes_args{style=main_only, filter=undefined}=Args) -> + Args#changes_args{filter = fun couch_changes:main_only_filter/1}; +make_changes_args(#changes_args{style=all_docs, filter=undefined}=Args) -> + Args#changes_args{filter = fun couch_changes:all_docs_filter/1}; +make_changes_args(Args) -> + Args. + +get_start_seq(_DbName, #changes_args{dir=fwd, since=Since}) -> + Since; +get_start_seq(DbName, #changes_args{dir=rev}) -> + Shards = mem3:shards(DbName), + Workers = fabric_util:submit_jobs(Shards, get_update_seq, []), + {ok, Since} = fabric_util:recv(Workers, #shard.ref, + fun collect_update_seqs/3, fabric_dict:init(Workers, -1)), + Since. + +collect_update_seqs(Seq, Shard, Counters) when is_integer(Seq) -> + case fabric_dict:lookup_element(Shard, Counters) of + undefined -> + % already heard from someone else in this range + {ok, Counters}; + -1 -> + C1 = fabric_dict:store(Shard, Seq, Counters), + C2 = fabric_view:remove_overlapping_shards(Shard, C1), + case fabric_dict:any(-1, C2) of + true -> + {ok, C2}; + false -> + {stop, pack_seqs(C2)} + end + end. + +pack_seqs(Workers) -> + SeqList = [{N,R,S} || {#shard{node=N, range=R}, S} <- Workers], + SeqSum = lists:sum(element(2, lists:unzip(Workers))), + Opaque = couch_util:encodeBase64Url(term_to_binary(SeqList, [compressed])), + list_to_binary([integer_to_list(SeqSum), $-, Opaque]). + +unpack_seqs(0, DbName) -> + fabric_dict:init(mem3:shards(DbName), 0); + +unpack_seqs("0", DbName) -> + fabric_dict:init(mem3:shards(DbName), 0); + +unpack_seqs(Packed, DbName) -> + {match, [Opaque]} = re:run(Packed, "^([0-9]+-)?(?<opaque>.*)", [{capture, + [opaque], binary}]), + % TODO relies on internal structure of fabric_dict as keylist + lists:map(fun({Node, [A,B], Seq}) -> + Shard = #shard{node=Node, range=[A,B], dbname=DbName}, + {mem3_util:name_shard(Shard), Seq} + end, binary_to_term(couch_util:decodeBase64Url(Opaque))). + +start_update_notifiers(DbName) -> + lists:map(fun(#shard{node=Node, name=Name}) -> + {Node, rexi:cast(Node, {?MODULE, start_update_notifier, [Name]})} + end, mem3:shards(DbName)). + +% rexi endpoint +start_update_notifier(DbName) -> + {Caller, _} = get(rexi_from), + Fun = fun({_, X}) when X == DbName -> Caller ! db_updated; (_) -> ok end, + Id = {couch_db_update_notifier, make_ref()}, + ok = gen_event:add_sup_handler(couch_db_update, Id, Fun), + receive {gen_event_EXIT, Id, Reason} -> + rexi:reply({gen_event_EXIT, DbName, Reason}) + end. + +stop_update_notifiers(Notifiers) -> + [rexi:kill(Node, Ref) || {Node, Ref} <- Notifiers]. + +changes_row(#view_row{key=Seq, id=Id, value=Value, doc=deleted}, true) -> + {change, {[{seq,Seq}, {id,Id}, {changes,Value}, {deleted, true}, {doc, null}]}}; +changes_row(#view_row{key=Seq, id=Id, value=Value, doc=deleted}, false) -> + {change, {[{seq,Seq}, {id,Id}, {changes,Value}, {deleted, true}]}}; +changes_row(#view_row{key=Seq, id=Id, value=Value, doc={error,Reason}}, true) -> + {change, {[{seq,Seq}, {id,Id}, {changes,Value}, {error,Reason}]}}; +changes_row(#view_row{key=Seq, id=Id, value=Value, doc=Doc}, true) -> + {change, {[{seq,Seq}, {id,Id}, {changes,Value}, {doc,Doc}]}}; +changes_row(#view_row{key=Seq, id=Id, value=Value}, false) -> + {change, {[{seq,Seq}, {id,Id}, {changes,Value}]}}. + +find_replacement_shards(#shard{range=Range}, AllShards) -> + % TODO make this moar betta -- we might have split or merged the partition + [Shard || Shard <- AllShards, Shard#shard.range =:= Range]. diff --git a/apps/fabric/src/fabric_view_map.erl b/apps/fabric/src/fabric_view_map.erl new file mode 100644 index 00000000..ce8dd625 --- /dev/null +++ b/apps/fabric/src/fabric_view_map.erl @@ -0,0 +1,138 @@ +-module(fabric_view_map). + +-export([go/6]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +go(DbName, GroupId, View, Args, Callback, Acc0) when is_binary(GroupId) -> + {ok, DDoc} = fabric:open_doc(DbName, <<"_design/", GroupId/binary>>, []), + go(DbName, DDoc, View, Args, Callback, Acc0); + +go(DbName, DDoc, View, Args, Callback, Acc0) -> + Workers = lists:map(fun(#shard{name=Name, node=Node} = Shard) -> + Ref = rexi:cast(Node, {fabric_rpc, map_view, [Name, DDoc, View, Args]}), + Shard#shard{ref = Ref} + end, mem3:shards(DbName)), + BufferSize = couch_config:get("fabric", "map_buffer_size", "2"), + #view_query_args{limit = Limit, skip = Skip, keys = Keys} = Args, + State = #collector{ + query_args = Args, + callback = Callback, + buffer_size = list_to_integer(BufferSize), + counters = fabric_dict:init(Workers, 0), + skip = Skip, + limit = Limit, + keys = fabric_view:keydict(Keys), + sorted = Args#view_query_args.sorted, + user_acc = Acc0 + }, + try fabric_util:receive_loop(Workers, #shard.ref, fun handle_message/3, + State, infinity, 1000 * 60 * 60) of + {ok, NewState} -> + {ok, NewState#collector.user_acc}; + Error -> + Error + after + fabric_util:cleanup(Workers) + end. + +handle_message({rexi_DOWN, _, _, _}, nil, State) -> + % TODO see if progress can be made here, possibly by removing all shards + % from that node and checking is_progress_possible + {ok, State}; + +handle_message({rexi_EXIT, Reason}, Worker, State) -> + ?LOG_ERROR("~p rexi_EXIT ~p", [?MODULE, Reason]), + #collector{callback=Callback, counters=Counters0, user_acc=Acc} = State, + Counters = fabric_dict:erase(Worker, Counters0), + case fabric_view:is_progress_possible(Counters) of + true -> + {ok, State#collector{counters = Counters}}; + false -> + Callback({error, dead_shards}, Acc), + {error, dead_shards} + end; + +handle_message({total_and_offset, Tot, Off}, {Worker, From}, State) -> + #collector{ + callback = Callback, + counters = Counters0, + total_rows = Total0, + offset = Offset0, + user_acc = AccIn + } = State, + case fabric_dict:lookup_element(Worker, Counters0) of + undefined -> + % this worker lost the race with other partition copies, terminate + gen_server:reply(From, stop), + {ok, State}; + 0 -> + gen_server:reply(From, ok), + Counters1 = fabric_dict:update_counter(Worker, 1, Counters0), + Counters2 = fabric_view:remove_overlapping_shards(Worker, Counters1), + Total = Total0 + Tot, + Offset = Offset0 + Off, + case fabric_dict:any(0, Counters2) of + true -> + {ok, State#collector{ + counters = Counters2, + total_rows = Total, + offset = Offset + }}; + false -> + FinalOffset = erlang:min(Total, Offset+State#collector.skip), + {Go, Acc} = Callback({total_and_offset, Total, FinalOffset}, AccIn), + {Go, State#collector{ + counters = fabric_dict:decrement_all(Counters2), + total_rows = Total, + offset = FinalOffset, + user_acc = Acc + }} + end + end; + +handle_message(#view_row{}, {_, _}, #collector{limit=0} = State) -> + #collector{callback=Callback} = State, + {_, Acc} = Callback(complete, State#collector.user_acc), + {stop, State#collector{user_acc=Acc}}; + +handle_message(#view_row{} = Row, {_,From}, #collector{sorted=false} = St) -> + #collector{callback=Callback, user_acc=AccIn, limit=Limit} = St, + {Go, Acc} = Callback(fabric_view:transform_row(Row), AccIn), + gen_server:reply(From, ok), + {Go, St#collector{user_acc=Acc, limit=Limit-1}}; + +handle_message(#view_row{} = Row, {Worker, From}, State) -> + #collector{ + query_args = #view_query_args{direction=Dir}, + counters = Counters0, + rows = Rows0, + keys = KeyDict + } = State, + Rows = merge_row(Dir, KeyDict, Row#view_row{worker=Worker}, Rows0), + Counters1 = fabric_dict:update_counter(Worker, 1, Counters0), + State1 = State#collector{rows=Rows, counters=Counters1}, + State2 = fabric_view:maybe_pause_worker(Worker, From, State1), + fabric_view:maybe_send_row(State2); + +handle_message(complete, Worker, State) -> + Counters = fabric_dict:update_counter(Worker, 1, State#collector.counters), + fabric_view:maybe_send_row(State#collector{counters = Counters}). + +merge_row(fwd, undefined, Row, Rows) -> + lists:merge(fun(#view_row{key=KeyA, id=IdA}, #view_row{key=KeyB, id=IdB}) -> + couch_view:less_json([KeyA, IdA], [KeyB, IdB]) + end, [Row], Rows); +merge_row(rev, undefined, Row, Rows) -> + lists:merge(fun(#view_row{key=KeyA, id=IdA}, #view_row{key=KeyB, id=IdB}) -> + couch_view:less_json([KeyB, IdB], [KeyA, IdA]) + end, [Row], Rows); +merge_row(_, KeyDict, Row, Rows) -> + lists:merge(fun(#view_row{key=A, id=IdA}, #view_row{key=B, id=IdB}) -> + if A =:= B -> IdA < IdB; true -> + dict:fetch(A, KeyDict) < dict:fetch(B, KeyDict) + end + end, [Row], Rows). + diff --git a/apps/fabric/src/fabric_view_reduce.erl b/apps/fabric/src/fabric_view_reduce.erl new file mode 100644 index 00000000..ddde9f22 --- /dev/null +++ b/apps/fabric/src/fabric_view_reduce.erl @@ -0,0 +1,85 @@ +-module(fabric_view_reduce). + +-export([go/6]). + +-include("fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +go(DbName, GroupId, View, Args, Callback, Acc0) when is_binary(GroupId) -> + {ok, DDoc} = fabric:open_doc(DbName, <<"_design/", GroupId/binary>>, []), + go(DbName, DDoc, View, Args, Callback, Acc0); + +go(DbName, DDoc, VName, Args, Callback, Acc0) -> + #group{def_lang=Lang, views=Views} = Group = + couch_view_group:design_doc_to_view_group(#db{name=DbName}, DDoc), + {NthRed, View} = fabric_view:extract_view(nil, VName, Views, reduce), + {VName, RedSrc} = lists:nth(NthRed, View#view.reduce_funs), + Workers = lists:map(fun(#shard{name=Name, node=N} = Shard) -> + Ref = rexi:cast(N, {fabric_rpc, reduce_view, [Name,Group,VName,Args]}), + Shard#shard{ref = Ref} + end, mem3:shards(DbName)), + BufferSize = couch_config:get("fabric", "reduce_buffer_size", "20"), + #view_query_args{limit = Limit, skip = Skip} = Args, + State = #collector{ + query_args = Args, + callback = Callback, + buffer_size = list_to_integer(BufferSize), + counters = fabric_dict:init(Workers, 0), + keys = Args#view_query_args.keys, + skip = Skip, + limit = Limit, + lang = Group#group.def_lang, + os_proc = couch_query_servers:get_os_process(Lang), + reducer = RedSrc, + rows = dict:new(), + user_acc = Acc0 + }, + try fabric_util:receive_loop(Workers, #shard.ref, fun handle_message/3, + State, infinity, 1000 * 60 * 60) of + {ok, NewState} -> + {ok, NewState#collector.user_acc}; + Error -> + Error + after + fabric_util:cleanup(Workers), + catch couch_query_servers:ret_os_process(State#collector.os_proc) + end. + +handle_message({rexi_DOWN, _, _, _}, nil, State) -> + % TODO see if progress can be made here, possibly by removing all shards + % from that node and checking is_progress_possible + {ok, State}; + +handle_message({rexi_EXIT, Reason}, Worker, State) -> + ?LOG_ERROR("~p rexi_EXIT ~p", [?MODULE, Reason]), + #collector{callback=Callback, counters=Counters0, user_acc=Acc} = State, + Counters = fabric_dict:erase(Worker, Counters0), + case fabric_view:is_progress_possible(Counters) of + true -> + {ok, State#collector{counters = Counters}}; + false -> + Callback({error, dead_shards}, Acc), + {error, dead_shards} + end; + +handle_message(#view_row{key=Key} = Row, {Worker, From}, State) -> + #collector{counters = Counters0, rows = Rows0} = State, + case fabric_dict:lookup_element(Worker, Counters0) of + undefined -> + % this worker lost the race with other partition copies, terminate it + gen_server:reply(From, stop), + {ok, State}; + _ -> + Rows = dict:append(Key, Row#view_row{worker=Worker}, Rows0), + C1 = fabric_dict:update_counter(Worker, 1, Counters0), + % TODO time this call, if slow don't do it every time + C2 = fabric_view:remove_overlapping_shards(Worker, C1), + State1 = State#collector{rows=Rows, counters=C2}, + State2 = fabric_view:maybe_pause_worker(Worker, From, State1), + fabric_view:maybe_send_row(State2) + end; + +handle_message(complete, Worker, State) -> + Counters = fabric_dict:update_counter(Worker, 1, State#collector.counters), + fabric_view:maybe_send_row(State#collector{counters = Counters}). diff --git a/apps/ibrowse/include/ibrowse.hrl b/apps/ibrowse/include/ibrowse.hrl new file mode 100644 index 00000000..ebf3bb33 --- /dev/null +++ b/apps/ibrowse/include/ibrowse.hrl @@ -0,0 +1,12 @@ +-ifndef(IBROWSE_HRL). +-define(IBROWSE_HRL, "ibrowse.hrl"). + +-record(url, {abspath, host, port, username, password, path, protocol}). + +-record(lb_pid, {host_port, pid}). + +-record(client_conn, {key, cur_pipeline_size = 0, reqs_served = 0}). + +-record(ibrowse_conf, {key, value}). + +-endif. diff --git a/apps/ibrowse/src/ibrowse.app.src b/apps/ibrowse/src/ibrowse.app.src new file mode 100644 index 00000000..4f43dd92 --- /dev/null +++ b/apps/ibrowse/src/ibrowse.app.src @@ -0,0 +1,13 @@ +{application, ibrowse, + [{description, "HTTP client application"}, + {vsn, "1.5.1"}, + {modules, [ ibrowse, + ibrowse_http_client, + ibrowse_app, + ibrowse_sup, + ibrowse_lib, + ibrowse_lb ]}, + {registered, []}, + {applications, [kernel,stdlib,sasl]}, + {env, []}, + {mod, {ibrowse_app, []}}]}. diff --git a/apps/ibrowse/src/ibrowse.erl b/apps/ibrowse/src/ibrowse.erl new file mode 100644 index 00000000..1913ef59 --- /dev/null +++ b/apps/ibrowse/src/ibrowse.erl @@ -0,0 +1,760 @@ +%%%------------------------------------------------------------------- +%%% File : ibrowse.erl +%%% Author : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description : Load balancer process for HTTP client connections. +%%% +%%% Created : 11 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%%------------------------------------------------------------------- +%% @author Chandrashekhar Mullaparthi <chandrashekhar dot mullaparthi at gmail dot com> +%% @copyright 2005-2009 Chandrashekhar Mullaparthi +%% @version 1.5.2 +%% @doc The ibrowse application implements an HTTP 1.1 client. This +%% module implements the API of the HTTP client. There is one named +%% process called 'ibrowse' which assists in load balancing and maintaining configuration. There is one load balancing process per unique webserver. There is +%% one process to handle one TCP connection to a webserver +%% (implemented in the module ibrowse_http_client). Multiple connections to a +%% webserver are setup based on the settings for each webserver. The +%% ibrowse process also determines which connection to pipeline a +%% certain request on. The functions to call are send_req/3, +%% send_req/4, send_req/5, send_req/6. +%% +%% <p>Here are a few sample invocations.</p> +%% +%% <code> +%% ibrowse:send_req("http://intranet/messenger/", [], get). +%% <br/><br/> +%% +%% ibrowse:send_req("http://www.google.com/", [], get, [], +%% [{proxy_user, "XXXXX"}, +%% {proxy_password, "XXXXX"}, +%% {proxy_host, "proxy"}, +%% {proxy_port, 8080}], 1000). +%% <br/><br/> +%% +%%ibrowse:send_req("http://www.erlang.org/download/otp_src_R10B-3.tar.gz", [], get, [], +%% [{proxy_user, "XXXXX"}, +%% {proxy_password, "XXXXX"}, +%% {proxy_host, "proxy"}, +%% {proxy_port, 8080}, +%% {save_response_to_file, true}], 1000). +%% <br/><br/> +%% +%% ibrowse:send_req("http://www.erlang.org", [], head). +%% +%% <br/><br/> +%% ibrowse:send_req("http://www.sun.com", [], options). +%% +%% <br/><br/> +%% ibrowse:send_req("http://www.bbc.co.uk", [], trace). +%% +%% <br/><br/> +%% ibrowse:send_req("http://www.google.com", [], get, [], +%% [{stream_to, self()}]). +%% </code> +%% +%% <p>A driver exists which implements URL encoding in C, but the +%% speed achieved using only erlang has been good enough, so the +%% driver isn't actually used.</p> + +-module(ibrowse). +-vsn('$Id: ibrowse.erl,v 1.8 2009/07/01 22:43:19 chandrusf Exp $ '). + +-behaviour(gen_server). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +-export([start_link/0, start/0, stop/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%% API interface +-export([ + rescan_config/0, + rescan_config/1, + get_config_value/1, + get_config_value/2, + spawn_worker_process/2, + spawn_link_worker_process/2, + stop_worker_process/1, + send_req/3, + send_req/4, + send_req/5, + send_req/6, + send_req_direct/4, + send_req_direct/5, + send_req_direct/6, + send_req_direct/7, + stream_next/1, + set_max_sessions/3, + set_max_pipeline_size/3, + set_dest/3, + trace_on/0, + trace_off/0, + trace_on/2, + trace_off/2, + all_trace_off/0, + show_dest_status/0, + show_dest_status/2 + ]). + +-ifdef(debug). +-compile(export_all). +-endif. + +-import(ibrowse_lib, [ + parse_url/1, + get_value/3, + do_trace/2 + ]). + +-record(state, {trace = false}). + +-include("ibrowse.hrl"). +-include_lib("stdlib/include/ms_transform.hrl"). + +-define(DEF_MAX_SESSIONS,10). +-define(DEF_MAX_PIPELINE_SIZE,10). + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link/0 +%% Description: Starts the server +%%-------------------------------------------------------------------- +%% @doc Starts the ibrowse process linked to the calling process. Usually invoked by the supervisor ibrowse_sup +%% @spec start_link() -> {ok, pid()} +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +%% @doc Starts the ibrowse process without linking. Useful when testing using the shell +start() -> + gen_server:start({local, ?MODULE}, ?MODULE, [], [{debug, []}]). + +%% @doc Stop the ibrowse process. Useful when testing using the shell. +stop() -> + catch gen_server:call(ibrowse, stop). + +%% @doc This is the basic function to send a HTTP request. +%% The Status return value indicates the HTTP status code returned by the webserver +%% @spec send_req(Url::string(), Headers::headerList(), Method::method()) -> response() +%% headerList() = [{header(), value()}] +%% header() = atom() | string() +%% value() = term() +%% method() = get | post | head | options | put | delete | trace | mkcol | propfind | proppatch | lock | unlock | move | copy +%% Status = string() +%% ResponseHeaders = [respHeader()] +%% respHeader() = {headerName(), headerValue()} +%% headerName() = string() +%% headerValue() = string() +%% response() = {ok, Status, ResponseHeaders, ResponseBody} | {ibrowse_req_id, req_id() } | {error, Reason} +%% req_id() = term() +%% ResponseBody = string() | {file, Filename} +%% Reason = term() +send_req(Url, Headers, Method) -> + send_req(Url, Headers, Method, [], []). + +%% @doc Same as send_req/3. +%% If a list is specified for the body it has to be a flat list. The body can also be a fun/0 or a fun/1. <br/> +%% If fun/0, the connection handling process will repeatdely call the fun until it returns an error or eof. <pre>Fun() = {ok, Data} | eof</pre><br/> +%% If fun/1, the connection handling process will repeatedly call the fun with the supplied state until it returns an error or eof. <pre>Fun(State) = {ok, Data} | {ok, Data, NewState} | eof</pre> +%% @spec send_req(Url, Headers, Method::method(), Body::body()) -> response() +%% body() = [] | string() | binary() | fun_arity_0() | {fun_arity_1(), initial_state()} +%% initial_state() = term() +send_req(Url, Headers, Method, Body) -> + send_req(Url, Headers, Method, Body, []). + +%% @doc Same as send_req/4. +%% For a description of SSL Options, look in the <a href="http://www.erlang.org/doc/apps/ssl/index.html">ssl</a> manpage. If the +%% HTTP Version to use is not specified, the default is 1.1. +%% <br/> +%% <p>The <code>host_header</code> option is useful in the case where ibrowse is +%% connecting to a component such as <a +%% href="http://www.stunnel.org">stunnel</a> which then sets up a +%% secure connection to a webserver. In this case, the URL supplied to +%% ibrowse must have the stunnel host/port details, but that won't +%% make sense to the destination webserver. This option can then be +%% used to specify what should go in the <code>Host</code> header in +%% the request.</p> +%% <ul> +%% <li>The <code>stream_to</code> option can be used to have the HTTP +%% response streamed to a process as messages as data arrives on the +%% socket. If the calling process wishes to control the rate at which +%% data is received from the server, the option <code>{stream_to, +%% {process(), once}}</code> can be specified. The calling process +%% will have to invoke <code>ibrowse:stream_next(Request_id)</code> to +%% receive the next packet.</li> +%% +%% <li>When both the options <code>save_response_to_file</code> and <code>stream_to</code> +%% are specified, the former takes precedence.</li> +%% +%% <li>For the <code>save_response_to_file</code> option, the response body is saved to +%% file only if the status code is in the 200-299 range. If not, the response body is returned +%% as a string.</li> +%% <li>Whenever an error occurs in the processing of a request, ibrowse will return as much +%% information as it has, such as HTTP Status Code and HTTP Headers. When this happens, the response +%% is of the form <code>{error, {Reason, {stat_code, StatusCode}, HTTP_headers}}</code></li> +%% +%% <li>The <code>inactivity_timeout</code> option is useful when +%% dealing with large response bodies and/or slow links. In these +%% cases, it might be hard to estimate how long a request will take to +%% complete. In such cases, the client might want to timeout if no +%% data has been received on the link for a certain time interval.</li> +%% +%% <li> +%% The <code>connect_timeout</code> option is to specify how long the +%% client process should wait for connection establishment. This is +%% useful in scenarios where connections to servers are usually setup +%% very fast, but responses might take much longer compared to +%% connection setup. In such cases, it is better for the calling +%% process to timeout faster if there is a problem (DNS lookup +%% delays/failures, network routing issues, etc). The total timeout +%% value specified for the request will enforced. To illustrate using +%% an example: +%% <code> +%% ibrowse:send_req("http://www.example.com/cgi-bin/request", [], get, [], [{connect_timeout, 100}], 1000). +%% </code> +%% In the above invocation, if the connection isn't established within +%% 100 milliseconds, the request will fail with +%% <code>{error, conn_failed}</code>.<br/> +%% If connection setup succeeds, the total time allowed for the +%% request to complete will be 1000 milliseconds minus the time taken +%% for connection setup. +%% </li> +%% </ul> +%% +%% @spec send_req(Url::string(), Headers::headerList(), Method::method(), Body::body(), Options::optionList()) -> response() +%% optionList() = [option()] +%% option() = {max_sessions, integer()} | +%% {response_format,response_format()}| +%% {stream_chunk_size, integer()} | +%% {max_pipeline_size, integer()} | +%% {trace, boolean()} | +%% {is_ssl, boolean()} | +%% {ssl_options, [SSLOpt]} | +%% {pool_name, atom()} | +%% {proxy_host, string()} | +%% {proxy_port, integer()} | +%% {proxy_user, string()} | +%% {proxy_password, string()} | +%% {use_absolute_uri, boolean()} | +%% {basic_auth, {username(), password()}} | +%% {cookie, string()} | +%% {content_length, integer()} | +%% {content_type, string()} | +%% {save_response_to_file, srtf()} | +%% {stream_to, stream_to()} | +%% {http_vsn, {MajorVsn, MinorVsn}} | +%% {host_header, string()} | +%% {inactivity_timeout, integer()} | +%% {connect_timeout, integer()} | +%% {transfer_encoding, {chunked, ChunkSize}} +%% +%% stream_to() = process() | {process(), once} +%% process() = pid() | atom() +%% username() = string() +%% password() = string() +%% SSLOpt = term() +%% ChunkSize = integer() +%% srtf() = boolean() | filename() +%% filename() = string() +%% response_format() = list | binary +send_req(Url, Headers, Method, Body, Options) -> + send_req(Url, Headers, Method, Body, Options, 30000). + +%% @doc Same as send_req/5. +%% All timeout values are in milliseconds. +%% @spec send_req(Url, Headers::headerList(), Method::method(), Body::body(), Options::optionList(), Timeout) -> response() +%% Timeout = integer() | infinity +send_req(Url, Headers, Method, Body, Options, Timeout) -> + case catch parse_url(Url) of + #url{host = Host, + port = Port, + protocol = Protocol} = Parsed_url -> + Lb_pid = case ets:lookup(ibrowse_lb, {Host, Port}) of + [] -> + get_lb_pid(Parsed_url); + [#lb_pid{pid = Lb_pid_1}] -> + Lb_pid_1 + end, + Max_sessions = get_max_sessions(Host, Port, Options), + Max_pipeline_size = get_max_pipeline_size(Host, Port, Options), + Options_1 = merge_options(Host, Port, Options), + {SSLOptions, IsSSL} = + case (Protocol == https) orelse + get_value(is_ssl, Options_1, false) of + false -> {[], false}; + true -> {get_value(ssl_options, Options_1, []), true} + end, + case ibrowse_lb:spawn_connection(Lb_pid, Parsed_url, + Max_sessions, + Max_pipeline_size, + {SSLOptions, IsSSL}) of + {ok, Conn_Pid} -> + do_send_req(Conn_Pid, Parsed_url, Headers, + Method, Body, Options_1, Timeout); + Err -> + Err + end; + Err -> + {error, {url_parsing_failed, Err}} + end. + +merge_options(Host, Port, Options) -> + Config_options = get_config_value({options, Host, Port}, []), + lists:foldl( + fun({Key, Val}, Acc) -> + case lists:keysearch(Key, 1, Options) of + false -> + [{Key, Val} | Acc]; + _ -> + Acc + end + end, Options, Config_options). + +get_lb_pid(Url) -> + gen_server:call(?MODULE, {get_lb_pid, Url}). + +get_max_sessions(Host, Port, Options) -> + get_value(max_sessions, Options, + get_config_value({max_sessions, Host, Port}, ?DEF_MAX_SESSIONS)). + +get_max_pipeline_size(Host, Port, Options) -> + get_value(max_pipeline_size, Options, + get_config_value({max_pipeline_size, Host, Port}, ?DEF_MAX_PIPELINE_SIZE)). + +%% @doc Deprecated. Use set_max_sessions/3 and set_max_pipeline_size/3 +%% for achieving the same effect. +set_dest(Host, Port, [{max_sessions, Max} | T]) -> + set_max_sessions(Host, Port, Max), + set_dest(Host, Port, T); +set_dest(Host, Port, [{max_pipeline_size, Max} | T]) -> + set_max_pipeline_size(Host, Port, Max), + set_dest(Host, Port, T); +set_dest(Host, Port, [{trace, Bool} | T]) when Bool == true; Bool == false -> + ibrowse ! {trace, true, Host, Port}, + set_dest(Host, Port, T); +set_dest(_Host, _Port, [H | _]) -> + exit({invalid_option, H}); +set_dest(_, _, []) -> + ok. + +%% @doc Set the maximum number of connections allowed to a specific Host:Port. +%% @spec set_max_sessions(Host::string(), Port::integer(), Max::integer()) -> ok +set_max_sessions(Host, Port, Max) when is_integer(Max), Max > 0 -> + gen_server:call(?MODULE, {set_config_value, {max_sessions, Host, Port}, Max}). + +%% @doc Set the maximum pipeline size for each connection to a specific Host:Port. +%% @spec set_max_pipeline_size(Host::string(), Port::integer(), Max::integer()) -> ok +set_max_pipeline_size(Host, Port, Max) when is_integer(Max), Max > 0 -> + gen_server:call(?MODULE, {set_config_value, {max_pipeline_size, Host, Port}, Max}). + +do_send_req(Conn_Pid, Parsed_url, Headers, Method, Body, Options, Timeout) -> + case catch ibrowse_http_client:send_req(Conn_Pid, Parsed_url, + Headers, Method, ensure_bin(Body), + Options, Timeout) of + {'EXIT', {timeout, _}} -> + {error, req_timedout}; + {'EXIT', Reason} -> + {error, {'EXIT', Reason}}; + {ok, St_code, Headers, Body} = Ret when is_binary(Body) -> + case get_value(response_format, Options, list) of + list -> + {ok, St_code, Headers, binary_to_list(Body)}; + binary -> + Ret + end; + Ret -> + Ret + end. + +ensure_bin(L) when is_list(L) -> list_to_binary(L); +ensure_bin(B) when is_binary(B) -> B; +ensure_bin(Fun) when is_function(Fun) -> Fun; +ensure_bin({Fun}) when is_function(Fun) -> Fun; +ensure_bin({Fun, _} = Body) when is_function(Fun) -> Body. + +%% @doc Creates a HTTP client process to the specified Host:Port which +%% is not part of the load balancing pool. This is useful in cases +%% where some requests to a webserver might take a long time whereas +%% some might take a very short time. To avoid getting these quick +%% requests stuck in the pipeline behind time consuming requests, use +%% this function to get a handle to a connection process. <br/> +%% <b>Note:</b> Calling this function only creates a worker process. No connection +%% is setup. The connection attempt is made only when the first +%% request is sent via any of the send_req_direct/4,5,6,7 functions.<br/> +%% <b>Note:</b> It is the responsibility of the calling process to control +%% pipeline size on such connections. +%% +%% @spec spawn_worker_process(Host::string(), Port::integer()) -> {ok, pid()} +spawn_worker_process(Host, Port) -> + ibrowse_http_client:start({Host, Port}). + +%% @doc Same as spawn_worker_process/2 except the the calling process +%% is linked to the worker process which is spawned. +spawn_link_worker_process(Host, Port) -> + ibrowse_http_client:start_link({Host, Port}). + +%% @doc Terminate a worker process spawned using +%% spawn_worker_process/2 or spawn_link_worker_process/2. Requests in +%% progress will get the error response <pre>{error, closing_on_request}</pre> +%% @spec stop_worker_process(Conn_pid::pid()) -> ok +stop_worker_process(Conn_pid) -> + ibrowse_http_client:stop(Conn_pid). + +%% @doc Same as send_req/3 except that the first argument is the PID +%% returned by spawn_worker_process/2 or spawn_link_worker_process/2 +send_req_direct(Conn_pid, Url, Headers, Method) -> + send_req_direct(Conn_pid, Url, Headers, Method, [], []). + +%% @doc Same as send_req/4 except that the first argument is the PID +%% returned by spawn_worker_process/2 or spawn_link_worker_process/2 +send_req_direct(Conn_pid, Url, Headers, Method, Body) -> + send_req_direct(Conn_pid, Url, Headers, Method, Body, []). + +%% @doc Same as send_req/5 except that the first argument is the PID +%% returned by spawn_worker_process/2 or spawn_link_worker_process/2 +send_req_direct(Conn_pid, Url, Headers, Method, Body, Options) -> + send_req_direct(Conn_pid, Url, Headers, Method, Body, Options, 30000). + +%% @doc Same as send_req/6 except that the first argument is the PID +%% returned by spawn_worker_process/2 or spawn_link_worker_process/2 +send_req_direct(Conn_pid, Url, Headers, Method, Body, Options, Timeout) -> + case catch parse_url(Url) of + #url{host = Host, + port = Port} = Parsed_url -> + Options_1 = merge_options(Host, Port, Options), + case do_send_req(Conn_pid, Parsed_url, Headers, Method, Body, Options_1, Timeout) of + {error, {'EXIT', {noproc, _}}} -> + {error, worker_is_dead}; + Ret -> + Ret + end; + Err -> + {error, {url_parsing_failed, Err}} + end. + +%% @doc Tell ibrowse to stream the next chunk of data to the +%% caller. Should be used in conjunction with the +%% <code>stream_to</code> option +%% @spec stream_next(Req_id :: req_id()) -> ok | {error, unknown_req_id} +stream_next(Req_id) -> + case ets:lookup(ibrowse_stream, {req_id_pid, Req_id}) of + [] -> + {error, unknown_req_id}; + [{_, Pid}] -> + catch Pid ! {stream_next, Req_id}, + ok + end. + +%% @doc Turn tracing on for the ibrowse process +trace_on() -> + ibrowse ! {trace, true}. +%% @doc Turn tracing off for the ibrowse process +trace_off() -> + ibrowse ! {trace, false}. + +%% @doc Turn tracing on for all connections to the specified HTTP +%% server. Host is whatever is specified as the domain name in the URL +%% @spec trace_on(Host, Port) -> ok +%% Host = string() +%% Port = integer() +trace_on(Host, Port) -> + ibrowse ! {trace, true, Host, Port}, + ok. + +%% @doc Turn tracing OFF for all connections to the specified HTTP +%% server. +%% @spec trace_off(Host, Port) -> ok +trace_off(Host, Port) -> + ibrowse ! {trace, false, Host, Port}, + ok. + +%% @doc Turn Off ALL tracing +%% @spec all_trace_off() -> ok +all_trace_off() -> + ibrowse ! all_trace_off, + ok. + +show_dest_status() -> + Dests = lists:filter(fun({lb_pid, {Host, Port}, _}) when is_list(Host), + is_integer(Port) -> + true; + (_) -> + false + end, ets:tab2list(ibrowse_lb)), + All_ets = ets:all(), + io:format("~-40.40s | ~-5.5s | ~-10.10s | ~s~n", + ["Server:port", "ETS", "Num conns", "LB Pid"]), + io:format("~80.80.=s~n", [""]), + lists:foreach(fun({lb_pid, {Host, Port}, Lb_pid}) -> + case lists:dropwhile( + fun(Tid) -> + ets:info(Tid, owner) /= Lb_pid + end, All_ets) of + [] -> + io:format("~40.40s | ~-5.5s | ~-5.5s | ~s~n", + [Host ++ ":" ++ integer_to_list(Port), + "", + "", + io_lib:format("~p", [Lb_pid])] + ); + [Tid | _] -> + catch ( + begin + Size = ets:info(Tid, size), + io:format("~40.40s | ~-5.5s | ~-5.5s | ~s~n", + [Host ++ ":" ++ integer_to_list(Port), + integer_to_list(Tid), + integer_to_list(Size), + io_lib:format("~p", [Lb_pid])] + ) + end + ) + end + end, Dests). + +%% @doc Shows some internal information about load balancing to a +%% specified Host:Port. Info about workers spawned using +%% spawn_worker_process/2 or spawn_link_worker_process/2 is not +%% included. +show_dest_status(Host, Port) -> + case ets:lookup(ibrowse_lb, {Host, Port}) of + [] -> + no_active_processes; + [#lb_pid{pid = Lb_pid}] -> + io:format("Load Balancer Pid : ~p~n", [Lb_pid]), + io:format("LB process msg q size : ~p~n", [(catch process_info(Lb_pid, message_queue_len))]), + case lists:dropwhile( + fun(Tid) -> + ets:info(Tid, owner) /= Lb_pid + end, ets:all()) of + [] -> + io:format("Couldn't locate ETS table for ~p~n", [Lb_pid]); + [Tid | _] -> + First = ets:first(Tid), + Last = ets:last(Tid), + Size = ets:info(Tid, size), + io:format("LB ETS table id : ~p~n", [Tid]), + io:format("Num Connections : ~p~n", [Size]), + case Size of + 0 -> + ok; + _ -> + {First_p_sz, _} = First, + {Last_p_sz, _} = Last, + io:format("Smallest pipeline : ~1000.p~n", [First_p_sz]), + io:format("Largest pipeline : ~1000.p~n", [Last_p_sz]) + end + end + end. + +%% @doc Clear current configuration for ibrowse and load from the file +%% ibrowse.conf in the IBROWSE_EBIN/../priv directory. Current +%% configuration is cleared only if the ibrowse.conf file is readable +%% using file:consult/1 +rescan_config() -> + gen_server:call(?MODULE, rescan_config). + +%% Clear current configuration for ibrowse and load from the specified +%% file. Current configuration is cleared only if the specified +%% file is readable using file:consult/1 +rescan_config(File) when is_list(File) -> + gen_server:call(?MODULE, {rescan_config, File}). + +%%==================================================================== +%% Server functions +%%==================================================================== + +%%-------------------------------------------------------------------- +%% Function: init/1 +%% Description: Initiates the server +%% Returns: {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%%-------------------------------------------------------------------- +init(_) -> + process_flag(trap_exit, true), + State = #state{}, + put(my_trace_flag, State#state.trace), + put(ibrowse_trace_token, "ibrowse"), + ets:new(ibrowse_lb, [named_table, public, {keypos, 2}]), + ets:new(ibrowse_conf, [named_table, protected, {keypos, 2}]), + ets:new(ibrowse_stream, [named_table, public]), + import_config(), + {ok, #state{}}. + +import_config() -> + case code:priv_dir(ibrowse) of + {error, _} = Err -> + Err; + PrivDir -> + Filename = filename:join(PrivDir, "ibrowse.conf"), + import_config(Filename) + end. + +import_config(Filename) -> + case file:consult(Filename) of + {ok, Terms} -> + ets:delete_all_objects(ibrowse_conf), + Fun = fun({dest, Host, Port, MaxSess, MaxPipe, Options}) + when is_list(Host), is_integer(Port), + is_integer(MaxSess), MaxSess > 0, + is_integer(MaxPipe), MaxPipe > 0, is_list(Options) -> + I = [{{max_sessions, Host, Port}, MaxSess}, + {{max_pipeline_size, Host, Port}, MaxPipe}, + {{options, Host, Port}, Options}], + lists:foreach( + fun({X, Y}) -> + ets:insert(ibrowse_conf, + #ibrowse_conf{key = X, + value = Y}) + end, I); + ({K, V}) -> + ets:insert(ibrowse_conf, + #ibrowse_conf{key = K, + value = V}); + (X) -> + io:format("Skipping unrecognised term: ~p~n", [X]) + end, + lists:foreach(Fun, Terms); + Err -> + Err + end. + +%% @doc Internal export +get_config_value(Key) -> + [#ibrowse_conf{value = V}] = ets:lookup(ibrowse_conf, Key), + V. + +%% @doc Internal export +get_config_value(Key, DefVal) -> + case ets:lookup(ibrowse_conf, Key) of + [] -> + DefVal; + [#ibrowse_conf{value = V}] -> + V + end. + +set_config_value(Key, Val) -> + ets:insert(ibrowse_conf, #ibrowse_conf{key = Key, value = Val}). +%%-------------------------------------------------------------------- +%% Function: handle_call/3 +%% Description: Handling call messages +%% Returns: {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | (terminate/2 is called) +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_call({get_lb_pid, #url{host = Host, port = Port} = Url}, _From, State) -> + Pid = do_get_connection(Url, ets:lookup(ibrowse_lb, {Host, Port})), + {reply, Pid, State}; + +handle_call(stop, _From, State) -> + do_trace("IBROWSE shutting down~n", []), + {stop, normal, ok, State}; + +handle_call({set_config_value, Key, Val}, _From, State) -> + set_config_value(Key, Val), + {reply, ok, State}; + +handle_call(rescan_config, _From, State) -> + Ret = (catch import_config()), + {reply, Ret, State}; + +handle_call({rescan_config, File}, _From, State) -> + Ret = (catch import_config(File)), + {reply, Ret, State}; + +handle_call(Request, _From, State) -> + Reply = {unknown_request, Request}, + {reply, Reply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_cast/2 +%% Description: Handling cast messages +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- + +handle_cast(_Msg, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_info/2 +%% Description: Handling all non call/cast messages +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_info(all_trace_off, State) -> + Mspec = [{{ibrowse_conf,{trace,'$1','$2'},true},[],[{{'$1','$2'}}]}], + Trace_on_dests = ets:select(ibrowse_conf, Mspec), + Fun = fun(#lb_pid{host_port = {H, P}, pid = Pid}, _) -> + case lists:member({H, P}, Trace_on_dests) of + false -> + ok; + true -> + catch Pid ! {trace, false} + end; + (_, Acc) -> + Acc + end, + ets:foldl(Fun, undefined, ibrowse_lb), + ets:select_delete(ibrowse_conf, [{{ibrowse_conf,{trace,'$1','$2'},true},[],['true']}]), + {noreply, State}; + +handle_info({trace, Bool}, State) -> + put(my_trace_flag, Bool), + {noreply, State}; + +handle_info({trace, Bool, Host, Port}, State) -> + Fun = fun(#lb_pid{host_port = {H, P}, pid = Pid}, _) + when H == Host, + P == Port -> + catch Pid ! {trace, Bool}; + (_, Acc) -> + Acc + end, + ets:foldl(Fun, undefined, ibrowse_lb), + ets:insert(ibrowse_conf, #ibrowse_conf{key = {trace, Host, Port}, + value = Bool}), + {noreply, State}; + +handle_info(_Info, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: terminate/2 +%% Description: Shutdown the server +%% Returns: any (ignored by gen_server) +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%-------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Convert process state when code is changed +%% Returns: {ok, NewState} +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- +do_get_connection(#url{host = Host, port = Port}, []) -> + {ok, Pid} = ibrowse_lb:start_link([Host, Port]), + ets:insert(ibrowse_lb, #lb_pid{host_port = {Host, Port}, pid = Pid}), + Pid; +do_get_connection(_Url, [#lb_pid{pid = Pid}]) -> + Pid. diff --git a/apps/ibrowse/src/ibrowse_app.erl b/apps/ibrowse/src/ibrowse_app.erl new file mode 100644 index 00000000..8c83e8f1 --- /dev/null +++ b/apps/ibrowse/src/ibrowse_app.erl @@ -0,0 +1,64 @@ +%%%------------------------------------------------------------------- +%%% File : ibrowse_app.erl +%%% Author : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description : +%%% +%%% Created : 15 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%%------------------------------------------------------------------- +-module(ibrowse_app). +-vsn('$Id: ibrowse_app.erl,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ '). + +-behaviour(application). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +%%-------------------------------------------------------------------- +-export([ + start/2, + stop/1 + ]). + +%%-------------------------------------------------------------------- +%% Internal exports +%%-------------------------------------------------------------------- +-export([ + ]). + +%%-------------------------------------------------------------------- +%% Macros +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% Records +%%-------------------------------------------------------------------- + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Func: start/2 +%% Returns: {ok, Pid} | +%% {ok, Pid, State} | +%% {error, Reason} +%%-------------------------------------------------------------------- +start(_Type, _StartArgs) -> + case ibrowse_sup:start_link() of + {ok, Pid} -> + {ok, Pid}; + Error -> + Error + end. + +%%-------------------------------------------------------------------- +%% Func: stop/1 +%% Returns: any +%%-------------------------------------------------------------------- +stop(_State) -> + ok. + +%%==================================================================== +%% Internal functions +%%==================================================================== diff --git a/apps/ibrowse/src/ibrowse_http_client.erl b/apps/ibrowse/src/ibrowse_http_client.erl new file mode 100644 index 00000000..65d9cb9c --- /dev/null +++ b/apps/ibrowse/src/ibrowse_http_client.erl @@ -0,0 +1,1476 @@ +%%%------------------------------------------------------------------- +%%% File : ibrowse_http_client.erl +%%% Author : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description : The name says it all +%%% +%%% Created : 11 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%%------------------------------------------------------------------- +-module(ibrowse_http_client). +-vsn('$Id: ibrowse_http_client.erl,v 1.19 2009/07/01 22:43:19 chandrusf Exp $ '). + +-behaviour(gen_server). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +-export([ + start_link/1, + start/1, + stop/1, + send_req/7 + ]). + +-ifdef(debug). +-compile(export_all). +-endif. + +%% gen_server callbacks +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 + ]). + +-include("ibrowse.hrl"). + +-record(state, {host, port, + use_proxy = false, proxy_auth_digest, + ssl_options = [], is_ssl = false, socket, + reqs=queue:new(), cur_req, status=idle, http_status_code, + reply_buffer = <<>>, rep_buf_size=0, streamed_size = 0, + recvd_headers=[], + is_closing, send_timer, content_length, + deleted_crlf = false, transfer_encoding, + chunk_size, chunk_size_buffer = <<>>, recvd_chunk_size, + lb_ets_tid, cur_pipeline_size = 0, prev_req_id + }). + +-record(request, {url, method, options, from, + stream_to, caller_controls_socket = false, + req_id, + stream_chunk_size, + save_response_to_file = false, + tmp_file_name, tmp_file_fd, + response_format}). + +-import(ibrowse_lib, [ + get_value/2, + get_value/3, + do_trace/2 + ]). + +-define(DEFAULT_STREAM_CHUNK_SIZE, 1024*1024). + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link/0 +%% Description: Starts the server +%%-------------------------------------------------------------------- +start(Args) -> + gen_server:start(?MODULE, Args, []). + +start_link(Args) -> + gen_server:start_link(?MODULE, Args, []). + +stop(Conn_pid) -> + gen_server:call(Conn_pid, stop). + +send_req(Conn_Pid, Url, Headers, Method, Body, Options, Timeout) -> + gen_server:call( + Conn_Pid, + {send_req, {Url, Headers, Method, Body, Options, Timeout}}, Timeout). + +%%==================================================================== +%% Server functions +%%==================================================================== + +%%-------------------------------------------------------------------- +%% Function: init/1 +%% Description: Initiates the server +%% Returns: {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%%-------------------------------------------------------------------- +init({Lb_Tid, #url{host = Host, port = Port}, {SSLOptions, Is_ssl}}) -> + State = #state{host = Host, + port = Port, + ssl_options = SSLOptions, + is_ssl = Is_ssl, + lb_ets_tid = Lb_Tid}, + put(ibrowse_trace_token, [Host, $:, integer_to_list(Port)]), + put(my_trace_flag, ibrowse_lib:get_trace_status(Host, Port)), + {ok, State}; +init({Host, Port}) -> + State = #state{host = Host, + port = Port}, + put(ibrowse_trace_token, [Host, $:, integer_to_list(Port)]), + put(my_trace_flag, ibrowse_lib:get_trace_status(Host, Port)), + {ok, State}; +init(#url{host=Host, port=Port, protocol=Protocol}) -> + State = #state{ + host = Host, + port = Port, + is_ssl = (Protocol == https), + ssl_options = [{ssl_imp, new}, {depth, 9}] + }, + put(ibrowse_trace_token, [Host, $:, integer_to_list(Port)]), + put(my_trace_flag, ibrowse_lib:get_trace_status(Host, Port)), + {ok, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_call/3 +%% Description: Handling call messages +%% Returns: {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | (terminate/2 is called) +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +%% Received a request when the remote server has already sent us a +%% Connection: Close header +handle_call({send_req, _}, _From, #state{is_closing = true} = State) -> + {reply, {error, connection_closing}, State}; + +handle_call({send_req, {Url, Headers, Method, Body, Options, Timeout}}, + From, State) -> + send_req_1(From, Url, Headers, Method, Body, Options, Timeout, State); + +handle_call(stop, _From, State) -> + do_close(State), + do_error_reply(State, closing_on_request), + {stop, normal, ok, State#state{socket=undefined}}; + +handle_call(Request, _From, State) -> + Reply = {unknown_request, Request}, + {reply, Reply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_cast/2 +%% Description: Handling cast messages +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_cast(_Msg, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_info/2 +%% Description: Handling all non call/cast messages +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_info({tcp, _Sock, Data}, #state{status = Status} = State) -> + do_trace("Data recvd in state: ~p. Size: ~p. ~p~n~n", [Status, size(Data), Data]), + handle_sock_data(Data, State); +handle_info({ssl, _Sock, Data}, State) -> + handle_sock_data(Data, State); + +handle_info({stream_next, Req_id}, #state{socket = Socket, + is_ssl = Is_ssl, + cur_req = #request{req_id = Req_id}} = State) -> + do_setopts(Socket, [{active, once}], Is_ssl), + {noreply, State}; + +handle_info({stream_next, _Req_id}, State) -> + {noreply, State}; + +handle_info({tcp_closed, _Sock}, State) -> + do_trace("TCP connection closed by peer!~n", []), + handle_sock_closed(State), + {stop, normal, State}; +handle_info({ssl_closed, _Sock}, State) -> + do_trace("SSL connection closed by peer!~n", []), + handle_sock_closed(State), + {stop, normal, State}; + +handle_info({tcp_error, _Sock}, State) -> + io:format("Error on connection to ~1000.p:~1000.p~n", [State#state.host, State#state.port]), + handle_sock_closed(State), + {stop, normal, State}; +handle_info({ssl_error, _Sock}, State) -> + io:format("Error on SSL connection to ~1000.p:~1000.p~n", [State#state.host, State#state.port]), + handle_sock_closed(State), + {stop, normal, State}; + +handle_info({req_timedout, From}, State) -> + case lists:keysearch(From, #request.from, queue:to_list(State#state.reqs)) of + false -> + {noreply, State}; + {value, _} -> + shutting_down(State), + do_error_reply(State, req_timedout), + {stop, normal, State} + end; + +handle_info(timeout, State) -> + shutting_down(State), + do_error_reply(State, req_timedout), + {stop, normal, State}; + +handle_info({trace, Bool}, State) -> + put(my_trace_flag, Bool), + {noreply, State}; + +handle_info(Info, State) -> + io:format("Unknown message recvd for ~1000.p:~1000.p -> ~p~n", + [State#state.host, State#state.port, Info]), + io:format("Recvd unknown message ~p when in state: ~p~n", [Info, State]), + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: terminate/2 +%% Description: Shutdown the server +%% Returns: any (ignored by gen_server) +%%-------------------------------------------------------------------- +terminate(_Reason, State) -> + do_close(State). + +%%-------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Convert process state when code is changed +%% Returns: {ok, NewState} +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% Handles data recvd on the socket +%%-------------------------------------------------------------------- +handle_sock_data(Data, #state{status=idle}=State) -> + do_trace("Data recvd on socket in state idle!. ~1000.p~n", [Data]), + shutting_down(State), + do_error_reply(State, data_in_status_idle), + do_close(State), + {stop, normal, State}; + +handle_sock_data(Data, #state{status = get_header}=State) -> + case parse_response(Data, State) of + {error, _Reason} -> + shutting_down(State), + {stop, normal, State}; + stop -> + shutting_down(State), + {stop, normal, State}; + State_1 -> + active_once(State_1), + {noreply, State_1, get_inac_timeout(State_1)} + end; + +handle_sock_data(Data, #state{status = get_body, + content_length = CL, + http_status_code = StatCode, + recvd_headers = Headers, + chunk_size = CSz} = State) -> + case (CL == undefined) and (CSz == undefined) of + true -> + case accumulate_response(Data, State) of + {error, Reason} -> + shutting_down(State), + fail_pipelined_requests(State, + {error, {Reason, {stat_code, StatCode}, Headers}}), + {stop, normal, State}; + State_1 -> + active_once(State_1), + {noreply, State_1, get_inac_timeout(State_1)} + end; + _ -> + case parse_11_response(Data, State) of + {error, Reason} -> + shutting_down(State), + fail_pipelined_requests(State, + {error, {Reason, {stat_code, StatCode}, Headers}}), + {stop, normal, State}; + stop -> + shutting_down(State), + {stop, normal, State}; + State_1 -> + active_once(State_1), + {noreply, State_1, get_inac_timeout(State_1)} + end + end. + +accumulate_response(Data, + #state{ + cur_req = #request{save_response_to_file = true, + tmp_file_fd = undefined} = CurReq, + http_status_code=[$2 | _]}=State) -> + TmpFilename = make_tmp_filename(), + case file:open(TmpFilename, [write, delayed_write, raw]) of + {ok, Fd} -> + accumulate_response(Data, State#state{ + cur_req = CurReq#request{ + tmp_file_fd = Fd, + tmp_file_name = TmpFilename}}); + {error, Reason} -> + {error, {file_open_error, Reason}} + end; +accumulate_response(Data, #state{cur_req = #request{save_response_to_file = true, + tmp_file_fd = Fd}, + transfer_encoding=chunked, + reply_buffer = Reply_buf, + http_status_code=[$2 | _] + } = State) -> + case file:write(Fd, [Reply_buf, Data]) of + ok -> + State#state{reply_buffer = <<>>}; + {error, Reason} -> + {error, {file_write_error, Reason}} + end; +accumulate_response(Data, #state{cur_req = #request{save_response_to_file = true, + tmp_file_fd = Fd}, + reply_buffer = RepBuf, + http_status_code=[$2 | _] + } = State) -> + case file:write(Fd, [RepBuf, Data]) of + ok -> + State#state{reply_buffer = <<>>}; + {error, Reason} -> + {error, {file_write_error, Reason}} + end; +accumulate_response(<<>>, State) -> + State; +accumulate_response(Data, #state{reply_buffer = RepBuf, + rep_buf_size = RepBufSize, + streamed_size = Streamed_size, + cur_req = CurReq}=State) -> + #request{stream_to=StreamTo, req_id=ReqId, + stream_chunk_size = Stream_chunk_size, + response_format = Response_format, + caller_controls_socket = Caller_controls_socket} = CurReq, + RepBuf_1 = list_to_binary([RepBuf, Data]), + New_data_size = RepBufSize - Streamed_size, + case StreamTo of + undefined -> + State#state{reply_buffer = RepBuf_1}; + _ when Caller_controls_socket == true -> + do_interim_reply(StreamTo, Response_format, ReqId, RepBuf_1), + State#state{reply_buffer = <<>>, + streamed_size = Streamed_size + size(RepBuf_1)}; + _ when New_data_size >= Stream_chunk_size -> + {Stream_chunk, Rem_data} = split_binary(RepBuf_1, Stream_chunk_size), + do_interim_reply(StreamTo, Response_format, ReqId, Stream_chunk), + accumulate_response( + Rem_data, + State#state{ + reply_buffer = <<>>, + streamed_size = Streamed_size + Stream_chunk_size}); + _ -> + State#state{reply_buffer = RepBuf_1} + end. + +make_tmp_filename() -> + DownloadDir = ibrowse:get_config_value(download_dir, filename:absname("./")), + {A,B,C} = now(), + filename:join([DownloadDir, + "ibrowse_tmp_file_"++ + integer_to_list(A) ++ + integer_to_list(B) ++ + integer_to_list(C)]). + + +%%-------------------------------------------------------------------- +%% Handles the case when the server closes the socket +%%-------------------------------------------------------------------- +handle_sock_closed(#state{status=get_header}=State) -> + shutting_down(State), + do_error_reply(State, connection_closed); + +handle_sock_closed(#state{cur_req=undefined} = State) -> + shutting_down(State); + +%% We check for IsClosing because this the server could have sent a +%% Connection-Close header and has closed the socket to indicate end +%% of response. There maybe requests pipelined which need a response. +handle_sock_closed(#state{reply_buffer = Buf, reqs = Reqs, http_status_code = SC, + is_closing = IsClosing, + cur_req = #request{tmp_file_name=TmpFilename, + tmp_file_fd=Fd} = CurReq, + status = get_body, recvd_headers = Headers}=State) -> + #request{from=From, stream_to=StreamTo, req_id=ReqId, + response_format = Resp_format} = CurReq, + case IsClosing of + true -> + {_, Reqs_1} = queue:out(Reqs), + case TmpFilename of + undefined -> + do_reply(State, From, StreamTo, ReqId, Resp_format, + {ok, SC, Headers, Buf}); + _ -> + file:close(Fd), + do_reply(State, From, StreamTo, ReqId, Resp_format, + {ok, SC, Headers, {file, TmpFilename}}) + end, + do_error_reply(State#state{reqs = Reqs_1}, connection_closed), + State; + _ -> + do_error_reply(State, connection_closed), + State + end. + +do_connect(Host, Port, _Options, #state{is_ssl=true, ssl_options=SSLOptions}, Timeout) -> + ssl:connect(Host, Port, + [binary, {nodelay, true}, {active, false} | SSLOptions], + Timeout); +do_connect(Host, Port, _Options, _State, Timeout) -> + gen_tcp:connect(Host, Port, + [binary, {nodelay, true}, {active, false}], + Timeout). + +do_send(Req, #state{socket = Sock, is_ssl = true}) -> ssl:send(Sock, Req); +do_send(Req, #state{socket = Sock, is_ssl = false}) -> gen_tcp:send(Sock, Req). + +%% @spec do_send_body(Sock::socket_descriptor(), Source::source_descriptor(), IsSSL::boolean()) -> ok | error() +%% source_descriptor() = fun_arity_0 | +%% {fun_arity_0} | +%% {fun_arity_1, term()} +%% error() = term() +do_send_body(Source, State) when is_function(Source) -> + do_send_body({Source}, State); +do_send_body({Source}, State) when is_function(Source) -> + do_send_body1(Source, Source(), State); +do_send_body({Source, Source_state}, State) when is_function(Source) -> + do_send_body1(Source, Source(Source_state), State); +do_send_body(Body, State) -> + do_send(Body, State). + +do_send_body1(Source, Resp, State) -> + case Resp of + {ok, Data} -> + do_send(Data, State), + do_send_body({Source}, State); + {ok, Data, New_source_state} -> + do_send(Data, State), + do_send_body({Source, New_source_state}, State); + eof -> + ok; + Err -> + Err + end. + +do_close(#state{socket = undefined}) -> ok; +do_close(#state{socket = Sock, is_ssl = true}) -> ssl:close(Sock); +do_close(#state{socket = Sock, is_ssl = false}) -> gen_tcp:close(Sock). + +active_once(#state{cur_req = #request{caller_controls_socket = true}}) -> + ok; +active_once(#state{socket = Socket, is_ssl = Is_ssl}) -> + do_setopts(Socket, [{active, once}], Is_ssl). + +do_setopts(Sock, Opts, true) -> ssl:setopts(Sock, Opts); +do_setopts(Sock, Opts, false) -> inet:setopts(Sock, Opts). + +check_ssl_options(Options, State) -> + case get_value(is_ssl, Options, false) of + false -> + State; + true -> + State#state{is_ssl=true, ssl_options=get_value(ssl_options, Options)} + end. + +send_req_1(From, + #url{host = Host, + port = Port} = Url, + Headers, Method, Body, Options, Timeout, + #state{socket = undefined} = State) -> + {Host_1, Port_1, State_1} = + case get_value(proxy_host, Options, false) of + false -> + {Host, Port, State}; + PHost -> + ProxyUser = get_value(proxy_user, Options, []), + ProxyPassword = get_value(proxy_password, Options, []), + Digest = http_auth_digest(ProxyUser, ProxyPassword), + {PHost, get_value(proxy_port, Options, 80), + State#state{use_proxy = true, + proxy_auth_digest = Digest}} + end, + State_2 = check_ssl_options(Options, State_1), + do_trace("Connecting...~n", []), + Start_ts = now(), + Conn_timeout = get_value(connect_timeout, Options, Timeout), + case do_connect(Host_1, Port_1, Options, State_2, Conn_timeout) of + {ok, Sock} -> + do_trace("Connected!~n", []), + End_ts = now(), + Timeout_1 = case Timeout of + infinity -> + infinity; + _ -> + Timeout - trunc(round(timer:now_diff(End_ts, Start_ts) / 1000)) + end, + State_3 = State_2#state{socket = Sock}, + send_req_1(From, Url, Headers, Method, Body, Options, Timeout_1, State_3); + Err -> + shutting_down(State_2), + do_trace("Error connecting. Reason: ~1000.p~n", [Err]), + gen_server:reply(From, {error, conn_failed}), + {stop, normal, State_2} + end; +send_req_1(From, + #url{abspath = AbsPath, + host = Host, + port = Port, + path = RelPath} = Url, + Headers, Method, Body, Options, Timeout, + #state{status = Status} = State) -> + ReqId = make_req_id(), + Resp_format = get_value(response_format, Options, list), + {StreamTo, Caller_controls_socket} = + case get_value(stream_to, Options, undefined) of + {Caller, once} when is_pid(Caller) or + is_atom(Caller) -> + Async_pid_rec = {{req_id_pid, ReqId}, self()}, + true = ets:insert(ibrowse_stream, Async_pid_rec), + {Caller, true}; + undefined -> + {undefined, false}; + Caller when is_pid(Caller) or + is_atom(Caller) -> + {Caller, false}; + Stream_to_inv -> + exit({invalid_option, {stream_to, Stream_to_inv}}) + end, + SaveResponseToFile = get_value(save_response_to_file, Options, false), + NewReq = #request{url = Url, + method = Method, + stream_to = StreamTo, + caller_controls_socket = Caller_controls_socket, + options = Options, + req_id = ReqId, + save_response_to_file = SaveResponseToFile, + stream_chunk_size = get_stream_chunk_size(Options), + response_format = Resp_format, + from = From}, + State_1 = State#state{reqs=queue:in(NewReq, State#state.reqs)}, + Headers_1 = add_auth_headers(Url, Options, Headers, State), + HostHeaderValue = case lists:keysearch(host_header, 1, Options) of + false -> + case Port of + 80 -> Host; + _ -> [Host, ":", integer_to_list(Port)] + end; + {value, {_, Host_h_val}} -> + Host_h_val + end, + {Req, Body_1} = make_request(Method, + [{"Host", HostHeaderValue} | Headers_1], + AbsPath, RelPath, Body, Options, State#state.use_proxy), + case get(my_trace_flag) of + true -> + %%Avoid the binary operations if trace is not on... + NReq = binary_to_list(list_to_binary(Req)), + do_trace("Sending request: ~n" + "--- Request Begin ---~n~s~n" + "--- Request End ---~n", [NReq]); + _ -> ok + end, + case do_send(Req, State) of + ok -> + case do_send_body(Body_1, State) of + ok -> + State_2 = inc_pipeline_counter(State_1), + active_once(State_1), + Ref = case Timeout of + infinity -> + undefined; + _ -> + erlang:send_after(Timeout, self(), {req_timedout, From}) + end, + State_3 = case Status of + idle -> + State_2#state{status = get_header, + cur_req = NewReq, + send_timer = Ref}; + _ -> + State_2#state{send_timer = Ref} + end, + case StreamTo of + undefined -> + ok; + _ -> + gen_server:reply(From, {ibrowse_req_id, ReqId}) + end, + {noreply, State_3, get_inac_timeout(State_3)}; + Err -> + shutting_down(State_1), + do_trace("Send failed... Reason: ~p~n", [Err]), + gen_server:reply(From, {error, send_failed}), + {stop, normal, State_1} + end; + Err -> + shutting_down(State_1), + do_trace("Send failed... Reason: ~p~n", [Err]), + gen_server:reply(From, {error, send_failed}), + {stop, normal, State_1} + end. + +add_auth_headers(#url{username = User, + password = UPw}, + Options, + Headers, + #state{use_proxy = UseProxy, + proxy_auth_digest = ProxyAuthDigest}) -> + Headers_1 = case User of + undefined -> + case get_value(basic_auth, Options, undefined) of + undefined -> + Headers; + {U,P} -> + [{"Authorization", ["Basic ", http_auth_digest(U, P)]} | Headers] + end; + _ -> + [{"Authorization", ["Basic ", http_auth_digest(User, UPw)]} | Headers] + end, + case UseProxy of + false -> + Headers_1; + true when ProxyAuthDigest == [] -> + Headers_1; + true -> + [{"Proxy-Authorization", ["Basic ", ProxyAuthDigest]} | Headers_1] + end. + +http_auth_digest([], []) -> + []; +http_auth_digest(Username, Password) -> + encode_base64(Username ++ [$: | Password]). + +encode_base64([]) -> + []; +encode_base64([A]) -> + [e(A bsr 2), e((A band 3) bsl 4), $=, $=]; +encode_base64([A,B]) -> + [e(A bsr 2), e(((A band 3) bsl 4) bor (B bsr 4)), e((B band 15) bsl 2), $=]; +encode_base64([A,B,C|Ls]) -> + encode_base64_do(A,B,C, Ls). +encode_base64_do(A,B,C, Rest) -> + BB = (A bsl 16) bor (B bsl 8) bor C, + [e(BB bsr 18), e((BB bsr 12) band 63), + e((BB bsr 6) band 63), e(BB band 63)|encode_base64(Rest)]. + +e(X) when X >= 0, X < 26 -> X+65; +e(X) when X>25, X<52 -> X+71; +e(X) when X>51, X<62 -> X-4; +e(62) -> $+; +e(63) -> $/; +e(X) -> exit({bad_encode_base64_token, X}). + +make_request(Method, Headers, AbsPath, RelPath, Body, Options, UseProxy) -> + HttpVsn = http_vsn_string(get_value(http_vsn, Options, {1,1})), + Headers_1 = + case get_value(content_length, Headers, false) of + false when (Body == []) or + (Body == <<>>) or + is_tuple(Body) or + is_function(Body) -> + Headers; + false when is_binary(Body) -> + [{"content-length", integer_to_list(size(Body))} | Headers]; + false -> + [{"content-length", integer_to_list(length(Body))} | Headers]; + _ -> + Headers + end, + {Headers_2, Body_1} = + case get_value(transfer_encoding, Options, false) of + false -> + {Headers_1, Body}; + {chunked, ChunkSize} -> + {[{X, Y} || {X, Y} <- Headers_1, + X /= "Content-Length", + X /= "content-length", + X /= content_length] ++ + [{"Transfer-Encoding", "chunked"}], + chunk_request_body(Body, ChunkSize)} + end, + Headers_3 = cons_headers(Headers_2), + Uri = case get_value(use_absolute_uri, Options, false) or UseProxy of + true -> + AbsPath; + false -> + RelPath + end, + {[method(Method), " ", Uri, " ", HttpVsn, crnl(), Headers_3, crnl()], Body_1}. + +http_vsn_string({0,9}) -> "HTTP/0.9"; +http_vsn_string({1,0}) -> "HTTP/1.0"; +http_vsn_string({1,1}) -> "HTTP/1.1". + +cons_headers(Headers) -> + cons_headers(Headers, []). +cons_headers([], Acc) -> + encode_headers(Acc); +cons_headers([{basic_auth, {U,P}} | T], Acc) -> + cons_headers(T, [{"Authorization", + ["Basic ", ibrowse_lib:encode_base64(U++":"++P)]} | Acc]); +cons_headers([{cookie, Cookie} | T], Acc) -> + cons_headers(T, [{"Cookie", Cookie} | Acc]); +cons_headers([{content_length, L} | T], Acc) -> + cons_headers(T, [{"Content-Length", L} | Acc]); +cons_headers([{content_type, L} | T], Acc) -> + cons_headers(T, [{"Content-Type", L} | Acc]); +cons_headers([H | T], Acc) -> + cons_headers(T, [H | Acc]). + +encode_headers(L) -> + encode_headers(L, []). +encode_headers([{http_vsn, _Val} | T], Acc) -> + encode_headers(T, Acc); +encode_headers([{Name,Val} | T], Acc) when is_list(Name) -> + encode_headers(T, [[Name, ": ", fmt_val(Val), crnl()] | Acc]); +encode_headers([{Name,Val} | T], Acc) when is_atom(Name) -> + encode_headers(T, [[atom_to_list(Name), ": ", fmt_val(Val), crnl()] | Acc]); +encode_headers([], Acc) -> + lists:reverse(Acc). + +chunk_request_body(Body, ChunkSize) -> + chunk_request_body(Body, ChunkSize, []). + +chunk_request_body(Body, _ChunkSize, Acc) when Body == <<>>; Body == [] -> + LastChunk = "0\r\n", + lists:reverse(["\r\n", LastChunk | Acc]); +chunk_request_body(Body, ChunkSize, Acc) when is_binary(Body), + size(Body) >= ChunkSize -> + <<ChunkBody:ChunkSize/binary, Rest/binary>> = Body, + Chunk = [ibrowse_lib:dec2hex(4, ChunkSize),"\r\n", + ChunkBody, "\r\n"], + chunk_request_body(Rest, ChunkSize, [Chunk | Acc]); +chunk_request_body(Body, _ChunkSize, Acc) when is_binary(Body) -> + BodySize = size(Body), + Chunk = [ibrowse_lib:dec2hex(4, BodySize),"\r\n", + Body, "\r\n"], + LastChunk = "0\r\n", + lists:reverse(["\r\n", LastChunk, Chunk | Acc]); +chunk_request_body(Body, ChunkSize, Acc) when is_list(Body), + length(Body) >= ChunkSize -> + {ChunkBody, Rest} = split_list_at(Body, ChunkSize), + Chunk = [ibrowse_lib:dec2hex(4, ChunkSize),"\r\n", + ChunkBody, "\r\n"], + chunk_request_body(Rest, ChunkSize, [Chunk | Acc]); +chunk_request_body(Body, _ChunkSize, Acc) when is_list(Body) -> + BodySize = length(Body), + Chunk = [ibrowse_lib:dec2hex(4, BodySize),"\r\n", + Body, "\r\n"], + LastChunk = "0\r\n", + lists:reverse(["\r\n", LastChunk, Chunk | Acc]). + + +parse_response(_Data, #state{cur_req = undefined}=State) -> + State#state{status = idle}; +parse_response(Data, #state{reply_buffer = Acc, reqs = Reqs, + cur_req = CurReq} = State) -> + #request{from=From, stream_to=StreamTo, req_id=ReqId, + method=Method, response_format = Resp_format} = CurReq, + MaxHeaderSize = ibrowse:get_config_value(max_headers_size, infinity), + case scan_header(Acc, Data) of + {yes, Headers, Data_1} -> + do_trace("Recvd Header Data -> ~s~n----~n", [Headers]), + do_trace("Recvd headers~n--- Headers Begin ---~n~s~n--- Headers End ---~n~n", [Headers]), + {HttpVsn, StatCode, Headers_1} = parse_headers(Headers), + do_trace("HttpVsn: ~p StatusCode: ~p Headers_1 -> ~1000.p~n", [HttpVsn, StatCode, Headers_1]), + LCHeaders = [{to_lower(X), Y} || {X,Y} <- Headers_1], + ConnClose = to_lower(get_value("connection", LCHeaders, "false")), + IsClosing = is_connection_closing(HttpVsn, ConnClose), + case IsClosing of + true -> + shutting_down(State); + false -> + ok + end, + State_1 = State#state{recvd_headers=Headers_1, status=get_body, + reply_buffer = <<>>, + http_status_code=StatCode, is_closing=IsClosing}, + put(conn_close, ConnClose), + TransferEncoding = to_lower(get_value("transfer-encoding", LCHeaders, "false")), + case get_value("content-length", LCHeaders, undefined) of + _ when Method == head -> + {_, Reqs_1} = queue:out(Reqs), + send_async_headers(ReqId, StreamTo, StatCode, Headers_1), + State_1_1 = do_reply(State_1, From, StreamTo, ReqId, Resp_format, + {ok, StatCode, Headers_1, []}), + cancel_timer(State_1_1#state.send_timer, {eat_message, {req_timedout, From}}), + State_2 = reset_state(State_1_1), + State_3 = set_cur_request(State_2#state{reqs = Reqs_1}), + parse_response(Data_1, State_3); + _ when hd(StatCode) == $1 -> + %% No message body is expected. Server may send + %% one or more 1XX responses before a proper + %% response. + send_async_headers(ReqId, StreamTo, StatCode, Headers_1), + do_trace("Recvd a status code of ~p. Ignoring and waiting for a proper response~n", [StatCode]), + parse_response(Data_1, State_1#state{recvd_headers = [], + status = get_header}); + _ when StatCode == "204"; + StatCode == "304" -> + %% No message body is expected for these Status Codes. + %% RFC2616 - Sec 4.4 + {_, Reqs_1} = queue:out(Reqs), + send_async_headers(ReqId, StreamTo, StatCode, Headers_1), + State_1_1 = do_reply(State_1, From, StreamTo, ReqId, Resp_format, + {ok, StatCode, Headers_1, []}), + cancel_timer(State_1_1#state.send_timer, {eat_message, {req_timedout, From}}), + State_2 = reset_state(State_1_1), + State_3 = set_cur_request(State_2#state{reqs = Reqs_1}), + parse_response(Data_1, State_3); + _ when TransferEncoding == "chunked" -> + do_trace("Chunked encoding detected...~n",[]), + send_async_headers(ReqId, StreamTo, StatCode, Headers_1), + case parse_11_response(Data_1, State_1#state{transfer_encoding=chunked, + chunk_size=chunk_start, + reply_buffer = <<>>}) of + {error, Reason} -> + fail_pipelined_requests(State_1, + {error, {Reason, + {stat_code, StatCode}, Headers_1}}), + {error, Reason}; + State_2 -> + State_2 + end; + undefined when HttpVsn == "HTTP/1.0"; + ConnClose == "close" -> + send_async_headers(ReqId, StreamTo, StatCode, Headers_1), + State_1#state{reply_buffer = Data_1}; + undefined -> + fail_pipelined_requests(State_1, + {error, {content_length_undefined, + {stat_code, StatCode}, Headers}}), + {error, content_length_undefined}; + V -> + case catch list_to_integer(V) of + V_1 when is_integer(V_1), V_1 >= 0 -> + send_async_headers(ReqId, StreamTo, StatCode, Headers_1), + do_trace("Recvd Content-Length of ~p~n", [V_1]), + State_2 = State_1#state{rep_buf_size=0, + reply_buffer = <<>>, + content_length=V_1}, + case parse_11_response(Data_1, State_2) of + {error, Reason} -> + fail_pipelined_requests(State_1, + {error, {Reason, + {stat_code, StatCode}, Headers_1}}), + {error, Reason}; + State_3 -> + State_3 + end; + _ -> + fail_pipelined_requests(State_1, + {error, {content_length_undefined, + {stat_code, StatCode}, Headers}}), + {error, content_length_undefined} + end + end; + {no, Acc_1} when MaxHeaderSize == infinity -> + State#state{reply_buffer = Acc_1}; + {no, Acc_1} when size(Acc_1) < MaxHeaderSize -> + State#state{reply_buffer = Acc_1}; + {no, _Acc_1} -> + fail_pipelined_requests(State, {error, max_headers_size_exceeded}), + {error, max_headers_size_exceeded} + end. + +is_connection_closing("HTTP/0.9", _) -> true; +is_connection_closing(_, "close") -> true; +is_connection_closing("HTTP/1.0", "false") -> true; +is_connection_closing(_, _) -> false. + +%% This clause determines the chunk size when given data from the beginning of the chunk +parse_11_response(DataRecvd, + #state{transfer_encoding = chunked, + chunk_size = chunk_start, + chunk_size_buffer = Chunk_sz_buf + } = State) -> + case scan_crlf(Chunk_sz_buf, DataRecvd) of + {yes, ChunkHeader, Data_1} -> + case parse_chunk_header(ChunkHeader) of + {error, Reason} -> + {error, Reason}; + ChunkSize -> + %% + %% Do we have to preserve the chunk encoding when + %% streaming? NO. This should be transparent to the client + %% process. Chunked encoding was only introduced to make + %% it efficient for the server. + %% + RemLen = size(Data_1), + do_trace("Determined chunk size: ~p. Already recvd: ~p~n", [ChunkSize, RemLen]), + parse_11_response(Data_1, State#state{chunk_size_buffer = <<>>, + deleted_crlf = true, + recvd_chunk_size = 0, + chunk_size = ChunkSize}) + end; + {no, Data_1} -> + State#state{chunk_size_buffer = Data_1} + end; + +%% This clause is to remove the CRLF between two chunks +%% +parse_11_response(DataRecvd, + #state{transfer_encoding = chunked, + chunk_size = tbd, + chunk_size_buffer = Buf}=State) -> + case scan_crlf(Buf, DataRecvd) of + {yes, _, NextChunk} -> + State_1 = State#state{chunk_size = chunk_start, + chunk_size_buffer = <<>>, + deleted_crlf = true}, + parse_11_response(NextChunk, State_1); + {no, Data_1} -> + State#state{chunk_size_buffer = Data_1} + end; + +%% This clause deals with the end of a chunked transfer. ibrowse does +%% not support Trailers in the Chunked Transfer encoding. Any trailer +%% received is silently discarded. +parse_11_response(DataRecvd, + #state{transfer_encoding = chunked, chunk_size = 0, + cur_req = CurReq, + deleted_crlf = DelCrlf, + chunk_size_buffer = Trailer, reqs = Reqs}=State) -> + do_trace("Detected end of chunked transfer...~n", []), + DataRecvd_1 = case DelCrlf of + false -> + DataRecvd; + true -> + <<$\r, $\n, DataRecvd/binary>> + end, + case scan_header(Trailer, DataRecvd_1) of + {yes, _TEHeaders, Rem} -> + {_, Reqs_1} = queue:out(Reqs), + State_1 = handle_response(CurReq, State#state{reqs = Reqs_1}), + parse_response(Rem, reset_state(State_1)); + {no, Rem} -> + State#state{chunk_size_buffer = Rem, deleted_crlf = false} + end; + +%% This clause extracts a chunk, given the size. +parse_11_response(DataRecvd, + #state{transfer_encoding = chunked, + chunk_size = CSz, + recvd_chunk_size = Recvd_csz, + rep_buf_size = RepBufSz} = State) -> + NeedBytes = CSz - Recvd_csz, + DataLen = size(DataRecvd), + do_trace("Recvd more data: size: ~p. NeedBytes: ~p~n", [DataLen, NeedBytes]), + case DataLen >= NeedBytes of + true -> + {RemChunk, RemData} = split_binary(DataRecvd, NeedBytes), + do_trace("Recvd another chunk...~n", []), + do_trace("RemData -> ~p~n", [RemData]), + case accumulate_response(RemChunk, State) of + {error, Reason} -> + do_trace("Error accumulating response --> ~p~n", [Reason]), + {error, Reason}; + #state{} = State_1 -> + State_2 = State_1#state{chunk_size=tbd}, + parse_11_response(RemData, State_2) + end; + false -> + accumulate_response(DataRecvd, + State#state{rep_buf_size = RepBufSz + DataLen, + recvd_chunk_size = Recvd_csz + DataLen}) + end; + +%% This clause to extract the body when Content-Length is specified +parse_11_response(DataRecvd, + #state{content_length=CL, rep_buf_size=RepBufSz, + reqs=Reqs}=State) -> + NeedBytes = CL - RepBufSz, + DataLen = size(DataRecvd), + case DataLen >= NeedBytes of + true -> + {RemBody, Rem} = split_binary(DataRecvd, NeedBytes), + {_, Reqs_1} = queue:out(Reqs), + State_1 = accumulate_response(RemBody, State), + State_2 = handle_response(State_1#state.cur_req, State_1#state{reqs=Reqs_1}), + State_3 = reset_state(State_2), + parse_response(Rem, State_3); + false -> + accumulate_response(DataRecvd, State#state{rep_buf_size = (RepBufSz+DataLen)}) + end. + +handle_response(#request{from=From, stream_to=StreamTo, req_id=ReqId, + response_format = Resp_format, + save_response_to_file = SaveResponseToFile, + tmp_file_name = TmpFilename, + tmp_file_fd = Fd + }, + #state{http_status_code = SCode, + send_timer = ReqTimer, + reply_buffer = RepBuf, + recvd_headers = RespHeaders}=State) when SaveResponseToFile /= false -> + Body = RepBuf, + State_1 = set_cur_request(State), + file:close(Fd), + ResponseBody = case TmpFilename of + undefined -> + Body; + _ -> + {file, TmpFilename} + end, + State_2 = do_reply(State_1, From, StreamTo, ReqId, Resp_format, + {ok, SCode, RespHeaders, ResponseBody}), + cancel_timer(ReqTimer, {eat_message, {req_timedout, From}}), + State_2; +handle_response(#request{from=From, stream_to=StreamTo, req_id=ReqId, + response_format = Resp_format}, + #state{http_status_code=SCode, recvd_headers=RespHeaders, + reply_buffer = RepBuf, + send_timer=ReqTimer}=State) -> + Body = RepBuf, +%% State_1 = set_cur_request(State), + State_1 = case get(conn_close) of + "close" -> + do_reply(State, From, StreamTo, ReqId, Resp_format, + {ok, SCode, RespHeaders, Body}), + exit(normal); + _ -> + State_1_1 = do_reply(State, From, StreamTo, ReqId, Resp_format, + {ok, SCode, RespHeaders, Body}), + cancel_timer(ReqTimer, {eat_message, {req_timedout, From}}), + State_1_1 + end, + set_cur_request(State_1). + +reset_state(State) -> + State#state{status = get_header, + rep_buf_size = 0, + streamed_size = 0, + content_length = undefined, + reply_buffer = <<>>, + chunk_size_buffer = <<>>, + recvd_headers = [], + deleted_crlf = false, + http_status_code = undefined, + chunk_size = undefined, + transfer_encoding = undefined}. + +set_cur_request(#state{reqs = Reqs} = State) -> + case queue:to_list(Reqs) of + [] -> + State#state{cur_req = undefined}; + [NextReq | _] -> + State#state{cur_req = NextReq} + end. + +parse_headers(Headers) -> + case scan_crlf(Headers) of + {yes, StatusLine, T} -> + parse_headers(StatusLine, T); + {no, StatusLine} -> + parse_headers(StatusLine, <<>>) + end. + +parse_headers(StatusLine, Headers) -> + Headers_1 = parse_headers_1(Headers), + case parse_status_line(StatusLine) of + {ok, HttpVsn, StatCode, _Msg} -> + put(http_prot_vsn, HttpVsn), + {HttpVsn, StatCode, Headers_1}; + _ -> %% A HTTP 0.9 response? + put(http_prot_vsn, "HTTP/0.9"), + {"HTTP/0.9", undefined, Headers} + end. + +% From RFC 2616 +% +% HTTP/1.1 header field values can be folded onto multiple lines if +% the continuation line begins with a space or horizontal tab. All +% linear white space, including folding, has the same semantics as +% SP. A recipient MAY replace any linear white space with a single +% SP before interpreting the field value or forwarding the message +% downstream. + parse_headers_1(B) when is_binary(B) -> + parse_headers_1(binary_to_list(B)); + parse_headers_1(String) -> + parse_headers_1(String, [], []). + +parse_headers_1([$\n, H |T], [$\r | L], Acc) when H == 32; + H == $\t -> + parse_headers_1(lists:dropwhile(fun(X) -> + is_whitespace(X) + end, T), [32 | L], Acc); +parse_headers_1([$\n|T], [$\r | L], Acc) -> + case parse_header(lists:reverse(L)) of + invalid -> + parse_headers_1(T, [], Acc); + NewHeader -> + parse_headers_1(T, [], [NewHeader | Acc]) + end; +parse_headers_1([H|T], L, Acc) -> + parse_headers_1(T, [H|L], Acc); +parse_headers_1([], [], Acc) -> + lists:reverse(Acc); +parse_headers_1([], L, Acc) -> + Acc_1 = case parse_header(lists:reverse(L)) of + invalid -> + Acc; + NewHeader -> + [NewHeader | Acc] + end, + lists:reverse(Acc_1). + +parse_status_line(Line) when is_binary(Line) -> + parse_status_line(binary_to_list(Line)); +parse_status_line(Line) -> + parse_status_line(Line, get_prot_vsn, [], []). +parse_status_line([32 | T], get_prot_vsn, ProtVsn, StatCode) -> + parse_status_line(T, get_status_code, ProtVsn, StatCode); +parse_status_line([32 | T], get_status_code, ProtVsn, StatCode) -> + {ok, lists:reverse(ProtVsn), lists:reverse(StatCode), T}; +parse_status_line([H | T], get_prot_vsn, ProtVsn, StatCode) -> + parse_status_line(T, get_prot_vsn, [H|ProtVsn], StatCode); +parse_status_line([H | T], get_status_code, ProtVsn, StatCode) -> + parse_status_line(T, get_status_code, ProtVsn, [H | StatCode]); +parse_status_line([], _, _, _) -> + http_09. + +parse_header(B) when is_binary(B) -> + parse_header(binary_to_list(B)); +parse_header(L) -> + parse_header(L, []). +parse_header([$: | V], Acc) -> + {lists:reverse(Acc), string:strip(V)}; +parse_header([H | T], Acc) -> + parse_header(T, [H | Acc]); +parse_header([], _) -> + invalid. + +scan_header(Bin) -> + case get_crlf_crlf_pos(Bin, 0) of + {yes, Pos} -> + {Headers, <<_:4/binary, Body/binary>>} = split_binary(Bin, Pos), + {yes, Headers, Body}; + no -> + {no, Bin} + end. + +scan_header(Bin1, Bin2) when size(Bin1) < 4 -> + scan_header(<<Bin1/binary, Bin2/binary>>); +scan_header(Bin1, <<>>) -> + scan_header(Bin1); +scan_header(Bin1, Bin2) -> + Bin1_already_scanned_size = size(Bin1) - 4, + <<Headers_prefix:Bin1_already_scanned_size/binary, Rest/binary>> = Bin1, + Bin_to_scan = <<Rest/binary, Bin2/binary>>, + case get_crlf_crlf_pos(Bin_to_scan, 0) of + {yes, Pos} -> + {Headers_suffix, <<_:4/binary, Body/binary>>} = split_binary(Bin_to_scan, Pos), + {yes, <<Headers_prefix/binary, Headers_suffix/binary>>, Body}; + no -> + {no, <<Bin1/binary, Bin2/binary>>} + end. + +get_crlf_crlf_pos(<<$\r, $\n, $\r, $\n, _/binary>>, Pos) -> {yes, Pos}; +get_crlf_crlf_pos(<<_, Rest/binary>>, Pos) -> get_crlf_crlf_pos(Rest, Pos + 1); +get_crlf_crlf_pos(<<>>, _) -> no. + +scan_crlf(Bin) -> + case get_crlf_pos(Bin) of + {yes, Pos} -> + {Prefix, <<_, _, Suffix/binary>>} = split_binary(Bin, Pos), + {yes, Prefix, Suffix}; + no -> + {no, Bin} + end. + +scan_crlf(<<>>, Bin2) -> + scan_crlf(Bin2); +scan_crlf(Bin1, Bin2) when size(Bin1) < 2 -> + scan_crlf(<<Bin1/binary, Bin2/binary>>); +scan_crlf(Bin1, Bin2) -> + scan_crlf_1(size(Bin1) - 2, Bin1, Bin2). + +scan_crlf_1(Bin1_head_size, Bin1, Bin2) -> + <<Bin1_head:Bin1_head_size/binary, Bin1_tail/binary>> = Bin1, + Bin3 = <<Bin1_tail/binary, Bin2/binary>>, + case get_crlf_pos(Bin3) of + {yes, Pos} -> + {Prefix, <<_, _, Suffix/binary>>} = split_binary(Bin3, Pos), + {yes, list_to_binary([Bin1_head, Prefix]), Suffix}; + no -> + {no, list_to_binary([Bin1, Bin2])} + end. + +get_crlf_pos(Bin) -> + get_crlf_pos(Bin, 0). + +get_crlf_pos(<<$\r, $\n, _/binary>>, Pos) -> {yes, Pos}; +get_crlf_pos(<<_, Rest/binary>>, Pos) -> get_crlf_pos(Rest, Pos + 1); +get_crlf_pos(<<>>, _) -> no. + +%% scan_crlf(<<$\n, T/binary>>, [$\r | L]) -> {yes, lists:reverse(L), T}; +%% scan_crlf(<<H, T/binary>>, L) -> scan_crlf(T, [H|L]); +%% scan_crlf(<<>>, L) -> {no, L}; +%% scan_crlf([$\n|T], [$\r | L]) -> {yes, lists:reverse(L), T}; +%% scan_crlf([H|T], L) -> scan_crlf(T, [H|L]); +%% scan_crlf([], L) -> {no, L}. + +fmt_val(L) when is_list(L) -> L; +fmt_val(I) when is_integer(I) -> integer_to_list(I); +fmt_val(A) when is_atom(A) -> atom_to_list(A); +fmt_val(Term) -> io_lib:format("~p", [Term]). + +crnl() -> "\r\n". + +method(get) -> "GET"; +method(post) -> "POST"; +method(head) -> "HEAD"; +method(options) -> "OPTIONS"; +method(put) -> "PUT"; +method(delete) -> "DELETE"; +method(trace) -> "TRACE"; +method(mkcol) -> "MKCOL"; +method(propfind) -> "PROPFIND"; +method(proppatch) -> "PROPPATCH"; +method(lock) -> "LOCK"; +method(unlock) -> "UNLOCK"; +method(move) -> "MOVE"; +method(copy) -> "COPY". + +%% From RFC 2616 +%% +% The chunked encoding modifies the body of a message in order to +% transfer it as a series of chunks, each with its own size indicator, +% followed by an OPTIONAL trailer containing entity-header +% fields. This allows dynamically produced content to be transferred +% along with the information necessary for the recipient to verify +% that it has received the full message. +% Chunked-Body = *chunk +% last-chunk +% trailer +% CRLF +% chunk = chunk-size [ chunk-extension ] CRLF +% chunk-data CRLF +% chunk-size = 1*HEX +% last-chunk = 1*("0") [ chunk-extension ] CRLF +% chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) +% chunk-ext-name = token +% chunk-ext-val = token | quoted-string +% chunk-data = chunk-size(OCTET) +% trailer = *(entity-header CRLF) +% The chunk-size field is a string of hex digits indicating the size +% of the chunk. The chunked encoding is ended by any chunk whose size +% is zero, followed by the trailer, which is terminated by an empty +% line. +%% +%% The parsing implemented here discards all chunk extensions. It also +%% strips trailing spaces from the chunk size fields as Apache 1.3.27 was +%% sending them. +parse_chunk_header([]) -> + throw({error, invalid_chunk_size}); +parse_chunk_header(ChunkHeader) -> + parse_chunk_header(ChunkHeader, []). + +parse_chunk_header(<<$;, _/binary>>, Acc) -> + hexlist_to_integer(lists:reverse(Acc)); +parse_chunk_header(<<H, T/binary>>, Acc) -> + case is_whitespace(H) of + true -> + parse_chunk_header(T, Acc); + false -> + parse_chunk_header(T, [H | Acc]) + end; +parse_chunk_header(<<>>, Acc) -> + hexlist_to_integer(lists:reverse(Acc)). + +is_whitespace($\s) -> true; +is_whitespace($\r) -> true; +is_whitespace($\n) -> true; +is_whitespace($\t) -> true; +is_whitespace(_) -> false. + + +send_async_headers(_ReqId, undefined, _StatCode, _Headers) -> + ok; +send_async_headers(ReqId, StreamTo, StatCode, Headers) -> + catch StreamTo ! {ibrowse_async_headers, ReqId, StatCode, Headers}. + +format_response_data(Resp_format, Body) -> + case Resp_format of + list when is_list(Body) -> + flatten(Body); + list when is_binary(Body) -> + binary_to_list(Body); + binary when is_list(Body) -> + list_to_binary(Body); + _ -> + %% This is to cater for sending messages such as + %% {chunk_start, _}, chunk_end etc + Body + end. + +do_reply(State, From, undefined, _, Resp_format, {ok, St_code, Headers, Body}) -> + Msg_1 = {ok, St_code, Headers, format_response_data(Resp_format, Body)}, + gen_server:reply(From, Msg_1), + dec_pipeline_counter(State); +do_reply(State, From, undefined, _, _, Msg) -> + gen_server:reply(From, Msg), + dec_pipeline_counter(State); +do_reply(#state{prev_req_id = Prev_req_id} = State, + _From, StreamTo, ReqId, Resp_format, {ok, _, _, Body}) -> + State_1 = dec_pipeline_counter(State), + case Body of + [] -> + ok; + _ -> + Body_1 = format_response_data(Resp_format, Body), + catch StreamTo ! {ibrowse_async_response, ReqId, Body_1} + end, + catch StreamTo ! {ibrowse_async_response_end, ReqId}, + %% We don't want to delete the Req-id to Pid mapping straightaway + %% as the client may send a stream_next message just while we are + %% sending back this ibrowse_async_response_end message. If we + %% deleted this mapping straightaway, the caller will see a + %% {error, unknown_req_id} when it calls ibrowse:stream_next/1. To + %% get around this, we store the req id, and clear it after the + %% next request. If there are wierd combinations of stream, + %% stream_once and sync requests on the same connection, it will + %% take a while for the req_id-pid mapping to get cleared, but it + %% should do no harm. + ets:delete(ibrowse_stream, {req_id_pid, Prev_req_id}), + State_1#state{prev_req_id = ReqId}; +do_reply(State, _From, StreamTo, ReqId, Resp_format, Msg) -> + State_1 = dec_pipeline_counter(State), + Msg_1 = format_response_data(Resp_format, Msg), + catch StreamTo ! {ibrowse_async_response, ReqId, Msg_1}, + State_1. + +do_interim_reply(undefined, _, _ReqId, _Msg) -> + ok; +do_interim_reply(StreamTo, Response_format, ReqId, Msg) -> + Msg_1 = format_response_data(Response_format, Msg), + catch StreamTo ! {ibrowse_async_response, ReqId, Msg_1}. + +do_error_reply(#state{reqs = Reqs} = State, Err) -> + ReqList = queue:to_list(Reqs), + lists:foreach(fun(#request{from=From, stream_to=StreamTo, req_id=ReqId, + response_format = Resp_format}) -> + ets:delete(ibrowse_stream, {req_id_pid, ReqId}), + do_reply(State, From, StreamTo, ReqId, Resp_format, {error, Err}) + end, ReqList). + +fail_pipelined_requests(#state{reqs = Reqs, cur_req = CurReq} = State, Reply) -> + {_, Reqs_1} = queue:out(Reqs), + #request{from=From, stream_to=StreamTo, req_id=ReqId, + response_format = Resp_format} = CurReq, + do_reply(State, From, StreamTo, ReqId, Resp_format, Reply), + do_error_reply(State#state{reqs = Reqs_1}, previous_request_failed). + +split_list_at(List, N) -> + split_list_at(List, N, []). +split_list_at([], _, Acc) -> + {lists:reverse(Acc), []}; +split_list_at(List2, 0, List1) -> + {lists:reverse(List1), List2}; +split_list_at([H | List2], N, List1) -> + split_list_at(List2, N-1, [H | List1]). + +hexlist_to_integer(List) -> + hexlist_to_integer(lists:reverse(List), 1, 0). +hexlist_to_integer([H | T], Multiplier, Acc) -> + hexlist_to_integer(T, Multiplier*16, Multiplier*to_ascii(H) + Acc); +hexlist_to_integer([], _, Acc) -> + Acc. + +to_ascii($A) -> 10; +to_ascii($a) -> 10; +to_ascii($B) -> 11; +to_ascii($b) -> 11; +to_ascii($C) -> 12; +to_ascii($c) -> 12; +to_ascii($D) -> 13; +to_ascii($d) -> 13; +to_ascii($E) -> 14; +to_ascii($e) -> 14; +to_ascii($F) -> 15; +to_ascii($f) -> 15; +to_ascii($1) -> 1; +to_ascii($2) -> 2; +to_ascii($3) -> 3; +to_ascii($4) -> 4; +to_ascii($5) -> 5; +to_ascii($6) -> 6; +to_ascii($7) -> 7; +to_ascii($8) -> 8; +to_ascii($9) -> 9; +to_ascii($0) -> 0. + +cancel_timer(undefined) -> ok; +cancel_timer(Ref) -> erlang:cancel_timer(Ref). + +cancel_timer(Ref, {eat_message, Msg}) -> + cancel_timer(Ref), + receive + Msg -> + ok + after 0 -> + ok + end. + +make_req_id() -> + now(). + +to_lower(Str) -> + to_lower(Str, []). +to_lower([H|T], Acc) when H >= $A, H =< $Z -> + to_lower(T, [H+32|Acc]); +to_lower([H|T], Acc) -> + to_lower(T, [H|Acc]); +to_lower([], Acc) -> + lists:reverse(Acc). + +shutting_down(#state{lb_ets_tid = undefined}) -> + ok; +shutting_down(#state{lb_ets_tid = Tid, + cur_pipeline_size = Sz}) -> + catch ets:delete(Tid, {Sz, self()}). + +inc_pipeline_counter(#state{is_closing = true} = State) -> + State; +inc_pipeline_counter(#state{cur_pipeline_size = Pipe_sz} = State) -> + State#state{cur_pipeline_size = Pipe_sz + 1}. + +dec_pipeline_counter(#state{is_closing = true} = State) -> + State; +dec_pipeline_counter(#state{lb_ets_tid = undefined} = State) -> + State; +dec_pipeline_counter(#state{cur_pipeline_size = Pipe_sz, + lb_ets_tid = Tid} = State) -> + ets:delete(Tid, {Pipe_sz, self()}), + ets:insert(Tid, {{Pipe_sz - 1, self()}, []}), + State#state{cur_pipeline_size = Pipe_sz - 1}. + +flatten([H | _] = L) when is_integer(H) -> + L; +flatten([H | _] = L) when is_list(H) -> + lists:flatten(L); +flatten([]) -> + []. + +get_stream_chunk_size(Options) -> + case lists:keysearch(stream_chunk_size, 1, Options) of + {value, {_, V}} when V > 0 -> + V; + _ -> + ?DEFAULT_STREAM_CHUNK_SIZE + end. + +get_inac_timeout(#state{cur_req = #request{options = Opts}}) -> + get_value(inactivity_timeout, Opts, infinity); +get_inac_timeout(#state{cur_req = undefined}) -> + infinity. diff --git a/apps/ibrowse/src/ibrowse_lb.erl b/apps/ibrowse/src/ibrowse_lb.erl new file mode 100644 index 00000000..834054a7 --- /dev/null +++ b/apps/ibrowse/src/ibrowse_lb.erl @@ -0,0 +1,216 @@ +%%%------------------------------------------------------------------- +%%% File : ibrowse_lb.erl +%%% Author : chandru <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description : +%%% +%%% Created : 6 Mar 2008 by chandru <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%%------------------------------------------------------------------- +-module(ibrowse_lb). + +-vsn('$Id: ibrowse_lb.erl,v 1.2 2009/07/01 22:43:19 chandrusf Exp $ '). +-author(chandru). +-behaviour(gen_server). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +-export([ + start_link/1, + spawn_connection/5 + ]). + +%% gen_server callbacks +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 + ]). + +-record(state, {parent_pid, + ets_tid, + host, + port, + max_sessions, + max_pipeline_size, + num_cur_sessions = 0}). + +-include("ibrowse.hrl"). + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link/0 +%% Description: Starts the server +%%-------------------------------------------------------------------- +start_link(Args) -> + gen_server:start_link(?MODULE, Args, []). + +%%==================================================================== +%% Server functions +%%==================================================================== + +%%-------------------------------------------------------------------- +%% Function: init/1 +%% Description: Initiates the server +%% Returns: {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%%-------------------------------------------------------------------- +init([Host, Port]) -> + process_flag(trap_exit, true), + Max_sessions = ibrowse:get_config_value({max_sessions, Host, Port}, 10), + Max_pipe_sz = ibrowse:get_config_value({max_pipeline_size, Host, Port}, 10), + put(my_trace_flag, ibrowse_lib:get_trace_status(Host, Port)), + put(ibrowse_trace_token, ["LB: ", Host, $:, integer_to_list(Port)]), + Tid = ets:new(ibrowse_lb, [public, ordered_set]), + {ok, #state{parent_pid = whereis(ibrowse), + host = Host, + port = Port, + ets_tid = Tid, + max_pipeline_size = Max_pipe_sz, + max_sessions = Max_sessions}}. + +spawn_connection(Lb_pid, Url, + Max_sessions, + Max_pipeline_size, + SSL_options) + when is_pid(Lb_pid), + is_record(Url, url), + is_integer(Max_pipeline_size), + is_integer(Max_sessions) -> + gen_server:call(Lb_pid, + {spawn_connection, Url, Max_sessions, Max_pipeline_size, SSL_options}). +%%-------------------------------------------------------------------- +%% Function: handle_call/3 +%% Description: Handling call messages +%% Returns: {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | (terminate/2 is called) +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +% handle_call({spawn_connection, _Url, Max_sess, Max_pipe, _}, _From, +% #state{max_sessions = Max_sess, +% ets_tid = Tid, +% max_pipeline_size = Max_pipe_sz, +% num_cur_sessions = Num} = State) +% when Num >= Max -> +% Reply = find_best_connection(Tid), +% {reply, sorry_dude_reuse, State}; + +%% Update max_sessions in #state with supplied value +handle_call({spawn_connection, _Url, Max_sess, Max_pipe, _}, _From, + #state{num_cur_sessions = Num} = State) + when Num >= Max_sess -> + State_1 = maybe_create_ets(State), + Reply = find_best_connection(State_1#state.ets_tid, Max_pipe), + {reply, Reply, State_1#state{max_sessions = Max_sess}}; + +handle_call({spawn_connection, Url, _Max_sess, _Max_pipe, SSL_options}, _From, + #state{num_cur_sessions = Cur} = State) -> + State_1 = maybe_create_ets(State), + Tid = State_1#state.ets_tid, + {ok, Pid} = ibrowse_http_client:start_link({Tid, Url, SSL_options}), + ets:insert(Tid, {{1, Pid}, []}), + {reply, {ok, Pid}, State_1#state{num_cur_sessions = Cur + 1}}; + +handle_call(Request, _From, State) -> + Reply = {unknown_request, Request}, + {reply, Reply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_cast/2 +%% Description: Handling cast messages +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_cast(_Msg, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_info/2 +%% Description: Handling all non call/cast messages +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_info({'EXIT', Parent, _Reason}, #state{parent_pid = Parent} = State) -> + {stop, normal, State}; + +handle_info({'EXIT', _Pid, _Reason}, #state{ets_tid = undefined} = State) -> + {noreply, State}; + +handle_info({'EXIT', Pid, _Reason}, + #state{num_cur_sessions = Cur, + ets_tid = Tid} = State) -> + ets:match_delete(Tid, {{'_', Pid}, '_'}), + Cur_1 = Cur - 1, + State_1 = case Cur_1 of + 0 -> + ets:delete(Tid), + State#state{ets_tid = undefined}; + _ -> + State + end, + {noreply, State_1#state{num_cur_sessions = Cur_1}}; + +handle_info({trace, Bool}, #state{ets_tid = undefined} = State) -> + put(my_trace_flag, Bool), + {noreply, State}; + +handle_info({trace, Bool}, #state{ets_tid = Tid} = State) -> + ets:foldl(fun({{_, Pid}, _}, Acc) when is_pid(Pid) -> + catch Pid ! {trace, Bool}, + Acc; + (_, Acc) -> + Acc + end, undefined, Tid), + put(my_trace_flag, Bool), + {noreply, State}; + +handle_info(_Info, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: terminate/2 +%% Description: Shutdown the server +%% Returns: any (ignored by gen_server) +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%-------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Convert process state when code is changed +%% Returns: {ok, NewState} +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- +find_best_connection(Tid, Max_pipe) -> + case ets:first(Tid) of + {Cur_sz, Pid} when Cur_sz < Max_pipe -> + ets:delete(Tid, {Cur_sz, Pid}), + ets:insert(Tid, {{Cur_sz + 1, Pid}, []}), + {ok, Pid}; + _ -> + {error, retry_later} + end. + +maybe_create_ets(#state{ets_tid = undefined} = State) -> + Tid = ets:new(ibrowse_lb, [public, ordered_set]), + State#state{ets_tid = Tid}; +maybe_create_ets(State) -> + State. diff --git a/apps/ibrowse/src/ibrowse_lib.erl b/apps/ibrowse/src/ibrowse_lib.erl new file mode 100644 index 00000000..6c7b1546 --- /dev/null +++ b/apps/ibrowse/src/ibrowse_lib.erl @@ -0,0 +1,399 @@ +%%% File : ibrowse_lib.erl +%%% Author : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description : +%%% Created : 27 Feb 2004 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%% @doc Module with a few useful functions + +-module(ibrowse_lib). +-vsn('$Id: ibrowse_lib.erl,v 1.6 2008/03/27 01:35:50 chandrusf Exp $ '). +-author('chandru'). +-ifdef(debug). +-compile(export_all). +-endif. + +-include("ibrowse.hrl"). + +-export([ + get_trace_status/2, + do_trace/2, + do_trace/3, + url_encode/1, + decode_rfc822_date/1, + status_code/1, + dec2hex/2, + drv_ue/1, + drv_ue/2, + encode_base64/1, + decode_base64/1, + get_value/2, + get_value/3, + parse_url/1, + printable_date/0 + ]). + +get_trace_status(Host, Port) -> + ibrowse:get_config_value({trace, Host, Port}, false). + +drv_ue(Str) -> + [{port, Port}| _] = ets:lookup(ibrowse_table, port), + drv_ue(Str, Port). +drv_ue(Str, Port) -> + case erlang:port_control(Port, 1, Str) of + [] -> + Str; + Res -> + Res + end. + +%% @doc URL-encodes a string based on RFC 1738. Returns a flat list. +%% @spec url_encode(Str) -> UrlEncodedStr +%% Str = string() +%% UrlEncodedStr = string() +url_encode(Str) when is_list(Str) -> + url_encode_char(lists:reverse(Str), []). + +url_encode_char([X | T], Acc) when X >= $0, X =< $9 -> + url_encode_char(T, [X | Acc]); +url_encode_char([X | T], Acc) when X >= $a, X =< $z -> + url_encode_char(T, [X | Acc]); +url_encode_char([X | T], Acc) when X >= $A, X =< $Z -> + url_encode_char(T, [X | Acc]); +url_encode_char([X | T], Acc) when X == $-; X == $_; X == $. -> + url_encode_char(T, [X | Acc]); +url_encode_char([32 | T], Acc) -> + url_encode_char(T, [$+ | Acc]); +url_encode_char([X | T], Acc) -> + url_encode_char(T, [$%, d2h(X bsr 4), d2h(X band 16#0f) | Acc]); +url_encode_char([], Acc) -> + Acc. + +d2h(N) when N<10 -> N+$0; +d2h(N) -> N+$a-10. + +decode_rfc822_date(String) when is_list(String) -> + case catch decode_rfc822_date_1(string:tokens(String, ", \t\r\n")) of + {'EXIT', _} -> + {error, invalid_date}; + Res -> + Res + end. + +% TODO: Have to handle the Zone +decode_rfc822_date_1([_,DayInt,Month,Year, Time,Zone]) -> + decode_rfc822_date_1([DayInt,Month,Year, Time,Zone]); +decode_rfc822_date_1([Day,Month,Year, Time,_Zone]) -> + DayI = list_to_integer(Day), + MonthI = month_int(Month), + YearI = list_to_integer(Year), + TimeTup = case string:tokens(Time, ":") of + [H,M] -> + {list_to_integer(H), + list_to_integer(M), + 0}; + [H,M,S] -> + {list_to_integer(H), + list_to_integer(M), + list_to_integer(S)} + end, + {{YearI,MonthI,DayI}, TimeTup}. + +month_int("Jan") -> 1; +month_int("Feb") -> 2; +month_int("Mar") -> 3; +month_int("Apr") -> 4; +month_int("May") -> 5; +month_int("Jun") -> 6; +month_int("Jul") -> 7; +month_int("Aug") -> 8; +month_int("Sep") -> 9; +month_int("Oct") -> 10; +month_int("Nov") -> 11; +month_int("Dec") -> 12. + +%% @doc Given a status code, returns an atom describing the status code. +%% @spec status_code(StatusCode::status_code()) -> StatusDescription +%% status_code() = string() | integer() +%% StatusDescription = atom() +status_code(100) -> continue; +status_code(101) -> switching_protocols; +status_code(102) -> processing; +status_code(200) -> ok; +status_code(201) -> created; +status_code(202) -> accepted; +status_code(203) -> non_authoritative_information; +status_code(204) -> no_content; +status_code(205) -> reset_content; +status_code(206) -> partial_content; +status_code(207) -> multi_status; +status_code(300) -> multiple_choices; +status_code(301) -> moved_permanently; +status_code(302) -> found; +status_code(303) -> see_other; +status_code(304) -> not_modified; +status_code(305) -> use_proxy; +status_code(306) -> unused; +status_code(307) -> temporary_redirect; +status_code(400) -> bad_request; +status_code(401) -> unauthorized; +status_code(402) -> payment_required; +status_code(403) -> forbidden; +status_code(404) -> not_found; +status_code(405) -> method_not_allowed; +status_code(406) -> not_acceptable; +status_code(407) -> proxy_authentication_required; +status_code(408) -> request_timeout; +status_code(409) -> conflict; +status_code(410) -> gone; +status_code(411) -> length_required; +status_code(412) -> precondition_failed; +status_code(413) -> request_entity_too_large; +status_code(414) -> request_uri_too_long; +status_code(415) -> unsupported_media_type; +status_code(416) -> requested_range_not_satisfiable; +status_code(417) -> expectation_failed; +status_code(422) -> unprocessable_entity; +status_code(423) -> locked; +status_code(424) -> failed_dependency; +status_code(500) -> internal_server_error; +status_code(501) -> not_implemented; +status_code(502) -> bad_gateway; +status_code(503) -> service_unavailable; +status_code(504) -> gateway_timeout; +status_code(505) -> http_version_not_supported; +status_code(507) -> insufficient_storage; +status_code(X) when is_list(X) -> status_code(list_to_integer(X)); +status_code(_) -> unknown_status_code. + +%% @doc dec2hex taken from gtk.erl in std dist +%% M = integer() -- number of hex digits required +%% N = integer() -- the number to represent as hex +%% @spec dec2hex(M::integer(), N::integer()) -> string() +dec2hex(M,N) -> dec2hex(M,N,[]). + +dec2hex(0,_N,Ack) -> Ack; +dec2hex(M,N,Ack) -> dec2hex(M-1,N bsr 4,[d2h(N band 15)|Ack]). + +%% @doc Implements the base64 encoding algorithm. The output data type matches in the input data type. +%% @spec encode_base64(In) -> Out +%% In = string() | binary() +%% Out = string() | binary() +encode_base64(List) when is_list(List) -> + encode_base64_1(list_to_binary(List)); +encode_base64(Bin) when is_binary(Bin) -> + List = encode_base64_1(Bin), + list_to_binary(List). + +encode_base64_1(<<A:6, B:6, C:6, D:6, Rest/binary>>) -> + [int_to_b64(A), int_to_b64(B), + int_to_b64(C), int_to_b64(D) | encode_base64_1(Rest)]; +encode_base64_1(<<A:6, B:6, C:4>>) -> + [int_to_b64(A), int_to_b64(B), int_to_b64(C bsl 2), $=]; +encode_base64_1(<<A:6, B:2>>) -> + [int_to_b64(A), int_to_b64(B bsl 4), $=, $=]; +encode_base64_1(<<>>) -> + []. + +%% @doc Implements the base64 decoding algorithm. The output data type matches in the input data type. +%% @spec decode_base64(In) -> Out | exit({error, invalid_input}) +%% In = string() | binary() +%% Out = string() | binary() +decode_base64(List) when is_list(List) -> + decode_base64_1(List, []); +decode_base64(Bin) when is_binary(Bin) -> + List = decode_base64_1(binary_to_list(Bin), []), + list_to_binary(List). + +decode_base64_1([H | T], Acc) when ((H == $\t) or + (H == 32) or + (H == $\r) or + (H == $\n)) -> + decode_base64_1(T, Acc); + +decode_base64_1([$=, $=], Acc) -> + lists:reverse(Acc); +decode_base64_1([$=, _ | _], _Acc) -> + exit({error, invalid_input}); + +decode_base64_1([A1, B1, $=, $=], Acc) -> + A = b64_to_int(A1), + B = b64_to_int(B1), + Oct1 = (A bsl 2) bor (B bsr 4), + decode_base64_1([], [Oct1 | Acc]); +decode_base64_1([A1, B1, C1, $=], Acc) -> + A = b64_to_int(A1), + B = b64_to_int(B1), + C = b64_to_int(C1), + Oct1 = (A bsl 2) bor (B bsr 4), + Oct2 = ((B band 16#f) bsl 6) bor (C bsr 2), + decode_base64_1([], [Oct2, Oct1 | Acc]); +decode_base64_1([A1, B1, C1, D1 | T], Acc) -> + A = b64_to_int(A1), + B = b64_to_int(B1), + C = b64_to_int(C1), + D = b64_to_int(D1), + Oct1 = (A bsl 2) bor (B bsr 4), + Oct2 = ((B band 16#f) bsl 4) bor (C bsr 2), + Oct3 = ((C band 2#11) bsl 6) bor D, + decode_base64_1(T, [Oct3, Oct2, Oct1 | Acc]); +decode_base64_1([], Acc) -> + lists:reverse(Acc). + +%% Taken from httpd_util.erl +int_to_b64(X) when X >= 0, X =< 25 -> X + $A; +int_to_b64(X) when X >= 26, X =< 51 -> X - 26 + $a; +int_to_b64(X) when X >= 52, X =< 61 -> X - 52 + $0; +int_to_b64(62) -> $+; +int_to_b64(63) -> $/. + +%% Taken from httpd_util.erl +b64_to_int(X) when X >= $A, X =< $Z -> X - $A; +b64_to_int(X) when X >= $a, X =< $z -> X - $a + 26; +b64_to_int(X) when X >= $0, X =< $9 -> X - $0 + 52; +b64_to_int($+) -> 62; +b64_to_int($/) -> 63. + +get_value(Tag, TVL, DefVal) -> + case lists:keysearch(Tag, 1, TVL) of + false -> + DefVal; + {value, {_, Val}} -> + Val + end. + +get_value(Tag, TVL) -> + {value, {_, V}} = lists:keysearch(Tag,1,TVL), + V. + +parse_url(Url) -> + parse_url(Url, get_protocol, #url{abspath=Url}, []). + +parse_url([$:, $/, $/ | _], get_protocol, Url, []) -> + {invalid_uri_1, Url}; +parse_url([$:, $/, $/ | T], get_protocol, Url, TmpAcc) -> + Prot = list_to_atom(lists:reverse(TmpAcc)), + parse_url(T, get_username, + Url#url{protocol = Prot}, + []); +parse_url([$/ | T], get_username, Url, TmpAcc) -> + %% No username/password. No port number + Url#url{host = lists:reverse(TmpAcc), + port = default_port(Url#url.protocol), + path = [$/ | T]}; +parse_url([$: | T], get_username, Url, TmpAcc) -> + %% It is possible that no username/password has been + %% specified. But we'll continue with the assumption that there is + %% a username/password. If we encounter a '@' later on, there is a + %% username/password indeed. If we encounter a '/', it was + %% actually the hostname + parse_url(T, get_password, + Url#url{username = lists:reverse(TmpAcc)}, + []); +parse_url([$@ | T], get_username, Url, TmpAcc) -> + parse_url(T, get_host, + Url#url{username = lists:reverse(TmpAcc), + password = ""}, + []); +parse_url([$@ | T], get_password, Url, TmpAcc) -> + parse_url(T, get_host, + Url#url{password = lists:reverse(TmpAcc)}, + []); +parse_url([$/ | T], get_password, Url, TmpAcc) -> + %% Ok, what we thought was the username/password was the hostname + %% and portnumber + #url{username=User} = Url, + Port = list_to_integer(lists:reverse(TmpAcc)), + Url#url{host = User, + port = Port, + username = undefined, + password = undefined, + path = [$/ | T]}; +parse_url([$: | T], get_host, #url{} = Url, TmpAcc) -> + parse_url(T, get_port, + Url#url{host = lists:reverse(TmpAcc)}, + []); +parse_url([$/ | T], get_host, #url{protocol=Prot} = Url, TmpAcc) -> + Url#url{host = lists:reverse(TmpAcc), + port = default_port(Prot), + path = [$/ | T]}; +parse_url([$/ | T], get_port, #url{protocol=Prot} = Url, TmpAcc) -> + Port = case TmpAcc of + [] -> + default_port(Prot); + _ -> + list_to_integer(lists:reverse(TmpAcc)) + end, + Url#url{port = Port, path = [$/ | T]}; +parse_url([H | T], State, Url, TmpAcc) -> + parse_url(T, State, Url, [H | TmpAcc]); +parse_url([], get_host, Url, TmpAcc) when TmpAcc /= [] -> + Url#url{host = lists:reverse(TmpAcc), + port = default_port(Url#url.protocol), + path = "/"}; +parse_url([], get_username, Url, TmpAcc) when TmpAcc /= [] -> + Url#url{host = lists:reverse(TmpAcc), + port = default_port(Url#url.protocol), + path = "/"}; +parse_url([], get_port, #url{protocol=Prot} = Url, TmpAcc) -> + Port = case TmpAcc of + [] -> + default_port(Prot); + _ -> + list_to_integer(lists:reverse(TmpAcc)) + end, + Url#url{port = Port, + path = "/"}; +parse_url([], get_password, Url, TmpAcc) -> + %% Ok, what we thought was the username/password was the hostname + %% and portnumber + #url{username=User} = Url, + Port = case TmpAcc of + [] -> + default_port(Url#url.protocol); + _ -> + list_to_integer(lists:reverse(TmpAcc)) + end, + Url#url{host = User, + port = Port, + username = undefined, + password = undefined, + path = "/"}; +parse_url([], State, Url, TmpAcc) -> + {invalid_uri_2, State, Url, TmpAcc}. + +default_port(http) -> 80; +default_port(https) -> 443; +default_port(ftp) -> 21. + +printable_date() -> + {{Y,Mo,D},{H, M, S}} = calendar:local_time(), + {_,_,MicroSecs} = now(), + [integer_to_list(Y), + $-, + integer_to_list(Mo), + $-, + integer_to_list(D), + $_, + integer_to_list(H), + $:, + integer_to_list(M), + $:, + integer_to_list(S), + $:, + integer_to_list(MicroSecs div 1000)]. + +do_trace(Fmt, Args) -> + do_trace(get(my_trace_flag), Fmt, Args). + +-ifdef(DEBUG). +do_trace(_, Fmt, Args) -> + io:format("~s -- (~s) - "++Fmt, + [printable_date(), + get(ibrowse_trace_token) | Args]). +-else. +do_trace(true, Fmt, Args) -> + io:format("~s -- (~s) - "++Fmt, + [printable_date(), + get(ibrowse_trace_token) | Args]); +do_trace(_, _, _) -> + ok. +-endif. diff --git a/apps/ibrowse/src/ibrowse_sup.erl b/apps/ibrowse/src/ibrowse_sup.erl new file mode 100644 index 00000000..1b9b863a --- /dev/null +++ b/apps/ibrowse/src/ibrowse_sup.erl @@ -0,0 +1,65 @@ +%%%------------------------------------------------------------------- +%%% File : ibrowse_sup.erl +%%% Author : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description : +%%% +%%% Created : 15 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%%------------------------------------------------------------------- +-module(ibrowse_sup). +-vsn('$Id: ibrowse_sup.erl,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ '). + +-behaviour(supervisor). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +%%-------------------------------------------------------------------- +-export([ + start_link/0 + ]). + +%%-------------------------------------------------------------------- +%% Internal exports +%%-------------------------------------------------------------------- +-export([ + init/1 + ]). + +%%-------------------------------------------------------------------- +%% Macros +%%-------------------------------------------------------------------- +-define(SERVER, ?MODULE). + +%%-------------------------------------------------------------------- +%% Records +%%-------------------------------------------------------------------- + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link/0 +%% Description: Starts the supervisor +%%-------------------------------------------------------------------- +start_link() -> + supervisor:start_link({local, ?SERVER}, ?MODULE, []). + +%%==================================================================== +%% Server functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Func: init/1 +%% Returns: {ok, {SupFlags, [ChildSpec]}} | +%% ignore | +%% {error, Reason} +%%-------------------------------------------------------------------- +init([]) -> + AChild = {ibrowse,{ibrowse,start_link,[]}, + permanent,2000,worker,[ibrowse, ibrowse_http_client]}, + {ok,{{one_for_all,10,1}, [AChild]}}. + +%%==================================================================== +%% Internal functions +%%==================================================================== diff --git a/apps/ibrowse/src/ibrowse_test.erl b/apps/ibrowse/src/ibrowse_test.erl new file mode 100644 index 00000000..3dc66ecf --- /dev/null +++ b/apps/ibrowse/src/ibrowse_test.erl @@ -0,0 +1,377 @@ +%%% File : ibrowse_test.erl +%%% Author : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description : Test ibrowse +%%% Created : 14 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> + +-module(ibrowse_test). +-vsn('$Id: ibrowse_test.erl,v 1.4 2009/07/01 22:43:19 chandrusf Exp $ '). +-export([ + load_test/3, + send_reqs_1/3, + do_send_req/2, + unit_tests/0, + unit_tests/1, + unit_tests_1/2, + drv_ue_test/0, + drv_ue_test/1, + ue_test/0, + ue_test/1, + verify_chunked_streaming/0, + verify_chunked_streaming/1, + i_do_async_req_list/4, + test_stream_once/3, + test_stream_once/4 + ]). + +test_stream_once(Url, Method, Options) -> + test_stream_once(Url, Method, Options, 5000). + +test_stream_once(Url, Method, Options, Timeout) -> + case ibrowse:send_req(Url, [], Method, [], [{stream_to, {self(), once}} | Options], Timeout) of + {ibrowse_req_id, Req_id} -> + case ibrowse:stream_next(Req_id) of + ok -> + test_stream_once(Req_id); + Err -> + Err + end; + Err -> + Err + end. + +test_stream_once(Req_id) -> + receive + {ibrowse_async_headers, Req_id, StatCode, Headers} -> + io:format("Recvd headers~n~p~n", [{ibrowse_async_headers, Req_id, StatCode, Headers}]), + case ibrowse:stream_next(Req_id) of + ok -> + test_stream_once(Req_id); + Err -> + Err + end; + {ibrowse_async_response, Req_id, {error, Err}} -> + io:format("Recvd error: ~p~n", [Err]); + {ibrowse_async_response, Req_id, Body_1} -> + io:format("Recvd body part: ~n~p~n", [{ibrowse_async_response, Req_id, Body_1}]), + case ibrowse:stream_next(Req_id) of + ok -> + test_stream_once(Req_id); + Err -> + Err + end; + {ibrowse_async_response_end, Req_id} -> + ok + end. +%% Use ibrowse:set_max_sessions/3 and ibrowse:set_max_pipeline_size/3 to +%% tweak settings before running the load test. The defaults are 10 and 10. +load_test(Url, NumWorkers, NumReqsPerWorker) when is_list(Url), + is_integer(NumWorkers), + is_integer(NumReqsPerWorker), + NumWorkers > 0, + NumReqsPerWorker > 0 -> + proc_lib:spawn(?MODULE, send_reqs_1, [Url, NumWorkers, NumReqsPerWorker]). + +send_reqs_1(Url, NumWorkers, NumReqsPerWorker) -> + Start_time = now(), + ets:new(pid_table, [named_table, public]), + ets:new(ibrowse_test_results, [named_table, public]), + ets:new(ibrowse_errors, [named_table, public, ordered_set]), + init_results(), + process_flag(trap_exit, true), + log_msg("Starting spawning of workers...~n", []), + spawn_workers(Url, NumWorkers, NumReqsPerWorker), + log_msg("Finished spawning workers...~n", []), + do_wait(), + End_time = now(), + log_msg("All workers are done...~n", []), + log_msg("ibrowse_test_results table: ~n~p~n", [ets:tab2list(ibrowse_test_results)]), + log_msg("Start time: ~1000.p~n", [calendar:now_to_local_time(Start_time)]), + log_msg("End time : ~1000.p~n", [calendar:now_to_local_time(End_time)]), + Elapsed_time_secs = trunc(timer:now_diff(End_time, Start_time) / 1000000), + log_msg("Elapsed : ~p~n", [Elapsed_time_secs]), + log_msg("Reqs/sec : ~p~n", [round(trunc((NumWorkers*NumReqsPerWorker) / Elapsed_time_secs))]), + dump_errors(). + +init_results() -> + ets:insert(ibrowse_test_results, {crash, 0}), + ets:insert(ibrowse_test_results, {send_failed, 0}), + ets:insert(ibrowse_test_results, {other_error, 0}), + ets:insert(ibrowse_test_results, {success, 0}), + ets:insert(ibrowse_test_results, {retry_later, 0}), + ets:insert(ibrowse_test_results, {trid_mismatch, 0}), + ets:insert(ibrowse_test_results, {success_no_trid, 0}), + ets:insert(ibrowse_test_results, {failed, 0}), + ets:insert(ibrowse_test_results, {timeout, 0}), + ets:insert(ibrowse_test_results, {req_id, 0}). + +spawn_workers(_Url, 0, _) -> + ok; +spawn_workers(Url, NumWorkers, NumReqsPerWorker) -> + Pid = proc_lib:spawn_link(?MODULE, do_send_req, [Url, NumReqsPerWorker]), + ets:insert(pid_table, {Pid, []}), + spawn_workers(Url, NumWorkers - 1, NumReqsPerWorker). + +do_wait() -> + receive + {'EXIT', _, normal} -> + do_wait(); + {'EXIT', Pid, Reason} -> + ets:delete(pid_table, Pid), + ets:insert(ibrowse_errors, {Pid, Reason}), + ets:update_counter(ibrowse_test_results, crash, 1), + do_wait(); + Msg -> + io:format("Recvd unknown message...~p~n", [Msg]), + do_wait() + after 1000 -> + case ets:info(pid_table, size) of + 0 -> + done; + _ -> + do_wait() + end + end. + +do_send_req(Url, NumReqs) -> + do_send_req_1(Url, NumReqs). + +do_send_req_1(_Url, 0) -> + ets:delete(pid_table, self()); +do_send_req_1(Url, NumReqs) -> + Counter = integer_to_list(ets:update_counter(ibrowse_test_results, req_id, 1)), + case ibrowse:send_req(Url, [{"ib_req_id", Counter}], get, [], [], 10000) of + {ok, _Status, Headers, _Body} -> + case lists:keysearch("ib_req_id", 1, Headers) of + {value, {_, Counter}} -> + ets:update_counter(ibrowse_test_results, success, 1); + {value, _} -> + ets:update_counter(ibrowse_test_results, trid_mismatch, 1); + false -> + ets:update_counter(ibrowse_test_results, success_no_trid, 1) + end; + {error, req_timedout} -> + ets:update_counter(ibrowse_test_results, timeout, 1); + {error, send_failed} -> + ets:update_counter(ibrowse_test_results, send_failed, 1); + {error, retry_later} -> + ets:update_counter(ibrowse_test_results, retry_later, 1); + Err -> + ets:insert(ibrowse_errors, {now(), Err}), + ets:update_counter(ibrowse_test_results, other_error, 1), + ok + end, + do_send_req_1(Url, NumReqs-1). + +dump_errors() -> + case ets:info(ibrowse_errors, size) of + 0 -> + ok; + _ -> + {A, B, C} = now(), + Filename = lists:flatten( + io_lib:format("ibrowse_errors_~p_~p_~p.txt" , [A, B, C])), + case file:open(Filename, [write, delayed_write, raw]) of + {ok, Iod} -> + dump_errors(ets:first(ibrowse_errors), Iod); + Err -> + io:format("failed to create file ~s. Reason: ~p~n", [Filename, Err]), + ok + end + end. + +dump_errors('$end_of_table', Iod) -> + file:close(Iod); +dump_errors(Key, Iod) -> + [{_, Term}] = ets:lookup(ibrowse_errors, Key), + file:write(Iod, io_lib:format("~p~n", [Term])), + dump_errors(ets:next(ibrowse_errors, Key), Iod). + +%%------------------------------------------------------------------------------ +%% Unit Tests +%%------------------------------------------------------------------------------ +-define(TEST_LIST, [{"http://intranet/messenger", get}, + {"http://www.google.co.uk", get}, + {"http://www.google.com", get}, + {"http://www.google.com", options}, + {"http://www.sun.com", get}, + {"http://www.oracle.com", get}, + {"http://www.bbc.co.uk", get}, + {"http://www.bbc.co.uk", trace}, + {"http://www.bbc.co.uk", options}, + {"http://yaws.hyber.org", get}, + {"http://jigsaw.w3.org/HTTP/ChunkedScript", get}, + {"http://jigsaw.w3.org/HTTP/TE/foo.txt", get}, + {"http://jigsaw.w3.org/HTTP/TE/bar.txt", get}, + {"http://jigsaw.w3.org/HTTP/connection.html", get}, + {"http://jigsaw.w3.org/HTTP/cc.html", get}, + {"http://jigsaw.w3.org/HTTP/cc-private.html", get}, + {"http://jigsaw.w3.org/HTTP/cc-proxy-revalidate.html", get}, + {"http://jigsaw.w3.org/HTTP/cc-nocache.html", get}, + {"http://jigsaw.w3.org/HTTP/h-content-md5.html", get}, + {"http://jigsaw.w3.org/HTTP/h-retry-after.html", get}, + {"http://jigsaw.w3.org/HTTP/h-retry-after-date.html", get}, + {"http://jigsaw.w3.org/HTTP/neg", get}, + {"http://jigsaw.w3.org/HTTP/negbad", get}, + {"http://jigsaw.w3.org/HTTP/400/toolong/", get}, + {"http://jigsaw.w3.org/HTTP/300/", get}, + {"http://jigsaw.w3.org/HTTP/Basic/", get, [{basic_auth, {"guest", "guest"}}]}, + {"http://jigsaw.w3.org/HTTP/CL/", get}, + {"http://www.httpwatch.com/httpgallery/chunked/", get} + ]). + +unit_tests() -> + unit_tests([]). + +unit_tests(Options) -> + Options_1 = Options ++ [{connect_timeout, 5000}], + {Pid, Ref} = erlang:spawn_monitor(?MODULE, unit_tests_1, [self(), Options_1]), + receive + {done, Pid} -> + ok; + {'DOWN', Ref, _, _, Info} -> + io:format("Test process crashed: ~p~n", [Info]) + after 60000 -> + exit(Pid, kill), + io:format("Timed out waiting for tests to complete~n", []) + end. + +unit_tests_1(Parent, Options) -> + lists:foreach(fun({Url, Method}) -> + execute_req(Url, Method, Options); + ({Url, Method, X_Opts}) -> + execute_req(Url, Method, X_Opts ++ Options) + end, ?TEST_LIST), + Parent ! {done, self()}. + +verify_chunked_streaming() -> + verify_chunked_streaming([]). + +verify_chunked_streaming(Options) -> + Url = "http://www.httpwatch.com/httpgallery/chunked/", + io:format("URL: ~s~n", [Url]), + io:format("Fetching data without streaming...~n", []), + Result_without_streaming = ibrowse:send_req( + Url, [], get, [], + [{response_format, binary} | Options]), + io:format("Fetching data with streaming as list...~n", []), + Async_response_list = do_async_req_list( + Url, get, [{response_format, list} | Options]), + io:format("Fetching data with streaming as binary...~n", []), + Async_response_bin = do_async_req_list( + Url, get, [{response_format, binary} | Options]), + compare_responses(Result_without_streaming, Async_response_list, Async_response_bin). + +compare_responses({ok, St_code, _, Body}, {ok, St_code, _, Body}, {ok, St_code, _, Body}) -> + success; +compare_responses({ok, St_code, _, Body_1}, {ok, St_code, _, Body_2}, {ok, St_code, _, Body_3}) -> + case Body_1 of + Body_2 -> + io:format("Body_1 and Body_2 match~n", []); + Body_3 -> + io:format("Body_1 and Body_3 match~n", []); + _ when Body_2 == Body_3 -> + io:format("Body_2 and Body_3 match~n", []); + _ -> + io:format("All three bodies are different!~n", []) + end, + io:format("Body_1 -> ~p~n", [Body_1]), + io:format("Body_2 -> ~p~n", [Body_2]), + io:format("Body_3 -> ~p~n", [Body_3]), + fail_bodies_mismatch; +compare_responses(R1, R2, R3) -> + io:format("R1 -> ~p~n", [R1]), + io:format("R2 -> ~p~n", [R2]), + io:format("R3 -> ~p~n", [R3]), + fail. + +%% do_async_req_list(Url) -> +%% do_async_req_list(Url, get). + +%% do_async_req_list(Url, Method) -> +%% do_async_req_list(Url, Method, [{stream_to, self()}, +%% {stream_chunk_size, 1000}]). + +do_async_req_list(Url, Method, Options) -> + {Pid,_} = erlang:spawn_monitor(?MODULE, i_do_async_req_list, + [self(), Url, Method, + Options ++ [{stream_chunk_size, 1000}]]), + io:format("Spawned process ~p~n", [Pid]), + wait_for_resp(Pid). + +wait_for_resp(Pid) -> + receive + {async_result, Pid, Res} -> + Res; + {async_result, Other_pid, _} -> + io:format("~p: Waiting for result from ~p: got from ~p~n", [self(), Pid, Other_pid]), + wait_for_resp(Pid); + {'DOWN', _, _, Pid, Reason} -> + {'EXIT', Reason}; + {'DOWN', _, _, _, _} -> + wait_for_resp(Pid); + Msg -> + io:format("Recvd unknown message: ~p~n", [Msg]), + wait_for_resp(Pid) + after 10000 -> + {error, timeout} + end. + +i_do_async_req_list(Parent, Url, Method, Options) -> + Res = ibrowse:send_req(Url, [], Method, [], [{stream_to, self()} | Options]), + case Res of + {ibrowse_req_id, Req_id} -> + Result = wait_for_async_resp(Req_id, undefined, undefined, []), + Parent ! {async_result, self(), Result}; + Err -> + Parent ! {async_result, self(), Err} + end. + +wait_for_async_resp(Req_id, Acc_Stat_code, Acc_Headers, Body) -> + receive + {ibrowse_async_headers, Req_id, StatCode, Headers} -> + wait_for_async_resp(Req_id, StatCode, Headers, Body); + {ibrowse_async_response_end, Req_id} -> + Body_1 = list_to_binary(lists:reverse(Body)), + {ok, Acc_Stat_code, Acc_Headers, Body_1}; + {ibrowse_async_response, Req_id, Data} -> + wait_for_async_resp(Req_id, Acc_Stat_code, Acc_Headers, [Data | Body]); + Err -> + {ok, Acc_Stat_code, Acc_Headers, Err} + end. + +execute_req(Url, Method, Options) -> + io:format("~7.7w, ~50.50s: ", [Method, Url]), + Result = (catch ibrowse:send_req(Url, [], Method, [], Options)), + case Result of + {ok, SCode, _H, _B} -> + io:format("Status code: ~p~n", [SCode]); + Err -> + io:format("Err -> ~p~n", [Err]) + end. + +drv_ue_test() -> + drv_ue_test(lists:duplicate(1024, 127)). +drv_ue_test(Data) -> + [{port, Port}| _] = ets:lookup(ibrowse_table, port), +% erl_ddll:unload_driver("ibrowse_drv"), +% timer:sleep(1000), +% erl_ddll:load_driver("../priv", "ibrowse_drv"), +% Port = open_port({spawn, "ibrowse_drv"}, []), + {Time, Res} = timer:tc(ibrowse_lib, drv_ue, [Data, Port]), + io:format("Time -> ~p~n", [Time]), + io:format("Data Length -> ~p~n", [length(Data)]), + io:format("Res Length -> ~p~n", [length(Res)]). +% io:format("Result -> ~s~n", [Res]). + +ue_test() -> + ue_test(lists:duplicate(1024, $?)). +ue_test(Data) -> + {Time, Res} = timer:tc(ibrowse_lib, url_encode, [Data]), + io:format("Time -> ~p~n", [Time]), + io:format("Data Length -> ~p~n", [length(Data)]), + io:format("Res Length -> ~p~n", [length(Res)]). +% io:format("Result -> ~s~n", [Res]). + +log_msg(Fmt, Args) -> + io:format("~s -- " ++ Fmt, + [ibrowse_lib:printable_date() | Args]). diff --git a/apps/mem3/ebin/mem3.app b/apps/mem3/ebin/mem3.app new file mode 100644 index 00000000..0613ab26 --- /dev/null +++ b/apps/mem3/ebin/mem3.app @@ -0,0 +1,24 @@ +{application, mem3, [ + {description, "CouchDB Cluster Membership"}, + {mod, {mem3_app, []}}, + {vsn, "1.0.1"}, + {modules, [ + mem3, + mem3_app, + mem3_cache, + mem3_httpd, + mem3_nodes, + mem3_sup, + mem3_sync, + mem3_sync_event, + mem3_util + ]}, + {registered, [ + mem3_cache, + mem3_events, + mem3_nodes, + mem3_sync, + mem3_sup + ]}, + {applications, [kernel, stdlib, sasl, crypto, mochiweb, couch]} +]}. diff --git a/apps/mem3/ebin/mem3.appup b/apps/mem3/ebin/mem3.appup new file mode 100644 index 00000000..6e9ebe71 --- /dev/null +++ b/apps/mem3/ebin/mem3.appup @@ -0,0 +1,3 @@ +{"1.0.1",[{"1.0",[ + {load_module, mem3_httpd} +]}],[{"1.0",[]}]}. diff --git a/apps/mem3/include/mem3.hrl b/apps/mem3/include/mem3.hrl new file mode 100644 index 00000000..b9359b44 --- /dev/null +++ b/apps/mem3/include/mem3.hrl @@ -0,0 +1,30 @@ +% type specification hacked to suppress dialyzer warning re: match spec +-record(shard, { + name :: binary() | '_', + node :: node() | '_', + dbname :: binary(), + range :: [non_neg_integer() | '$1' | '$2'], + ref :: reference() | 'undefined' | '_' +}). + +%% types +-type join_type() :: init | join | replace | leave. +-type join_order() :: non_neg_integer(). +-type options() :: list(). +-type mem_node() :: {join_order(), node(), options()}. +-type mem_node_list() :: [mem_node()]. +-type arg_options() :: {test, boolean()}. +-type args() :: [] | [arg_options()]. +-type test() :: undefined | node(). +-type epoch() :: float(). +-type clock() :: {node(), epoch()}. +-type vector_clock() :: [clock()]. +-type ping_node() :: node() | nil. +-type gossip_fun() :: call | cast. + +-type part() :: #shard{}. +-type fullmap() :: [part()]. +-type ref_part_map() :: {reference(), part()}. +-type tref() :: reference(). +-type np() :: {node(), part()}. +-type beg_acc() :: [integer()]. diff --git a/apps/mem3/src/mem3.erl b/apps/mem3/src/mem3.erl new file mode 100644 index 00000000..1485c7fe --- /dev/null +++ b/apps/mem3/src/mem3.erl @@ -0,0 +1,103 @@ +-module(mem3). + +-export([start/0, stop/0, restart/0, nodes/0, shards/1, shards/2, + choose_shards/2]). +-export([compare_nodelists/0, compare_shards/1]). + +-include("mem3.hrl"). + +start() -> + application:start(mem3). + +stop() -> + application:stop(mem3). + +restart() -> + stop(), + start(). + +%% @doc Detailed report of cluster-wide membership state. Queries the state +%% on all member nodes and builds a dictionary with unique states as the +%% key and the nodes holding that state as the value. Also reports member +%% nodes which fail to respond and nodes which are connected but are not +%% cluster members. Useful for debugging. +-spec compare_nodelists() -> [{{cluster_nodes, [node()]} | bad_nodes + | non_member_nodes, [node()]}]. +compare_nodelists() -> + Nodes = mem3:nodes(), + AllNodes = erlang:nodes([this, visible]), + {Replies, BadNodes} = gen_server:multi_call(Nodes, mem3_nodes, get_nodelist), + Dict = lists:foldl(fun({Node, Nodelist}, D) -> + orddict:append({cluster_nodes, Nodelist}, Node, D) + end, orddict:new(), Replies), + [{non_member_nodes, AllNodes -- Nodes}, {bad_nodes, BadNodes} | Dict]. + +-spec compare_shards(DbName::iodata()) -> [{bad_nodes | [#shard{}], [node()]}]. +compare_shards(DbName) when is_list(DbName) -> + compare_shards(list_to_binary(DbName)); +compare_shards(DbName) -> + Nodes = mem3:nodes(), + {Replies, BadNodes} = rpc:multicall(mem3, shards, [DbName]), + GoodNodes = [N || N <- Nodes, not lists:member(N, BadNodes)], + Dict = lists:foldl(fun({Shards, Node}, D) -> + orddict:append(Shards, Node, D) + end, orddict:new(), lists:zip(Replies, GoodNodes)), + [{bad_nodes, BadNodes} | Dict]. + +-spec nodes() -> [node()]. +nodes() -> + mem3_nodes:get_nodelist(). + +-spec shards(DbName::iodata()) -> [#shard{}]. +shards(DbName) when is_list(DbName) -> + shards(list_to_binary(DbName)); +shards(DbName) -> + try ets:lookup(partitions, DbName) of + [] -> + mem3_util:load_shards_from_disk(DbName); + Else -> + Else + catch error:badarg -> + mem3_util:load_shards_from_disk(DbName) + end. + +-spec shards(DbName::iodata(), DocId::binary()) -> [#shard{}]. +shards(DbName, DocId) when is_list(DbName) -> + shards(list_to_binary(DbName), DocId); +shards(DbName, DocId) when is_list(DocId) -> + shards(DbName, list_to_binary(DocId)); +shards(DbName, DocId) -> + HashKey = mem3_util:hash(DocId), + Head = #shard{ + name = '_', + node = '_', + dbname = DbName, + range = ['$1','$2'], + ref = '_' + }, + Conditions = [{'<', '$1', HashKey}, {'=<', HashKey, '$2'}], + try ets:select(partitions, [{Head, Conditions, ['$_']}]) of + [] -> + mem3_util:load_shards_from_disk(DbName, DocId); + Shards -> + Shards + catch error:badarg -> + mem3_util:load_shards_from_disk(DbName, DocId) + end. + +-spec choose_shards(DbName::iodata(), Options::list()) -> [#shard{}]. +choose_shards(DbName, Options) when is_list(DbName) -> + choose_shards(list_to_binary(DbName), Options); +choose_shards(DbName, Options) -> + try shards(DbName) + catch error:E when E==database_does_not_exist; E==badarg -> + Nodes = mem3:nodes(), + NodeCount = length(Nodes), + N = mem3_util:n_val(couch_util:get_value(n, Options), NodeCount), + Q = mem3_util:to_integer(couch_util:get_value(q, Options, + couch_config:get("cluster", "q", "8"))), + % rotate to a random entry in the nodelist for even distribution + {A, B} = lists:split(crypto:rand_uniform(1,length(Nodes)+1), Nodes), + RotatedNodes = B ++ A, + mem3_util:create_partition_map(DbName, N, Q, RotatedNodes) + end. diff --git a/apps/mem3/src/mem3_app.erl b/apps/mem3/src/mem3_app.erl new file mode 100644 index 00000000..88cd1ea1 --- /dev/null +++ b/apps/mem3/src/mem3_app.erl @@ -0,0 +1,9 @@ +-module(mem3_app). +-behaviour(application). +-export([start/2, stop/1]). + +start(_Type, []) -> + mem3_sup:start_link(). + +stop([]) -> + ok. diff --git a/apps/mem3/src/mem3_cache.erl b/apps/mem3/src/mem3_cache.erl new file mode 100644 index 00000000..2a29ca4c --- /dev/null +++ b/apps/mem3/src/mem3_cache.erl @@ -0,0 +1,92 @@ +-module(mem3_cache). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/0]). + +-record(state, {changes_pid}). + +-include("mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init([]) -> + ets:new(partitions, [bag, public, named_table, {keypos,#shard.dbname}]), + {Pid, _} = spawn_monitor(fun() -> listen_for_changes(0) end), + {ok, #state{changes_pid = Pid}}. + +handle_call(_Call, _From, State) -> + {noreply, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({'DOWN', _, _, Pid, {badarg, [{ets,delete,[partitions,_]}|_]}}, + #state{changes_pid=Pid} = State) -> + % fatal error, somebody deleted our ets table + {stop, ets_table_error, State}; +handle_info({'DOWN', _, _, Pid, Reason}, #state{changes_pid=Pid} = State) -> + ?LOG_INFO("~p changes listener died ~p", [?MODULE, Reason]), + Seq = case Reason of {seq, EndSeq} -> EndSeq; _ -> 0 end, + timer:send_after(5000, {start_listener, Seq}), + {noreply, State}; +handle_info({start_listener, Seq}, State) -> + {NewPid, _} = spawn_monitor(fun() -> listen_for_changes(Seq) end), + {noreply, State#state{changes_pid=NewPid}}; +handle_info(_Msg, State) -> + {noreply, State}. + +terminate(_Reason, #state{changes_pid=Pid}) -> + exit(Pid, kill), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% internal functions + +listen_for_changes(Since) -> + DbName = ?l2b(couch_config:get("mem3", "db", "dbs")), + {ok, Db} = ensure_exists(DbName), + Args = #changes_args{ + feed = "continuous", + since = Since, + heartbeat = true, + include_docs = true + }, + ChangesFun = couch_changes:handle_changes(Args, nil, Db), + ChangesFun(fun changes_callback/2). + +ensure_exists(DbName) -> + Options = [{user_ctx, #user_ctx{roles=[<<"_admin">>]}}], + case couch_db:open(DbName, Options) of + {ok, Db} -> + {ok, Db}; + _ -> + couch_server:create(DbName, Options) + end. + +changes_callback(start, _) -> + {ok, nil}; +changes_callback({stop, EndSeq}, _) -> + exit({seq, EndSeq}); +changes_callback({change, {Change}, _}, _) -> + DbName = couch_util:get_value(<<"id">>, Change), + case couch_util:get_value(deleted, Change, false) of + true -> + ets:delete(partitions, DbName); + false -> + case couch_util:get_value(doc, Change) of + {error, Reason} -> + ?LOG_ERROR("missing partition table for ~s: ~p", [DbName, Reason]); + {Doc} -> + ets:delete(partitions, DbName), + ets:insert(partitions, mem3_util:build_shards(DbName, Doc)) + end + end, + {ok, couch_util:get_value(<<"seq">>, Change)}; +changes_callback(timeout, _) -> + {ok, nil}. diff --git a/apps/mem3/src/mem3_httpd.erl b/apps/mem3/src/mem3_httpd.erl new file mode 100644 index 00000000..cbfaea95 --- /dev/null +++ b/apps/mem3/src/mem3_httpd.erl @@ -0,0 +1,39 @@ +-module(mem3_httpd). + +-export([handle_membership_req/1]). + +%% includes +-include("mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + + +handle_membership_req(#httpd{method='GET', + path_parts=[<<"_membership">>]} = Req) -> + ClusterNodes = try mem3:nodes() + catch _:_ -> {ok,[]} end, + couch_httpd:send_json(Req, {[ + {all_nodes, lists:sort([node()|nodes()])}, + {cluster_nodes, lists:sort(ClusterNodes)} + ]}); +handle_membership_req(#httpd{method='GET', + path_parts=[<<"_membership">>, <<"parts">>, DbName]} = Req) -> + ClusterNodes = try mem3:nodes() + catch _:_ -> {ok,[]} end, + Shards = mem3:shards(DbName), + JsonShards = json_shards(Shards, dict:new()), + couch_httpd:send_json(Req, {[ + {all_nodes, lists:sort([node()|nodes()])}, + {cluster_nodes, lists:sort(ClusterNodes)}, + {partitions, JsonShards} + ]}). + +%% +%% internal +%% + +json_shards([], AccIn) -> + List = dict:to_list(AccIn), + {lists:sort(List)}; +json_shards([#shard{node=Node, range=[B,_E]} | Rest], AccIn) -> + HexBeg = couch_util:to_hex(<<B:32/integer>>), + json_shards(Rest, dict:append(HexBeg, Node, AccIn)). diff --git a/apps/mem3/src/mem3_nodes.erl b/apps/mem3/src/mem3_nodes.erl new file mode 100644 index 00000000..6cbf3d9a --- /dev/null +++ b/apps/mem3/src/mem3_nodes.erl @@ -0,0 +1,120 @@ +-module(mem3_nodes). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/0, get_nodelist/0]). + +-include("mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-record(state, {changes_pid, update_seq, nodes}). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +get_nodelist() -> + gen_server:call(?MODULE, get_nodelist). + +init([]) -> + {Nodes, UpdateSeq} = initialize_nodelist(), + {Pid, _} = spawn_monitor(fun() -> listen_for_changes(UpdateSeq) end), + {ok, #state{changes_pid = Pid, update_seq = UpdateSeq, nodes = Nodes}}. + +handle_call(get_nodelist, _From, State) -> + {reply, State#state.nodes, State}; +handle_call({add_node, Node}, _From, #state{nodes=Nodes} = State) -> + gen_event:notify(mem3_events, {add_node, Node}), + {reply, ok, State#state{nodes = lists:umerge([Node], Nodes)}}; +handle_call({remove_node, Node}, _From, #state{nodes=Nodes} = State) -> + gen_event:notify(mem3_events, {remove_node, Node}), + {reply, ok, State#state{nodes = lists:delete(Node, Nodes)}}; +handle_call(_Call, _From, State) -> + {noreply, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({'DOWN', _, _, Pid, Reason}, #state{changes_pid=Pid} = State) -> + ?LOG_INFO("~p changes listener died ~p", [?MODULE, Reason]), + StartSeq = State#state.update_seq, + Seq = case Reason of {seq, EndSeq} -> EndSeq; _ -> StartSeq end, + timer:send_after(5000, start_listener), + {noreply, State#state{update_seq = Seq}}; +handle_info(start_listener, #state{update_seq = Seq} = State) -> + {NewPid, _} = spawn_monitor(fun() -> listen_for_changes(Seq) end), + {noreply, State#state{changes_pid=NewPid}}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% internal functions + +initialize_nodelist() -> + DbName = couch_config:get("mem3", "nodedb", "nodes"), + {ok, Db} = ensure_exists(DbName), + {ok, _, Nodes0} = couch_btree:fold(Db#db.id_tree, fun first_fold/3, [], []), + % add self if not already present + case lists:member(node(), Nodes0) of + true -> + Nodes = Nodes0; + false -> + Doc = #doc{id = couch_util:to_binary(node())}, + {ok, _} = couch_db:update_doc(Db, Doc, []), + Nodes = [node() | Nodes0] + end, + couch_db:close(Db), + {lists:sort(Nodes), Db#db.update_seq}. + +first_fold(#full_doc_info{id = <<"_design/", _/binary>>}, _, Acc) -> + {ok, Acc}; +first_fold(#full_doc_info{deleted=true}, _, Acc) -> + {ok, Acc}; +first_fold(#full_doc_info{id=Id}, _, Acc) -> + {ok, [mem3_util:to_atom(Id) | Acc]}. + +listen_for_changes(Since) -> + DbName = ?l2b(couch_config:get("mem3", "nodedb", "nodes")), + {ok, Db} = ensure_exists(DbName), + Args = #changes_args{ + feed = "continuous", + since = Since, + heartbeat = true, + include_docs = true + }, + ChangesFun = couch_changes:handle_changes(Args, nil, Db), + ChangesFun(fun changes_callback/2). + +ensure_exists(DbName) when is_list(DbName) -> + ensure_exists(list_to_binary(DbName)); +ensure_exists(DbName) -> + Options = [{user_ctx, #user_ctx{roles=[<<"_admin">>]}}], + case couch_db:open(DbName, Options) of + {ok, Db} -> + {ok, Db}; + _ -> + couch_server:create(DbName, Options) + end. + +changes_callback(start, _) -> + {ok, nil}; +changes_callback({stop, EndSeq}, _) -> + exit({seq, EndSeq}); +changes_callback({change, {Change}, _}, _) -> + Node = couch_util:get_value(<<"id">>, Change), + case Node of <<"_design/", _/binary>> -> ok; _ -> + case couch_util:get_value(deleted, Change, false) of + false -> + gen_server:call(?MODULE, {add_node, mem3_util:to_atom(Node)}); + true -> + gen_server:call(?MODULE, {remove_node, mem3_util:to_atom(Node)}) + end + end, + {ok, couch_util:get_value(<<"seq">>, Change)}; +changes_callback(timeout, _) -> + {ok, nil}. diff --git a/apps/mem3/src/mem3_sup.erl b/apps/mem3/src/mem3_sup.erl new file mode 100644 index 00000000..58d0bbf5 --- /dev/null +++ b/apps/mem3/src/mem3_sup.erl @@ -0,0 +1,21 @@ +-module(mem3_sup). +-behaviour(supervisor). +-export([start_link/0, init/1]). + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +init(_Args) -> + Children = [ + child(mem3_events), + child(mem3_sync), + child(mem3_cache), + child(mem3_nodes) + ], + {ok, {{one_for_one,10,1}, Children}}. + +child(mem3_events) -> + MFA = {gen_event, start_link, [{local, mem3_events}]}, + {mem3_events, MFA, permanent, 1000, worker, dynamic}; +child(Child) -> + {Child, {Child, start_link, []}, permanent, 1000, worker, [Child]}. diff --git a/apps/mem3/src/mem3_sync.erl b/apps/mem3/src/mem3_sync.erl new file mode 100644 index 00000000..d3b3ea51 --- /dev/null +++ b/apps/mem3/src/mem3_sync.erl @@ -0,0 +1,215 @@ +-module(mem3_sync). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/0, get_active/0, get_queue/0, push/2, remove_node/1]). + +-include("mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-record(state, { + active = [], + count = 0, + limit, + dict = dict:new(), + waiting = [], + update_notifier +}). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +get_active() -> + gen_server:call(?MODULE, get_active). + +get_queue() -> + gen_server:call(?MODULE, get_queue). + +push(Db, Node) -> + gen_server:cast(?MODULE, {push, Db, Node}). + +remove_node(Node) -> + gen_server:cast(?MODULE, {remove_node, Node}). + +init([]) -> + process_flag(trap_exit, true), + Concurrency = couch_config:get("mem3", "sync_concurrency", "10"), + gen_event:add_handler(mem3_events, mem3_sync_event, []), + {ok, Pid} = start_update_notifier(), + spawn(fun initial_sync/0), + {ok, #state{limit = list_to_integer(Concurrency), update_notifier=Pid}}. + +handle_call(get_active, _From, State) -> + {reply, State#state.active, State}; + +handle_call(get_queue, _From, State) -> + {reply, State#state.waiting, State}. + +handle_cast({push, DbName, Node}, #state{count=Count, limit=Limit} = State) + when Count >= Limit -> + {noreply, add_to_queue(State, DbName, Node)}; + +handle_cast({push, DbName, Node}, State) -> + #state{active = L, count = C} = State, + case is_running(DbName, Node, L) of + true -> + {noreply, add_to_queue(State, DbName, Node)}; + false -> + Pid = start_push_replication(DbName, Node), + {noreply, State#state{active=[{DbName, Node, Pid}|L], count=C+1}} + end; + +handle_cast({remove_node, Node}, State) -> + Waiting = [{S,N} || {S,N} <- State#state.waiting, N =/= Node], + Dict = lists:foldl(fun(DbName,D) -> dict:erase({DbName,Node}, D) end, + State#state.dict, [S || {S,N} <- Waiting, N =:= Node]), + {noreply, State#state{dict = Dict, waiting = Waiting}}. + +handle_info({'EXIT', Pid, _}, #state{update_notifier=Pid} = State) -> + {ok, NewPid} = start_update_notifier(), + {noreply, State#state{update_notifier=NewPid}}; + +handle_info({'EXIT', Active, normal}, State) -> + handle_replication_exit(State, Active); + +handle_info({'EXIT', Active, Reason}, State) -> + case lists:keyfind(Active, 3, State#state.active) of + {OldDbName, OldNode, _} -> + ?LOG_ERROR("~p replication ~s -> ~p died:~n~p", [?MODULE, OldDbName, + OldNode, Reason]), + timer:apply_after(5000, ?MODULE, push, [OldDbName, OldNode]); + false -> ok end, + handle_replication_exit(State, Active); + +handle_info(Msg, State) -> + ?LOG_ERROR("unexpected msg at replication manager ~p", [Msg]), + {noreply, State}. + +terminate(_Reason, State) -> + [exit(Pid, shutdown) || {_,_,Pid} <- State#state.active], + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_replication_exit(#state{waiting=[]} = State, Pid) -> + NewActive = lists:keydelete(Pid, 3, State#state.active), + {noreply, State#state{active=NewActive, count=length(NewActive)}}; +handle_replication_exit(State, Pid) -> + #state{active=Active, limit=Limit, dict=D, waiting=Waiting} = State, + Active1 = lists:keydelete(Pid, 3, Active), + Count = length(Active1), + NewState = if Count < Limit -> + case next_replication(Active1, Waiting) of + nil -> % all waiting replications are also active + State#state{active = Active1, count = Count}; + {DbName, Node, StillWaiting} -> + NewPid = start_push_replication(DbName, Node), + State#state{ + active = [{DbName, Node, NewPid} | Active1], + count = Count+1, + dict = dict:erase({DbName,Node}, D), + waiting = StillWaiting + } + end; + true -> + State#state{active = Active1, count=Count} + end, + {noreply, NewState}. + +start_push_replication(DbName, Node) -> + PostBody = {[ + {<<"source">>, DbName}, + {<<"target">>, {[{<<"node">>, Node}, {<<"name">>, DbName}]}}, + {<<"continuous">>, false}, + {<<"async">>, true} + ]}, + ?LOG_INFO("starting ~s -> ~p internal replication", [DbName, Node]), + UserCtx = #user_ctx{name = <<"replicator">>, roles = [<<"_admin">>]}, + case (catch couch_rep:replicate(PostBody, UserCtx)) of + Pid when is_pid(Pid) -> + link(Pid), + Pid; + {db_not_found, _Msg} -> + case couch_db:open(DbName, []) of + {ok, Db} -> + % source exists, let's (re)create the target + couch_db:close(Db), + case rpc:call(Node, couch_api, create_db, [DbName, []]) of + {ok, Target} -> + ?LOG_INFO("~p successfully created ~s on ~p", [?MODULE, DbName, + Node]), + couch_db:close(Target), + start_push_replication(DbName, Node); + file_exists -> + start_push_replication(DbName, Node); + Error -> + ?LOG_ERROR("~p couldn't create ~s on ~p because ~p", + [?MODULE, DbName, Node, Error]), + exit(shutdown) + end; + {not_found, no_db_file} -> + % source is gone, so this is a hack to skip it + ?LOG_INFO("~p tried to push ~s to ~p but it was already deleted", + [?MODULE, DbName, Node]), + spawn_link(fun() -> ok end) + end; + {node_not_connected, _} -> + % we'll get this one when the node rejoins + ?LOG_ERROR("~p exiting because ~p is not connected", [?MODULE, Node]), + spawn_link(fun() -> ok end); + CatchAll -> + ?LOG_INFO("~p strange error ~p", [?MODULE, CatchAll]), + case lists:member(Node, nodes()) of + true -> + timer:apply_after(5000, ?MODULE, push, [DbName, Node]); + false -> + ok + end, + spawn_link(fun() -> ok end) + end. + +add_to_queue(State, DbName, Node) -> + #state{dict=D, waiting=Waiting} = State, + case dict:is_key({DbName, Node}, D) of + true -> + State; + false -> + ?LOG_DEBUG("adding ~s -> ~p to internal queue", [DbName, Node]), + State#state{ + dict = dict:store({DbName,Node}, ok, D), + waiting = Waiting ++ [{DbName,Node}] + } + end. + +initial_sync() -> + Db1 = ?l2b(couch_config:get("mem3", "node_db", "nodes")), + Db2 = ?l2b(couch_config:get("mem3", "shard_db", "dbs")), + Nodes = mem3:nodes(), + Live = nodes(), + [[push(Db, N) || Db <- [Db1,Db2]] || N <- Nodes, lists:member(N, Live)]. + +start_update_notifier() -> + Db1 = ?l2b(couch_config:get("mem3", "node_db", "nodes")), + Db2 = ?l2b(couch_config:get("mem3", "shard_db", "dbs")), + couch_db_update_notifier:start_link(fun + ({updated, Db}) when Db == Db1; Db == Db2 -> + Nodes = mem3:nodes(), + Live = nodes(), + [?MODULE:push(Db, N) || N <- Nodes, lists:member(N, Live)]; + (_) -> ok end). + +%% @doc Finds the next {DbName,Node} pair in the list of waiting replications +%% which does not correspond to an already running replication +-spec next_replication(list(), list()) -> {binary(),node(),list()} | nil. +next_replication(Active, Waiting) -> + case lists:splitwith(fun({S,N}) -> is_running(S,N,Active) end, Waiting) of + {_, []} -> + nil; + {Running, [{DbName,Node}|Rest]} -> + {DbName, Node, Running ++ Rest} + end. + +is_running(DbName, Node, ActiveList) -> + [] =/= [true || {S,N,_} <- ActiveList, S=:=DbName, N=:=Node]. diff --git a/apps/mem3/src/mem3_sync_event.erl b/apps/mem3/src/mem3_sync_event.erl new file mode 100644 index 00000000..45fcb8aa --- /dev/null +++ b/apps/mem3/src/mem3_sync_event.erl @@ -0,0 +1,44 @@ +-module(mem3_sync_event). +-behaviour(gen_event). + +-export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, + code_change/3]). + +init(_) -> + {ok, nil}. + +handle_event({add_node, Node}, State) -> + Db1 = list_to_binary(couch_config:get("mem3", "node_db", "nodes")), + Db2 = list_to_binary(couch_config:get("mem3", "shard_db", "dbs")), + [mem3_sync:push(Db, Node) || Db <- [Db1, Db2]], + {ok, State}; + +handle_event({nodeup, Node}, State) -> + case lists:member(Node, mem3:nodes()) of + true -> + Db1 = list_to_binary(couch_config:get("mem3", "node_db", "nodes")), + Db2 = list_to_binary(couch_config:get("mem3", "shard_db", "dbs")), + [mem3_sync:push(Db, Node) || Db <- [Db1, Db2]]; + false -> + ok + end, + {ok, State}; + +handle_event({Down, Node}, State) when Down == nodedown; Down == remove_node -> + mem3_sync:remove_node(Node), + {ok, State}; + +handle_event(_Event, State) -> + {ok, State}. + +handle_call(_Request, State) -> + {ok, ok, State}. + +handle_info(_Info, State) -> + {ok, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/apps/mem3/src/mem3_util.erl b/apps/mem3/src/mem3_util.erl new file mode 100644 index 00000000..2ed84db6 --- /dev/null +++ b/apps/mem3/src/mem3_util.erl @@ -0,0 +1,139 @@ +-module(mem3_util). + +-export([hash/1, name_shard/1, create_partition_map/4, build_shards/2, + n_val/2, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1, + load_shards_from_disk/1, load_shards_from_disk/2]). + +-define(RINGTOP, 2 bsl 31). % CRC32 space + +-include("mem3.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +hash(Item) when is_binary(Item) -> + erlang:crc32(Item); +hash(Item) -> + erlang:crc32(term_to_binary(Item)). + +name_shard(#shard{dbname = DbName, range=[B,E]} = Shard) -> + Name = ["shards/", couch_util:to_hex(<<B:32/integer>>), "-", + couch_util:to_hex(<<E:32/integer>>), "/", DbName], + Shard#shard{name = ?l2b(Name)}. + +create_partition_map(DbName, N, Q, Nodes) -> + UniqueShards = make_key_ranges((?RINGTOP) div Q, 0, []), + Shards0 = lists:flatten([lists:duplicate(N, S) || S <- UniqueShards]), + Shards1 = attach_nodes(Shards0, [], Nodes, []), + [name_shard(S#shard{dbname=DbName}) || S <- Shards1]. + +make_key_ranges(_, CurrentPos, Acc) when CurrentPos >= ?RINGTOP -> + Acc; +make_key_ranges(Increment, Start, Acc) -> + case Start + 2*Increment of + X when X > ?RINGTOP -> + End = ?RINGTOP - 1; + _ -> + End = Start + Increment - 1 + end, + make_key_ranges(Increment, End+1, [#shard{range=[Start, End]} | Acc]). + +attach_nodes([], Acc, _, _) -> + lists:reverse(Acc); +attach_nodes(Shards, Acc, [], UsedNodes) -> + attach_nodes(Shards, Acc, lists:reverse(UsedNodes), []); +attach_nodes([S | Rest], Acc, [Node | Nodes], UsedNodes) -> + attach_nodes(Rest, [S#shard{node=Node} | Acc], Nodes, [Node | UsedNodes]). + +write_db_doc(Doc) -> + {ok, Db} = couch_db:open(<<"dbs">>, []), + try + update_db_doc(Db, Doc) + catch conflict -> + ?LOG_ERROR("conflict writing db doc, must be a race", []) + after + couch_db:close(Db) + end. + +update_db_doc(Db, #doc{id=Id, body=Body} = Doc) -> + case couch_db:open_doc(Db, Id, []) of + {not_found, _} -> + {ok, _} = couch_db:update_doc(Db, Doc, []); + {ok, #doc{body=Body}} -> + ok; + {ok, OldDoc} -> + {ok, _} = couch_db:update_doc(Db, OldDoc#doc{body=Body}, []) + end. + +delete_db_doc(DocId) -> + {ok, Db} = couch_db:open(<<"dbs">>, []), + try + delete_db_doc(Db, DocId) + catch conflict -> + ok + after + couch_db:close(Db) + end. + +delete_db_doc(Db, DocId) -> + case couch_db:open_doc(Db, DocId, []) of + {not_found, _} -> + ok; + {ok, OldDoc} -> + {ok, _} = couch_db:update_doc(Db, OldDoc#doc{deleted=true}, []) + end. + +build_shards(DbName, DocProps) -> + {ByNode} = couch_util:get_value(<<"by_node">>, DocProps, {[]}), + lists:flatmap(fun({Node, Ranges}) -> + lists:map(fun(Range) -> + [B,E] = string:tokens(?b2l(Range), "-"), + Beg = httpd_util:hexlist_to_integer(B), + End = httpd_util:hexlist_to_integer(E), + name_shard(#shard{ + dbname = DbName, + node = to_atom(Node), + range = [Beg, End] + }) + end, Ranges) + end, ByNode). + +to_atom(Node) when is_binary(Node) -> + list_to_atom(binary_to_list(Node)); +to_atom(Node) when is_atom(Node) -> + Node. + +to_integer(N) when is_integer(N) -> + N; +to_integer(N) when is_binary(N) -> + list_to_integer(binary_to_list(N)); +to_integer(N) when is_list(N) -> + list_to_integer(N). + +n_val(undefined, NodeCount) -> + n_val(couch_config:get("cluster", "n", "3"), NodeCount); +n_val(N, NodeCount) when is_list(N) -> + n_val(list_to_integer(N), NodeCount); +n_val(N, NodeCount) when is_integer(NodeCount), N > NodeCount -> + ?LOG_ERROR("Request to create N=~p DB but only ~p node(s)", [N, NodeCount]), + NodeCount; +n_val(N, _) when N < 1 -> + 1; +n_val(N, _) -> + N. + +load_shards_from_disk(DbName) when is_binary(DbName) -> + {ok, Db} = couch_db:open(<<"dbs">>, []), + try load_shards_from_db(Db, DbName) after couch_db:close(Db) end. + +load_shards_from_db(#db{} = ShardDb, DbName) -> + case couch_db:open_doc(ShardDb, DbName, []) of + {ok, #doc{body = {Props}}} -> + ?LOG_INFO("dbs cache miss for ~s", [DbName]), + build_shards(DbName, Props); + {not_found, _} -> + erlang:error(database_does_not_exist) + end. + +load_shards_from_disk(DbName, DocId)-> + Shards = load_shards_from_disk(DbName), + HashKey = hash(DocId), + [S || #shard{range = [B,E]} = S <- Shards, B < HashKey, HashKey =< E]. diff --git a/apps/mem3/test/01-config-default.ini b/apps/mem3/test/01-config-default.ini new file mode 100644 index 00000000..757f7830 --- /dev/null +++ b/apps/mem3/test/01-config-default.ini @@ -0,0 +1,2 @@ +[cluster] +n=3 diff --git a/apps/mem3/test/mem3_util_test.erl b/apps/mem3/test/mem3_util_test.erl new file mode 100644 index 00000000..0f6d24be --- /dev/null +++ b/apps/mem3/test/mem3_util_test.erl @@ -0,0 +1,140 @@ +-module(mem3_util_test). + +-include("mem3.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +hash_test() -> + ?assertEqual(1624516141,mem3_util:hash(0)), + ?assertEqual(3816901808,mem3_util:hash("0")), + ?assertEqual(3523407757,mem3_util:hash(<<0>>)), + ?assertEqual(4108050209,mem3_util:hash(<<"0">>)), + ?assertEqual(3094724072,mem3_util:hash(zero)), + ok. + +name_shard_test() -> + Shard1 = #shard{}, + ?assertError(function_clause, mem3_util:name_shard(Shard1)), + + Shard2 = #shard{dbname = <<"testdb">>, range = [0,100]}, + #shard{name=Name2} = mem3_util:name_shard(Shard2), + ?assertEqual(<<"shards/00000000-00000064/testdb">>, Name2), + + ok. + +create_partition_map_test() -> + {DbName1, N1, Q1, Nodes1} = {<<"testdb1">>, 3, 4, [a,b,c,d]}, + Map1 = mem3_util:create_partition_map(DbName1, N1, Q1, Nodes1), + ?assertEqual(12, length(Map1)), + + {DbName2, N2, Q2, Nodes2} = {<<"testdb2">>, 1, 1, [a,b,c,d]}, + [#shard{name=Name2,node=Node2}] = Map2 = + mem3_util:create_partition_map(DbName2, N2, Q2, Nodes2), + ?assertEqual(1, length(Map2)), + ?assertEqual(<<"shards/00000000-ffffffff/testdb2">>, Name2), + ?assertEqual(a, Node2), + ok. + +build_shards_test() -> + DocProps1 = + [{<<"changelog">>, + [[<<"add">>,<<"00000000-1fffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"20000000-3fffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"40000000-5fffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"60000000-7fffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"80000000-9fffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"a0000000-bfffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"c0000000-dfffffff">>, + <<"dbcore@node.local">>], + [<<"add">>,<<"e0000000-ffffffff">>, + <<"dbcore@node.local">>]]}, + {<<"by_node">>, + {[{<<"dbcore@node.local">>, + [<<"00000000-1fffffff">>,<<"20000000-3fffffff">>, + <<"40000000-5fffffff">>,<<"60000000-7fffffff">>, + <<"80000000-9fffffff">>,<<"a0000000-bfffffff">>, + <<"c0000000-dfffffff">>,<<"e0000000-ffffffff">>]}]}}, + {<<"by_range">>, + {[{<<"00000000-1fffffff">>,[<<"dbcore@node.local">>]}, + {<<"20000000-3fffffff">>,[<<"dbcore@node.local">>]}, + {<<"40000000-5fffffff">>,[<<"dbcore@node.local">>]}, + {<<"60000000-7fffffff">>,[<<"dbcore@node.local">>]}, + {<<"80000000-9fffffff">>,[<<"dbcore@node.local">>]}, + {<<"a0000000-bfffffff">>,[<<"dbcore@node.local">>]}, + {<<"c0000000-dfffffff">>,[<<"dbcore@node.local">>]}, + {<<"e0000000-ffffffff">>,[<<"dbcore@node.local">>]}]}}], + Shards1 = mem3_util:build_shards(<<"testdb1">>, DocProps1), + ExpectedShards1 = + [{shard,<<"shards/00000000-1fffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [0,536870911], + undefined}, + {shard,<<"shards/20000000-3fffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [536870912,1073741823], + undefined}, + {shard,<<"shards/40000000-5fffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [1073741824,1610612735], + undefined}, + {shard,<<"shards/60000000-7fffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [1610612736,2147483647], + undefined}, + {shard,<<"shards/80000000-9fffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [2147483648,2684354559], + undefined}, + {shard,<<"shards/a0000000-bfffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [2684354560,3221225471], + undefined}, + {shard,<<"shards/c0000000-dfffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [3221225472,3758096383], + undefined}, + {shard,<<"shards/e0000000-ffffffff/testdb1">>, + 'dbcore@node.local',<<"testdb1">>, + [3758096384,4294967295], + undefined}], + ?assertEqual(ExpectedShards1, Shards1), + ok. + + +%% n_val tests + +nval_test() -> + ?assertEqual(2, mem3_util:n_val(2,4)), + ?assertEqual(1, mem3_util:n_val(-1,4)), + ?assertEqual(4, mem3_util:n_val(6,4)), + ok. + +config_01_setup() -> + Ini = filename:join([code:lib_dir(mem3, test), "01-config-default.ini"]), + {ok, Pid} = couch_config:start_link([Ini]), + Pid. + +config_teardown(_Pid) -> + couch_config:stop(). + +n_val_test_() -> + {"n_val tests", + [ + {setup, + fun config_01_setup/0, + fun config_teardown/1, + fun(Pid) -> + {with, Pid, [ + fun n_val_1/1 + ]} + end} + ] + }. + +n_val_1(_Pid) -> + ?assertEqual(3, mem3_util:n_val(undefined, 4)). diff --git a/apps/mochiweb/src/mochifmt.erl b/apps/mochiweb/src/mochifmt.erl new file mode 100644 index 00000000..da0a133a --- /dev/null +++ b/apps/mochiweb/src/mochifmt.erl @@ -0,0 +1,426 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2008 Mochi Media, Inc. + +%% @doc String Formatting for Erlang, inspired by Python 2.6 +%% (<a href="http://www.python.org/dev/peps/pep-3101/">PEP 3101</a>). +%% +-module(mochifmt). +-author('bob@mochimedia.com'). +-export([format/2, format_field/2, convert_field/2, get_value/2, get_field/2]). +-export([tokenize/1, format/3, get_field/3, format_field/3]). +-export([bformat/2, bformat/3]). +-export([f/2, f/3]). +-export([test/0]). + +-record(conversion, {length, precision, ctype, align, fill_char, sign}). + +%% @spec tokenize(S::string()) -> tokens() +%% @doc Tokenize a format string into mochifmt's internal format. +tokenize(S) -> + {?MODULE, tokenize(S, "", [])}. + +%% @spec convert_field(Arg, Conversion::conversion()) -> term() +%% @doc Process Arg according to the given explicit conversion specifier. +convert_field(Arg, "") -> + Arg; +convert_field(Arg, "r") -> + repr(Arg); +convert_field(Arg, "s") -> + str(Arg). + +%% @spec get_value(Key::string(), Args::args()) -> term() +%% @doc Get the Key from Args. If Args is a tuple then convert Key to +%% an integer and get element(1 + Key, Args). If Args is a list and Key +%% can be parsed as an integer then use lists:nth(1 + Key, Args), +%% otherwise try and look for Key in Args as a proplist, converting +%% Key to an atom or binary if necessary. +get_value(Key, Args) when is_tuple(Args) -> + element(1 + list_to_integer(Key), Args); +get_value(Key, Args) when is_list(Args) -> + try lists:nth(1 + list_to_integer(Key), Args) + catch error:_ -> + {_K, V} = proplist_lookup(Key, Args), + V + end. + +%% @spec get_field(Key::string(), Args) -> term() +%% @doc Consecutively call get_value/2 on parts of Key delimited by ".", +%% replacing Args with the result of the previous get_value. This +%% is used to implement formats such as {0.0}. +get_field(Key, Args) -> + get_field(Key, Args, ?MODULE). + +%% @spec get_field(Key::string(), Args, Module) -> term() +%% @doc Consecutively call Module:get_value/2 on parts of Key delimited by ".", +%% replacing Args with the result of the previous get_value. This +%% is used to implement formats such as {0.0}. +get_field(Key, Args, Module) -> + {Name, Next} = lists:splitwith(fun (C) -> C =/= $. end, Key), + Res = try Module:get_value(Name, Args) + catch error:undef -> get_value(Name, Args) end, + case Next of + "" -> + Res; + "." ++ S1 -> + get_field(S1, Res, Module) + end. + +%% @spec format(Format::string(), Args) -> iolist() +%% @doc Format Args with Format. +format(Format, Args) -> + format(Format, Args, ?MODULE). + +%% @spec format(Format::string(), Args, Module) -> iolist() +%% @doc Format Args with Format using Module. +format({?MODULE, Parts}, Args, Module) -> + format2(Parts, Args, Module, []); +format(S, Args, Module) -> + format(tokenize(S), Args, Module). + +%% @spec format_field(Arg, Format) -> iolist() +%% @doc Format Arg with Format. +format_field(Arg, Format) -> + format_field(Arg, Format, ?MODULE). + +%% @spec format_field(Arg, Format, _Module) -> iolist() +%% @doc Format Arg with Format. +format_field(Arg, Format, _Module) -> + F = default_ctype(Arg, parse_std_conversion(Format)), + fix_padding(fix_sign(convert2(Arg, F), F), F). + +%% @spec f(Format::string(), Args) -> string() +%% @doc Format Args with Format and return a string(). +f(Format, Args) -> + f(Format, Args, ?MODULE). + +%% @spec f(Format::string(), Args, Module) -> string() +%% @doc Format Args with Format using Module and return a string(). +f(Format, Args, Module) -> + case lists:member(${, Format) of + true -> + binary_to_list(bformat(Format, Args, Module)); + false -> + Format + end. + +%% @spec bformat(Format::string(), Args) -> binary() +%% @doc Format Args with Format and return a binary(). +bformat(Format, Args) -> + iolist_to_binary(format(Format, Args)). + +%% @spec bformat(Format::string(), Args, Module) -> binary() +%% @doc Format Args with Format using Module and return a binary(). +bformat(Format, Args, Module) -> + iolist_to_binary(format(Format, Args, Module)). + +%% @spec test() -> ok +%% @doc Run tests. +test() -> + ok = test_tokenize(), + ok = test_format(), + ok = test_std(), + ok = test_records(), + ok. + +%% Internal API + +add_raw("", Acc) -> + Acc; +add_raw(S, Acc) -> + [{raw, lists:reverse(S)} | Acc]. + +tokenize([], S, Acc) -> + lists:reverse(add_raw(S, Acc)); +tokenize("{{" ++ Rest, S, Acc) -> + tokenize(Rest, "{" ++ S, Acc); +tokenize("{" ++ Rest, S, Acc) -> + {Format, Rest1} = tokenize_format(Rest), + tokenize(Rest1, "", [{format, make_format(Format)} | add_raw(S, Acc)]); +tokenize("}}" ++ Rest, S, Acc) -> + tokenize(Rest, "}" ++ S, Acc); +tokenize([C | Rest], S, Acc) -> + tokenize(Rest, [C | S], Acc). + +tokenize_format(S) -> + tokenize_format(S, 1, []). + +tokenize_format("}" ++ Rest, 1, Acc) -> + {lists:reverse(Acc), Rest}; +tokenize_format("}" ++ Rest, N, Acc) -> + tokenize_format(Rest, N - 1, "}" ++ Acc); +tokenize_format("{" ++ Rest, N, Acc) -> + tokenize_format(Rest, 1 + N, "{" ++ Acc); +tokenize_format([C | Rest], N, Acc) -> + tokenize_format(Rest, N, [C | Acc]). + +make_format(S) -> + {Name0, Spec} = case lists:splitwith(fun (C) -> C =/= $: end, S) of + {_, ""} -> + {S, ""}; + {SN, ":" ++ SS} -> + {SN, SS} + end, + {Name, Transform} = case lists:splitwith(fun (C) -> C =/= $! end, Name0) of + {_, ""} -> + {Name0, ""}; + {TN, "!" ++ TT} -> + {TN, TT} + end, + {Name, Transform, Spec}. + +proplist_lookup(S, P) -> + A = try list_to_existing_atom(S) + catch error:_ -> make_ref() end, + B = try list_to_binary(S) + catch error:_ -> make_ref() end, + proplist_lookup2({S, A, B}, P). + +proplist_lookup2({KS, KA, KB}, [{K, V} | _]) + when KS =:= K orelse KA =:= K orelse KB =:= K -> + {K, V}; +proplist_lookup2(Keys, [_ | Rest]) -> + proplist_lookup2(Keys, Rest). + +format2([], _Args, _Module, Acc) -> + lists:reverse(Acc); +format2([{raw, S} | Rest], Args, Module, Acc) -> + format2(Rest, Args, Module, [S | Acc]); +format2([{format, {Key, Convert, Format0}} | Rest], Args, Module, Acc) -> + Format = f(Format0, Args, Module), + V = case Module of + ?MODULE -> + V0 = get_field(Key, Args), + V1 = convert_field(V0, Convert), + format_field(V1, Format); + _ -> + V0 = try Module:get_field(Key, Args) + catch error:undef -> get_field(Key, Args, Module) end, + V1 = try Module:convert_field(V0, Convert) + catch error:undef -> convert_field(V0, Convert) end, + try Module:format_field(V1, Format) + catch error:undef -> format_field(V1, Format, Module) end + end, + format2(Rest, Args, Module, [V | Acc]). + +default_ctype(_Arg, C=#conversion{ctype=N}) when N =/= undefined -> + C; +default_ctype(Arg, C) when is_integer(Arg) -> + C#conversion{ctype=decimal}; +default_ctype(Arg, C) when is_float(Arg) -> + C#conversion{ctype=general}; +default_ctype(_Arg, C) -> + C#conversion{ctype=string}. + +fix_padding(Arg, #conversion{length=undefined}) -> + Arg; +fix_padding(Arg, F=#conversion{length=Length, fill_char=Fill0, align=Align0, + ctype=Type}) -> + Padding = Length - iolist_size(Arg), + Fill = case Fill0 of + undefined -> + $\s; + _ -> + Fill0 + end, + Align = case Align0 of + undefined -> + case Type of + string -> + left; + _ -> + right + end; + _ -> + Align0 + end, + case Padding > 0 of + true -> + do_padding(Arg, Padding, Fill, Align, F); + false -> + Arg + end. + +do_padding(Arg, Padding, Fill, right, _F) -> + [lists:duplicate(Padding, Fill), Arg]; +do_padding(Arg, Padding, Fill, center, _F) -> + LPadding = lists:duplicate(Padding div 2, Fill), + RPadding = case Padding band 1 of + 1 -> + [Fill | LPadding]; + _ -> + LPadding + end, + [LPadding, Arg, RPadding]; +do_padding([$- | Arg], Padding, Fill, sign_right, _F) -> + [[$- | lists:duplicate(Padding, Fill)], Arg]; +do_padding(Arg, Padding, Fill, sign_right, #conversion{sign=$-}) -> + [lists:duplicate(Padding, Fill), Arg]; +do_padding([S | Arg], Padding, Fill, sign_right, #conversion{sign=S}) -> + [[S | lists:duplicate(Padding, Fill)], Arg]; +do_padding(Arg, Padding, Fill, sign_right, #conversion{sign=undefined}) -> + [lists:duplicate(Padding, Fill), Arg]; +do_padding(Arg, Padding, Fill, left, _F) -> + [Arg | lists:duplicate(Padding, Fill)]. + +fix_sign(Arg, #conversion{sign=$+}) when Arg >= 0 -> + [$+, Arg]; +fix_sign(Arg, #conversion{sign=$\s}) when Arg >= 0 -> + [$\s, Arg]; +fix_sign(Arg, _F) -> + Arg. + +ctype($\%) -> percent; +ctype($s) -> string; +ctype($b) -> bin; +ctype($o) -> oct; +ctype($X) -> upper_hex; +ctype($x) -> hex; +ctype($c) -> char; +ctype($d) -> decimal; +ctype($g) -> general; +ctype($f) -> fixed; +ctype($e) -> exp. + +align($<) -> left; +align($>) -> right; +align($^) -> center; +align($=) -> sign_right. + +convert2(Arg, F=#conversion{ctype=percent}) -> + [convert2(100.0 * Arg, F#conversion{ctype=fixed}), $\%]; +convert2(Arg, #conversion{ctype=string}) -> + str(Arg); +convert2(Arg, #conversion{ctype=bin}) -> + erlang:integer_to_list(Arg, 2); +convert2(Arg, #conversion{ctype=oct}) -> + erlang:integer_to_list(Arg, 8); +convert2(Arg, #conversion{ctype=upper_hex}) -> + erlang:integer_to_list(Arg, 16); +convert2(Arg, #conversion{ctype=hex}) -> + string:to_lower(erlang:integer_to_list(Arg, 16)); +convert2(Arg, #conversion{ctype=char}) when Arg < 16#80 -> + [Arg]; +convert2(Arg, #conversion{ctype=char}) -> + xmerl_ucs:to_utf8(Arg); +convert2(Arg, #conversion{ctype=decimal}) -> + integer_to_list(Arg); +convert2(Arg, #conversion{ctype=general, precision=undefined}) -> + try mochinum:digits(Arg) + catch error:undef -> io_lib:format("~g", [Arg]) end; +convert2(Arg, #conversion{ctype=fixed, precision=undefined}) -> + io_lib:format("~f", [Arg]); +convert2(Arg, #conversion{ctype=exp, precision=undefined}) -> + io_lib:format("~e", [Arg]); +convert2(Arg, #conversion{ctype=general, precision=P}) -> + io_lib:format("~." ++ integer_to_list(P) ++ "g", [Arg]); +convert2(Arg, #conversion{ctype=fixed, precision=P}) -> + io_lib:format("~." ++ integer_to_list(P) ++ "f", [Arg]); +convert2(Arg, #conversion{ctype=exp, precision=P}) -> + io_lib:format("~." ++ integer_to_list(P) ++ "e", [Arg]). + +str(A) when is_atom(A) -> + atom_to_list(A); +str(I) when is_integer(I) -> + integer_to_list(I); +str(F) when is_float(F) -> + try mochinum:digits(F) + catch error:undef -> io_lib:format("~g", [F]) end; +str(L) when is_list(L) -> + L; +str(B) when is_binary(B) -> + B; +str(P) -> + repr(P). + +repr(P) when is_float(P) -> + try mochinum:digits(P) + catch error:undef -> float_to_list(P) end; +repr(P) -> + io_lib:format("~p", [P]). + +parse_std_conversion(S) -> + parse_std_conversion(S, #conversion{}). + +parse_std_conversion("", Acc) -> + Acc; +parse_std_conversion([Fill, Align | Spec], Acc) + when Align =:= $< orelse Align =:= $> orelse Align =:= $= orelse Align =:= $^ -> + parse_std_conversion(Spec, Acc#conversion{fill_char=Fill, + align=align(Align)}); +parse_std_conversion([Align | Spec], Acc) + when Align =:= $< orelse Align =:= $> orelse Align =:= $= orelse Align =:= $^ -> + parse_std_conversion(Spec, Acc#conversion{align=align(Align)}); +parse_std_conversion([Sign | Spec], Acc) + when Sign =:= $+ orelse Sign =:= $- orelse Sign =:= $\s -> + parse_std_conversion(Spec, Acc#conversion{sign=Sign}); +parse_std_conversion("0" ++ Spec, Acc) -> + Align = case Acc#conversion.align of + undefined -> + sign_right; + A -> + A + end, + parse_std_conversion(Spec, Acc#conversion{fill_char=$0, align=Align}); +parse_std_conversion(Spec=[D|_], Acc) when D >= $0 andalso D =< $9 -> + {W, Spec1} = lists:splitwith(fun (C) -> C >= $0 andalso C =< $9 end, Spec), + parse_std_conversion(Spec1, Acc#conversion{length=list_to_integer(W)}); +parse_std_conversion([$. | Spec], Acc) -> + case lists:splitwith(fun (C) -> C >= $0 andalso C =< $9 end, Spec) of + {"", Spec1} -> + parse_std_conversion(Spec1, Acc); + {P, Spec1} -> + parse_std_conversion(Spec1, + Acc#conversion{precision=list_to_integer(P)}) + end; +parse_std_conversion([Type], Acc) -> + parse_std_conversion("", Acc#conversion{ctype=ctype(Type)}). + +test_tokenize() -> + {?MODULE, [{raw, "ABC"}]} = tokenize("ABC"), + {?MODULE, [{format, {"0", "", ""}}]} = tokenize("{0}"), + {?MODULE, [{raw, "ABC"}, {format, {"1", "", ""}}, {raw, "DEF"}]} = + tokenize("ABC{1}DEF"), + ok. + +test_format() -> + <<" -4">> = bformat("{0:4}", [-4]), + <<" 4">> = bformat("{0:4}", [4]), + <<" 4">> = bformat("{0:{0}}", [4]), + <<"4 ">> = bformat("{0:4}", ["4"]), + <<"4 ">> = bformat("{0:{0}}", ["4"]), + <<"1.2yoDEF">> = bformat("{2}{0}{1}{3}", {yo, "DE", 1.2, <<"F">>}), + <<"cafebabe">> = bformat("{0:x}", {16#cafebabe}), + <<"CAFEBABE">> = bformat("{0:X}", {16#cafebabe}), + <<"CAFEBABE">> = bformat("{0:X}", {16#cafebabe}), + <<"755">> = bformat("{0:o}", {8#755}), + <<"a">> = bformat("{0:c}", {97}), + %% Horizontal ellipsis + <<226, 128, 166>> = bformat("{0:c}", {16#2026}), + <<"11">> = bformat("{0:b}", {3}), + <<"11">> = bformat("{0:b}", [3]), + <<"11">> = bformat("{three:b}", [{three, 3}]), + <<"11">> = bformat("{three:b}", [{"three", 3}]), + <<"11">> = bformat("{three:b}", [{<<"three">>, 3}]), + <<"\"foo\"">> = bformat("{0!r}", {"foo"}), + <<"2008-5-4">> = bformat("{0.0}-{0.1}-{0.2}", {{2008,5,4}}), + <<"2008-05-04">> = bformat("{0.0:04}-{0.1:02}-{0.2:02}", {{2008,5,4}}), + <<"foo6bar-6">> = bformat("foo{1}{0}-{1}", {bar, 6}), + <<"-'atom test'-">> = bformat("-{arg!r}-", [{arg, 'atom test'}]), + <<"2008-05-04">> = bformat("{0.0:0{1.0}}-{0.1:0{1.1}}-{0.2:0{1.2}}", + {{2008,5,4}, {4, 2, 2}}), + ok. + +test_std() -> + M = mochifmt_std:new(), + <<"01">> = bformat("{0}{1}", [0, 1], M), + ok. + +test_records() -> + M = mochifmt_records:new([{conversion, record_info(fields, conversion)}]), + R = #conversion{length=long, precision=hard, sign=peace}, + long = M:get_value("length", R), + hard = M:get_value("precision", R), + peace = M:get_value("sign", R), + <<"long hard">> = bformat("{length} {precision}", R, M), + <<"long hard">> = bformat("{0.length} {0.precision}", [R], M), + ok. diff --git a/apps/mochiweb/src/mochifmt_records.erl b/apps/mochiweb/src/mochifmt_records.erl new file mode 100644 index 00000000..94c77978 --- /dev/null +++ b/apps/mochiweb/src/mochifmt_records.erl @@ -0,0 +1,30 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2008 Mochi Media, Inc. + +%% @doc Formatter that understands records. +%% +%% Usage: +%% +%% 1> M = mochifmt_records:new([{rec, record_info(fields, rec)}]), +%% M:format("{0.bar}", [#rec{bar=foo}]). +%% foo + +-module(mochifmt_records, [Recs]). +-author('bob@mochimedia.com'). +-export([get_value/2]). + +get_value(Key, Rec) when is_tuple(Rec) and is_atom(element(1, Rec)) -> + try begin + Atom = list_to_existing_atom(Key), + {_, Fields} = proplists:lookup(element(1, Rec), Recs), + element(get_rec_index(Atom, Fields, 2), Rec) + end + catch error:_ -> mochifmt:get_value(Key, Rec) + end; +get_value(Key, Args) -> + mochifmt:get_value(Key, Args). + +get_rec_index(Atom, [Atom | _], Index) -> + Index; +get_rec_index(Atom, [_ | Rest], Index) -> + get_rec_index(Atom, Rest, 1 + Index). diff --git a/apps/mochiweb/src/mochifmt_std.erl b/apps/mochiweb/src/mochifmt_std.erl new file mode 100644 index 00000000..9442016a --- /dev/null +++ b/apps/mochiweb/src/mochifmt_std.erl @@ -0,0 +1,23 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2008 Mochi Media, Inc. + +%% @doc Template module for a mochifmt formatter. + +-module(mochifmt_std, []). +-author('bob@mochimedia.com'). +-export([format/2, get_value/2, format_field/2, get_field/2, convert_field/2]). + +format(Format, Args) -> + mochifmt:format(Format, Args, THIS). + +get_field(Key, Args) -> + mochifmt:get_field(Key, Args, THIS). + +convert_field(Key, Args) -> + mochifmt:convert_field(Key, Args). + +get_value(Key, Args) -> + mochifmt:get_value(Key, Args). + +format_field(Arg, Format) -> + mochifmt:format_field(Arg, Format, THIS). diff --git a/apps/mochiweb/src/mochihex.erl b/apps/mochiweb/src/mochihex.erl new file mode 100644 index 00000000..7fe6899e --- /dev/null +++ b/apps/mochiweb/src/mochihex.erl @@ -0,0 +1,75 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2006 Mochi Media, Inc. + +%% @doc Utilities for working with hexadecimal strings. + +-module(mochihex). +-author('bob@mochimedia.com'). + +-export([test/0, to_hex/1, to_bin/1, to_int/1, dehex/1, hexdigit/1]). + +%% @type iolist() = [char() | binary() | iolist()] +%% @type iodata() = iolist() | binary() + +%% @spec to_hex(integer | iolist()) -> string() +%% @doc Convert an iolist to a hexadecimal string. +to_hex(0) -> + "0"; +to_hex(I) when is_integer(I), I > 0 -> + to_hex_int(I, []); +to_hex(B) -> + to_hex(iolist_to_binary(B), []). + +%% @spec to_bin(string()) -> binary() +%% @doc Convert a hexadecimal string to a binary. +to_bin(L) -> + to_bin(L, []). + +%% @spec to_int(string()) -> integer() +%% @doc Convert a hexadecimal string to an integer. +to_int(L) -> + erlang:list_to_integer(L, 16). + +%% @spec dehex(char()) -> integer() +%% @doc Convert a hex digit to its integer value. +dehex(C) when C >= $0, C =< $9 -> + C - $0; +dehex(C) when C >= $a, C =< $f -> + C - $a + 10; +dehex(C) when C >= $A, C =< $F -> + C - $A + 10. + +%% @spec hexdigit(integer()) -> char() +%% @doc Convert an integer less than 16 to a hex digit. +hexdigit(C) when C >= 0, C =< 9 -> + C + $0; +hexdigit(C) when C =< 15 -> + C + $a - 10. + +%% @spec test() -> ok +%% @doc Test this module. +test() -> + "ff000ff1" = to_hex([255, 0, 15, 241]), + <<255, 0, 15, 241>> = to_bin("ff000ff1"), + 16#ff000ff1 = to_int("ff000ff1"), + "ff000ff1" = to_hex(16#ff000ff1), + ok. + + +%% Internal API + +to_hex(<<>>, Acc) -> + lists:reverse(Acc); +to_hex(<<C1:4, C2:4, Rest/binary>>, Acc) -> + to_hex(Rest, [hexdigit(C2), hexdigit(C1) | Acc]). + +to_hex_int(0, Acc) -> + Acc; +to_hex_int(I, Acc) -> + to_hex_int(I bsr 4, [hexdigit(I band 15) | Acc]). + +to_bin([], Acc) -> + iolist_to_binary(lists:reverse(Acc)); +to_bin([C1, C2 | Rest], Acc) -> + to_bin(Rest, [(dehex(C1) bsl 4) bor dehex(C2) | Acc]). + diff --git a/apps/mochiweb/src/mochijson.erl b/apps/mochiweb/src/mochijson.erl new file mode 100644 index 00000000..74695a75 --- /dev/null +++ b/apps/mochiweb/src/mochijson.erl @@ -0,0 +1,528 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2006 Mochi Media, Inc. + +%% @doc Yet another JSON (RFC 4627) library for Erlang. +-module(mochijson). +-author('bob@mochimedia.com'). +-export([encoder/1, encode/1]). +-export([decoder/1, decode/1]). +-export([binary_encoder/1, binary_encode/1]). +-export([binary_decoder/1, binary_decode/1]). +-export([test/0]). + +% This is a macro to placate syntax highlighters.. +-define(Q, $\"). +-define(ADV_COL(S, N), S#decoder{column=N+S#decoder.column}). +-define(INC_COL(S), S#decoder{column=1+S#decoder.column}). +-define(INC_LINE(S), S#decoder{column=1, line=1+S#decoder.line}). + +%% @type iolist() = [char() | binary() | iolist()] +%% @type iodata() = iolist() | binary() +%% @type json_string() = atom | string() | binary() +%% @type json_number() = integer() | float() +%% @type json_array() = {array, [json_term()]} +%% @type json_object() = {struct, [{json_string(), json_term()}]} +%% @type json_term() = json_string() | json_number() | json_array() | +%% json_object() +%% @type encoding() = utf8 | unicode +%% @type encoder_option() = {input_encoding, encoding()} | +%% {handler, function()} +%% @type decoder_option() = {input_encoding, encoding()} | +%% {object_hook, function()} +%% @type bjson_string() = binary() +%% @type bjson_number() = integer() | float() +%% @type bjson_array() = [bjson_term()] +%% @type bjson_object() = {struct, [{bjson_string(), bjson_term()}]} +%% @type bjson_term() = bjson_string() | bjson_number() | bjson_array() | +%% bjson_object() +%% @type binary_encoder_option() = {handler, function()} +%% @type binary_decoder_option() = {object_hook, function()} + +-record(encoder, {input_encoding=unicode, + handler=null}). + +-record(decoder, {input_encoding=utf8, + object_hook=null, + line=1, + column=1, + state=null}). + +%% @spec encoder([encoder_option()]) -> function() +%% @doc Create an encoder/1 with the given options. +encoder(Options) -> + State = parse_encoder_options(Options, #encoder{}), + fun (O) -> json_encode(O, State) end. + +%% @spec encode(json_term()) -> iolist() +%% @doc Encode the given as JSON to an iolist. +encode(Any) -> + json_encode(Any, #encoder{}). + +%% @spec decoder([decoder_option()]) -> function() +%% @doc Create a decoder/1 with the given options. +decoder(Options) -> + State = parse_decoder_options(Options, #decoder{}), + fun (O) -> json_decode(O, State) end. + +%% @spec decode(iolist()) -> json_term() +%% @doc Decode the given iolist to Erlang terms. +decode(S) -> + json_decode(S, #decoder{}). + +%% @spec binary_decoder([binary_decoder_option()]) -> function() +%% @doc Create a binary_decoder/1 with the given options. +binary_decoder(Options) -> + mochijson2:decoder(Options). + +%% @spec binary_encoder([binary_encoder_option()]) -> function() +%% @doc Create a binary_encoder/1 with the given options. +binary_encoder(Options) -> + mochijson2:encoder(Options). + +%% @spec binary_encode(bjson_term()) -> iolist() +%% @doc Encode the given as JSON to an iolist, using lists for arrays and +%% binaries for strings. +binary_encode(Any) -> + mochijson2:encode(Any). + +%% @spec binary_decode(iolist()) -> bjson_term() +%% @doc Decode the given iolist to Erlang terms, using lists for arrays and +%% binaries for strings. +binary_decode(S) -> + mochijson2:decode(S). + +test() -> + test_all(), + mochijson2:test(). + +%% Internal API + +parse_encoder_options([], State) -> + State; +parse_encoder_options([{input_encoding, Encoding} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{input_encoding=Encoding}); +parse_encoder_options([{handler, Handler} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{handler=Handler}). + +parse_decoder_options([], State) -> + State; +parse_decoder_options([{input_encoding, Encoding} | Rest], State) -> + parse_decoder_options(Rest, State#decoder{input_encoding=Encoding}); +parse_decoder_options([{object_hook, Hook} | Rest], State) -> + parse_decoder_options(Rest, State#decoder{object_hook=Hook}). + +json_encode(true, _State) -> + "true"; +json_encode(false, _State) -> + "false"; +json_encode(null, _State) -> + "null"; +json_encode(I, _State) when is_integer(I) -> + integer_to_list(I); +json_encode(F, _State) when is_float(F) -> + mochinum:digits(F); +json_encode(L, State) when is_list(L); is_binary(L); is_atom(L) -> + json_encode_string(L, State); +json_encode({array, Props}, State) when is_list(Props) -> + json_encode_array(Props, State); +json_encode({struct, Props}, State) when is_list(Props) -> + json_encode_proplist(Props, State); +json_encode(Bad, #encoder{handler=null}) -> + exit({json_encode, {bad_term, Bad}}); +json_encode(Bad, State=#encoder{handler=Handler}) -> + json_encode(Handler(Bad), State). + +json_encode_array([], _State) -> + "[]"; +json_encode_array(L, State) -> + F = fun (O, Acc) -> + [$,, json_encode(O, State) | Acc] + end, + [$, | Acc1] = lists:foldl(F, "[", L), + lists:reverse([$\] | Acc1]). + +json_encode_proplist([], _State) -> + "{}"; +json_encode_proplist(Props, State) -> + F = fun ({K, V}, Acc) -> + KS = case K of + K when is_atom(K) -> + json_encode_string_utf8(atom_to_list(K)); + K when is_integer(K) -> + json_encode_string(integer_to_list(K), State); + K when is_list(K); is_binary(K) -> + json_encode_string(K, State) + end, + VS = json_encode(V, State), + [$,, VS, $:, KS | Acc] + end, + [$, | Acc1] = lists:foldl(F, "{", Props), + lists:reverse([$\} | Acc1]). + +json_encode_string(A, _State) when is_atom(A) -> + json_encode_string_unicode(xmerl_ucs:from_utf8(atom_to_list(A))); +json_encode_string(B, _State) when is_binary(B) -> + json_encode_string_unicode(xmerl_ucs:from_utf8(B)); +json_encode_string(S, #encoder{input_encoding=utf8}) -> + json_encode_string_utf8(S); +json_encode_string(S, #encoder{input_encoding=unicode}) -> + json_encode_string_unicode(S). + +json_encode_string_utf8(S) -> + [?Q | json_encode_string_utf8_1(S)]. + +json_encode_string_utf8_1([C | Cs]) when C >= 0, C =< 16#7f -> + NewC = case C of + $\\ -> "\\\\"; + ?Q -> "\\\""; + _ when C >= $\s, C < 16#7f -> C; + $\t -> "\\t"; + $\n -> "\\n"; + $\r -> "\\r"; + $\f -> "\\f"; + $\b -> "\\b"; + _ when C >= 0, C =< 16#7f -> unihex(C); + _ -> exit({json_encode, {bad_char, C}}) + end, + [NewC | json_encode_string_utf8_1(Cs)]; +json_encode_string_utf8_1(All=[C | _]) when C >= 16#80, C =< 16#10FFFF -> + [?Q | Rest] = json_encode_string_unicode(xmerl_ucs:from_utf8(All)), + Rest; +json_encode_string_utf8_1([]) -> + "\"". + +json_encode_string_unicode(S) -> + [?Q | json_encode_string_unicode_1(S)]. + +json_encode_string_unicode_1([C | Cs]) -> + NewC = case C of + $\\ -> "\\\\"; + ?Q -> "\\\""; + _ when C >= $\s, C < 16#7f -> C; + $\t -> "\\t"; + $\n -> "\\n"; + $\r -> "\\r"; + $\f -> "\\f"; + $\b -> "\\b"; + _ when C >= 0, C =< 16#10FFFF -> unihex(C); + _ -> exit({json_encode, {bad_char, C}}) + end, + [NewC | json_encode_string_unicode_1(Cs)]; +json_encode_string_unicode_1([]) -> + "\"". + +dehex(C) when C >= $0, C =< $9 -> + C - $0; +dehex(C) when C >= $a, C =< $f -> + C - $a + 10; +dehex(C) when C >= $A, C =< $F -> + C - $A + 10. + +hexdigit(C) when C >= 0, C =< 9 -> + C + $0; +hexdigit(C) when C =< 15 -> + C + $a - 10. + +unihex(C) when C < 16#10000 -> + <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>, + Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], + [$\\, $u | Digits]; +unihex(C) when C =< 16#10FFFF -> + N = C - 16#10000, + S1 = 16#d800 bor ((N bsr 10) band 16#3ff), + S2 = 16#dc00 bor (N band 16#3ff), + [unihex(S1), unihex(S2)]. + +json_decode(B, S) when is_binary(B) -> + json_decode(binary_to_list(B), S); +json_decode(L, S) -> + {Res, L1, S1} = decode1(L, S), + {eof, [], _} = tokenize(L1, S1#decoder{state=trim}), + Res. + +decode1(L, S=#decoder{state=null}) -> + case tokenize(L, S#decoder{state=any}) of + {{const, C}, L1, S1} -> + {C, L1, S1}; + {start_array, L1, S1} -> + decode_array(L1, S1#decoder{state=any}, []); + {start_object, L1, S1} -> + decode_object(L1, S1#decoder{state=key}, []) + end. + +make_object(V, #decoder{object_hook=null}) -> + V; +make_object(V, #decoder{object_hook=Hook}) -> + Hook(V). + +decode_object(L, S=#decoder{state=key}, Acc) -> + case tokenize(L, S) of + {end_object, Rest, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, Rest, S1#decoder{state=null}}; + {{const, K}, Rest, S1} when is_list(K) -> + {colon, L2, S2} = tokenize(Rest, S1), + {V, L3, S3} = decode1(L2, S2#decoder{state=null}), + decode_object(L3, S3#decoder{state=comma}, [{K, V} | Acc]) + end; +decode_object(L, S=#decoder{state=comma}, Acc) -> + case tokenize(L, S) of + {end_object, Rest, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, Rest, S1#decoder{state=null}}; + {comma, Rest, S1} -> + decode_object(Rest, S1#decoder{state=key}, Acc) + end. + +decode_array(L, S=#decoder{state=any}, Acc) -> + case tokenize(L, S) of + {end_array, Rest, S1} -> + {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}}; + {start_array, Rest, S1} -> + {Array, Rest1, S2} = decode_array(Rest, S1#decoder{state=any}, []), + decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]); + {start_object, Rest, S1} -> + {Array, Rest1, S2} = decode_object(Rest, S1#decoder{state=key}, []), + decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]); + {{const, Const}, Rest, S1} -> + decode_array(Rest, S1#decoder{state=comma}, [Const | Acc]) + end; +decode_array(L, S=#decoder{state=comma}, Acc) -> + case tokenize(L, S) of + {end_array, Rest, S1} -> + {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}}; + {comma, Rest, S1} -> + decode_array(Rest, S1#decoder{state=any}, Acc) + end. + +tokenize_string(IoList=[C | _], S=#decoder{input_encoding=utf8}, Acc) + when is_list(C); is_binary(C); C >= 16#7f -> + List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)), + tokenize_string(List, S#decoder{input_encoding=unicode}, Acc); +tokenize_string("\"" ++ Rest, S, Acc) -> + {lists:reverse(Acc), Rest, ?INC_COL(S)}; +tokenize_string("\\\"" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\" | Acc]); +tokenize_string("\\\\" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\\ | Acc]); +tokenize_string("\\/" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$/ | Acc]); +tokenize_string("\\b" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\b | Acc]); +tokenize_string("\\f" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\f | Acc]); +tokenize_string("\\n" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\n | Acc]); +tokenize_string("\\r" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\r | Acc]); +tokenize_string("\\t" ++ Rest, S, Acc) -> + tokenize_string(Rest, ?ADV_COL(S, 2), [$\t | Acc]); +tokenize_string([$\\, $u, C3, C2, C1, C0 | Rest], S, Acc) -> + % coalesce UTF-16 surrogate pair? + C = dehex(C0) bor + (dehex(C1) bsl 4) bor + (dehex(C2) bsl 8) bor + (dehex(C3) bsl 12), + tokenize_string(Rest, ?ADV_COL(S, 6), [C | Acc]); +tokenize_string([C | Rest], S, Acc) when C >= $\s; C < 16#10FFFF -> + tokenize_string(Rest, ?ADV_COL(S, 1), [C | Acc]). + +tokenize_number(IoList=[C | _], Mode, S=#decoder{input_encoding=utf8}, Acc) + when is_list(C); is_binary(C); C >= 16#7f -> + List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)), + tokenize_number(List, Mode, S#decoder{input_encoding=unicode}, Acc); +tokenize_number([$- | Rest], sign, S, []) -> + tokenize_number(Rest, int, ?INC_COL(S), [$-]); +tokenize_number(Rest, sign, S, []) -> + tokenize_number(Rest, int, S, []); +tokenize_number([$0 | Rest], int, S, Acc) -> + tokenize_number(Rest, frac, ?INC_COL(S), [$0 | Acc]); +tokenize_number([C | Rest], int, S, Acc) when C >= $1, C =< $9 -> + tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]); +tokenize_number([C | Rest], int1, S, Acc) when C >= $0, C =< $9 -> + tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]); +tokenize_number(Rest, int1, S, Acc) -> + tokenize_number(Rest, frac, S, Acc); +tokenize_number([$., C | Rest], frac, S, Acc) when C >= $0, C =< $9 -> + tokenize_number(Rest, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); +tokenize_number([E | Rest], frac, S, Acc) when E == $e; E == $E -> + tokenize_number(Rest, esign, ?INC_COL(S), [$e, $0, $. | Acc]); +tokenize_number(Rest, frac, S, Acc) -> + {{int, lists:reverse(Acc)}, Rest, S}; +tokenize_number([C | Rest], frac1, S, Acc) when C >= $0, C =< $9 -> + tokenize_number(Rest, frac1, ?INC_COL(S), [C | Acc]); +tokenize_number([E | Rest], frac1, S, Acc) when E == $e; E == $E -> + tokenize_number(Rest, esign, ?INC_COL(S), [$e | Acc]); +tokenize_number(Rest, frac1, S, Acc) -> + {{float, lists:reverse(Acc)}, Rest, S}; +tokenize_number([C | Rest], esign, S, Acc) when C == $-; C == $+ -> + tokenize_number(Rest, eint, ?INC_COL(S), [C | Acc]); +tokenize_number(Rest, esign, S, Acc) -> + tokenize_number(Rest, eint, S, Acc); +tokenize_number([C | Rest], eint, S, Acc) when C >= $0, C =< $9 -> + tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]); +tokenize_number([C | Rest], eint1, S, Acc) when C >= $0, C =< $9 -> + tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]); +tokenize_number(Rest, eint1, S, Acc) -> + {{float, lists:reverse(Acc)}, Rest, S}. + +tokenize([], S=#decoder{state=trim}) -> + {eof, [], S}; +tokenize([L | Rest], S) when is_list(L) -> + tokenize(L ++ Rest, S); +tokenize([B | Rest], S) when is_binary(B) -> + tokenize(xmerl_ucs:from_utf8(B) ++ Rest, S); +tokenize("\r\n" ++ Rest, S) -> + tokenize(Rest, ?INC_LINE(S)); +tokenize("\n" ++ Rest, S) -> + tokenize(Rest, ?INC_LINE(S)); +tokenize([C | Rest], S) when C == $\s; C == $\t -> + tokenize(Rest, ?INC_COL(S)); +tokenize("{" ++ Rest, S) -> + {start_object, Rest, ?INC_COL(S)}; +tokenize("}" ++ Rest, S) -> + {end_object, Rest, ?INC_COL(S)}; +tokenize("[" ++ Rest, S) -> + {start_array, Rest, ?INC_COL(S)}; +tokenize("]" ++ Rest, S) -> + {end_array, Rest, ?INC_COL(S)}; +tokenize("," ++ Rest, S) -> + {comma, Rest, ?INC_COL(S)}; +tokenize(":" ++ Rest, S) -> + {colon, Rest, ?INC_COL(S)}; +tokenize("null" ++ Rest, S) -> + {{const, null}, Rest, ?ADV_COL(S, 4)}; +tokenize("true" ++ Rest, S) -> + {{const, true}, Rest, ?ADV_COL(S, 4)}; +tokenize("false" ++ Rest, S) -> + {{const, false}, Rest, ?ADV_COL(S, 5)}; +tokenize("\"" ++ Rest, S) -> + {String, Rest1, S1} = tokenize_string(Rest, ?INC_COL(S), []), + {{const, String}, Rest1, S1}; +tokenize(L=[C | _], S) when C >= $0, C =< $9; C == $- -> + case tokenize_number(L, sign, S, []) of + {{int, Int}, Rest, S1} -> + {{const, list_to_integer(Int)}, Rest, S1}; + {{float, Float}, Rest, S1} -> + {{const, list_to_float(Float)}, Rest, S1} + end. + +%% testing constructs borrowed from the Yaws JSON implementation. + +%% Create an object from a list of Key/Value pairs. + +obj_new() -> + {struct, []}. + +is_obj({struct, Props}) -> + F = fun ({K, _}) when is_list(K) -> + true; + (_) -> + false + end, + lists:all(F, Props). + +obj_from_list(Props) -> + Obj = {struct, Props}, + case is_obj(Obj) of + true -> Obj; + false -> exit(json_bad_object) + end. + +%% Test for equivalence of Erlang terms. +%% Due to arbitrary order of construction, equivalent objects might +%% compare unequal as erlang terms, so we need to carefully recurse +%% through aggregates (tuples and objects). + +equiv({struct, Props1}, {struct, Props2}) -> + equiv_object(Props1, Props2); +equiv({array, L1}, {array, L2}) -> + equiv_list(L1, L2); +equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; +equiv(S1, S2) when is_list(S1), is_list(S2) -> S1 == S2; +equiv(true, true) -> true; +equiv(false, false) -> true; +equiv(null, null) -> true. + +%% Object representation and traversal order is unknown. +%% Use the sledgehammer and sort property lists. + +equiv_object(Props1, Props2) -> + L1 = lists:keysort(1, Props1), + L2 = lists:keysort(1, Props2), + Pairs = lists:zip(L1, L2), + true = lists:all(fun({{K1, V1}, {K2, V2}}) -> + equiv(K1, K2) and equiv(V1, V2) + end, Pairs). + +%% Recursively compare tuple elements for equivalence. + +equiv_list([], []) -> + true; +equiv_list([V1 | L1], [V2 | L2]) -> + equiv(V1, V2) andalso equiv_list(L1, L2). + +test_all() -> + test_issue33(), + test_one(e2j_test_vec(utf8), 1). + +test_issue33() -> + %% http://code.google.com/p/mochiweb/issues/detail?id=33 + Js = {struct, [{"key", [194, 163]}]}, + Encoder = encoder([{input_encoding, utf8}]), + "{\"key\":\"\\u00a3\"}" = lists:flatten(Encoder(Js)). + +test_one([], _N) -> + %% io:format("~p tests passed~n", [N-1]), + ok; +test_one([{E, J} | Rest], N) -> + %% io:format("[~p] ~p ~p~n", [N, E, J]), + true = equiv(E, decode(J)), + true = equiv(E, decode(encode(E))), + test_one(Rest, 1+N). + +e2j_test_vec(utf8) -> + [ + {1, "1"}, + {3.1416, "3.14160"}, % text representation may truncate, trail zeroes + {-1, "-1"}, + {-3.1416, "-3.14160"}, + {12.0e10, "1.20000e+11"}, + {1.234E+10, "1.23400e+10"}, + {-1.234E-10, "-1.23400e-10"}, + {10.0, "1.0e+01"}, + {123.456, "1.23456E+2"}, + {10.0, "1e1"}, + {"foo", "\"foo\""}, + {"foo" ++ [5] ++ "bar", "\"foo\\u0005bar\""}, + {"", "\"\""}, + {"\"", "\"\\\"\""}, + {"\n\n\n", "\"\\n\\n\\n\""}, + {"\\", "\"\\\\\""}, + {"\" \b\f\r\n\t\"", "\"\\\" \\b\\f\\r\\n\\t\\\"\""}, + {obj_new(), "{}"}, + {obj_from_list([{"foo", "bar"}]), "{\"foo\":\"bar\"}"}, + {obj_from_list([{"foo", "bar"}, {"baz", 123}]), + "{\"foo\":\"bar\",\"baz\":123}"}, + {{array, []}, "[]"}, + {{array, [{array, []}]}, "[[]]"}, + {{array, [1, "foo"]}, "[1,\"foo\"]"}, + + % json array in a json object + {obj_from_list([{"foo", {array, [123]}}]), + "{\"foo\":[123]}"}, + + % json object in a json object + {obj_from_list([{"foo", obj_from_list([{"bar", true}])}]), + "{\"foo\":{\"bar\":true}}"}, + + % fold evaluation order + {obj_from_list([{"foo", {array, []}}, + {"bar", obj_from_list([{"baz", true}])}, + {"alice", "bob"}]), + "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"}, + + % json object in a json array + {{array, [-123, "foo", obj_from_list([{"bar", {array, []}}]), null]}, + "[-123,\"foo\",{\"bar\":[]},null]"} + ]. diff --git a/apps/mochiweb/src/mochijson2.erl b/apps/mochiweb/src/mochijson2.erl new file mode 100644 index 00000000..111c37bd --- /dev/null +++ b/apps/mochiweb/src/mochijson2.erl @@ -0,0 +1,660 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works +%% with binaries as strings, arrays as lists (without an {array, _}) +%% wrapper and it only knows how to decode UTF-8 (and ASCII). + +-module(mochijson2). +-author('bob@mochimedia.com'). +-export([encoder/1, encode/1]). +-export([decoder/1, decode/1]). +-export([test/0]). + +% This is a macro to placate syntax highlighters.. +-define(Q, $\"). +-define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset, + column=N+S#decoder.column}). +-define(INC_COL(S), S#decoder{offset=1+S#decoder.offset, + column=1+S#decoder.column}). +-define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset, + column=1, + line=1+S#decoder.line}). +-define(INC_CHAR(S, C), + case C of + $\n -> + S#decoder{column=1, + line=1+S#decoder.line, + offset=1+S#decoder.offset}; + _ -> + S#decoder{column=1+S#decoder.column, + offset=1+S#decoder.offset} + end). +-define(IS_WHITESPACE(C), + (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). + +%% @type iolist() = [char() | binary() | iolist()] +%% @type iodata() = iolist() | binary() +%% @type json_string() = atom | binary() +%% @type json_number() = integer() | float() +%% @type json_array() = [json_term()] +%% @type json_object() = {struct, [{json_string(), json_term()}]} +%% @type json_term() = json_string() | json_number() | json_array() | +%% json_object() + +-record(encoder, {handler=null, + utf8=false}). + +-record(decoder, {object_hook=null, + offset=0, + line=1, + column=1, + state=null}). + +%% @spec encoder([encoder_option()]) -> function() +%% @doc Create an encoder/1 with the given options. +%% @type encoder_option() = handler_option() | utf8_option() +%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false) +encoder(Options) -> + State = parse_encoder_options(Options, #encoder{}), + fun (O) -> json_encode(O, State) end. + +%% @spec encode(json_term()) -> iolist() +%% @doc Encode the given as JSON to an iolist. +encode(Any) -> + json_encode(Any, #encoder{}). + +%% @spec decoder([decoder_option()]) -> function() +%% @doc Create a decoder/1 with the given options. +decoder(Options) -> + State = parse_decoder_options(Options, #decoder{}), + fun (O) -> json_decode(O, State) end. + +%% @spec decode(iolist()) -> json_term() +%% @doc Decode the given iolist to Erlang terms. +decode(S) -> + json_decode(S, #decoder{}). + +test() -> + test_all(). + +%% Internal API + +parse_encoder_options([], State) -> + State; +parse_encoder_options([{handler, Handler} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{handler=Handler}); +parse_encoder_options([{utf8, Switch} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{utf8=Switch}). + +parse_decoder_options([], State) -> + State; +parse_decoder_options([{object_hook, Hook} | Rest], State) -> + parse_decoder_options(Rest, State#decoder{object_hook=Hook}). + +json_encode(true, _State) -> + <<"true">>; +json_encode(false, _State) -> + <<"false">>; +json_encode(null, _State) -> + <<"null">>; +json_encode(I, _State) when is_integer(I) -> + integer_to_list(I); +json_encode(F, _State) when is_float(F) -> + mochinum:digits(F); +json_encode(S, State) when is_binary(S); is_atom(S) -> + json_encode_string(S, State); +json_encode(Array, State) when is_list(Array) -> + json_encode_array(Array, State); +json_encode({struct, Props}, State) when is_list(Props) -> + json_encode_proplist(Props, State); +json_encode(Bad, #encoder{handler=null}) -> + exit({json_encode, {bad_term, Bad}}); +json_encode(Bad, State=#encoder{handler=Handler}) -> + json_encode(Handler(Bad), State). + +json_encode_array([], _State) -> + <<"[]">>; +json_encode_array(L, State) -> + F = fun (O, Acc) -> + [$,, json_encode(O, State) | Acc] + end, + [$, | Acc1] = lists:foldl(F, "[", L), + lists:reverse([$\] | Acc1]). + +json_encode_proplist([], _State) -> + <<"{}">>; +json_encode_proplist(Props, State) -> + F = fun ({K, V}, Acc) -> + KS = json_encode_string(K, State), + VS = json_encode(V, State), + [$,, VS, $:, KS | Acc] + end, + [$, | Acc1] = lists:foldl(F, "{", Props), + lists:reverse([$\} | Acc1]). + +json_encode_string(A, State) when is_atom(A) -> + L = atom_to_list(A), + case json_string_is_safe(L) of + true -> + [?Q, L, ?Q]; + false -> + json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q]) + end; +json_encode_string(B, State) when is_binary(B) -> + case json_bin_is_safe(B) of + true -> + [?Q, B, ?Q]; + false -> + json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q]) + end; +json_encode_string(I, _State) when is_integer(I) -> + [?Q, integer_to_list(I), ?Q]; +json_encode_string(L, State) when is_list(L) -> + case json_string_is_safe(L) of + true -> + [?Q, L, ?Q]; + false -> + json_encode_string_unicode(L, State, [?Q]) + end. + +json_string_is_safe([]) -> + true; +json_string_is_safe([C | Rest]) -> + case C of + ?Q -> + false; + $\\ -> + false; + $\b -> + false; + $\f -> + false; + $\n -> + false; + $\r -> + false; + $\t -> + false; + C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> + false; + C when C < 16#7f -> + json_string_is_safe(Rest); + _ -> + false + end. + +json_bin_is_safe(<<>>) -> + true; +json_bin_is_safe(<<C, Rest/binary>>) -> + case C of + ?Q -> + false; + $\\ -> + false; + $\b -> + false; + $\f -> + false; + $\n -> + false; + $\r -> + false; + $\t -> + false; + C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> + false; + C when C < 16#7f -> + json_bin_is_safe(Rest); + _ -> + false + end. + +json_encode_string_unicode([], _State, Acc) -> + lists:reverse([$\" | Acc]); +json_encode_string_unicode([C | Cs], State, Acc) -> + Acc1 = case C of + ?Q -> + [?Q, $\\ | Acc]; + %% Escaping solidus is only useful when trying to protect + %% against "</script>" injection attacks which are only + %% possible when JSON is inserted into a HTML document + %% in-line. mochijson2 does not protect you from this, so + %% if you do insert directly into HTML then you need to + %% uncomment the following case or escape the output of encode. + %% + %% $/ -> + %% [$/, $\\ | Acc]; + %% + $\\ -> + [$\\, $\\ | Acc]; + $\b -> + [$b, $\\ | Acc]; + $\f -> + [$f, $\\ | Acc]; + $\n -> + [$n, $\\ | Acc]; + $\r -> + [$r, $\\ | Acc]; + $\t -> + [$t, $\\ | Acc]; + C when C >= 0, C < $\s -> + [unihex(C) | Acc]; + C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 -> + [xmerl_ucs:to_utf8(C) | Acc]; + C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 -> + [unihex(C) | Acc]; + C when C < 16#7f -> + [C | Acc]; + _ -> + exit({json_encode, {bad_char, C}}) + end, + json_encode_string_unicode(Cs, State, Acc1). + +hexdigit(C) when C >= 0, C =< 9 -> + C + $0; +hexdigit(C) when C =< 15 -> + C + $a - 10. + +unihex(C) when C < 16#10000 -> + <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>, + Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], + [$\\, $u | Digits]; +unihex(C) when C =< 16#10FFFF -> + N = C - 16#10000, + S1 = 16#d800 bor ((N bsr 10) band 16#3ff), + S2 = 16#dc00 bor (N band 16#3ff), + [unihex(S1), unihex(S2)]. + +json_decode(L, S) when is_list(L) -> + json_decode(iolist_to_binary(L), S); +json_decode(B, S) -> + {Res, S1} = decode1(B, S), + {eof, _} = tokenize(B, S1#decoder{state=trim}), + Res. + +decode1(B, S=#decoder{state=null}) -> + case tokenize(B, S#decoder{state=any}) of + {{const, C}, S1} -> + {C, S1}; + {start_array, S1} -> + decode_array(B, S1); + {start_object, S1} -> + decode_object(B, S1) + end. + +make_object(V, #decoder{object_hook=null}) -> + V; +make_object(V, #decoder{object_hook=Hook}) -> + Hook(V). + +decode_object(B, S) -> + decode_object(B, S#decoder{state=key}, []). + +decode_object(B, S=#decoder{state=key}, Acc) -> + case tokenize(B, S) of + {end_object, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, S1#decoder{state=null}}; + {{const, K}, S1} -> + {colon, S2} = tokenize(B, S1), + {V, S3} = decode1(B, S2#decoder{state=null}), + decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc]) + end; +decode_object(B, S=#decoder{state=comma}, Acc) -> + case tokenize(B, S) of + {end_object, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, S1#decoder{state=null}}; + {comma, S1} -> + decode_object(B, S1#decoder{state=key}, Acc) + end. + +decode_array(B, S) -> + decode_array(B, S#decoder{state=any}, []). + +decode_array(B, S=#decoder{state=any}, Acc) -> + case tokenize(B, S) of + {end_array, S1} -> + {lists:reverse(Acc), S1#decoder{state=null}}; + {start_array, S1} -> + {Array, S2} = decode_array(B, S1), + decode_array(B, S2#decoder{state=comma}, [Array | Acc]); + {start_object, S1} -> + {Array, S2} = decode_object(B, S1), + decode_array(B, S2#decoder{state=comma}, [Array | Acc]); + {{const, Const}, S1} -> + decode_array(B, S1#decoder{state=comma}, [Const | Acc]) + end; +decode_array(B, S=#decoder{state=comma}, Acc) -> + case tokenize(B, S) of + {end_array, S1} -> + {lists:reverse(Acc), S1#decoder{state=null}}; + {comma, S1} -> + decode_array(B, S1#decoder{state=any}, Acc) + end. + +tokenize_string(B, S=#decoder{offset=O}) -> + case tokenize_string_fast(B, O) of + {escape, O1} -> + Length = O1 - O, + S1 = ?ADV_COL(S, Length), + <<_:O/binary, Head:Length/binary, _/binary>> = B, + tokenize_string(B, S1, lists:reverse(binary_to_list(Head))); + O1 -> + Length = O1 - O, + <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B, + {{const, String}, ?ADV_COL(S, Length + 1)} + end. + +tokenize_string_fast(B, O) -> + case B of + <<_:O/binary, ?Q, _/binary>> -> + O; + <<_:O/binary, $\\, _/binary>> -> + {escape, O}; + <<_:O/binary, C1, _/binary>> when C1 < 128 -> + tokenize_string_fast(B, 1 + O); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string_fast(B, 2 + O); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string_fast(B, 3 + O); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string_fast(B, 4 + O); + _ -> + throw(invalid_utf8) + end. + +tokenize_string(B, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, ?Q, _/binary>> -> + {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)}; + <<_:O/binary, "\\\"", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]); + <<_:O/binary, "\\\\", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]); + <<_:O/binary, "\\/", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]); + <<_:O/binary, "\\b", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]); + <<_:O/binary, "\\f", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]); + <<_:O/binary, "\\n", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]); + <<_:O/binary, "\\r", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]); + <<_:O/binary, "\\t", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]); + <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> -> + C = erlang:list_to_integer([C3, C2, C1, C0], 16), + if C > 16#D7FF, C < 16#DC00 -> + %% coalesce UTF-16 surrogate pair + <<"\\u", D3, D2, D1, D0, _/binary>> = Rest, + D = erlang:list_to_integer([D3,D2,D1,D0], 16), + [CodePoint] = xmerl_ucs:from_utf16be(<<C:16/big-unsigned-integer, + D:16/big-unsigned-integer>>), + Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc), + tokenize_string(B, ?ADV_COL(S, 12), Acc1); + true -> + Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc), + tokenize_string(B, ?ADV_COL(S, 6), Acc1) + end; + <<_:O/binary, C, _/binary>> -> + tokenize_string(B, ?INC_CHAR(S, C), [C | Acc]) + end. + +tokenize_number(B, S) -> + case tokenize_number(B, sign, S, []) of + {{int, Int}, S1} -> + {{const, list_to_integer(Int)}, S1}; + {{float, Float}, S1} -> + {{const, list_to_float(Float)}, S1} + end. + +tokenize_number(B, sign, S=#decoder{offset=O}, []) -> + case B of + <<_:O/binary, $-, _/binary>> -> + tokenize_number(B, int, ?INC_COL(S), [$-]); + _ -> + tokenize_number(B, int, S, []) + end; +tokenize_number(B, int, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, $0, _/binary>> -> + tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]); + <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 -> + tokenize_number(B, int1, ?INC_COL(S), [C | Acc]) + end; +tokenize_number(B, int1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, int1, ?INC_COL(S), [C | Acc]); + _ -> + tokenize_number(B, frac, S, Acc) + end; +tokenize_number(B, frac, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 -> + tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); + <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> + tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]); + _ -> + {{int, lists:reverse(Acc)}, S} + end; +tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]); + <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> + tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]); + _ -> + {{float, lists:reverse(Acc)}, S} + end; +tokenize_number(B, esign, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ -> + tokenize_number(B, eint, ?INC_COL(S), [C | Acc]); + _ -> + tokenize_number(B, eint, S, Acc) + end; +tokenize_number(B, eint, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]) + end; +tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]); + _ -> + {{float, lists:reverse(Acc)}, S} + end. + +tokenize(B, S=#decoder{offset=O}) -> + case B of + <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> + tokenize(B, ?INC_CHAR(S, C)); + <<_:O/binary, "{", _/binary>> -> + {start_object, ?INC_COL(S)}; + <<_:O/binary, "}", _/binary>> -> + {end_object, ?INC_COL(S)}; + <<_:O/binary, "[", _/binary>> -> + {start_array, ?INC_COL(S)}; + <<_:O/binary, "]", _/binary>> -> + {end_array, ?INC_COL(S)}; + <<_:O/binary, ",", _/binary>> -> + {comma, ?INC_COL(S)}; + <<_:O/binary, ":", _/binary>> -> + {colon, ?INC_COL(S)}; + <<_:O/binary, "null", _/binary>> -> + {{const, null}, ?ADV_COL(S, 4)}; + <<_:O/binary, "true", _/binary>> -> + {{const, true}, ?ADV_COL(S, 4)}; + <<_:O/binary, "false", _/binary>> -> + {{const, false}, ?ADV_COL(S, 5)}; + <<_:O/binary, "\"", _/binary>> -> + tokenize_string(B, ?INC_COL(S)); + <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9) + orelse C =:= $- -> + tokenize_number(B, S); + <<_:O/binary>> -> + trim = S#decoder.state, + {eof, S} + end. + +%% testing constructs borrowed from the Yaws JSON implementation. + +%% Create an object from a list of Key/Value pairs. + +obj_new() -> + {struct, []}. + +is_obj({struct, Props}) -> + F = fun ({K, _}) when is_binary(K) -> + true; + (_) -> + false + end, + lists:all(F, Props). + +obj_from_list(Props) -> + Obj = {struct, Props}, + case is_obj(Obj) of + true -> Obj; + false -> exit({json_bad_object, Obj}) + end. + +%% Test for equivalence of Erlang terms. +%% Due to arbitrary order of construction, equivalent objects might +%% compare unequal as erlang terms, so we need to carefully recurse +%% through aggregates (tuples and objects). + +equiv({struct, Props1}, {struct, Props2}) -> + equiv_object(Props1, Props2); +equiv(L1, L2) when is_list(L1), is_list(L2) -> + equiv_list(L1, L2); +equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; +equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2; +equiv(true, true) -> true; +equiv(false, false) -> true; +equiv(null, null) -> true. + +%% Object representation and traversal order is unknown. +%% Use the sledgehammer and sort property lists. + +equiv_object(Props1, Props2) -> + L1 = lists:keysort(1, Props1), + L2 = lists:keysort(1, Props2), + Pairs = lists:zip(L1, L2), + true = lists:all(fun({{K1, V1}, {K2, V2}}) -> + equiv(K1, K2) and equiv(V1, V2) + end, Pairs). + +%% Recursively compare tuple elements for equivalence. + +equiv_list([], []) -> + true; +equiv_list([V1 | L1], [V2 | L2]) -> + equiv(V1, V2) andalso equiv_list(L1, L2). + +test_all() -> + [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>), + <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]), + test_encoder_utf8(), + test_input_validation(), + test_one(e2j_test_vec(utf8), 1). + +test_one([], _N) -> + %% io:format("~p tests passed~n", [N-1]), + ok; +test_one([{E, J} | Rest], N) -> + %% io:format("[~p] ~p ~p~n", [N, E, J]), + true = equiv(E, decode(J)), + true = equiv(E, decode(encode(E))), + test_one(Rest, 1+N). + +e2j_test_vec(utf8) -> + [ + {1, "1"}, + {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes + {-1, "-1"}, + {-3.1416, "-3.14160"}, + {12.0e10, "1.20000e+11"}, + {1.234E+10, "1.23400e+10"}, + {-1.234E-10, "-1.23400e-10"}, + {10.0, "1.0e+01"}, + {123.456, "1.23456E+2"}, + {10.0, "1e1"}, + {<<"foo">>, "\"foo\""}, + {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""}, + {<<"">>, "\"\""}, + {<<"\n\n\n">>, "\"\\n\\n\\n\""}, + {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""}, + {obj_new(), "{}"}, + {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"}, + {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]), + "{\"foo\":\"bar\",\"baz\":123}"}, + {[], "[]"}, + {[[]], "[[]]"}, + {[1, <<"foo">>], "[1,\"foo\"]"}, + + %% json array in a json object + {obj_from_list([{<<"foo">>, [123]}]), + "{\"foo\":[123]}"}, + + %% json object in a json object + {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]), + "{\"foo\":{\"bar\":true}}"}, + + %% fold evaluation order + {obj_from_list([{<<"foo">>, []}, + {<<"bar">>, obj_from_list([{<<"baz">>, true}])}, + {<<"alice">>, <<"bob">>}]), + "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"}, + + %% json object in a json array + {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null], + "[-123,\"foo\",{\"bar\":[]},null]"} + ]. + +%% test utf8 encoding +test_encoder_utf8() -> + %% safe conversion case (default) + [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] = + encode(<<1,"\321\202\320\265\321\201\321\202">>), + + %% raw utf8 output (optional) + Enc = mochijson2:encoder([{utf8, true}]), + [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] = + Enc(<<1,"\321\202\320\265\321\201\321\202">>). + +test_input_validation() -> + Good = [ + {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, % pound + {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, % euro + {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} % denarius + ], + lists:foreach(fun({CodePoint, UTF8}) -> + Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)), + Expect = decode(UTF8) + end, Good), + + Bad = [ + % 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte + <<?Q, 16#80, ?Q>>, + % missing continuations, last byte in each should be 80-BF + <<?Q, 16#C2, 16#7F, ?Q>>, + <<?Q, 16#E0, 16#80,16#7F, ?Q>>, + <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>, + % we don't support code points > 10FFFF per RFC 3629 + <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>> + ], + lists:foreach(fun(X) -> + ok = try decode(X) catch invalid_utf8 -> ok end + end, Bad). diff --git a/apps/mochiweb/src/mochinum.erl b/apps/mochiweb/src/mochinum.erl new file mode 100644 index 00000000..6a866042 --- /dev/null +++ b/apps/mochiweb/src/mochinum.erl @@ -0,0 +1,289 @@ +%% @copyright 2007 Mochi Media, Inc. +%% @author Bob Ippolito <bob@mochimedia.com> + +%% @doc Useful numeric algorithms for floats that cover some deficiencies +%% in the math module. More interesting is digits/1, which implements +%% the algorithm from: +%% http://www.cs.indiana.edu/~burger/fp/index.html +%% See also "Printing Floating-Point Numbers Quickly and Accurately" +%% in Proceedings of the SIGPLAN '96 Conference on Programming Language +%% Design and Implementation. + +-module(mochinum). +-author("Bob Ippolito <bob@mochimedia.com>"). +-export([digits/1, frexp/1, int_pow/2, int_ceil/1, test/0]). + +%% IEEE 754 Float exponent bias +-define(FLOAT_BIAS, 1022). +-define(MIN_EXP, -1074). +-define(BIG_POW, 4503599627370496). + +%% External API + +%% @spec digits(number()) -> string() +%% @doc Returns a string that accurately represents the given integer or float +%% using a conservative amount of digits. Great for generating +%% human-readable output, or compact ASCII serializations for floats. +digits(N) when is_integer(N) -> + integer_to_list(N); +digits(0.0) -> + "0.0"; +digits(Float) -> + {Frac, Exp} = frexp(Float), + Exp1 = Exp - 53, + Frac1 = trunc(abs(Frac) * (1 bsl 53)), + [Place | Digits] = digits1(Float, Exp1, Frac1), + R = insert_decimal(Place, [$0 + D || D <- Digits]), + case Float < 0 of + true -> + [$- | R]; + _ -> + R + end. + +%% @spec frexp(F::float()) -> {Frac::float(), Exp::float()} +%% @doc Return the fractional and exponent part of an IEEE 754 double, +%% equivalent to the libc function of the same name. +%% F = Frac * pow(2, Exp). +frexp(F) -> + frexp1(unpack(F)). + +%% @spec int_pow(X::integer(), N::integer()) -> Y::integer() +%% @doc Moderately efficient way to exponentiate integers. +%% int_pow(10, 2) = 100. +int_pow(_X, 0) -> + 1; +int_pow(X, N) when N > 0 -> + int_pow(X, N, 1). + +%% @spec int_ceil(F::float()) -> integer() +%% @doc Return the ceiling of F as an integer. The ceiling is defined as +%% F when F == trunc(F); +%% trunc(F) when F < 0; +%% trunc(F) + 1 when F > 0. +int_ceil(X) -> + T = trunc(X), + case (X - T) of + Neg when Neg < 0 -> T; + Pos when Pos > 0 -> T + 1; + _ -> T + end. + + +%% Internal API + +int_pow(X, N, R) when N < 2 -> + R * X; +int_pow(X, N, R) -> + int_pow(X * X, N bsr 1, case N band 1 of 1 -> R * X; 0 -> R end). + +insert_decimal(0, S) -> + "0." ++ S; +insert_decimal(Place, S) when Place > 0 -> + L = length(S), + case Place - L of + 0 -> + S ++ ".0"; + N when N < 0 -> + {S0, S1} = lists:split(L + N, S), + S0 ++ "." ++ S1; + N when N < 6 -> + %% More places than digits + S ++ lists:duplicate(N, $0) ++ ".0"; + _ -> + insert_decimal_exp(Place, S) + end; +insert_decimal(Place, S) when Place > -6 -> + "0." ++ lists:duplicate(abs(Place), $0) ++ S; +insert_decimal(Place, S) -> + insert_decimal_exp(Place, S). + +insert_decimal_exp(Place, S) -> + [C | S0] = S, + S1 = case S0 of + [] -> + "0"; + _ -> + S0 + end, + Exp = case Place < 0 of + true -> + "e-"; + false -> + "e+" + end, + [C] ++ "." ++ S1 ++ Exp ++ integer_to_list(abs(Place - 1)). + + +digits1(Float, Exp, Frac) -> + Round = ((Frac band 1) =:= 0), + case Exp >= 0 of + true -> + BExp = 1 bsl Exp, + case (Frac /= ?BIG_POW) of + true -> + scale((Frac * BExp * 2), 2, BExp, BExp, + Round, Round, Float); + false -> + scale((Frac * BExp * 4), 4, (BExp * 2), BExp, + Round, Round, Float) + end; + false -> + case (Exp == ?MIN_EXP) orelse (Frac /= ?BIG_POW) of + true -> + scale((Frac * 2), 1 bsl (1 - Exp), 1, 1, + Round, Round, Float); + false -> + scale((Frac * 4), 1 bsl (2 - Exp), 2, 1, + Round, Round, Float) + end + end. + +scale(R, S, MPlus, MMinus, LowOk, HighOk, Float) -> + Est = int_ceil(math:log10(abs(Float)) - 1.0e-10), + %% Note that the scheme implementation uses a 326 element look-up table + %% for int_pow(10, N) where we do not. + case Est >= 0 of + true -> + fixup(R, S * int_pow(10, Est), MPlus, MMinus, Est, + LowOk, HighOk); + false -> + Scale = int_pow(10, -Est), + fixup(R * Scale, S, MPlus * Scale, MMinus * Scale, Est, + LowOk, HighOk) + end. + +fixup(R, S, MPlus, MMinus, K, LowOk, HighOk) -> + TooLow = case HighOk of + true -> + (R + MPlus) >= S; + false -> + (R + MPlus) > S + end, + case TooLow of + true -> + [(K + 1) | generate(R, S, MPlus, MMinus, LowOk, HighOk)]; + false -> + [K | generate(R * 10, S, MPlus * 10, MMinus * 10, LowOk, HighOk)] + end. + +generate(R0, S, MPlus, MMinus, LowOk, HighOk) -> + D = R0 div S, + R = R0 rem S, + TC1 = case LowOk of + true -> + R =< MMinus; + false -> + R < MMinus + end, + TC2 = case HighOk of + true -> + (R + MPlus) >= S; + false -> + (R + MPlus) > S + end, + case TC1 of + false -> + case TC2 of + false -> + [D | generate(R * 10, S, MPlus * 10, MMinus * 10, + LowOk, HighOk)]; + true -> + [D + 1] + end; + true -> + case TC2 of + false -> + [D]; + true -> + case R * 2 < S of + true -> + [D]; + false -> + [D + 1] + end + end + end. + +unpack(Float) -> + <<Sign:1, Exp:11, Frac:52>> = <<Float:64/float>>, + {Sign, Exp, Frac}. + +frexp1({_Sign, 0, 0}) -> + {0.0, 0}; +frexp1({Sign, 0, Frac}) -> + Exp = log2floor(Frac), + <<Frac1:64/float>> = <<Sign:1, ?FLOAT_BIAS:11, (Frac-1):52>>, + {Frac1, -(?FLOAT_BIAS) - 52 + Exp}; +frexp1({Sign, Exp, Frac}) -> + <<Frac1:64/float>> = <<Sign:1, ?FLOAT_BIAS:11, Frac:52>>, + {Frac1, Exp - ?FLOAT_BIAS}. + +log2floor(Int) -> + log2floor(Int, 0). + +log2floor(0, N) -> + N; +log2floor(Int, N) -> + log2floor(Int bsr 1, 1 + N). + + +test() -> + ok = test_frexp(), + ok = test_int_ceil(), + ok = test_int_pow(), + ok = test_digits(), + ok. + +test_int_ceil() -> + 1 = int_ceil(0.0001), + 0 = int_ceil(0.0), + 1 = int_ceil(0.99), + 1 = int_ceil(1.0), + -1 = int_ceil(-1.5), + -2 = int_ceil(-2.0), + ok. + +test_int_pow() -> + 1 = int_pow(1, 1), + 1 = int_pow(1, 0), + 1 = int_pow(10, 0), + 10 = int_pow(10, 1), + 100 = int_pow(10, 2), + 1000 = int_pow(10, 3), + ok. + +test_digits() -> + "0" = digits(0), + "0.0" = digits(0.0), + "1.0" = digits(1.0), + "-1.0" = digits(-1.0), + "0.1" = digits(0.1), + "0.01" = digits(0.01), + "0.001" = digits(0.001), + ok. + +test_frexp() -> + %% zero + {0.0, 0} = frexp(0.0), + %% one + {0.5, 1} = frexp(1.0), + %% negative one + {-0.5, 1} = frexp(-1.0), + %% small denormalized number + %% 4.94065645841246544177e-324 + <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>, + {0.5, -1073} = frexp(SmallDenorm), + %% large denormalized number + %% 2.22507385850720088902e-308 + <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>, + {0.99999999999999978, -1022} = frexp(BigDenorm), + %% small normalized number + %% 2.22507385850720138309e-308 + <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>, + {0.5, -1021} = frexp(SmallNorm), + %% large normalized number + %% 1.79769313486231570815e+308 + <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>, + {0.99999999999999989, 1024} = frexp(LargeNorm), + ok. diff --git a/apps/mochiweb/src/mochiweb.app.src b/apps/mochiweb/src/mochiweb.app.src new file mode 100644 index 00000000..b0f90144 --- /dev/null +++ b/apps/mochiweb/src/mochiweb.app.src @@ -0,0 +1,32 @@ +{application, mochiweb, + [{description, "MochiMedia Web Server"}, + {vsn, "113"}, + {modules, [ + mochihex, + mochijson, + mochijson2, + mochinum, + mochiweb, + mochiweb_app, + mochiweb_charref, + mochiweb_cookies, + mochiweb_echo, + mochiweb_headers, + mochiweb_html, + mochiweb_http, + mochiweb_multipart, + mochiweb_request, + mochiweb_response, + mochiweb_skel, + mochiweb_socket_server, + mochiweb_sup, + mochiweb_util, + reloader, + mochifmt, + mochifmt_std, + mochifmt_records + ]}, + {registered, []}, + {mod, {mochiweb_app, []}}, + {env, []}, + {applications, [kernel, stdlib]}]}. diff --git a/apps/mochiweb/src/mochiweb.erl b/apps/mochiweb/src/mochiweb.erl new file mode 100644 index 00000000..0f4d52a6 --- /dev/null +++ b/apps/mochiweb/src/mochiweb.erl @@ -0,0 +1,110 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Start and stop the MochiWeb server. + +-module(mochiweb). +-author('bob@mochimedia.com'). + +-export([start/0, stop/0]). +-export([new_request/1, new_response/1]). +-export([all_loaded/0, all_loaded/1, reload/0]). +-export([test/0]). + +%% @spec start() -> ok +%% @doc Start the MochiWeb server. +start() -> + ensure_started(crypto), + application:start(mochiweb). + +%% @spec stop() -> ok +%% @doc Stop the MochiWeb server. +stop() -> + Res = application:stop(mochiweb), + application:stop(crypto), + Res. + +%% @spec test() -> ok +%% @doc Run all of the tests for MochiWeb. +test() -> + mochiweb_util:test(), + mochiweb_headers:test(), + mochiweb_cookies:test(), + mochihex:test(), + mochinum:test(), + mochijson:test(), + mochiweb_charref:test(), + mochiweb_html:test(), + mochifmt:test(), + test_request(), + ok. + +reload() -> + [c:l(Module) || Module <- all_loaded()]. + +all_loaded() -> + all_loaded(filename:dirname(code:which(?MODULE))). + +all_loaded(Base) when is_atom(Base) -> + []; +all_loaded(Base) -> + FullBase = Base ++ "/", + F = fun ({_Module, Loaded}, Acc) when is_atom(Loaded) -> + Acc; + ({Module, Loaded}, Acc) -> + case lists:prefix(FullBase, Loaded) of + true -> + [Module | Acc]; + false -> + Acc + end + end, + lists:foldl(F, [], code:all_loaded()). + + +%% @spec new_request({Socket, Request, Headers}) -> MochiWebRequest +%% @doc Return a mochiweb_request data structure. +new_request({Socket, {Method, {abs_path, Uri}, Version}, Headers}) -> + mochiweb_request:new(Socket, + Method, + Uri, + Version, + mochiweb_headers:make(Headers)); +% this case probably doesn't "exist". +new_request({Socket, {Method, {absoluteURI, _Protocol, _Host, _Port, Uri}, + Version}, Headers}) -> + mochiweb_request:new(Socket, + Method, + Uri, + Version, + mochiweb_headers:make(Headers)); +%% Request-URI is "*" +%% From http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 +new_request({Socket, {Method, '*'=Uri, Version}, Headers}) -> + mochiweb_request:new(Socket, + Method, + Uri, + Version, + mochiweb_headers:make(Headers)). + +%% @spec new_response({Request, integer(), Headers}) -> MochiWebResponse +%% @doc Return a mochiweb_response data structure. +new_response({Request, Code, Headers}) -> + mochiweb_response:new(Request, + Code, + mochiweb_headers:make(Headers)). + +%% Internal API + +test_request() -> + R = mochiweb_request:new(z, z, "/foo/bar/baz%20wibble+quux?qs=2", z, []), + "/foo/bar/baz wibble quux" = R:get(path), + ok. + +ensure_started(App) -> + case application:start(App) of + ok -> + ok; + {error, {already_started, App}} -> + ok + end. diff --git a/apps/mochiweb/src/mochiweb_app.erl b/apps/mochiweb/src/mochiweb_app.erl new file mode 100644 index 00000000..2b437f6c --- /dev/null +++ b/apps/mochiweb/src/mochiweb_app.erl @@ -0,0 +1,20 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Callbacks for the mochiweb application. + +-module(mochiweb_app). +-author('bob@mochimedia.com'). + +-behaviour(application). +-export([start/2,stop/1]). + +%% @spec start(_Type, _StartArgs) -> ServerRet +%% @doc application start callback for mochiweb. +start(_Type, _StartArgs) -> + mochiweb_sup:start_link(). + +%% @spec stop(_State) -> ServerRet +%% @doc application stop callback for mochiweb. +stop(_State) -> + ok. diff --git a/apps/mochiweb/src/mochiweb_charref.erl b/apps/mochiweb/src/mochiweb_charref.erl new file mode 100644 index 00000000..d037d2f8 --- /dev/null +++ b/apps/mochiweb/src/mochiweb_charref.erl @@ -0,0 +1,295 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Converts HTML 4 charrefs and entities to codepoints. +-module(mochiweb_charref). +-export([charref/1, test/0]). + +%% External API. + +%% @spec charref(S) -> integer() | undefined +%% @doc Convert a decimal charref, hex charref, or html entity to a unicode +%% codepoint, or return undefined on failure. +%% The input should not include an ampersand or semicolon. +%% charref("#38") = 38, charref("#x26") = 38, charref("amp") = 38. +charref(B) when is_binary(B) -> + charref(binary_to_list(B)); +charref([$#, C | L]) when C =:= $x orelse C =:= $X -> + try erlang:list_to_integer(L, 16) + catch + error:badarg -> undefined + end; +charref([$# | L]) -> + try list_to_integer(L) + catch + error:badarg -> undefined + end; +charref(L) -> + entity(L). + +%% @spec test() -> ok +%% @doc Run tests for mochiweb_charref. +test() -> + 1234 = charref("#1234"), + 255 = charref("#xfF"), + 255 = charref("#XFf"), + 38 = charref("amp"), + undefined = charref("not_an_entity"), + ok. + +%% Internal API. + +entity("nbsp") -> 160; +entity("iexcl") -> 161; +entity("cent") -> 162; +entity("pound") -> 163; +entity("curren") -> 164; +entity("yen") -> 165; +entity("brvbar") -> 166; +entity("sect") -> 167; +entity("uml") -> 168; +entity("copy") -> 169; +entity("ordf") -> 170; +entity("laquo") -> 171; +entity("not") -> 172; +entity("shy") -> 173; +entity("reg") -> 174; +entity("macr") -> 175; +entity("deg") -> 176; +entity("plusmn") -> 177; +entity("sup2") -> 178; +entity("sup3") -> 179; +entity("acute") -> 180; +entity("micro") -> 181; +entity("para") -> 182; +entity("middot") -> 183; +entity("cedil") -> 184; +entity("sup1") -> 185; +entity("ordm") -> 186; +entity("raquo") -> 187; +entity("frac14") -> 188; +entity("frac12") -> 189; +entity("frac34") -> 190; +entity("iquest") -> 191; +entity("Agrave") -> 192; +entity("Aacute") -> 193; +entity("Acirc") -> 194; +entity("Atilde") -> 195; +entity("Auml") -> 196; +entity("Aring") -> 197; +entity("AElig") -> 198; +entity("Ccedil") -> 199; +entity("Egrave") -> 200; +entity("Eacute") -> 201; +entity("Ecirc") -> 202; +entity("Euml") -> 203; +entity("Igrave") -> 204; +entity("Iacute") -> 205; +entity("Icirc") -> 206; +entity("Iuml") -> 207; +entity("ETH") -> 208; +entity("Ntilde") -> 209; +entity("Ograve") -> 210; +entity("Oacute") -> 211; +entity("Ocirc") -> 212; +entity("Otilde") -> 213; +entity("Ouml") -> 214; +entity("times") -> 215; +entity("Oslash") -> 216; +entity("Ugrave") -> 217; +entity("Uacute") -> 218; +entity("Ucirc") -> 219; +entity("Uuml") -> 220; +entity("Yacute") -> 221; +entity("THORN") -> 222; +entity("szlig") -> 223; +entity("agrave") -> 224; +entity("aacute") -> 225; +entity("acirc") -> 226; +entity("atilde") -> 227; +entity("auml") -> 228; +entity("aring") -> 229; +entity("aelig") -> 230; +entity("ccedil") -> 231; +entity("egrave") -> 232; +entity("eacute") -> 233; +entity("ecirc") -> 234; +entity("euml") -> 235; +entity("igrave") -> 236; +entity("iacute") -> 237; +entity("icirc") -> 238; +entity("iuml") -> 239; +entity("eth") -> 240; +entity("ntilde") -> 241; +entity("ograve") -> 242; +entity("oacute") -> 243; +entity("ocirc") -> 244; +entity("otilde") -> 245; +entity("ouml") -> 246; +entity("divide") -> 247; +entity("oslash") -> 248; +entity("ugrave") -> 249; +entity("uacute") -> 250; +entity("ucirc") -> 251; +entity("uuml") -> 252; +entity("yacute") -> 253; +entity("thorn") -> 254; +entity("yuml") -> 255; +entity("fnof") -> 402; +entity("Alpha") -> 913; +entity("Beta") -> 914; +entity("Gamma") -> 915; +entity("Delta") -> 916; +entity("Epsilon") -> 917; +entity("Zeta") -> 918; +entity("Eta") -> 919; +entity("Theta") -> 920; +entity("Iota") -> 921; +entity("Kappa") -> 922; +entity("Lambda") -> 923; +entity("Mu") -> 924; +entity("Nu") -> 925; +entity("Xi") -> 926; +entity("Omicron") -> 927; +entity("Pi") -> 928; +entity("Rho") -> 929; +entity("Sigma") -> 931; +entity("Tau") -> 932; +entity("Upsilon") -> 933; +entity("Phi") -> 934; +entity("Chi") -> 935; +entity("Psi") -> 936; +entity("Omega") -> 937; +entity("alpha") -> 945; +entity("beta") -> 946; +entity("gamma") -> 947; +entity("delta") -> 948; +entity("epsilon") -> 949; +entity("zeta") -> 950; +entity("eta") -> 951; +entity("theta") -> 952; +entity("iota") -> 953; +entity("kappa") -> 954; +entity("lambda") -> 955; +entity("mu") -> 956; +entity("nu") -> 957; +entity("xi") -> 958; +entity("omicron") -> 959; +entity("pi") -> 960; +entity("rho") -> 961; +entity("sigmaf") -> 962; +entity("sigma") -> 963; +entity("tau") -> 964; +entity("upsilon") -> 965; +entity("phi") -> 966; +entity("chi") -> 967; +entity("psi") -> 968; +entity("omega") -> 969; +entity("thetasym") -> 977; +entity("upsih") -> 978; +entity("piv") -> 982; +entity("bull") -> 8226; +entity("hellip") -> 8230; +entity("prime") -> 8242; +entity("Prime") -> 8243; +entity("oline") -> 8254; +entity("frasl") -> 8260; +entity("weierp") -> 8472; +entity("image") -> 8465; +entity("real") -> 8476; +entity("trade") -> 8482; +entity("alefsym") -> 8501; +entity("larr") -> 8592; +entity("uarr") -> 8593; +entity("rarr") -> 8594; +entity("darr") -> 8595; +entity("harr") -> 8596; +entity("crarr") -> 8629; +entity("lArr") -> 8656; +entity("uArr") -> 8657; +entity("rArr") -> 8658; +entity("dArr") -> 8659; +entity("hArr") -> 8660; +entity("forall") -> 8704; +entity("part") -> 8706; +entity("exist") -> 8707; +entity("empty") -> 8709; +entity("nabla") -> 8711; +entity("isin") -> 8712; +entity("notin") -> 8713; +entity("ni") -> 8715; +entity("prod") -> 8719; +entity("sum") -> 8721; +entity("minus") -> 8722; +entity("lowast") -> 8727; +entity("radic") -> 8730; +entity("prop") -> 8733; +entity("infin") -> 8734; +entity("ang") -> 8736; +entity("and") -> 8743; +entity("or") -> 8744; +entity("cap") -> 8745; +entity("cup") -> 8746; +entity("int") -> 8747; +entity("there4") -> 8756; +entity("sim") -> 8764; +entity("cong") -> 8773; +entity("asymp") -> 8776; +entity("ne") -> 8800; +entity("equiv") -> 8801; +entity("le") -> 8804; +entity("ge") -> 8805; +entity("sub") -> 8834; +entity("sup") -> 8835; +entity("nsub") -> 8836; +entity("sube") -> 8838; +entity("supe") -> 8839; +entity("oplus") -> 8853; +entity("otimes") -> 8855; +entity("perp") -> 8869; +entity("sdot") -> 8901; +entity("lceil") -> 8968; +entity("rceil") -> 8969; +entity("lfloor") -> 8970; +entity("rfloor") -> 8971; +entity("lang") -> 9001; +entity("rang") -> 9002; +entity("loz") -> 9674; +entity("spades") -> 9824; +entity("clubs") -> 9827; +entity("hearts") -> 9829; +entity("diams") -> 9830; +entity("quot") -> 34; +entity("amp") -> 38; +entity("lt") -> 60; +entity("gt") -> 62; +entity("OElig") -> 338; +entity("oelig") -> 339; +entity("Scaron") -> 352; +entity("scaron") -> 353; +entity("Yuml") -> 376; +entity("circ") -> 710; +entity("tilde") -> 732; +entity("ensp") -> 8194; +entity("emsp") -> 8195; +entity("thinsp") -> 8201; +entity("zwnj") -> 8204; +entity("zwj") -> 8205; +entity("lrm") -> 8206; +entity("rlm") -> 8207; +entity("ndash") -> 8211; +entity("mdash") -> 8212; +entity("lsquo") -> 8216; +entity("rsquo") -> 8217; +entity("sbquo") -> 8218; +entity("ldquo") -> 8220; +entity("rdquo") -> 8221; +entity("bdquo") -> 8222; +entity("dagger") -> 8224; +entity("Dagger") -> 8225; +entity("permil") -> 8240; +entity("lsaquo") -> 8249; +entity("rsaquo") -> 8250; +entity("euro") -> 8364; +entity(_) -> undefined. + diff --git a/apps/mochiweb/src/mochiweb_cookies.erl b/apps/mochiweb/src/mochiweb_cookies.erl new file mode 100644 index 00000000..61711ff0 --- /dev/null +++ b/apps/mochiweb/src/mochiweb_cookies.erl @@ -0,0 +1,257 @@ +%% @author Emad El-Haraty <emad@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc HTTP Cookie parsing and generating (RFC 2109, RFC 2965). + +-module(mochiweb_cookies). +-export([parse_cookie/1, cookie/3, cookie/2, test/0]). + +-define(QUOTE, $\"). + +-define(IS_WHITESPACE(C), + (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). + +%% RFC 2616 separators (called tspecials in RFC 2068) +-define(IS_SEPARATOR(C), + (C < 32 orelse + C =:= $\s orelse C =:= $\t orelse + C =:= $( orelse C =:= $) orelse C =:= $< orelse C =:= $> orelse + C =:= $@ orelse C =:= $, orelse C =:= $; orelse C =:= $: orelse + C =:= $\\ orelse C =:= $\" orelse C =:= $/ orelse + C =:= $[ orelse C =:= $] orelse C =:= $? orelse C =:= $= orelse + C =:= ${ orelse C =:= $})). + +%% @type proplist() = [{Key::string(), Value::string()}]. +%% @type header() = {Name::string(), Value::string()}. + +%% @spec cookie(Key::string(), Value::string()) -> header() +%% @doc Short-hand for <code>cookie(Key, Value, [])</code>. +cookie(Key, Value) -> + cookie(Key, Value, []). + +%% @spec cookie(Key::string(), Value::string(), Options::[Option]) -> header() +%% where Option = {max_age, integer()} | {local_time, {date(), time()}} +%% | {domain, string()} | {path, string()} +%% | {secure, true | false} | {http_only, true | false} +%% +%% @doc Generate a Set-Cookie header field tuple. +cookie(Key, Value, Options) -> + Cookie = [any_to_list(Key), "=", quote(Value), "; Version=1"], + %% Set-Cookie: + %% Comment, Domain, Max-Age, Path, Secure, Version + %% Set-Cookie2: + %% Comment, CommentURL, Discard, Domain, Max-Age, Path, Port, Secure, + %% Version + ExpiresPart = + case proplists:get_value(max_age, Options) of + undefined -> + ""; + RawAge -> + When = case proplists:get_value(local_time, Options) of + undefined -> + calendar:local_time(); + LocalTime -> + LocalTime + end, + Age = case RawAge < 0 of + true -> + 0; + false -> + RawAge + end, + ["; Expires=", age_to_cookie_date(Age, When), + "; Max-Age=", quote(Age)] + end, + SecurePart = + case proplists:get_value(secure, Options) of + true -> + "; Secure"; + _ -> + "" + end, + DomainPart = + case proplists:get_value(domain, Options) of + undefined -> + ""; + Domain -> + ["; Domain=", quote(Domain)] + end, + PathPart = + case proplists:get_value(path, Options) of + undefined -> + ""; + Path -> + ["; Path=", quote(Path)] + end, + HttpOnlyPart = + case proplists:get_value(http_only, Options) of + true -> + "; HttpOnly"; + _ -> + "" + end, + CookieParts = [Cookie, ExpiresPart, SecurePart, DomainPart, PathPart, HttpOnlyPart], + {"Set-Cookie", lists:flatten(CookieParts)}. + + +%% Every major browser incorrectly handles quoted strings in a +%% different and (worse) incompatible manner. Instead of wasting time +%% writing redundant code for each browser, we restrict cookies to +%% only contain characters that browsers handle compatibly. +%% +%% By replacing the definition of quote with this, we generate +%% RFC-compliant cookies: +%% +%% quote(V) -> +%% Fun = fun(?QUOTE, Acc) -> [$\\, ?QUOTE | Acc]; +%% (Ch, Acc) -> [Ch | Acc] +%% end, +%% [?QUOTE | lists:foldr(Fun, [?QUOTE], V)]. + +%% Convert to a string and raise an error if quoting is required. +quote(V0) -> + V = any_to_list(V0), + lists:all(fun(Ch) -> Ch =:= $/ orelse not ?IS_SEPARATOR(Ch) end, V) + orelse erlang:error({cookie_quoting_required, V}), + V. + +add_seconds(Secs, LocalTime) -> + Greg = calendar:datetime_to_gregorian_seconds(LocalTime), + calendar:gregorian_seconds_to_datetime(Greg + Secs). + +age_to_cookie_date(Age, LocalTime) -> + httpd_util:rfc1123_date(add_seconds(Age, LocalTime)). + +%% @spec parse_cookie(string()) -> [{K::string(), V::string()}] +%% @doc Parse the contents of a Cookie header field, ignoring cookie +%% attributes, and return a simple property list. +parse_cookie("") -> + []; +parse_cookie(Cookie) -> + parse_cookie(Cookie, []). + +%% @spec test() -> ok +%% @doc Run tests for mochiweb_cookies. +test() -> + parse_cookie_test(), + cookie_test(), + ok. + +%% Internal API + +parse_cookie([], Acc) -> + lists:reverse(Acc); +parse_cookie(String, Acc) -> + {{Token, Value}, Rest} = read_pair(String), + Acc1 = case Token of + "" -> + Acc; + "$" ++ _ -> + Acc; + _ -> + [{Token, Value} | Acc] + end, + parse_cookie(Rest, Acc1). + +read_pair(String) -> + {Token, Rest} = read_token(skip_whitespace(String)), + {Value, Rest1} = read_value(skip_whitespace(Rest)), + {{Token, Value}, skip_past_separator(Rest1)}. + +read_value([$= | Value]) -> + Value1 = skip_whitespace(Value), + case Value1 of + [?QUOTE | _] -> + read_quoted(Value1); + _ -> + read_token(Value1) + end; +read_value(String) -> + {"", String}. + +read_quoted([?QUOTE | String]) -> + read_quoted(String, []). + +read_quoted([], Acc) -> + {lists:reverse(Acc), []}; +read_quoted([?QUOTE | Rest], Acc) -> + {lists:reverse(Acc), Rest}; +read_quoted([$\\, Any | Rest], Acc) -> + read_quoted(Rest, [Any | Acc]); +read_quoted([C | Rest], Acc) -> + read_quoted(Rest, [C | Acc]). + +skip_whitespace(String) -> + F = fun (C) -> ?IS_WHITESPACE(C) end, + lists:dropwhile(F, String). + +read_token(String) -> + F = fun (C) -> not ?IS_SEPARATOR(C) end, + lists:splitwith(F, String). + +skip_past_separator([]) -> + []; +skip_past_separator([$; | Rest]) -> + Rest; +skip_past_separator([$, | Rest]) -> + Rest; +skip_past_separator([_ | Rest]) -> + skip_past_separator(Rest). + +parse_cookie_test() -> + %% RFC example + C1 = "$Version=\"1\"; Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\"; + Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\"; + Shipping=\"FedEx\"; $Path=\"/acme\"", + [ + {"Customer","WILE_E_COYOTE"}, + {"Part_Number","Rocket_Launcher_0001"}, + {"Shipping","FedEx"} + ] = parse_cookie(C1), + %% Potential edge cases + [{"foo", "x"}] = parse_cookie("foo=\"\\x\""), + [] = parse_cookie("="), + [{"foo", ""}, {"bar", ""}] = parse_cookie(" foo ; bar "), + [{"foo", ""}, {"bar", ""}] = parse_cookie("foo=;bar="), + [{"foo", "\";"}, {"bar", ""}] = parse_cookie("foo = \"\\\";\";bar "), + [{"foo", "\";bar"}] = parse_cookie("foo=\"\\\";bar"). + +any_to_list(V) when is_list(V) -> + V; +any_to_list(V) when is_atom(V) -> + atom_to_list(V); +any_to_list(V) when is_binary(V) -> + binary_to_list(V); +any_to_list(V) when is_integer(V) -> + integer_to_list(V). + + +cookie_test() -> + C1 = {"Set-Cookie", + "Customer=WILE_E_COYOTE; " + "Version=1; " + "Path=/acme"}, + C1 = cookie("Customer", "WILE_E_COYOTE", [{path, "/acme"}]), + C1 = cookie("Customer", "WILE_E_COYOTE", + [{path, "/acme"}, {badoption, "negatory"}]), + C1 = cookie('Customer', 'WILE_E_COYOTE', [{path, '/acme'}]), + C1 = cookie(<<"Customer">>, <<"WILE_E_COYOTE">>, [{path, <<"/acme">>}]), + + {"Set-Cookie","=NoKey; Version=1"} = cookie("", "NoKey", []), + + LocalTime = calendar:universal_time_to_local_time({{2007, 5, 15}, {13, 45, 33}}), + C2 = {"Set-Cookie", + "Customer=WILE_E_COYOTE; " + "Version=1; " + "Expires=Tue, 15 May 2007 13:45:33 GMT; " + "Max-Age=0"}, + C2 = cookie("Customer", "WILE_E_COYOTE", + [{max_age, -111}, {local_time, LocalTime}]), + C3 = {"Set-Cookie", + "Customer=WILE_E_COYOTE; " + "Version=1; " + "Expires=Wed, 16 May 2007 13:45:50 GMT; " + "Max-Age=86417"}, + C3 = cookie("Customer", "WILE_E_COYOTE", + [{max_age, 86417}, {local_time, LocalTime}]), + ok. diff --git a/apps/mochiweb/src/mochiweb_echo.erl b/apps/mochiweb/src/mochiweb_echo.erl new file mode 100644 index 00000000..f32d6803 --- /dev/null +++ b/apps/mochiweb/src/mochiweb_echo.erl @@ -0,0 +1,31 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Simple and stupid echo server to demo mochiweb_socket_server. + +-module(mochiweb_echo). +-author('bob@mochimedia.com'). +-export([start/0, stop/0, loop/1]). + +stop() -> + mochiweb_socket_server:stop(?MODULE). + +start() -> + mochiweb_socket_server:start([{name, ?MODULE}, + {port, 6789}, + {ip, "127.0.0.1"}, + {max, 1}, + {loop, {?MODULE, loop}}]). + +loop(Socket) -> + case gen_tcp:recv(Socket, 0, 30000) of + {ok, Data} -> + case gen_tcp:send(Socket, Data) of + ok -> + loop(Socket); + _ -> + exit(normal) + end; + _Other -> + exit(normal) + end. diff --git a/apps/mochiweb/src/mochiweb_headers.erl b/apps/mochiweb/src/mochiweb_headers.erl new file mode 100644 index 00000000..d90fd679 --- /dev/null +++ b/apps/mochiweb/src/mochiweb_headers.erl @@ -0,0 +1,251 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Case preserving (but case insensitive) HTTP Header dictionary. + +-module(mochiweb_headers). +-author('bob@mochimedia.com'). +-export([empty/0, from_list/1, insert/3, enter/3, get_value/2, lookup/2]). +-export([delete_any/2, get_primary_value/2]). +-export([default/3, enter_from_list/2, default_from_list/2]). +-export([to_list/1, make/1]). +-export([from_binary/1]). +-export([test/0]). + +%% @type headers(). +%% @type key() = atom() | binary() | string(). +%% @type value() = atom() | binary() | string() | integer(). + +%% @spec test() -> ok +%% @doc Run tests for this module. +test() -> + H = ?MODULE:make([{hdr, foo}, {"Hdr", "bar"}, {'Hdr', 2}]), + [{hdr, "foo, bar, 2"}] = ?MODULE:to_list(H), + H1 = ?MODULE:insert(taco, grande, H), + [{hdr, "foo, bar, 2"}, {taco, "grande"}] = ?MODULE:to_list(H1), + H2 = ?MODULE:make([{"Set-Cookie", "foo"}]), + [{"Set-Cookie", "foo"}] = ?MODULE:to_list(H2), + H3 = ?MODULE:insert("Set-Cookie", "bar", H2), + [{"Set-Cookie", "foo"}, {"Set-Cookie", "bar"}] = ?MODULE:to_list(H3), + "foo, bar" = ?MODULE:get_value("set-cookie", H3), + {value, {"Set-Cookie", "foo, bar"}} = ?MODULE:lookup("set-cookie", H3), + undefined = ?MODULE:get_value("shibby", H3), + none = ?MODULE:lookup("shibby", H3), + H4 = ?MODULE:insert("content-type", + "application/x-www-form-urlencoded; charset=utf8", + H3), + "application/x-www-form-urlencoded" = ?MODULE:get_primary_value( + "content-type", H4), + H4 = ?MODULE:delete_any("nonexistent-header", H4), + H3 = ?MODULE:delete_any("content-type", H4), + HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>, + H_HB = ?MODULE:from_binary(HB), + H_HB = ?MODULE:from_binary(binary_to_list(HB)), + "47" = ?MODULE:get_value("Content-Length", H_HB), + "text/plain" = ?MODULE:get_value("Content-Type", H_HB), + L_H_HB = ?MODULE:to_list(H_HB), + 2 = length(L_H_HB), + true = lists:member({'Content-Length', "47"}, L_H_HB), + true = lists:member({'Content-Type', "text/plain"}, L_H_HB), + HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ], + HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ], + HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ], + H_HL = ?MODULE:from_binary(HL), + H_HL = ?MODULE:from_binary(HL2), + H_HL = ?MODULE:from_binary(HL3), + "47" = ?MODULE:get_value("Content-Length", H_HL), + "text/plain" = ?MODULE:get_value("Content-Type", H_HL), + L_H_HL = ?MODULE:to_list(H_HL), + 2 = length(L_H_HL), + true = lists:member({'Content-Length', "47"}, L_H_HL), + true = lists:member({'Content-Type', "text/plain"}, L_H_HL), + [] = ?MODULE:to_list(?MODULE:from_binary(<<>>)), + [] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)), + [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)), + [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)), + [] = ?MODULE:to_list(?MODULE:from_binary("")), + [] = ?MODULE:to_list(?MODULE:from_binary([<<>>])), + [] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])), + [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])), + [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])), + ok. + +%% @spec empty() -> headers() +%% @doc Create an empty headers structure. +empty() -> + gb_trees:empty(). + +%% @spec make(headers() | [{key(), value()}]) -> headers() +%% @doc Construct a headers() from the given list. +make(L) when is_list(L) -> + from_list(L); +%% assume a tuple is already mochiweb_headers. +make(T) when is_tuple(T) -> + T. + +%% @spec from_binary(RawHttpHeader()) -> headers() +%% @type RawHttpHeader() -> string() | binary() | [ string() | binary() ] +%% +%% @doc Transforms a raw HTTP header into a mochiweb headers structure. +%% +%% The given raw HTTP header can be one of the following: +%% +%% 1) A string or a binary representing a full HTTP header ending with +%% double CRLF. +%% Examples: +%% "Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n" +%% <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">> +%% +%% 2) A list of binaries or strings where each element represents a raw +%% HTTP header line ending with a single CRLF. +%% Examples: +%% [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ] +%% [ "Content-Length: 47\r\n", "Content-Type: text/plain\r\n" ] +%% [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ] +%% +from_binary(RawHttpHeader) when is_binary(RawHttpHeader) -> + from_binary(RawHttpHeader, []); + +from_binary(RawHttpHeaderList) -> + from_binary(list_to_binary([RawHttpHeaderList, "\r\n"])). + +from_binary(RawHttpHeader, Acc) -> + case erlang:decode_packet(httph, RawHttpHeader, []) of + { ok, {http_header, _, H, _, V}, Rest } -> + from_binary(Rest, [{H, V} | Acc]); + _ -> + make(Acc) + end. + +%% @spec from_list([{key(), value()}]) -> headers() +%% @doc Construct a headers() from the given list. +from_list(List) -> + lists:foldl(fun ({K, V}, T) -> insert(K, V, T) end, empty(), List). + +%% @spec enter_from_list([{key(), value()}], headers()) -> headers() +%% @doc Insert pairs into the headers, replace any values for existing keys. +enter_from_list(List, T) -> + lists:foldl(fun ({K, V}, T1) -> enter(K, V, T1) end, T, List). + +%% @spec default_from_list([{key(), value()}], headers()) -> headers() +%% @doc Insert pairs into the headers for keys that do not already exist. +default_from_list(List, T) -> + lists:foldl(fun ({K, V}, T1) -> default(K, V, T1) end, T, List). + +%% @spec to_list(headers()) -> [{key(), string()}] +%% @doc Return the contents of the headers. The keys will be the exact key +%% that was first inserted (e.g. may be an atom or binary, case is +%% preserved). +to_list(T) -> + F = fun ({K, {array, L}}, Acc) -> + L1 = lists:reverse(L), + lists:foldl(fun (V, Acc1) -> [{K, V} | Acc1] end, Acc, L1); + (Pair, Acc) -> + [Pair | Acc] + end, + lists:reverse(lists:foldl(F, [], gb_trees:values(T))). + +%% @spec get_value(key(), headers()) -> string() | undefined +%% @doc Return the value of the given header using a case insensitive search. +%% undefined will be returned for keys that are not present. +get_value(K, T) -> + case lookup(K, T) of + {value, {_, V}} -> + expand(V); + none -> + undefined + end. + +%% @spec get_primary_value(key(), headers()) -> string() | undefined +%% @doc Return the value of the given header up to the first semicolon using +%% a case insensitive search. undefined will be returned for keys +%% that are not present. +get_primary_value(K, T) -> + case get_value(K, T) of + undefined -> + undefined; + V -> + lists:takewhile(fun (C) -> C =/= $; end, V) + end. + +%% @spec lookup(key(), headers()) -> {value, {key(), string()}} | none +%% @doc Return the case preserved key and value for the given header using +%% a case insensitive search. none will be returned for keys that are +%% not present. +lookup(K, T) -> + case gb_trees:lookup(normalize(K), T) of + {value, {K0, V}} -> + {value, {K0, expand(V)}}; + none -> + none + end. + +%% @spec default(key(), value(), headers()) -> headers() +%% @doc Insert the pair into the headers if it does not already exist. +default(K, V, T) -> + K1 = normalize(K), + V1 = any_to_list(V), + try gb_trees:insert(K1, {K, V1}, T) + catch + error:{key_exists, _} -> + T + end. + +%% @spec enter(key(), value(), headers()) -> headers() +%% @doc Insert the pair into the headers, replacing any pre-existing key. +enter(K, V, T) -> + K1 = normalize(K), + V1 = any_to_list(V), + gb_trees:enter(K1, {K, V1}, T). + +%% @spec insert(key(), value(), headers()) -> headers() +%% @doc Insert the pair into the headers, merging with any pre-existing key. +%% A merge is done with Value = V0 ++ ", " ++ V1. +insert(K, V, T) -> + K1 = normalize(K), + V1 = any_to_list(V), + try gb_trees:insert(K1, {K, V1}, T) + catch + error:{key_exists, _} -> + {K0, V0} = gb_trees:get(K1, T), + V2 = merge(K1, V1, V0), + gb_trees:update(K1, {K0, V2}, T) + end. + +%% @spec delete_any(key(), headers()) -> headers() +%% @doc Delete the header corresponding to key if it is present. +delete_any(K, T) -> + K1 = normalize(K), + gb_trees:delete_any(K1, T). + +%% Internal API + +expand({array, L}) -> + mochiweb_util:join(lists:reverse(L), ", "); +expand(V) -> + V. + +merge("set-cookie", V1, {array, L}) -> + {array, [V1 | L]}; +merge("set-cookie", V1, V0) -> + {array, [V1, V0]}; +merge(_, V1, V0) -> + V0 ++ ", " ++ V1. + +normalize(K) when is_list(K) -> + string:to_lower(K); +normalize(K) when is_atom(K) -> + normalize(atom_to_list(K)); +normalize(K) when is_binary(K) -> + normalize(binary_to_list(K)). + +any_to_list(V) when is_list(V) -> + V; +any_to_list(V) when is_atom(V) -> + atom_to_list(V); +any_to_list(V) when is_binary(V) -> + binary_to_list(V); +any_to_list(V) when is_integer(V) -> + integer_to_list(V). + + diff --git a/apps/mochiweb/src/mochiweb_html.erl b/apps/mochiweb/src/mochiweb_html.erl new file mode 100644 index 00000000..77100d50 --- /dev/null +++ b/apps/mochiweb/src/mochiweb_html.erl @@ -0,0 +1,893 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Loosely tokenizes and generates parse trees for HTML 4. +-module(mochiweb_html). +-export([tokens/1, parse/1, parse_tokens/1, to_tokens/1, escape/1, + escape_attr/1, to_html/1, test/0]). + +% This is a macro to placate syntax highlighters.. +-define(QUOTE, $\"). +-define(SQUOTE, $\'). +-define(ADV_COL(S, N), + S#decoder{column=N+S#decoder.column, + offset=N+S#decoder.offset}). +-define(INC_COL(S), + S#decoder{column=1+S#decoder.column, + offset=1+S#decoder.offset}). +-define(INC_LINE(S), + S#decoder{column=1, + line=1+S#decoder.line, + offset=1+S#decoder.offset}). +-define(INC_CHAR(S, C), + case C of + $\n -> + S#decoder{column=1, + line=1+S#decoder.line, + offset=1+S#decoder.offset}; + _ -> + S#decoder{column=1+S#decoder.column, + offset=1+S#decoder.offset} + end). + +-define(IS_WHITESPACE(C), + (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). +-define(IS_LITERAL_SAFE(C), + ((C >= $A andalso C =< $Z) orelse (C >= $a andalso C =< $z) + orelse (C >= $0 andalso C =< $9))). + +-record(decoder, {line=1, + column=1, + offset=0}). + +%% @type html_node() = {string(), [html_attr()], [html_node() | string()]} +%% @type html_attr() = {string(), string()} +%% @type html_token() = html_data() | start_tag() | end_tag() | inline_html() | html_comment() | html_doctype() +%% @type html_data() = {data, string(), Whitespace::boolean()} +%% @type start_tag() = {start_tag, Name, [html_attr()], Singleton::boolean()} +%% @type end_tag() = {end_tag, Name} +%% @type html_comment() = {comment, Comment} +%% @type html_doctype() = {doctype, [Doctype]} +%% @type inline_html() = {'=', iolist()} + +%% External API. + +%% @spec parse(string() | binary()) -> html_node() +%% @doc tokenize and then transform the token stream into a HTML tree. +parse(Input) -> + parse_tokens(tokens(Input)). + +%% @spec parse_tokens([html_token()]) -> html_node() +%% @doc Transform the output of tokens(Doc) into a HTML tree. +parse_tokens(Tokens) when is_list(Tokens) -> + %% Skip over doctype, processing instructions + F = fun (X) -> + case X of + {start_tag, _, _, false} -> + false; + _ -> + true + end + end, + [{start_tag, Tag, Attrs, false} | Rest] = lists:dropwhile(F, Tokens), + {Tree, _} = tree(Rest, [norm({Tag, Attrs})]), + Tree. + +%% @spec tokens(StringOrBinary) -> [html_token()] +%% @doc Transform the input UTF-8 HTML into a token stream. +tokens(Input) -> + tokens(iolist_to_binary(Input), #decoder{}, []). + +%% @spec to_tokens(html_node()) -> [html_token()] +%% @doc Convert a html_node() tree to a list of tokens. +to_tokens({Tag0}) -> + to_tokens({Tag0, [], []}); +to_tokens(T={'=', _}) -> + [T]; +to_tokens(T={doctype, _}) -> + [T]; +to_tokens(T={comment, _}) -> + [T]; +to_tokens({Tag0, Acc}) -> + to_tokens({Tag0, [], Acc}); +to_tokens({Tag0, Attrs, Acc}) -> + Tag = to_tag(Tag0), + to_tokens([{Tag, Acc}], [{start_tag, Tag, Attrs, is_singleton(Tag)}]). + +%% @spec to_html([html_token()] | html_node()) -> iolist() +%% @doc Convert a list of html_token() to a HTML document. +to_html(Node) when is_tuple(Node) -> + to_html(to_tokens(Node)); +to_html(Tokens) when is_list(Tokens) -> + to_html(Tokens, []). + +%% @spec escape(string() | atom() | binary()) -> binary() +%% @doc Escape a string such that it's safe for HTML (amp; lt; gt;). +escape(B) when is_binary(B) -> + escape(binary_to_list(B), []); +escape(A) when is_atom(A) -> + escape(atom_to_list(A), []); +escape(S) when is_list(S) -> + escape(S, []). + +%% @spec escape_attr(string() | binary() | atom() | integer() | float()) -> binary() +%% @doc Escape a string such that it's safe for HTML attrs +%% (amp; lt; gt; quot;). +escape_attr(B) when is_binary(B) -> + escape_attr(binary_to_list(B), []); +escape_attr(A) when is_atom(A) -> + escape_attr(atom_to_list(A), []); +escape_attr(S) when is_list(S) -> + escape_attr(S, []); +escape_attr(I) when is_integer(I) -> + escape_attr(integer_to_list(I), []); +escape_attr(F) when is_float(F) -> + escape_attr(mochinum:digits(F), []). + +%% @spec test() -> ok +%% @doc Run tests for mochiweb_html. +test() -> + test_destack(), + test_tokens(), + test_tokens2(), + test_parse(), + test_parse2(), + test_parse_tokens(), + test_escape(), + test_escape_attr(), + test_to_html(), + ok. + + +%% Internal API + +test_to_html() -> + Expect = <<"<html><head><title>hey!</title></head><body><p class=\"foo\">what's up<br /></p><div>sucka</div><!-- comment! --></body></html>">>, + Expect = iolist_to_binary( + to_html({html, [], + [{<<"head">>, [], + [{title, <<"hey!">>}]}, + {body, [], + [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]}, + {'div', <<"sucka">>}, + {comment, <<" comment! ">>}]}]})), + Expect1 = <<"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">">>, + Expect1 = iolist_to_binary( + to_html({doctype, + [<<"html">>, <<"PUBLIC">>, + <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>, + <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]})), + ok. +to_html([], Acc) -> + lists:reverse(Acc); +to_html([{'=', Content} | Rest], Acc) -> + to_html(Rest, [Content | Acc]); +to_html([{pi, Tag, Attrs} | Rest], Acc) -> + Open = [<<"<?">>, + Tag, + attrs_to_html(Attrs, []), + <<"?>">>], + to_html(Rest, [Open | Acc]); +to_html([{comment, Comment} | Rest], Acc) -> + to_html(Rest, [[<<"<!--">>, Comment, <<"-->">>] | Acc]); +to_html([{doctype, Parts} | Rest], Acc) -> + Inside = doctype_to_html(Parts, Acc), + to_html(Rest, [[<<"<!DOCTYPE">>, Inside, <<">">>] | Acc]); +to_html([{data, Data, _Whitespace} | Rest], Acc) -> + to_html(Rest, [escape(Data) | Acc]); +to_html([{start_tag, Tag, Attrs, Singleton} | Rest], Acc) -> + Open = [<<"<">>, + Tag, + attrs_to_html(Attrs, []), + case Singleton of + true -> <<" />">>; + false -> <<">">> + end], + to_html(Rest, [Open | Acc]); +to_html([{end_tag, Tag} | Rest], Acc) -> + to_html(Rest, [[<<"</">>, Tag, <<">">>] | Acc]). + +doctype_to_html([], Acc) -> + lists:reverse(Acc); +doctype_to_html([Word | Rest], Acc) -> + case lists:all(fun (C) -> ?IS_LITERAL_SAFE(C) end, + binary_to_list(iolist_to_binary(Word))) of + true -> + doctype_to_html(Rest, [[<<" ">>, Word] | Acc]); + false -> + doctype_to_html(Rest, [[<<" \"">>, escape_attr(Word), ?QUOTE] | Acc]) + end. + +attrs_to_html([], Acc) -> + lists:reverse(Acc); +attrs_to_html([{K, V} | Rest], Acc) -> + attrs_to_html(Rest, + [[<<" ">>, escape(K), <<"=\"">>, + escape_attr(V), <<"\"">>] | Acc]). + +test_escape() -> + <<"&quot;\"word <<up!&quot;">> = + escape(<<""\"word <<up!"">>), + ok. + +test_escape_attr() -> + <<"&quot;"word <<up!&quot;">> = + escape_attr(<<""\"word <<up!"">>), + ok. + +escape([], Acc) -> + list_to_binary(lists:reverse(Acc)); +escape("<" ++ Rest, Acc) -> + escape(Rest, lists:reverse("<", Acc)); +escape(">" ++ Rest, Acc) -> + escape(Rest, lists:reverse(">", Acc)); +escape("&" ++ Rest, Acc) -> + escape(Rest, lists:reverse("&", Acc)); +escape([C | Rest], Acc) -> + escape(Rest, [C | Acc]). + +escape_attr([], Acc) -> + list_to_binary(lists:reverse(Acc)); +escape_attr("<" ++ Rest, Acc) -> + escape_attr(Rest, lists:reverse("<", Acc)); +escape_attr(">" ++ Rest, Acc) -> + escape_attr(Rest, lists:reverse(">", Acc)); +escape_attr("&" ++ Rest, Acc) -> + escape_attr(Rest, lists:reverse("&", Acc)); +escape_attr([?QUOTE | Rest], Acc) -> + escape_attr(Rest, lists:reverse(""", Acc)); +escape_attr([C | Rest], Acc) -> + escape_attr(Rest, [C | Acc]). + +to_tag(A) when is_atom(A) -> + norm(atom_to_list(A)); +to_tag(L) -> + norm(L). + +to_tokens([], Acc) -> + lists:reverse(Acc); +to_tokens([{Tag, []} | Rest], Acc) -> + to_tokens(Rest, [{end_tag, to_tag(Tag)} | Acc]); +to_tokens([{Tag0, [{T0} | R1]} | Rest], Acc) -> + %% Allow {br} + to_tokens([{Tag0, [{T0, [], []} | R1]} | Rest], Acc); +to_tokens([{Tag0, [T0={'=', _C0} | R1]} | Rest], Acc) -> + %% Allow {'=', iolist()} + to_tokens([{Tag0, R1} | Rest], [T0 | Acc]); +to_tokens([{Tag0, [T0={comment, _C0} | R1]} | Rest], Acc) -> + %% Allow {comment, iolist()} + to_tokens([{Tag0, R1} | Rest], [T0 | Acc]); +to_tokens([{Tag0, [{T0, A0=[{_, _} | _]} | R1]} | Rest], Acc) -> + %% Allow {p, [{"class", "foo"}]} + to_tokens([{Tag0, [{T0, A0, []} | R1]} | Rest], Acc); +to_tokens([{Tag0, [{T0, C0} | R1]} | Rest], Acc) -> + %% Allow {p, "content"} and {p, <<"content">>} + to_tokens([{Tag0, [{T0, [], C0} | R1]} | Rest], Acc); +to_tokens([{Tag0, [{T0, A1, C0} | R1]} | Rest], Acc) when is_binary(C0) -> + %% Allow {"p", [{"class", "foo"}], <<"content">>} + to_tokens([{Tag0, [{T0, A1, binary_to_list(C0)} | R1]} | Rest], Acc); +to_tokens([{Tag0, [{T0, A1, C0=[C | _]} | R1]} | Rest], Acc) + when is_integer(C) -> + %% Allow {"p", [{"class", "foo"}], "content"} + to_tokens([{Tag0, [{T0, A1, [C0]} | R1]} | Rest], Acc); +to_tokens([{Tag0, [{T0, A1, C1} | R1]} | Rest], Acc) -> + %% Native {"p", [{"class", "foo"}], ["content"]} + Tag = to_tag(Tag0), + T1 = to_tag(T0), + case is_singleton(norm(T1)) of + true -> + to_tokens([{Tag, R1} | Rest], [{start_tag, T1, A1, true} | Acc]); + false -> + to_tokens([{T1, C1}, {Tag, R1} | Rest], + [{start_tag, T1, A1, false} | Acc]) + end; +to_tokens([{Tag0, [L | R1]} | Rest], Acc) when is_list(L) -> + %% List text + Tag = to_tag(Tag0), + to_tokens([{Tag, R1} | Rest], [{data, iolist_to_binary(L), false} | Acc]); +to_tokens([{Tag0, [B | R1]} | Rest], Acc) when is_binary(B) -> + %% Binary text + Tag = to_tag(Tag0), + to_tokens([{Tag, R1} | Rest], [{data, B, false} | Acc]). + +test_tokens() -> + [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>}, + {<<"wibble">>, <<"wibble">>}, + {<<"alice">>, <<"bob">>}], true}] = + tokens(<<"<foo bar=baz wibble='wibble' alice=\"bob\"/>">>), + [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>}, + {<<"wibble">>, <<"wibble">>}, + {<<"alice">>, <<"bob">>}], true}] = + tokens(<<"<foo bar=baz wibble='wibble' alice=bob/>">>), + [{comment, <<"[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]">>}] = + tokens(<<"<!--[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]-->">>), + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}] = + tokens(<<"<script type=\"text/javascript\"> A= B <= C </script>">>), + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}] = + tokens(<<"<script type =\"text/javascript\"> A= B <= C </script>">>), + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}] = + tokens(<<"<script type = \"text/javascript\"> A= B <= C </script>">>), + [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false}, + {data, <<" A= B <= C ">>, false}, + {end_tag, <<"script">>}] = + tokens(<<"<script type= \"text/javascript\"> A= B <= C </script>">>), + [{start_tag, <<"textarea">>, [], false}, + {data, <<"<html></body>">>, false}, + {end_tag, <<"textarea">>}] = + tokens(<<"<textarea><html></body></textarea>">>), + ok. + +tokens(B, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary>> -> + lists:reverse(Acc); + _ -> + {Tag, S1} = tokenize(B, S), + case parse_flag(Tag) of + script -> + {Tag2, S2} = tokenize_script(B, S1), + tokens(B, S2, [Tag2, Tag | Acc]); + textarea -> + {Tag2, S2} = tokenize_textarea(B, S1), + tokens(B, S2, [Tag2, Tag | Acc]); + none -> + tokens(B, S1, [Tag | Acc]) + end + end. + +parse_flag({start_tag, B, _, false}) -> + case string:to_lower(binary_to_list(B)) of + "script" -> + script; + "textarea" -> + textarea; + _ -> + none + end; +parse_flag(_) -> + none. + +tokenize(B, S=#decoder{offset=O}) -> + case B of + <<_:O/binary, "<!--", _/binary>> -> + tokenize_comment(B, ?ADV_COL(S, 4)); + <<_:O/binary, "<!DOCTYPE", _/binary>> -> + tokenize_doctype(B, ?ADV_COL(S, 10)); + <<_:O/binary, "<![CDATA[", _/binary>> -> + tokenize_cdata(B, ?ADV_COL(S, 9)); + <<_:O/binary, "<?", _/binary>> -> + {Tag, S1} = tokenize_literal(B, ?ADV_COL(S, 2)), + {Attrs, S2} = tokenize_attributes(B, S1), + S3 = find_qgt(B, S2), + {{pi, Tag, Attrs}, S3}; + <<_:O/binary, "&", _/binary>> -> + tokenize_charref(B, ?INC_COL(S)); + <<_:O/binary, "</", _/binary>> -> + {Tag, S1} = tokenize_literal(B, ?ADV_COL(S, 2)), + {S2, _} = find_gt(B, S1), + {{end_tag, Tag}, S2}; + <<_:O/binary, "<", C, _/binary>> when ?IS_WHITESPACE(C) -> + %% This isn't really strict HTML + tokenize_data(B, ?INC_COL(S)); + <<_:O/binary, "<", _/binary>> -> + {Tag, S1} = tokenize_literal(B, ?INC_COL(S)), + {Attrs, S2} = tokenize_attributes(B, S1), + {S3, HasSlash} = find_gt(B, S2), + Singleton = HasSlash orelse is_singleton(norm(binary_to_list(Tag))), + {{start_tag, Tag, Attrs, Singleton}, S3}; + _ -> + tokenize_data(B, S) + end. + +test_parse() -> + D0 = <<"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\"> +<html> + <head> + <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"> + <title>Foo</title> + <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/rel/dojo/resources/dojo.css\" media=\"screen\"> + <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/foo.css\" media=\"screen\"> + <!--[if lt IE 7]> + <style type=\"text/css\"> + .no_ie { display: none; } + </style> + <![endif]--> + <link rel=\"icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\"> + <link rel=\"shortcut icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\"> + </head> + <body id=\"home\" class=\"tundra\"><![CDATA[<<this<!-- is -->CDATA>>]]></body> +</html>">>, + Expect = {<<"html">>, [], + [{<<"head">>, [], + [{<<"meta">>, + [{<<"http-equiv">>,<<"Content-Type">>}, + {<<"content">>,<<"text/html; charset=UTF-8">>}], + []}, + {<<"title">>,[],[<<"Foo">>]}, + {<<"link">>, + [{<<"rel">>,<<"stylesheet">>}, + {<<"type">>,<<"text/css">>}, + {<<"href">>,<<"/static/rel/dojo/resources/dojo.css">>}, + {<<"media">>,<<"screen">>}], + []}, + {<<"link">>, + [{<<"rel">>,<<"stylesheet">>}, + {<<"type">>,<<"text/css">>}, + {<<"href">>,<<"/static/foo.css">>}, + {<<"media">>,<<"screen">>}], + []}, + {comment,<<"[if lt IE 7]>\n <style type=\"text/css\">\n .no_ie { display: none; }\n </style>\n <![endif]">>}, + {<<"link">>, + [{<<"rel">>,<<"icon">>}, + {<<"href">>,<<"/static/images/favicon.ico">>}, + {<<"type">>,<<"image/x-icon">>}], + []}, + {<<"link">>, + [{<<"rel">>,<<"shortcut icon">>}, + {<<"href">>,<<"/static/images/favicon.ico">>}, + {<<"type">>,<<"image/x-icon">>}], + []}]}, + {<<"body">>, + [{<<"id">>,<<"home">>}, + {<<"class">>,<<"tundra">>}], + [<<"<<this<!-- is -->CDATA>>">>]}]}, + Expect = parse(D0), + ok. + +test_tokens2() -> + D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org</link><description>Bob's Rants</description></channel>">>, + Expect = [{start_tag,<<"channel">>,[],false}, + {start_tag,<<"title">>,[],false}, + {data,<<"from __future__ import *">>,false}, + {end_tag,<<"title">>}, + {start_tag,<<"link">>,[],true}, + {data,<<"http://bob.pythonmac.org">>,false}, + {end_tag,<<"link">>}, + {start_tag,<<"description">>,[],false}, + {data,<<"Bob's Rants">>,false}, + {end_tag,<<"description">>}, + {end_tag,<<"channel">>}], + Expect = tokens(D0), + ok. + +test_parse2() -> + D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org<br>foo</link><description>Bob's Rants</description></channel>">>, + Expect = {<<"channel">>,[], + [{<<"title">>,[],[<<"from __future__ import *">>]}, + {<<"link">>,[],[ + <<"http://bob.pythonmac.org">>, + {<<"br">>,[],[]}, + <<"foo">>]}, + {<<"description">>,[],[<<"Bob's Rants">>]}]}, + Expect = parse(D0), + ok. + +test_parse_tokens() -> + D0 = [{doctype,[<<"HTML">>,<<"PUBLIC">>,<<"-//W3C//DTD HTML 4.01 Transitional//EN">>]}, + {data,<<"\n">>,true}, + {start_tag,<<"html">>,[],false}], + {<<"html">>, [], []} = parse_tokens(D0), + D1 = D0 ++ [{end_tag, <<"html">>}], + {<<"html">>, [], []} = parse_tokens(D1), + D2 = D0 ++ [{start_tag, <<"body">>, [], false}], + {<<"html">>, [], [{<<"body">>, [], []}]} = parse_tokens(D2), + D3 = D0 ++ [{start_tag, <<"head">>, [], false}, + {end_tag, <<"head">>}, + {start_tag, <<"body">>, [], false}], + {<<"html">>, [], [{<<"head">>, [], []}, {<<"body">>, [], []}]} = parse_tokens(D3), + D4 = D3 ++ [{data,<<"\n">>,true}, + {start_tag,<<"div">>,[{<<"class">>,<<"a">>}],false}, + {start_tag,<<"a">>,[{<<"name">>,<<"#anchor">>}],false}, + {end_tag,<<"a">>}, + {end_tag,<<"div">>}, + {start_tag,<<"div">>,[{<<"class">>,<<"b">>}],false}, + {start_tag,<<"div">>,[{<<"class">>,<<"c">>}],false}, + {end_tag,<<"div">>}, + {end_tag,<<"div">>}], + {<<"html">>, [], + [{<<"head">>, [], []}, + {<<"body">>, [], + [{<<"div">>, [{<<"class">>, <<"a">>}], [{<<"a">>, [{<<"name">>, <<"#anchor">>}], []}]}, + {<<"div">>, [{<<"class">>, <<"b">>}], [{<<"div">>, [{<<"class">>, <<"c">>}], []}]} + ]}]} = parse_tokens(D4), + D5 = [{start_tag,<<"html">>,[],false}, + {data,<<"\n">>,true}, + {data,<<"boo">>,false}, + {data,<<"hoo">>,false}, + {data,<<"\n">>,true}, + {end_tag,<<"html">>}], + {<<"html">>, [], [<<"\nboohoo\n">>]} = parse_tokens(D5), + D6 = [{start_tag,<<"html">>,[],false}, + {data,<<"\n">>,true}, + {data,<<"\n">>,true}, + {end_tag,<<"html">>}], + {<<"html">>, [], []} = parse_tokens(D6), + D7 = [{start_tag,<<"html">>,[],false}, + {start_tag,<<"ul">>,[],false}, + {start_tag,<<"li">>,[],false}, + {data,<<"word">>,false}, + {start_tag,<<"li">>,[],false}, + {data,<<"up">>,false}, + {end_tag,<<"li">>}, + {start_tag,<<"li">>,[],false}, + {data,<<"fdsa">>,false}, + {start_tag,<<"br">>,[],true}, + {data,<<"asdf">>,false}, + {end_tag,<<"ul">>}, + {end_tag,<<"html">>}], + {<<"html">>, [], + [{<<"ul">>, [], + [{<<"li">>, [], [<<"word">>]}, + {<<"li">>, [], [<<"up">>]}, + {<<"li">>, [], [<<"fdsa">>,{<<"br">>, [], []}, <<"asdf">>]}]}]} = parse_tokens(D7), + ok. + +tree_data([{data, Data, Whitespace} | Rest], AllWhitespace, Acc) -> + tree_data(Rest, (Whitespace andalso AllWhitespace), [Data | Acc]); +tree_data(Rest, AllWhitespace, Acc) -> + {iolist_to_binary(lists:reverse(Acc)), AllWhitespace, Rest}. + +tree([], Stack) -> + {destack(Stack), []}; +tree([{end_tag, Tag} | Rest], Stack) -> + case destack(norm(Tag), Stack) of + S when is_list(S) -> + tree(Rest, S); + Result -> + {Result, []} + end; +tree([{start_tag, Tag, Attrs, true} | Rest], S) -> + tree(Rest, append_stack_child(norm({Tag, Attrs}), S)); +tree([{start_tag, Tag, Attrs, false} | Rest], S) -> + tree(Rest, stack(norm({Tag, Attrs}), S)); +tree([T={pi, _Tag, _Attrs} | Rest], S) -> + tree(Rest, append_stack_child(T, S)); +tree([T={comment, _Comment} | Rest], S) -> + tree(Rest, append_stack_child(T, S)); +tree(L=[{data, _Data, _Whitespace} | _], S) -> + case tree_data(L, true, []) of + {_, true, Rest} -> + tree(Rest, S); + {Data, false, Rest} -> + tree(Rest, append_stack_child(Data, S)) + end. + +norm({Tag, Attrs}) -> + {norm(Tag), [{norm(K), iolist_to_binary(V)} || {K, V} <- Attrs], []}; +norm(Tag) when is_binary(Tag) -> + Tag; +norm(Tag) -> + list_to_binary(string:to_lower(Tag)). + +test_destack() -> + {<<"a">>, [], []} = + destack([{<<"a">>, [], []}]), + {<<"a">>, [], [{<<"b">>, [], []}]} = + destack([{<<"b">>, [], []}, {<<"a">>, [], []}]), + {<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]} = + destack([{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]), + [{<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]}] = + destack(<<"b">>, + [{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]), + [{<<"b">>, [], [{<<"c">>, [], []}]}, {<<"a">>, [], []}] = + destack(<<"c">>, + [{<<"c">>, [], []}, {<<"b">>, [], []},{<<"a">>, [], []}]), + ok. + +stack(T1={TN, _, _}, Stack=[{TN, _, _} | _Rest]) + when TN =:= <<"li">> orelse TN =:= <<"option">> -> + [T1 | destack(TN, Stack)]; +stack(T1={TN0, _, _}, Stack=[{TN1, _, _} | _Rest]) + when (TN0 =:= <<"dd">> orelse TN0 =:= <<"dt">>) andalso + (TN1 =:= <<"dd">> orelse TN1 =:= <<"dt">>) -> + [T1 | destack(TN1, Stack)]; +stack(T1, Stack) -> + [T1 | Stack]. + +append_stack_child(StartTag, [{Name, Attrs, Acc} | Stack]) -> + [{Name, Attrs, [StartTag | Acc]} | Stack]. + +destack(TagName, Stack) when is_list(Stack) -> + F = fun (X) -> + case X of + {TagName, _, _} -> + false; + _ -> + true + end + end, + case lists:splitwith(F, Stack) of + {_, []} -> + %% If we're parsing something like XML we might find + %% a <link>tag</link> that is normally a singleton + %% in HTML but isn't here + case {is_singleton(TagName), Stack} of + {true, [{T0, A0, Acc0} | Post0]} -> + case lists:splitwith(F, Acc0) of + {_, []} -> + %% Actually was a singleton + Stack; + {Pre, [{T1, A1, []} | Post1]} -> + [{T0, A0, [{T1, A1, lists:reverse(Pre)} | Post1]} + | Post0] + end; + _ -> + %% No match, no state change + Stack + end; + {_Pre, [_T]} -> + %% Unfurl the whole stack, we're done + destack(Stack); + {Pre, [T, {T0, A0, Acc0} | Post]} -> + %% Unfurl up to the tag, then accumulate it + [{T0, A0, [destack(Pre ++ [T]) | Acc0]} | Post] + end. + +destack([{Tag, Attrs, Acc}]) -> + {Tag, Attrs, lists:reverse(Acc)}; +destack([{T1, A1, Acc1}, {T0, A0, Acc0} | Rest]) -> + destack([{T0, A0, [{T1, A1, lists:reverse(Acc1)} | Acc0]} | Rest]). + +is_singleton(<<"br">>) -> true; +is_singleton(<<"hr">>) -> true; +is_singleton(<<"img">>) -> true; +is_singleton(<<"input">>) -> true; +is_singleton(<<"base">>) -> true; +is_singleton(<<"meta">>) -> true; +is_singleton(<<"link">>) -> true; +is_singleton(<<"area">>) -> true; +is_singleton(<<"param">>) -> true; +is_singleton(<<"col">>) -> true; +is_singleton(_) -> false. + +tokenize_data(B, S=#decoder{offset=O}) -> + tokenize_data(B, S, O, true). + +tokenize_data(B, S=#decoder{offset=O}, Start, Whitespace) -> + case B of + <<_:O/binary, C, _/binary>> when (C =/= $< andalso C =/= $&) -> + tokenize_data(B, ?INC_CHAR(S, C), Start, + (Whitespace andalso ?IS_WHITESPACE(C))); + _ -> + Len = O - Start, + <<_:Start/binary, Data:Len/binary, _/binary>> = B, + {{data, Data, Whitespace}, S} + end. + +tokenize_attributes(B, S) -> + tokenize_attributes(B, S, []). + +tokenize_attributes(B, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary>> -> + {lists:reverse(Acc), S}; + <<_:O/binary, C, _/binary>> when (C =:= $> orelse C =:= $/) -> + {lists:reverse(Acc), S}; + <<_:O/binary, "?>", _/binary>> -> + {lists:reverse(Acc), S}; + <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> + tokenize_attributes(B, ?INC_CHAR(S, C), Acc); + _ -> + {Attr, S1} = tokenize_literal(B, S), + {Value, S2} = tokenize_attr_value(Attr, B, S1), + tokenize_attributes(B, S2, [{Attr, Value} | Acc]) + end. + +tokenize_attr_value(Attr, B, S) -> + S1 = skip_whitespace(B, S), + O = S1#decoder.offset, + case B of + <<_:O/binary, "=", _/binary>> -> + S2 = skip_whitespace(B, ?INC_COL(S1)), + tokenize_word_or_literal(B, S2); + _ -> + {Attr, S1} + end. + +skip_whitespace(B, S=#decoder{offset=O}) -> + case B of + <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> + skip_whitespace(B, ?INC_CHAR(S, C)); + _ -> + S + end. + +tokenize_literal(Bin, S) -> + tokenize_literal(Bin, S, []). + +tokenize_literal(Bin, S=#decoder{offset=O}, Acc) -> + case Bin of + <<_:O/binary, $&, _/binary>> -> + {{data, Data, false}, S1} = tokenize_charref(Bin, ?INC_COL(S)), + tokenize_literal(Bin, S1, [Data | Acc]); + <<_:O/binary, C, _/binary>> when not (?IS_WHITESPACE(C) + orelse C =:= $> + orelse C =:= $/ + orelse C =:= $=) -> + tokenize_literal(Bin, ?INC_COL(S), [C | Acc]); + _ -> + {iolist_to_binary(lists:reverse(Acc)), S} + end. + +find_qgt(Bin, S=#decoder{offset=O}) -> + case Bin of + <<_:O/binary, "?>", _/binary>> -> + ?ADV_COL(S, 2); + <<_:O/binary, C, _/binary>> -> + find_qgt(Bin, ?INC_CHAR(S, C)); + _ -> + S + end. + +find_gt(Bin, S) -> + find_gt(Bin, S, false). + +find_gt(Bin, S=#decoder{offset=O}, HasSlash) -> + case Bin of + <<_:O/binary, $/, _/binary>> -> + find_gt(Bin, ?INC_COL(S), true); + <<_:O/binary, $>, _/binary>> -> + {?INC_COL(S), HasSlash}; + <<_:O/binary, C, _/binary>> -> + find_gt(Bin, ?INC_CHAR(S, C), HasSlash); + _ -> + {S, HasSlash} + end. + +tokenize_charref(Bin, S=#decoder{offset=O}) -> + tokenize_charref(Bin, S, O). + +tokenize_charref(Bin, S=#decoder{offset=O}, Start) -> + case Bin of + <<_:O/binary>> -> + <<_:Start/binary, Raw/binary>> = Bin, + {{data, Raw, false}, S}; + <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) + orelse C =:= ?SQUOTE + orelse C =:= ?QUOTE + orelse C =:= $/ + orelse C =:= $> -> + Len = O - Start, + <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, + {{data, Raw, false}, S}; + <<_:O/binary, $;, _/binary>> -> + Len = O - Start, + <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, + Data = case mochiweb_charref:charref(Raw) of + undefined -> + Start1 = Start - 1, + Len1 = Len + 2, + <<_:Start1/binary, R:Len1/binary, _/binary>> = Bin, + R; + Unichar -> + list_to_binary(xmerl_ucs:to_utf8(Unichar)) + end, + {{data, Data, false}, ?INC_COL(S)}; + _ -> + tokenize_charref(Bin, ?INC_COL(S), Start) + end. + +tokenize_doctype(Bin, S) -> + tokenize_doctype(Bin, S, []). + +tokenize_doctype(Bin, S=#decoder{offset=O}, Acc) -> + case Bin of + <<_:O/binary>> -> + {{doctype, lists:reverse(Acc)}, S}; + <<_:O/binary, $>, _/binary>> -> + {{doctype, lists:reverse(Acc)}, ?INC_COL(S)}; + <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> + tokenize_doctype(Bin, ?INC_CHAR(S, C), Acc); + _ -> + {Word, S1} = tokenize_word_or_literal(Bin, S), + tokenize_doctype(Bin, S1, [Word | Acc]) + end. + +tokenize_word_or_literal(Bin, S=#decoder{offset=O}) -> + case Bin of + <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> + {error, {whitespace, [C], S}}; + <<_:O/binary, C, _/binary>> when C =:= ?QUOTE orelse C =:= ?SQUOTE -> + tokenize_word(Bin, ?INC_COL(S), C); + _ -> + tokenize_literal(Bin, S, []) + end. + +tokenize_word(Bin, S, Quote) -> + tokenize_word(Bin, S, Quote, []). + +tokenize_word(Bin, S=#decoder{offset=O}, Quote, Acc) -> + case Bin of + <<_:O/binary>> -> + {iolist_to_binary(lists:reverse(Acc)), S}; + <<_:O/binary, Quote, _/binary>> -> + {iolist_to_binary(lists:reverse(Acc)), ?INC_COL(S)}; + <<_:O/binary, $&, _/binary>> -> + {{data, Data, false}, S1} = tokenize_charref(Bin, ?INC_COL(S)), + tokenize_word(Bin, S1, Quote, [Data | Acc]); + <<_:O/binary, C, _/binary>> -> + tokenize_word(Bin, ?INC_CHAR(S, C), Quote, [C | Acc]) + end. + +tokenize_cdata(Bin, S=#decoder{offset=O}) -> + tokenize_cdata(Bin, S, O). + +tokenize_cdata(Bin, S=#decoder{offset=O}, Start) -> + case Bin of + <<_:O/binary, "]]>", _/binary>> -> + Len = O - Start, + <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, + {{data, Raw, false}, ?ADV_COL(S, 3)}; + <<_:O/binary, C, _/binary>> -> + tokenize_cdata(Bin, ?INC_CHAR(S, C), Start); + _ -> + <<_:O/binary, Raw/binary>> = Bin, + {{data, Raw, false}, S} + end. + +tokenize_comment(Bin, S=#decoder{offset=O}) -> + tokenize_comment(Bin, S, O). + +tokenize_comment(Bin, S=#decoder{offset=O}, Start) -> + case Bin of + <<_:O/binary, "-->", _/binary>> -> + Len = O - Start, + <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, + {{comment, Raw}, ?ADV_COL(S, 3)}; + <<_:O/binary, C, _/binary>> -> + tokenize_comment(Bin, ?INC_CHAR(S, C), Start); + <<_:Start/binary, Raw/binary>> -> + {{comment, Raw}, S} + end. + +tokenize_script(Bin, S=#decoder{offset=O}) -> + tokenize_script(Bin, S, O). + +tokenize_script(Bin, S=#decoder{offset=O}, Start) -> + case Bin of + %% Just a look-ahead, we want the end_tag separately + <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, _/binary>> + when (SS =:= $s orelse SS =:= $S) andalso + (CC =:= $c orelse CC =:= $C) andalso + (RR =:= $r orelse RR =:= $R) andalso + (II =:= $i orelse II =:= $I) andalso + (PP =:= $p orelse PP =:= $P) andalso + (TT=:= $t orelse TT =:= $T) -> + Len = O - Start, + <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, + {{data, Raw, false}, S}; + <<_:O/binary, C, _/binary>> -> + tokenize_script(Bin, ?INC_CHAR(S, C), Start); + <<_:Start/binary, Raw/binary>> -> + {{data, Raw, false}, S} + end. + +tokenize_textarea(Bin, S=#decoder{offset=O}) -> + tokenize_textarea(Bin, S, O). + +tokenize_textarea(Bin, S=#decoder{offset=O}, Start) -> + case Bin of + %% Just a look-ahead, we want the end_tag separately + <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, _/binary>> + when (TT =:= $t orelse TT =:= $T) andalso + (EE =:= $e orelse EE =:= $E) andalso + (XX =:= $x orelse XX =:= $X) andalso + (TT2 =:= $t orelse TT2 =:= $T) andalso + (AA =:= $a orelse AA =:= $A) andalso + (RR =:= $r orelse RR =:= $R) andalso + (EE2 =:= $e orelse EE2 =:= $E) andalso + (AA2 =:= $a orelse AA2 =:= $A) -> + Len = O - Start, + <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin, + {{data, Raw, false}, S}; + <<_:O/binary, C, _/binary>> -> + tokenize_textarea(Bin, ?INC_CHAR(S, C), Start); + <<_:Start/binary, Raw/binary>> -> + {{data, Raw, false}, S} + end. diff --git a/apps/mochiweb/src/mochiweb_http.erl b/apps/mochiweb/src/mochiweb_http.erl new file mode 100644 index 00000000..f1821f40 --- /dev/null +++ b/apps/mochiweb/src/mochiweb_http.erl @@ -0,0 +1,152 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc HTTP server. + +-module(mochiweb_http). +-author('bob@mochimedia.com'). +-export([start/0, start/1, stop/0, stop/1]). +-export([loop/2, default_body/1]). +-export([after_response/2, reentry/1]). + +-define(IDLE_TIMEOUT, 30000). + +-define(MAX_HEADERS, 1000). +-define(DEFAULTS, [{name, ?MODULE}, + {port, 8888}]). + +set_default({Prop, Value}, PropList) -> + case proplists:is_defined(Prop, PropList) of + true -> + PropList; + false -> + [{Prop, Value} | PropList] + end. + +set_defaults(Defaults, PropList) -> + lists:foldl(fun set_default/2, PropList, Defaults). + +parse_options(Options) -> + {loop, HttpLoop} = proplists:lookup(loop, Options), + Loop = fun (S) -> + ?MODULE:loop(S, HttpLoop) + end, + Options1 = [{loop, Loop} | proplists:delete(loop, Options)], + set_defaults(?DEFAULTS, Options1). + +stop() -> + mochiweb_socket_server:stop(?MODULE). + +stop(Name) -> + mochiweb_socket_server:stop(Name). + +start() -> + start([{ip, "127.0.0.1"}, + {loop, {?MODULE, default_body}}]). + +start(Options) -> + mochiweb_socket_server:start(parse_options(Options)). + +frm(Body) -> + ["<html><head></head><body>" + "<form method=\"POST\">" + "<input type=\"hidden\" value=\"message\" name=\"hidden\"/>" + "<input type=\"submit\" value=\"regular POST\">" + "</form>" + "<br />" + "<form method=\"POST\" enctype=\"multipart/form-data\"" + " action=\"/multipart\">" + "<input type=\"hidden\" value=\"multipart message\" name=\"hidden\"/>" + "<input type=\"file\" name=\"file\"/>" + "<input type=\"submit\" value=\"multipart POST\" />" + "</form>" + "<pre>", Body, "</pre>" + "</body></html>"]. + +default_body(Req, M, "/chunked") when M =:= 'GET'; M =:= 'HEAD' -> + Res = Req:ok({"text/plain", [], chunked}), + Res:write_chunk("First chunk\r\n"), + timer:sleep(5000), + Res:write_chunk("Last chunk\r\n"), + Res:write_chunk(""); +default_body(Req, M, _Path) when M =:= 'GET'; M =:= 'HEAD' -> + Body = io_lib:format("~p~n", [[{parse_qs, Req:parse_qs()}, + {parse_cookie, Req:parse_cookie()}, + Req:dump()]]), + Req:ok({"text/html", + [mochiweb_cookies:cookie("mochiweb_http", "test_cookie")], + frm(Body)}); +default_body(Req, 'POST', "/multipart") -> + Body = io_lib:format("~p~n", [[{parse_qs, Req:parse_qs()}, + {parse_cookie, Req:parse_cookie()}, + {body, Req:recv_body()}, + Req:dump()]]), + Req:ok({"text/html", [], frm(Body)}); +default_body(Req, 'POST', _Path) -> + Body = io_lib:format("~p~n", [[{parse_qs, Req:parse_qs()}, + {parse_cookie, Req:parse_cookie()}, + {parse_post, Req:parse_post()}, + Req:dump()]]), + Req:ok({"text/html", [], frm(Body)}); +default_body(Req, _Method, _Path) -> + Req:respond({501, [], []}). + +default_body(Req) -> + default_body(Req, Req:get(method), Req:get(path)). + +loop(Socket, Body) -> + inet:setopts(Socket, [{packet, http}]), + request(Socket, Body). + +request(Socket, Body) -> + case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of + {ok, {http_request, Method, Path, Version}} -> + headers(Socket, {Method, Path, Version}, [], Body, 0); + {error, {http_error, "\r\n"}} -> + request(Socket, Body); + {error, {http_error, "\n"}} -> + request(Socket, Body); + _Other -> + gen_tcp:close(Socket), + exit(normal) + end. + +reentry(Body) -> + fun (Req) -> + ?MODULE:after_response(Body, Req) + end. + +headers(Socket, Request, Headers, _Body, ?MAX_HEADERS) -> + %% Too many headers sent, bad request. + inet:setopts(Socket, [{packet, raw}]), + Req = mochiweb:new_request({Socket, Request, + lists:reverse(Headers)}), + Req:respond({400, [], []}), + gen_tcp:close(Socket), + exit(normal); +headers(Socket, Request, Headers, Body, HeaderCount) -> + case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of + {ok, http_eoh} -> + inet:setopts(Socket, [{packet, raw}]), + Req = mochiweb:new_request({Socket, Request, + lists:reverse(Headers)}), + Body(Req), + ?MODULE:after_response(Body, Req); + {ok, {http_header, _, Name, _, Value}} -> + headers(Socket, Request, [{Name, Value} | Headers], Body, + 1 + HeaderCount); + _Other -> + gen_tcp:close(Socket), + exit(normal) + end. + +after_response(Body, Req) -> + Socket = Req:get(socket), + case Req:should_close() of + true -> + gen_tcp:close(Socket), + exit(normal); + false -> + Req:cleanup(), + ?MODULE:loop(Socket, Body) + end. diff --git a/apps/mochiweb/src/mochiweb_multipart.erl b/apps/mochiweb/src/mochiweb_multipart.erl new file mode 100644 index 00000000..0368a9a6 --- /dev/null +++ b/apps/mochiweb/src/mochiweb_multipart.erl @@ -0,0 +1,530 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Utilities for parsing multipart/form-data. + +-module(mochiweb_multipart). +-author('bob@mochimedia.com'). + +-export([parse_form/1, parse_form/2]). +-export([parse_multipart_request/2]). +-export([test/0]). + +-define(CHUNKSIZE, 4096). + +-record(mp, {state, boundary, length, buffer, callback, req}). + +%% TODO: DOCUMENT THIS MODULE. + +parse_form(Req) -> + parse_form(Req, fun default_file_handler/2). + +parse_form(Req, FileHandler) -> + Callback = fun (Next) -> parse_form_outer(Next, FileHandler, []) end, + {_, _, Res} = parse_multipart_request(Req, Callback), + Res. + +parse_form_outer(eof, _, Acc) -> + lists:reverse(Acc); +parse_form_outer({headers, H}, FileHandler, State) -> + {"form-data", H1} = proplists:get_value("content-disposition", H), + Name = proplists:get_value("name", H1), + Filename = proplists:get_value("filename", H1), + case Filename of + undefined -> + fun (Next) -> + parse_form_value(Next, {Name, []}, FileHandler, State) + end; + _ -> + ContentType = proplists:get_value("content-type", H), + Handler = FileHandler(Filename, ContentType), + fun (Next) -> + parse_form_file(Next, {Name, Handler}, FileHandler, State) + end + end. + +parse_form_value(body_end, {Name, Acc}, FileHandler, State) -> + Value = binary_to_list(iolist_to_binary(lists:reverse(Acc))), + State1 = [{Name, Value} | State], + fun (Next) -> parse_form_outer(Next, FileHandler, State1) end; +parse_form_value({body, Data}, {Name, Acc}, FileHandler, State) -> + Acc1 = [Data | Acc], + fun (Next) -> parse_form_value(Next, {Name, Acc1}, FileHandler, State) end. + +parse_form_file(body_end, {Name, Handler}, FileHandler, State) -> + Value = Handler(eof), + State1 = [{Name, Value} | State], + fun (Next) -> parse_form_outer(Next, FileHandler, State1) end; +parse_form_file({body, Data}, {Name, Handler}, FileHandler, State) -> + H1 = Handler(Data), + fun (Next) -> parse_form_file(Next, {Name, H1}, FileHandler, State) end. + +default_file_handler(Filename, ContentType) -> + default_file_handler_1(Filename, ContentType, []). + +default_file_handler_1(Filename, ContentType, Acc) -> + fun(eof) -> + Value = iolist_to_binary(lists:reverse(Acc)), + {Filename, ContentType, Value}; + (Next) -> + default_file_handler_1(Filename, ContentType, [Next | Acc]) + end. + +parse_multipart_request(Req, Callback) -> + %% TODO: Support chunked? + Length = list_to_integer(Req:get_header_value("content-length")), + Boundary = iolist_to_binary( + get_boundary(Req:get_header_value("content-type"))), + Prefix = <<"\r\n--", Boundary/binary>>, + BS = byte_size(Boundary), + Chunk = read_chunk(Req, Length), + Length1 = Length - byte_size(Chunk), + <<"--", Boundary:BS/binary, "\r\n", Rest/binary>> = Chunk, + feed_mp(headers, flash_multipart_hack(#mp{boundary=Prefix, + length=Length1, + buffer=Rest, + callback=Callback, + req=Req})). + +parse_headers(<<>>) -> + []; +parse_headers(Binary) -> + parse_headers(Binary, []). + +parse_headers(Binary, Acc) -> + case find_in_binary(<<"\r\n">>, Binary) of + {exact, N} -> + <<Line:N/binary, "\r\n", Rest/binary>> = Binary, + parse_headers(Rest, [split_header(Line) | Acc]); + not_found -> + lists:reverse([split_header(Binary) | Acc]) + end. + +split_header(Line) -> + {Name, [$: | Value]} = lists:splitwith(fun (C) -> C =/= $: end, + binary_to_list(Line)), + {string:to_lower(string:strip(Name)), + mochiweb_util:parse_header(Value)}. + +read_chunk(Req, Length) when Length > 0 -> + case Length of + Length when Length < ?CHUNKSIZE -> + Req:recv(Length); + _ -> + Req:recv(?CHUNKSIZE) + end. + +read_more(State=#mp{length=Length, buffer=Buffer, req=Req}) -> + Data = read_chunk(Req, Length), + Buffer1 = <<Buffer/binary, Data/binary>>, + flash_multipart_hack(State#mp{length=Length - byte_size(Data), + buffer=Buffer1}). + +flash_multipart_hack(State=#mp{length=0, buffer=Buffer, boundary=Prefix}) -> + %% http://code.google.com/p/mochiweb/issues/detail?id=22 + %% Flash doesn't terminate multipart with \r\n properly so we fix it up here + PrefixSize = size(Prefix), + case size(Buffer) - (2 + PrefixSize) of + Seek when Seek >= 0 -> + case Buffer of + <<_:Seek/binary, Prefix:PrefixSize/binary, "--">> -> + Buffer1 = <<Buffer/binary, "\r\n">>, + State#mp{buffer=Buffer1}; + _ -> + State + end; + _ -> + State + end; +flash_multipart_hack(State) -> + State. + +feed_mp(headers, State=#mp{buffer=Buffer, callback=Callback}) -> + {State1, P} = case find_in_binary(<<"\r\n\r\n">>, Buffer) of + {exact, N} -> + {State, N}; + _ -> + S1 = read_more(State), + %% Assume headers must be less than ?CHUNKSIZE + {exact, N} = find_in_binary(<<"\r\n\r\n">>, + S1#mp.buffer), + {S1, N} + end, + <<Headers:P/binary, "\r\n\r\n", Rest/binary>> = State1#mp.buffer, + NextCallback = Callback({headers, parse_headers(Headers)}), + feed_mp(body, State1#mp{buffer=Rest, + callback=NextCallback}); +feed_mp(body, State=#mp{boundary=Prefix, buffer=Buffer, callback=Callback}) -> + Boundary = find_boundary(Prefix, Buffer), + case Boundary of + {end_boundary, Start, Skip} -> + <<Data:Start/binary, _:Skip/binary, Rest/binary>> = Buffer, + C1 = Callback({body, Data}), + C2 = C1(body_end), + {State#mp.length, Rest, C2(eof)}; + {next_boundary, Start, Skip} -> + <<Data:Start/binary, _:Skip/binary, Rest/binary>> = Buffer, + C1 = Callback({body, Data}), + feed_mp(headers, State#mp{callback=C1(body_end), + buffer=Rest}); + {maybe, Start} -> + <<Data:Start/binary, Rest/binary>> = Buffer, + feed_mp(body, read_more(State#mp{callback=Callback({body, Data}), + buffer=Rest})); + not_found -> + {Data, Rest} = {Buffer, <<>>}, + feed_mp(body, read_more(State#mp{callback=Callback({body, Data}), + buffer=Rest})) + end. + +get_boundary(ContentType) -> + {"multipart/form-data", Opts} = mochiweb_util:parse_header(ContentType), + case proplists:get_value("boundary", Opts) of + S when is_list(S) -> + S + end. + +find_in_binary(B, Data) when size(B) > 0 -> + case size(Data) - size(B) of + Last when Last < 0 -> + partial_find(B, Data, 0, size(Data)); + Last -> + find_in_binary(B, size(B), Data, 0, Last) + end. + +find_in_binary(B, BS, D, N, Last) when N =< Last-> + case D of + <<_:N/binary, B:BS/binary, _/binary>> -> + {exact, N}; + _ -> + find_in_binary(B, BS, D, 1 + N, Last) + end; +find_in_binary(B, BS, D, N, Last) when N =:= 1 + Last -> + partial_find(B, D, N, BS - 1). + +partial_find(_B, _D, _N, 0) -> + not_found; +partial_find(B, D, N, K) -> + <<B1:K/binary, _/binary>> = B, + case D of + <<_Skip:N/binary, B1:K/binary>> -> + {partial, N, K}; + _ -> + partial_find(B, D, 1 + N, K - 1) + end. + +find_boundary(Prefix, Data) -> + case find_in_binary(Prefix, Data) of + {exact, Skip} -> + PrefixSkip = Skip + size(Prefix), + case Data of + <<_:PrefixSkip/binary, "\r\n", _/binary>> -> + {next_boundary, Skip, size(Prefix) + 2}; + <<_:PrefixSkip/binary, "--\r\n", _/binary>> -> + {end_boundary, Skip, size(Prefix) + 4}; + _ when size(Data) < PrefixSkip + 4 -> + %% Underflow + {maybe, Skip}; + _ -> + %% False positive + not_found + end; + {partial, Skip, Length} when (Skip + Length) =:= size(Data) -> + %% Underflow + {maybe, Skip}; + _ -> + not_found + end. + +with_socket_server(ServerFun, ClientFun) -> + {ok, Server} = mochiweb_socket_server:start([{ip, "127.0.0.1"}, + {port, 0}, + {loop, ServerFun}]), + Port = mochiweb_socket_server:get(Server, port), + {ok, Client} = gen_tcp:connect("127.0.0.1", Port, + [binary, {active, false}]), + Res = (catch ClientFun(Client)), + mochiweb_socket_server:stop(Server), + Res. + +fake_request(Socket, ContentType, Length) -> + mochiweb_request:new(Socket, + 'POST', + "/multipart", + {1,1}, + mochiweb_headers:make( + [{"content-type", ContentType}, + {"content-length", Length}])). + +test_callback(Expect, [Expect | Rest]) -> + case Rest of + [] -> + ok; + _ -> + fun (Next) -> test_callback(Next, Rest) end + end; +test_callback({body, Got}, [{body, Expect} | Rest]) -> + GotSize = size(Got), + <<Got:GotSize/binary, Expect1/binary>> = Expect, + fun (Next) -> test_callback(Next, [{body, Expect1} | Rest]) end. + +test_parse3() -> + ContentType = "multipart/form-data; boundary=---------------------------7386909285754635891697677882", + BinContent = <<"-----------------------------7386909285754635891697677882\r\nContent-Disposition: form-data; name=\"hidden\"\r\n\r\nmultipart message\r\n-----------------------------7386909285754635891697677882\r\nContent-Disposition: form-data; name=\"file\"; filename=\"test_file.txt\"\r\nContent-Type: text/plain\r\n\r\nWoo multiline text file\n\nLa la la\r\n-----------------------------7386909285754635891697677882--\r\n">>, + Expect = [{headers, + [{"content-disposition", + {"form-data", [{"name", "hidden"}]}}]}, + {body, <<"multipart message">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "file"}, {"filename", "test_file.txt"}]}}, + {"content-type", {"text/plain", []}}]}, + {body, <<"Woo multiline text file\n\nLa la la">>}, + body_end, + eof], + TestCallback = fun (Next) -> test_callback(Next, Expect) end, + ServerFun = fun (Socket) -> + ok = gen_tcp:send(Socket, BinContent), + exit(normal) + end, + ClientFun = fun (Socket) -> + Req = fake_request(Socket, ContentType, + byte_size(BinContent)), + Res = parse_multipart_request(Req, TestCallback), + {0, <<>>, ok} = Res, + ok + end, + ok = with_socket_server(ServerFun, ClientFun), + ok. + + +test_parse2() -> + ContentType = "multipart/form-data; boundary=---------------------------6072231407570234361599764024", + BinContent = <<"-----------------------------6072231407570234361599764024\r\nContent-Disposition: form-data; name=\"hidden\"\r\n\r\nmultipart message\r\n-----------------------------6072231407570234361599764024\r\nContent-Disposition: form-data; name=\"file\"; filename=\"\"\r\nContent-Type: application/octet-stream\r\n\r\n\r\n-----------------------------6072231407570234361599764024--\r\n">>, + Expect = [{headers, + [{"content-disposition", + {"form-data", [{"name", "hidden"}]}}]}, + {body, <<"multipart message">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "file"}, {"filename", ""}]}}, + {"content-type", {"application/octet-stream", []}}]}, + {body, <<>>}, + body_end, + eof], + TestCallback = fun (Next) -> test_callback(Next, Expect) end, + ServerFun = fun (Socket) -> + ok = gen_tcp:send(Socket, BinContent), + exit(normal) + end, + ClientFun = fun (Socket) -> + Req = fake_request(Socket, ContentType, + byte_size(BinContent)), + Res = parse_multipart_request(Req, TestCallback), + {0, <<>>, ok} = Res, + ok + end, + ok = with_socket_server(ServerFun, ClientFun), + ok. + +test_parse_form() -> + ContentType = "multipart/form-data; boundary=AaB03x", + "AaB03x" = get_boundary(ContentType), + Content = mochiweb_util:join( + ["--AaB03x", + "Content-Disposition: form-data; name=\"submit-name\"", + "", + "Larry", + "--AaB03x", + "Content-Disposition: form-data; name=\"files\";" + ++ "filename=\"file1.txt\"", + "Content-Type: text/plain", + "", + "... contents of file1.txt ...", + "--AaB03x--", + ""], "\r\n"), + BinContent = iolist_to_binary(Content), + ServerFun = fun (Socket) -> + ok = gen_tcp:send(Socket, BinContent), + exit(normal) + end, + ClientFun = fun (Socket) -> + Req = fake_request(Socket, ContentType, + byte_size(BinContent)), + Res = parse_form(Req), + [{"submit-name", "Larry"}, + {"files", {"file1.txt", {"text/plain",[]}, + <<"... contents of file1.txt ...">>} + }] = Res, + ok + end, + ok = with_socket_server(ServerFun, ClientFun), + ok. + +test_parse() -> + ContentType = "multipart/form-data; boundary=AaB03x", + "AaB03x" = get_boundary(ContentType), + Content = mochiweb_util:join( + ["--AaB03x", + "Content-Disposition: form-data; name=\"submit-name\"", + "", + "Larry", + "--AaB03x", + "Content-Disposition: form-data; name=\"files\";" + ++ "filename=\"file1.txt\"", + "Content-Type: text/plain", + "", + "... contents of file1.txt ...", + "--AaB03x--", + ""], "\r\n"), + BinContent = iolist_to_binary(Content), + Expect = [{headers, + [{"content-disposition", + {"form-data", [{"name", "submit-name"}]}}]}, + {body, <<"Larry">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "files"}, {"filename", "file1.txt"}]}}, + {"content-type", {"text/plain", []}}]}, + {body, <<"... contents of file1.txt ...">>}, + body_end, + eof], + TestCallback = fun (Next) -> test_callback(Next, Expect) end, + ServerFun = fun (Socket) -> + ok = gen_tcp:send(Socket, BinContent), + exit(normal) + end, + ClientFun = fun (Socket) -> + Req = fake_request(Socket, ContentType, + byte_size(BinContent)), + Res = parse_multipart_request(Req, TestCallback), + {0, <<>>, ok} = Res, + ok + end, + ok = with_socket_server(ServerFun, ClientFun), + ok. + +test_find_boundary() -> + B = <<"\r\n--X">>, + {next_boundary, 0, 7} = find_boundary(B, <<"\r\n--X\r\nRest">>), + {next_boundary, 1, 7} = find_boundary(B, <<"!\r\n--X\r\nRest">>), + {end_boundary, 0, 9} = find_boundary(B, <<"\r\n--X--\r\nRest">>), + {end_boundary, 1, 9} = find_boundary(B, <<"!\r\n--X--\r\nRest">>), + not_found = find_boundary(B, <<"--X\r\nRest">>), + {maybe, 0} = find_boundary(B, <<"\r\n--X\r">>), + {maybe, 1} = find_boundary(B, <<"!\r\n--X\r">>), + P = <<"\r\n-----------------------------16037454351082272548568224146">>, + B0 = <<55,212,131,77,206,23,216,198,35,87,252,118,252,8,25,211,132,229, + 182,42,29,188,62,175,247,243,4,4,0,59, 13,10,45,45,45,45,45,45,45, + 45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45, + 49,54,48,51,55,52,53,52,51,53,49>>, + {maybe, 30} = find_boundary(P, B0), + ok. + +test_find_in_binary() -> + {exact, 0} = find_in_binary(<<"foo">>, <<"foobarbaz">>), + {exact, 1} = find_in_binary(<<"oo">>, <<"foobarbaz">>), + {exact, 8} = find_in_binary(<<"z">>, <<"foobarbaz">>), + not_found = find_in_binary(<<"q">>, <<"foobarbaz">>), + {partial, 7, 2} = find_in_binary(<<"azul">>, <<"foobarbaz">>), + {exact, 0} = find_in_binary(<<"foobarbaz">>, <<"foobarbaz">>), + {partial, 0, 3} = find_in_binary(<<"foobar">>, <<"foo">>), + {partial, 1, 3} = find_in_binary(<<"foobar">>, <<"afoo">>), + ok. + +test_flash_parse() -> + ContentType = "multipart/form-data; boundary=----------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5", + "----------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5" = get_boundary(ContentType), + BinContent = <<"------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"Filename\"\r\n\r\nhello.txt\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"success_action_status\"\r\n\r\n201\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"file\"; filename=\"hello.txt\"\r\nContent-Type: application/octet-stream\r\n\r\nhello\n\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"Upload\"\r\n\r\nSubmit Query\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5--">>, + Expect = [{headers, + [{"content-disposition", + {"form-data", [{"name", "Filename"}]}}]}, + {body, <<"hello.txt">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "success_action_status"}]}}]}, + {body, <<"201">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "file"}, {"filename", "hello.txt"}]}}, + {"content-type", {"application/octet-stream", []}}]}, + {body, <<"hello\n">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "Upload"}]}}]}, + {body, <<"Submit Query">>}, + body_end, + eof], + TestCallback = fun (Next) -> test_callback(Next, Expect) end, + ServerFun = fun (Socket) -> + ok = gen_tcp:send(Socket, BinContent), + exit(normal) + end, + ClientFun = fun (Socket) -> + Req = fake_request(Socket, ContentType, + byte_size(BinContent)), + Res = parse_multipart_request(Req, TestCallback), + {0, <<>>, ok} = Res, + ok + end, + ok = with_socket_server(ServerFun, ClientFun), + ok. + +test_flash_parse2() -> + ContentType = "multipart/form-data; boundary=----------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5", + "----------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5" = get_boundary(ContentType), + Chunk = iolist_to_binary(string:copies("%", 4096)), + BinContent = <<"------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"Filename\"\r\n\r\nhello.txt\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"success_action_status\"\r\n\r\n201\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"file\"; filename=\"hello.txt\"\r\nContent-Type: application/octet-stream\r\n\r\n", Chunk/binary, "\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5\r\nContent-Disposition: form-data; name=\"Upload\"\r\n\r\nSubmit Query\r\n------------ei4GI3GI3Ij5Ef1ae0KM7Ij5ei4Ij5--">>, + Expect = [{headers, + [{"content-disposition", + {"form-data", [{"name", "Filename"}]}}]}, + {body, <<"hello.txt">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "success_action_status"}]}}]}, + {body, <<"201">>}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "file"}, {"filename", "hello.txt"}]}}, + {"content-type", {"application/octet-stream", []}}]}, + {body, Chunk}, + body_end, + {headers, + [{"content-disposition", + {"form-data", [{"name", "Upload"}]}}]}, + {body, <<"Submit Query">>}, + body_end, + eof], + TestCallback = fun (Next) -> test_callback(Next, Expect) end, + ServerFun = fun (Socket) -> + ok = gen_tcp:send(Socket, BinContent), + exit(normal) + end, + ClientFun = fun (Socket) -> + Req = fake_request(Socket, ContentType, + byte_size(BinContent)), + Res = parse_multipart_request(Req, TestCallback), + {0, <<>>, ok} = Res, + ok + end, + ok = with_socket_server(ServerFun, ClientFun), + ok. + +test() -> + test_find_in_binary(), + test_find_boundary(), + test_parse(), + test_parse2(), + test_parse3(), + test_parse_form(), + test_flash_parse(), + test_flash_parse2(), + ok. diff --git a/apps/mochiweb/src/mochiweb_request.erl b/apps/mochiweb/src/mochiweb_request.erl new file mode 100644 index 00000000..5d7af26b --- /dev/null +++ b/apps/mochiweb/src/mochiweb_request.erl @@ -0,0 +1,867 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc MochiWeb HTTP Request abstraction. + +-module(mochiweb_request, [Socket, Method, RawPath, Version, Headers]). +-author('bob@mochimedia.com'). + +-include_lib("kernel/include/file.hrl"). + +-define(QUIP, "Any of you quaids got a smint?"). +-define(READ_SIZE, 8192). + +-export([get_header_value/1, get_primary_header_value/1, get/1, dump/0]). +-export([send/1, recv/1, recv/2, recv_body/0, recv_body/1, stream_body/3]). +-export([start_response/1, start_response_length/1, start_raw_response/1]). +-export([respond/1, ok/1]). +-export([not_found/0, not_found/1]). +-export([parse_post/0, parse_qs/0]). +-export([should_close/0, cleanup/0]). +-export([parse_cookie/0, get_cookie_value/1]). +-export([serve_file/2, serve_file/3]). +-export([accepted_encodings/1]). +-export([test/0]). + +-define(SAVE_QS, mochiweb_request_qs). +-define(SAVE_PATH, mochiweb_request_path). +-define(SAVE_RECV, mochiweb_request_recv). +-define(SAVE_BODY, mochiweb_request_body). +-define(SAVE_BODY_LENGTH, mochiweb_request_body_length). +-define(SAVE_POST, mochiweb_request_post). +-define(SAVE_COOKIE, mochiweb_request_cookie). +-define(SAVE_FORCE_CLOSE, mochiweb_request_force_close). + +%% @type iolist() = [iolist() | binary() | char()]. +%% @type iodata() = binary() | iolist(). +%% @type key() = atom() | string() | binary() +%% @type value() = atom() | string() | binary() | integer() +%% @type headers(). A mochiweb_headers structure. +%% @type response(). A mochiweb_response parameterized module instance. +%% @type ioheaders() = headers() | [{key(), value()}]. + +% 5 minute default idle timeout +-define(IDLE_TIMEOUT, 300000). + +% Maximum recv_body() length of 1MB +-define(MAX_RECV_BODY, (1024*1024)). + +%% @spec get_header_value(K) -> undefined | Value +%% @doc Get the value of a given request header. +get_header_value(K) -> + mochiweb_headers:get_value(K, Headers). + +get_primary_header_value(K) -> + mochiweb_headers:get_primary_value(K, Headers). + +%% @type field() = socket | method | raw_path | version | headers | peer | path | body_length | range + +%% @spec get(field()) -> term() +%% @doc Return the internal representation of the given field. +get(socket) -> + Socket; +get(method) -> + Method; +get(raw_path) -> + RawPath; +get(version) -> + Version; +get(headers) -> + Headers; +get(peer) -> + case inet:peername(Socket) of + {ok, {Addr={10, _, _, _}, _Port}} -> + case get_header_value("x-forwarded-for") of + undefined -> + inet_parse:ntoa(Addr); + Hosts -> + string:strip(lists:last(string:tokens(Hosts, ","))) + end; + {ok, {{127, 0, 0, 1}, _Port}} -> + case get_header_value("x-forwarded-for") of + undefined -> + "127.0.0.1"; + Hosts -> + string:strip(lists:last(string:tokens(Hosts, ","))) + end; + {ok, {Addr, _Port}} -> + inet_parse:ntoa(Addr) + end; +get(path) -> + case erlang:get(?SAVE_PATH) of + undefined -> + {Path0, _, _} = mochiweb_util:urlsplit_path(RawPath), + Path = mochiweb_util:unquote(Path0), + put(?SAVE_PATH, Path), + Path; + Cached -> + Cached + end; +get(body_length) -> + erlang:get(?SAVE_BODY_LENGTH); +get(range) -> + case get_header_value(range) of + undefined -> + undefined; + RawRange -> + parse_range_request(RawRange) + end. + +%% @spec dump() -> {mochiweb_request, [{atom(), term()}]} +%% @doc Dump the internal representation to a "human readable" set of terms +%% for debugging/inspection purposes. +dump() -> + {?MODULE, [{method, Method}, + {version, Version}, + {raw_path, RawPath}, + {headers, mochiweb_headers:to_list(Headers)}]}. + +%% @spec send(iodata()) -> ok +%% @doc Send data over the socket. +send(Data) -> + case gen_tcp:send(Socket, Data) of + ok -> + ok; + _ -> + exit(normal) + end. + +%% @spec recv(integer()) -> binary() +%% @doc Receive Length bytes from the client as a binary, with the default +%% idle timeout. +recv(Length) -> + recv(Length, ?IDLE_TIMEOUT). + +%% @spec recv(integer(), integer()) -> binary() +%% @doc Receive Length bytes from the client as a binary, with the given +%% Timeout in msec. +recv(Length, Timeout) -> + case gen_tcp:recv(Socket, Length, Timeout) of + {ok, Data} -> + put(?SAVE_RECV, true), + Data; + _ -> + exit(normal) + end. + +%% @spec body_length() -> undefined | chunked | unknown_transfer_encoding | integer() +%% @doc Infer body length from transfer-encoding and content-length headers. +body_length() -> + case get_header_value("transfer-encoding") of + undefined -> + case get_header_value("content-length") of + undefined -> + undefined; + Length -> + list_to_integer(Length) + end; + "chunked" -> + chunked; + Unknown -> + {unknown_transfer_encoding, Unknown} + end. + + +%% @spec recv_body() -> binary() +%% @doc Receive the body of the HTTP request (defined by Content-Length). +%% Will only receive up to the default max-body length of 1MB. +recv_body() -> + recv_body(?MAX_RECV_BODY). + +%% @spec recv_body(integer()) -> binary() +%% @doc Receive the body of the HTTP request (defined by Content-Length). +%% Will receive up to MaxBody bytes. +recv_body(MaxBody) -> + % we could use a sane constant for max chunk size + Body = stream_body(?MAX_RECV_BODY, fun + ({0, _ChunkedFooter}, {_LengthAcc, BinAcc}) -> + iolist_to_binary(lists:reverse(BinAcc)); + ({Length, Bin}, {LengthAcc, BinAcc}) -> + NewLength = Length + LengthAcc, + if NewLength > MaxBody -> + exit({body_too_large, chunked}); + true -> + {NewLength, [Bin | BinAcc]} + end + end, {0, []}, MaxBody), + put(?SAVE_BODY, Body), + Body. + +stream_body(MaxChunkSize, ChunkFun, FunState) -> + stream_body(MaxChunkSize, ChunkFun, FunState, undefined). + +stream_body(MaxChunkSize, ChunkFun, FunState, MaxBodyLength) -> + Expect = case get_header_value("expect") of + undefined -> + undefined; + Value when is_list(Value) -> + string:to_lower(Value) + end, + case Expect of + "100-continue" -> + start_raw_response({100, gb_trees:empty()}); + _Else -> + ok + end, + case body_length() of + undefined -> + undefined; + {unknown_transfer_encoding, Unknown} -> + exit({unknown_transfer_encoding, Unknown}); + chunked -> + % In this case the MaxBody is actually used to + % determine the maximum allowed size of a single + % chunk. + stream_chunked_body(MaxChunkSize, ChunkFun, FunState); + 0 -> + <<>>; + Length when is_integer(Length) -> + case MaxBodyLength of + MaxBodyLength when is_integer(MaxBodyLength), MaxBodyLength < Length -> + exit({body_too_large, content_length}); + _ -> + stream_unchunked_body(Length, ChunkFun, FunState) + end; + Length -> + exit({length_not_integer, Length}) + end. + + +%% @spec start_response({integer(), ioheaders()}) -> response() +%% @doc Start the HTTP response by sending the Code HTTP response and +%% ResponseHeaders. The server will set header defaults such as Server +%% and Date if not present in ResponseHeaders. +start_response({Code, ResponseHeaders}) -> + HResponse = mochiweb_headers:make(ResponseHeaders), + HResponse1 = mochiweb_headers:default_from_list(server_headers(), + HResponse), + start_raw_response({Code, HResponse1}). + +%% @spec start_raw_response({integer(), headers()}) -> response() +%% @doc Start the HTTP response by sending the Code HTTP response and +%% ResponseHeaders. +start_raw_response({Code, ResponseHeaders}) -> + F = fun ({K, V}, Acc) -> + [make_io(K), <<": ">>, V, <<"\r\n">> | Acc] + end, + End = lists:foldl(F, [<<"\r\n">>], + mochiweb_headers:to_list(ResponseHeaders)), + send([make_version(Version), make_code(Code), <<"\r\n">> | End]), + mochiweb:new_response({THIS, Code, ResponseHeaders}). + + +%% @spec start_response_length({integer(), ioheaders(), integer()}) -> response() +%% @doc Start the HTTP response by sending the Code HTTP response and +%% ResponseHeaders including a Content-Length of Length. The server +%% will set header defaults such as Server +%% and Date if not present in ResponseHeaders. +start_response_length({Code, ResponseHeaders, Length}) -> + HResponse = mochiweb_headers:make(ResponseHeaders), + HResponse1 = mochiweb_headers:enter("Content-Length", Length, HResponse), + start_response({Code, HResponse1}). + +%% @spec respond({integer(), ioheaders(), iodata() | chunked | {file, IoDevice}}) -> response() +%% @doc Start the HTTP response with start_response, and send Body to the +%% client (if the get(method) /= 'HEAD'). The Content-Length header +%% will be set by the Body length, and the server will insert header +%% defaults. +respond({Code, ResponseHeaders, {file, IoDevice}}) -> + Length = iodevice_size(IoDevice), + Response = start_response_length({Code, ResponseHeaders, Length}), + case Method of + 'HEAD' -> + ok; + _ -> + iodevice_stream(IoDevice) + end, + Response; +respond({Code, ResponseHeaders, chunked}) -> + HResponse = mochiweb_headers:make(ResponseHeaders), + HResponse1 = case Method of + 'HEAD' -> + %% This is what Google does, http://www.google.com/ + %% is chunked but HEAD gets Content-Length: 0. + %% The RFC is ambiguous so emulating Google is smart. + mochiweb_headers:enter("Content-Length", "0", + HResponse); + _ when Version >= {1, 1} -> + %% Only use chunked encoding for HTTP/1.1 + mochiweb_headers:enter("Transfer-Encoding", "chunked", + HResponse); + _ -> + %% For pre-1.1 clients we send the data as-is + %% without a Content-Length header and without + %% chunk delimiters. Since the end of the document + %% is now ambiguous we must force a close. + put(?SAVE_FORCE_CLOSE, true), + HResponse + end, + start_response({Code, HResponse1}); +respond({Code, ResponseHeaders, Body}) -> + Response = start_response_length({Code, ResponseHeaders, iolist_size(Body)}), + case Method of + 'HEAD' -> + ok; + _ -> + send(Body) + end, + Response. + +%% @spec not_found() -> response() +%% @doc Alias for <code>not_found([])</code>. +not_found() -> + not_found([]). + +%% @spec not_found(ExtraHeaders) -> response() +%% @doc Alias for <code>respond({404, [{"Content-Type", "text/plain"} +%% | ExtraHeaders], <<"Not found.">>})</code>. +not_found(ExtraHeaders) -> + respond({404, [{"Content-Type", "text/plain"} | ExtraHeaders], + <<"Not found.">>}). + +%% @spec ok({value(), iodata()} | {value(), ioheaders(), iodata() | {file, IoDevice}}) -> +%% response() +%% @doc respond({200, [{"Content-Type", ContentType} | Headers], Body}). +ok({ContentType, Body}) -> + ok({ContentType, [], Body}); +ok({ContentType, ResponseHeaders, Body}) -> + HResponse = mochiweb_headers:make(ResponseHeaders), + case THIS:get(range) of + X when X =:= undefined; X =:= fail -> + HResponse1 = mochiweb_headers:enter("Content-Type", ContentType, HResponse), + respond({200, HResponse1, Body}); + Ranges -> + {PartList, Size} = range_parts(Body, Ranges), + case PartList of + [] -> %% no valid ranges + HResponse1 = mochiweb_headers:enter("Content-Type", + ContentType, + HResponse), + %% could be 416, for now we'll just return 200 + respond({200, HResponse1, Body}); + PartList -> + {RangeHeaders, RangeBody} = + parts_to_body(PartList, ContentType, Size), + HResponse1 = mochiweb_headers:enter_from_list( + [{"Accept-Ranges", "bytes"} | + RangeHeaders], + HResponse), + respond({206, HResponse1, RangeBody}) + end + end. + +%% @spec should_close() -> bool() +%% @doc Return true if the connection must be closed. If false, using +%% Keep-Alive should be safe. +should_close() -> + ForceClose = erlang:get(mochiweb_request_force_close) =/= undefined, + DidNotRecv = erlang:get(mochiweb_request_recv) =:= undefined, + ForceClose orelse Version < {1, 0} + %% Connection: close + orelse get_header_value("connection") =:= "close" + %% HTTP 1.0 requires Connection: Keep-Alive + orelse (Version =:= {1, 0} + andalso get_header_value("connection") =/= "Keep-Alive") + %% unread data left on the socket, can't safely continue + orelse (DidNotRecv + andalso get_header_value("content-length") =/= undefined + andalso list_to_integer(get_header_value("content-length")) > 0) + orelse (DidNotRecv + andalso get_header_value("transfer-encoding") =:= "chunked"). + +%% @spec cleanup() -> ok +%% @doc Clean up any junk in the process dictionary, required before continuing +%% a Keep-Alive request. +cleanup() -> + [erase(K) || K <- [?SAVE_QS, + ?SAVE_PATH, + ?SAVE_RECV, + ?SAVE_BODY, + ?SAVE_POST, + ?SAVE_COOKIE, + ?SAVE_FORCE_CLOSE]], + ok. + +%% @spec parse_qs() -> [{Key::string(), Value::string()}] +%% @doc Parse the query string of the URL. +parse_qs() -> + case erlang:get(?SAVE_QS) of + undefined -> + {_, QueryString, _} = mochiweb_util:urlsplit_path(RawPath), + Parsed = mochiweb_util:parse_qs(QueryString), + put(?SAVE_QS, Parsed), + Parsed; + Cached -> + Cached + end. + +%% @spec get_cookie_value(Key::string) -> string() | undefined +%% @doc Get the value of the given cookie. +get_cookie_value(Key) -> + proplists:get_value(Key, parse_cookie()). + +%% @spec parse_cookie() -> [{Key::string(), Value::string()}] +%% @doc Parse the cookie header. +parse_cookie() -> + case erlang:get(?SAVE_COOKIE) of + undefined -> + Cookies = case get_header_value("cookie") of + undefined -> + []; + Value -> + mochiweb_cookies:parse_cookie(Value) + end, + put(?SAVE_COOKIE, Cookies), + Cookies; + Cached -> + Cached + end. + +%% @spec parse_post() -> [{Key::string(), Value::string()}] +%% @doc Parse an application/x-www-form-urlencoded form POST. This +%% has the side-effect of calling recv_body(). +parse_post() -> + case erlang:get(?SAVE_POST) of + undefined -> + Parsed = case recv_body() of + undefined -> + []; + Binary -> + case get_primary_header_value("content-type") of + "application/x-www-form-urlencoded" ++ _ -> + mochiweb_util:parse_qs(Binary); + _ -> + [] + end + end, + put(?SAVE_POST, Parsed), + Parsed; + Cached -> + Cached + end. + +%% @spec stream_chunked_body(integer(), fun(), term()) -> term() +%% @doc The function is called for each chunk. +%% Used internally by read_chunked_body. +stream_chunked_body(MaxChunkSize, Fun, FunState) -> + case read_chunk_length() of + 0 -> + Fun({0, read_chunk(0)}, FunState); + Length when Length > MaxChunkSize -> + NewState = read_sub_chunks(Length, MaxChunkSize, Fun, FunState), + stream_chunked_body(MaxChunkSize, Fun, NewState); + Length -> + NewState = Fun({Length, read_chunk(Length)}, FunState), + stream_chunked_body(MaxChunkSize, Fun, NewState) + end. + +stream_unchunked_body(0, Fun, FunState) -> + Fun({0, <<>>}, FunState); +stream_unchunked_body(Length, Fun, FunState) when Length > 0 -> + Bin = recv(0), + BinSize = byte_size(Bin), + if BinSize > Length -> + <<OurBody:Length/binary, Extra/binary>> = Bin, + gen_tcp:unrecv(Socket, Extra), + NewState = Fun({Length, OurBody}, FunState), + stream_unchunked_body(0, Fun, NewState); + true -> + NewState = Fun({BinSize, Bin}, FunState), + stream_unchunked_body(Length - BinSize, Fun, NewState) + end. + + +%% @spec read_chunk_length() -> integer() +%% @doc Read the length of the next HTTP chunk. +read_chunk_length() -> + inet:setopts(Socket, [{packet, line}]), + case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of + {ok, Header} -> + inet:setopts(Socket, [{packet, raw}]), + Splitter = fun (C) -> + C =/= $\r andalso C =/= $\n andalso C =/= $ + end, + {Hex, _Rest} = lists:splitwith(Splitter, binary_to_list(Header)), + mochihex:to_int(Hex); + _ -> + exit(normal) + end. + +%% @spec read_chunk(integer()) -> Chunk::binary() | [Footer::binary()] +%% @doc Read in a HTTP chunk of the given length. If Length is 0, then read the +%% HTTP footers (as a list of binaries, since they're nominal). +read_chunk(0) -> + inet:setopts(Socket, [{packet, line}]), + F = fun (F1, Acc) -> + case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of + {ok, <<"\r\n">>} -> + Acc; + {ok, Footer} -> + F1(F1, [Footer | Acc]); + _ -> + exit(normal) + end + end, + Footers = F(F, []), + inet:setopts(Socket, [{packet, raw}]), + Footers; +read_chunk(Length) -> + case gen_tcp:recv(Socket, 2 + Length, ?IDLE_TIMEOUT) of + {ok, <<Chunk:Length/binary, "\r\n">>} -> + Chunk; + _ -> + exit(normal) + end. + +read_sub_chunks(Length, MaxChunkSize, Fun, FunState) when Length > MaxChunkSize -> + Bin = recv(MaxChunkSize), + NewState = Fun({size(Bin), Bin}, FunState), + read_sub_chunks(Length - MaxChunkSize, MaxChunkSize, Fun, NewState); + +read_sub_chunks(Length, _MaxChunkSize, Fun, FunState) -> + Fun({Length, read_chunk(Length)}, FunState). + +%% @spec serve_file(Path, DocRoot) -> Response +%% @doc Serve a file relative to DocRoot. +serve_file(Path, DocRoot) -> + serve_file(Path, DocRoot, []). + +%% @spec serve_file(Path, DocRoot, ExtraHeaders) -> Response +%% @doc Serve a file relative to DocRoot. +serve_file(Path, DocRoot, ExtraHeaders) -> + case mochiweb_util:safe_relative_path(Path) of + undefined -> + not_found(ExtraHeaders); + RelPath -> + FullPath = filename:join([DocRoot, RelPath]), + case filelib:is_dir(FullPath) of + true -> + maybe_redirect(RelPath, FullPath, ExtraHeaders); + false -> + maybe_serve_file(FullPath, ExtraHeaders) + end + end. + +%% Internal API + +%% This has the same effect as the DirectoryIndex directive in httpd +directory_index(FullPath) -> + filename:join([FullPath, "index.html"]). + +maybe_redirect([], FullPath, ExtraHeaders) -> + maybe_serve_file(directory_index(FullPath), ExtraHeaders); + +maybe_redirect(RelPath, FullPath, ExtraHeaders) -> + case string:right(RelPath, 1) of + "/" -> + maybe_serve_file(directory_index(FullPath), ExtraHeaders); + _ -> + Host = mochiweb_headers:get_value("host", Headers), + Location = "http://" ++ Host ++ "/" ++ RelPath ++ "/", + LocationBin = list_to_binary(Location), + MoreHeaders = [{"Location", Location}, + {"Content-Type", "text/html"} | ExtraHeaders], + Top = <<"<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">" + "<html><head>" + "<title>301 Moved Permanently</title>" + "</head><body>" + "<h1>Moved Permanently</h1>" + "<p>The document has moved <a href=\"">>, + Bottom = <<">here</a>.</p></body></html>\n">>, + Body = <<Top/binary, LocationBin/binary, Bottom/binary>>, + respond({301, MoreHeaders, Body}) + end. + +maybe_serve_file(File, ExtraHeaders) -> + case file:read_file_info(File) of + {ok, FileInfo} -> + LastModified = httpd_util:rfc1123_date(FileInfo#file_info.mtime), + case get_header_value("if-modified-since") of + LastModified -> + respond({304, ExtraHeaders, ""}); + _ -> + case file:open(File, [raw, binary]) of + {ok, IoDevice} -> + ContentType = mochiweb_util:guess_mime(File), + Res = ok({ContentType, + [{"last-modified", LastModified} + | ExtraHeaders], + {file, IoDevice}}), + file:close(IoDevice), + Res; + _ -> + not_found(ExtraHeaders) + end + end; + {error, _} -> + not_found(ExtraHeaders) + end. + +server_headers() -> + [{"Server", "MochiWeb/1.0 (" ++ ?QUIP ++ ")"}, + {"Date", httpd_util:rfc1123_date()}]. + +make_io(Atom) when is_atom(Atom) -> + atom_to_list(Atom); +make_io(Integer) when is_integer(Integer) -> + integer_to_list(Integer); +make_io(Io) when is_list(Io); is_binary(Io) -> + Io. + +make_code(X) when is_integer(X) -> + [integer_to_list(X), [" " | httpd_util:reason_phrase(X)]]; +make_code(Io) when is_list(Io); is_binary(Io) -> + Io. + +make_version({1, 0}) -> + <<"HTTP/1.0 ">>; +make_version(_) -> + <<"HTTP/1.1 ">>. + +iodevice_stream(IoDevice) -> + case file:read(IoDevice, ?READ_SIZE) of + eof -> + ok; + {ok, Data} -> + ok = send(Data), + iodevice_stream(IoDevice) + end. + + +parts_to_body([{Start, End, Body}], ContentType, Size) -> + %% return body for a range reponse with a single body + HeaderList = [{"Content-Type", ContentType}, + {"Content-Range", + ["bytes ", + make_io(Start), "-", make_io(End), + "/", make_io(Size)]}], + {HeaderList, Body}; +parts_to_body(BodyList, ContentType, Size) when is_list(BodyList) -> + %% return + %% header Content-Type: multipart/byteranges; boundary=441934886133bdee4 + %% and multipart body + Boundary = mochihex:to_hex(crypto:rand_bytes(8)), + HeaderList = [{"Content-Type", + ["multipart/byteranges; ", + "boundary=", Boundary]}], + MultiPartBody = multipart_body(BodyList, ContentType, Boundary, Size), + + {HeaderList, MultiPartBody}. + +multipart_body([], _ContentType, Boundary, _Size) -> + ["--", Boundary, "--\r\n"]; +multipart_body([{Start, End, Body} | BodyList], ContentType, Boundary, Size) -> + ["--", Boundary, "\r\n", + "Content-Type: ", ContentType, "\r\n", + "Content-Range: ", + "bytes ", make_io(Start), "-", make_io(End), + "/", make_io(Size), "\r\n\r\n", + Body, "\r\n" + | multipart_body(BodyList, ContentType, Boundary, Size)]. + +iodevice_size(IoDevice) -> + {ok, Size} = file:position(IoDevice, eof), + {ok, 0} = file:position(IoDevice, bof), + Size. + +range_parts({file, IoDevice}, Ranges) -> + Size = iodevice_size(IoDevice), + F = fun (Spec, Acc) -> + case range_skip_length(Spec, Size) of + invalid_range -> + Acc; + V -> + [V | Acc] + end + end, + LocNums = lists:foldr(F, [], Ranges), + {ok, Data} = file:pread(IoDevice, LocNums), + Bodies = lists:zipwith(fun ({Skip, Length}, PartialBody) -> + {Skip, Skip + Length - 1, PartialBody} + end, + LocNums, Data), + {Bodies, Size}; +range_parts(Body0, Ranges) -> + Body = iolist_to_binary(Body0), + Size = size(Body), + F = fun(Spec, Acc) -> + case range_skip_length(Spec, Size) of + invalid_range -> + Acc; + {Skip, Length} -> + <<_:Skip/binary, PartialBody:Length/binary, _/binary>> = Body, + [{Skip, Skip + Length - 1, PartialBody} | Acc] + end + end, + {lists:foldr(F, [], Ranges), Size}. + +range_skip_length(Spec, Size) -> + case Spec of + {none, R} when R =< Size, R >= 0 -> + {Size - R, R}; + {none, _OutOfRange} -> + {0, Size}; + {R, none} when R >= 0, R < Size -> + {R, Size - R}; + {_OutOfRange, none} -> + invalid_range; + {Start, End} when 0 =< Start, Start =< End, End < Size -> + {Start, End - Start + 1}; + {_OutOfRange, _End} -> + invalid_range + end. + +parse_range_request(RawRange) when is_list(RawRange) -> + try + "bytes=" ++ RangeString = RawRange, + Ranges = string:tokens(RangeString, ","), + lists:map(fun ("-" ++ V) -> + {none, list_to_integer(V)}; + (R) -> + case string:tokens(R, "-") of + [S1, S2] -> + {list_to_integer(S1), list_to_integer(S2)}; + [S] -> + {list_to_integer(S), none} + end + end, + Ranges) + catch + _:_ -> + fail + end. + +%% @spec accepted_encodings([encoding()]) -> [encoding()] | error() +%% @type encoding() -> string() +%% @type error() -> bad_accept_encoding_value +%% +%% @doc Returns a list of encodings accepted by a request. Encodings that are +%% not supported by the server will not be included in the return list. +%% This list is computed from the "Accept-Encoding" header and +%% its elements are ordered, descendingly, according to their Q values. +%% +%% Section 14.3 of the RFC 2616 (HTTP 1.1) describes the "Accept-Encoding" +%% header and the process of determining which server supported encodings +%% can be used for encoding the body for the request's response. +%% +%% Examples +%% +%% 1) For a missing "Accept-Encoding" header: +%% accepted_encodings(["gzip", "identity"]) -> ["identity"] +%% +%% 2) For an "Accept-Encoding" header with value "gzip, deflate": +%% accepted_encodings(["gzip", "identity"]) -> ["gzip", "identity"] +%% +%% 3) For an "Accept-Encoding" header with value "gzip;q=0.5, deflate": +%% accepted_encodings(["gzip", "deflate", "identity"]) -> +%% ["deflate", "gzip", "identity"] +%% +accepted_encodings(SupportedEncodings) -> + AcceptEncodingHeader = case get_header_value("Accept-Encoding") of + undefined -> + ""; + Value -> + Value + end, + case mochiweb_util:parse_qvalues(AcceptEncodingHeader) of + invalid_qvalue_string -> + bad_accept_encoding_value; + QList -> + mochiweb_util:pick_accepted_encodings( + QList, SupportedEncodings, "identity" + ) + end. + +test() -> + ok = test_range(), + ok. + +test_range() -> + %% valid, single ranges + io:format("Testing parse_range_request with valid single ranges~n"), + io:format("1"), + [{20, 30}] = parse_range_request("bytes=20-30"), + io:format("2"), + [{20, none}] = parse_range_request("bytes=20-"), + io:format("3"), + [{none, 20}] = parse_range_request("bytes=-20"), + io:format(".. ok ~n"), + + %% invalid, single ranges + io:format("Testing parse_range_request with invalid ranges~n"), + io:format("1"), + fail = parse_range_request(""), + io:format("2"), + fail = parse_range_request("garbage"), + io:format("3"), + fail = parse_range_request("bytes=-20-30"), + io:format(".. ok ~n"), + + %% valid, multiple range + io:format("Testing parse_range_request with valid multiple ranges~n"), + io:format("1"), + [{20, 30}, {50, 100}, {110, 200}] = + parse_range_request("bytes=20-30,50-100,110-200"), + io:format("2"), + [{20, none}, {50, 100}, {none, 200}] = + parse_range_request("bytes=20-,50-100,-200"), + io:format(".. ok~n"), + + %% no ranges + io:format("Testing out parse_range_request with no ranges~n"), + io:format("1"), + [] = parse_range_request("bytes="), + io:format(".. ok~n"), + + Body = <<"012345678901234567890123456789012345678901234567890123456789">>, + BodySize = byte_size(Body), %% 60 + BodySize = 60, + + %% these values assume BodySize =:= 60 + io:format("Testing out range_skip_length on valid ranges~n"), + io:format("1"), + {1,9} = range_skip_length({1,9}, BodySize), %% 1-9 + io:format("2"), + {10,10} = range_skip_length({10,19}, BodySize), %% 10-19 + io:format("3"), + {40, 20} = range_skip_length({none, 20}, BodySize), %% -20 + io:format("4"), + {30, 30} = range_skip_length({30, none}, BodySize), %% 30- + io:format(".. ok ~n"), + + %% valid edge cases for range_skip_length + io:format("Testing out range_skip_length on valid edge case ranges~n"), + io:format("1"), + {BodySize, 0} = range_skip_length({none, 0}, BodySize), + io:format("2"), + {0, BodySize} = range_skip_length({none, BodySize}, BodySize), + io:format("3"), + {0, BodySize} = range_skip_length({0, none}, BodySize), + BodySizeLess1 = BodySize - 1, + io:format("4"), + {BodySizeLess1, 1} = range_skip_length({BodySize - 1, none}, BodySize), + + %% out of range, return whole thing + io:format("5"), + {0, BodySize} = range_skip_length({none, BodySize + 1}, BodySize), + io:format("6"), + {0, BodySize} = range_skip_length({none, -1}, BodySize), + io:format(".. ok ~n"), + + %% invalid ranges + io:format("Testing out range_skip_length on invalid ranges~n"), + io:format("1"), + invalid_range = range_skip_length({-1, 30}, BodySize), + io:format("2"), + invalid_range = range_skip_length({0, BodySize + 1}, BodySize), + io:format("3"), + invalid_range = range_skip_length({-1, BodySize + 1}, BodySize), + io:format("4"), + invalid_range = range_skip_length({BodySize, 40}, BodySize), + io:format("5"), + invalid_range = range_skip_length({-1, none}, BodySize), + io:format("6"), + invalid_range = range_skip_length({BodySize, none}, BodySize), + io:format(".. ok ~n"), + ok. + diff --git a/apps/mochiweb/src/mochiweb_response.erl b/apps/mochiweb/src/mochiweb_response.erl new file mode 100644 index 00000000..6285c4c4 --- /dev/null +++ b/apps/mochiweb/src/mochiweb_response.erl @@ -0,0 +1,56 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Response abstraction. + +-module(mochiweb_response, [Request, Code, Headers]). +-author('bob@mochimedia.com'). + +-define(QUIP, "Any of you quaids got a smint?"). + +-export([get_header_value/1, get/1, dump/0]). +-export([send/1, write_chunk/1]). + +%% @spec get_header_value(string() | atom() | binary()) -> string() | undefined +%% @doc Get the value of the given response header. +get_header_value(K) -> + mochiweb_headers:get_value(K, Headers). + +%% @spec get(request | code | headers) -> term() +%% @doc Return the internal representation of the given field. +get(request) -> + Request; +get(code) -> + Code; +get(headers) -> + Headers. + +%% @spec dump() -> {mochiweb_request, [{atom(), term()}]} +%% @doc Dump the internal representation to a "human readable" set of terms +%% for debugging/inspection purposes. +dump() -> + [{request, Request:dump()}, + {code, Code}, + {headers, mochiweb_headers:to_list(Headers)}]. + +%% @spec send(iodata()) -> ok +%% @doc Send data over the socket if the method is not HEAD. +send(Data) -> + case Request:get(method) of + 'HEAD' -> + ok; + _ -> + Request:send(Data) + end. + +%% @spec write_chunk(iodata()) -> ok +%% @doc Write a chunk of a HTTP chunked response. If Data is zero length, +%% then the chunked response will be finished. +write_chunk(Data) -> + case Request:get(version) of + Version when Version >= {1, 1} -> + Length = iolist_size(Data), + send([io_lib:format("~.16b\r\n", [Length]), Data, <<"\r\n">>]); + _ -> + send(Data) + end. diff --git a/apps/mochiweb/src/mochiweb_skel.erl b/apps/mochiweb/src/mochiweb_skel.erl new file mode 100644 index 00000000..36b48be5 --- /dev/null +++ b/apps/mochiweb/src/mochiweb_skel.erl @@ -0,0 +1,73 @@ +-module(mochiweb_skel). +-export([skelcopy/2]). + +-include_lib("kernel/include/file.hrl"). + +%% External API + +skelcopy(DestDir, Name) -> + ok = ensuredir(DestDir), + LDst = case length(filename:dirname(DestDir)) of + 1 -> %% handle case when dirname returns "/" + 0; + N -> + N + 1 + end, + skelcopy(src(), DestDir, Name, LDst), + ok = file:make_symlink( + filename:join(filename:dirname(code:which(?MODULE)), ".."), + filename:join([DestDir, Name, "deps", "mochiweb-src"])). + + +%% Internal API + +src() -> + Dir = filename:dirname(code:which(?MODULE)), + filename:join(Dir, "../priv/skel"). + +skel() -> + "skel". + +skelcopy(Src, DestDir, Name, LDst) -> + Dest = re:replace(filename:basename(Src), skel(), Name, + [global, {return, list}]), + case file:read_file_info(Src) of + {ok, #file_info{type=directory, mode=Mode}} -> + Dir = DestDir ++ "/" ++ Dest, + EDst = lists:nthtail(LDst, Dir), + ok = ensuredir(Dir), + ok = file:write_file_info(Dir, #file_info{mode=Mode}), + {ok, Files} = file:list_dir(Src), + io:format("~s/~n", [EDst]), + lists:foreach(fun ("." ++ _) -> ok; + (F) -> + skelcopy(filename:join(Src, F), + Dir, + Name, + LDst) + end, + Files), + ok; + {ok, #file_info{type=regular, mode=Mode}} -> + OutFile = filename:join(DestDir, Dest), + {ok, B} = file:read_file(Src), + S = re:replace(binary_to_list(B), skel(), Name, + [{return, list}, global]), + ok = file:write_file(OutFile, list_to_binary(S)), + ok = file:write_file_info(OutFile, #file_info{mode=Mode}), + io:format(" ~s~n", [filename:basename(Src)]), + ok; + {ok, _} -> + io:format("ignored source file: ~p~n", [Src]), + ok + end. + +ensuredir(Dir) -> + case file:make_dir(Dir) of + ok -> + ok; + {error, eexist} -> + ok; + E -> + E + end. diff --git a/apps/mochiweb/src/mochiweb_socket_server.erl b/apps/mochiweb/src/mochiweb_socket_server.erl new file mode 100644 index 00000000..7aafe290 --- /dev/null +++ b/apps/mochiweb/src/mochiweb_socket_server.erl @@ -0,0 +1,248 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc MochiWeb socket server. + +-module(mochiweb_socket_server). +-author('bob@mochimedia.com'). +-behaviour(gen_server). + +-export([start/1, stop/1]). +-export([init/1, handle_call/3, handle_cast/2, terminate/2, code_change/3, + handle_info/2]). +-export([get/2]). + +-export([acceptor_loop/1]). + +-record(mochiweb_socket_server, + {port, + loop, + name=undefined, + max=2048, + ip=any, + listen=null, + acceptor=null, + backlog=128}). + +start(State=#mochiweb_socket_server{}) -> + start_server(State); +start(Options) -> + start(parse_options(Options)). + +get(Name, Property) -> + gen_server:call(Name, {get, Property}). + +stop(Name) when is_atom(Name) -> + gen_server:cast(Name, stop); +stop(Pid) when is_pid(Pid) -> + gen_server:cast(Pid, stop); +stop({local, Name}) -> + stop(Name); +stop({global, Name}) -> + stop(Name); +stop(Options) -> + State = parse_options(Options), + stop(State#mochiweb_socket_server.name). + +%% Internal API + +parse_options(Options) -> + parse_options(Options, #mochiweb_socket_server{}). + +parse_options([], State) -> + State; +parse_options([{name, L} | Rest], State) when is_list(L) -> + Name = {local, list_to_atom(L)}, + parse_options(Rest, State#mochiweb_socket_server{name=Name}); +parse_options([{name, A} | Rest], State) when is_atom(A) -> + Name = {local, A}, + parse_options(Rest, State#mochiweb_socket_server{name=Name}); +parse_options([{name, Name} | Rest], State) -> + parse_options(Rest, State#mochiweb_socket_server{name=Name}); +parse_options([{port, L} | Rest], State) when is_list(L) -> + Port = list_to_integer(L), + parse_options(Rest, State#mochiweb_socket_server{port=Port}); +parse_options([{port, Port} | Rest], State) -> + parse_options(Rest, State#mochiweb_socket_server{port=Port}); +parse_options([{ip, Ip} | Rest], State) -> + ParsedIp = case Ip of + any -> + any; + Ip when is_tuple(Ip) -> + Ip; + Ip when is_list(Ip) -> + {ok, IpTuple} = inet_parse:address(Ip), + IpTuple + end, + parse_options(Rest, State#mochiweb_socket_server{ip=ParsedIp}); +parse_options([{loop, Loop} | Rest], State) -> + parse_options(Rest, State#mochiweb_socket_server{loop=Loop}); +parse_options([{backlog, Backlog} | Rest], State) -> + parse_options(Rest, State#mochiweb_socket_server{backlog=Backlog}); +parse_options([{max, Max} | Rest], State) -> + MaxInt = case Max of + Max when is_list(Max) -> + list_to_integer(Max); + Max when is_integer(Max) -> + Max + end, + parse_options(Rest, State#mochiweb_socket_server{max=MaxInt}). + +start_server(State=#mochiweb_socket_server{name=Name}) -> + case Name of + undefined -> + gen_server:start_link(?MODULE, State, []); + _ -> + gen_server:start_link(Name, ?MODULE, State, []) + end. + +ipv6_supported() -> + case (catch inet:getaddr("localhost", inet6)) of + {ok, _Addr} -> + true; + {error, _} -> + false + end. + +init(State=#mochiweb_socket_server{ip=Ip, port=Port, backlog=Backlog}) -> + process_flag(trap_exit, true), + BaseOpts = [binary, + {reuseaddr, true}, + {packet, 0}, + {backlog, Backlog}, + {recbuf, 8192}, + {active, false}, + {nodelay, true}], + Opts = case Ip of + any -> + case ipv6_supported() of % IPv4, and IPv6 if supported + true -> [inet, inet6 | BaseOpts]; + _ -> BaseOpts + end; + {_, _, _, _} -> % IPv4 + [inet, {ip, Ip} | BaseOpts]; + {_, _, _, _, _, _, _, _} -> % IPv6 + [inet6, {ip, Ip} | BaseOpts] + end, + case gen_tcp_listen(Port, Opts, State) of + {stop, eacces} -> + case Port < 1024 of + true -> + case fdsrv:start() of + {ok, _} -> + case fdsrv:bind_socket(tcp, Port) of + {ok, Fd} -> + gen_tcp_listen(Port, [{fd, Fd} | Opts], State); + _ -> + {stop, fdsrv_bind_failed} + end; + _ -> + {stop, fdsrv_start_failed} + end; + false -> + {stop, eacces} + end; + Other -> + Other + end. + +gen_tcp_listen(Port, Opts, State) -> + case gen_tcp:listen(Port, Opts) of + {ok, Listen} -> + {ok, ListenPort} = inet:port(Listen), + {ok, new_acceptor(State#mochiweb_socket_server{listen=Listen, + port=ListenPort})}; + {error, Reason} -> + {stop, Reason} + end. + +new_acceptor(State=#mochiweb_socket_server{max=0}) -> + io:format("Not accepting new connections~n"), + State#mochiweb_socket_server{acceptor=null}; +new_acceptor(State=#mochiweb_socket_server{listen=Listen,loop=Loop}) -> + Pid = proc_lib:spawn_link(?MODULE, acceptor_loop, + [{self(), Listen, Loop}]), + State#mochiweb_socket_server{acceptor=Pid}. + +call_loop({M, F}, Socket) -> + M:F(Socket); +call_loop(Loop, Socket) -> + Loop(Socket). + +acceptor_loop({Server, Listen, Loop}) -> + case catch gen_tcp:accept(Listen) of + {ok, Socket} -> + gen_server:cast(Server, {accepted, self()}), + call_loop(Loop, Socket); + {error, closed} -> + exit({error, closed}); + Other -> + error_logger:error_report( + [{application, mochiweb}, + "Accept failed error", + lists:flatten(io_lib:format("~p", [Other]))]), + exit({error, accept_failed}) + end. + + +do_get(port, #mochiweb_socket_server{port=Port}) -> + Port. + +handle_call({get, Property}, _From, State) -> + Res = do_get(Property, State), + {reply, Res, State}; +handle_call(_Message, _From, State) -> + Res = error, + {reply, Res, State}. + +handle_cast({accepted, Pid}, + State=#mochiweb_socket_server{acceptor=Pid, max=Max}) -> + % io:format("accepted ~p~n", [Pid]), + State1 = State#mochiweb_socket_server{max=Max - 1}, + {noreply, new_acceptor(State1)}; +handle_cast(stop, State) -> + {stop, normal, State}. + +terminate(_Reason, #mochiweb_socket_server{listen=Listen, port=Port}) -> + gen_tcp:close(Listen), + case Port < 1024 of + true -> + catch fdsrv:stop(), + ok; + false -> + ok + end. + +code_change(_OldVsn, State, _Extra) -> + State. + +handle_info({'EXIT', Pid, normal}, + State=#mochiweb_socket_server{acceptor=Pid}) -> + % io:format("normal acceptor down~n"), + {noreply, new_acceptor(State)}; +handle_info({'EXIT', Pid, Reason}, + State=#mochiweb_socket_server{acceptor=Pid}) -> + error_logger:error_report({?MODULE, ?LINE, + {acceptor_error, Reason}}), + timer:sleep(100), + {noreply, new_acceptor(State)}; +handle_info({'EXIT', _LoopPid, Reason}, + State=#mochiweb_socket_server{acceptor=Pid, max=Max}) -> + case Reason of + normal -> + ok; + _ -> + error_logger:error_report({?MODULE, ?LINE, + {child_error, Reason}}) + end, + State1 = State#mochiweb_socket_server{max=Max + 1}, + State2 = case Pid of + null -> + new_acceptor(State1); + _ -> + State1 + end, + {noreply, State2}; +handle_info(Info, State) -> + error_logger:info_report([{'INFO', Info}, {'State', State}]), + {noreply, State}. diff --git a/apps/mochiweb/src/mochiweb_sup.erl b/apps/mochiweb/src/mochiweb_sup.erl new file mode 100644 index 00000000..5cb525b5 --- /dev/null +++ b/apps/mochiweb/src/mochiweb_sup.erl @@ -0,0 +1,34 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Supervisor for the mochiweb application. + +-module(mochiweb_sup). +-author('bob@mochimedia.com'). + +-behaviour(supervisor). + +%% External exports +-export([start_link/0, upgrade/0]). + +%% supervisor callbacks +-export([init/1]). + +%% @spec start_link() -> ServerRet +%% @doc API for starting the supervisor. +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +%% @spec upgrade() -> ok +%% @doc Add processes if necessary. +upgrade() -> + {ok, {_, Specs}} = init([]), + [supervisor:start_child(?MODULE, Spec) || Spec <- Specs], + ok. + +%% @spec init([]) -> SupervisorTree +%% @doc supervisor callback, ensures yaws is in embedded mode and then +%% returns the supervisor tree. +init([]) -> + Processes = [], + {ok, {{one_for_one, 10, 10}, Processes}}. diff --git a/apps/mochiweb/src/mochiweb_util.erl b/apps/mochiweb/src/mochiweb_util.erl new file mode 100644 index 00000000..d8fc89d5 --- /dev/null +++ b/apps/mochiweb/src/mochiweb_util.erl @@ -0,0 +1,859 @@ +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Utilities for parsing and quoting. + +-module(mochiweb_util). +-author('bob@mochimedia.com'). +-export([join/2, quote_plus/1, urlencode/1, parse_qs/1, unquote/1]). +-export([path_split/1]). +-export([urlsplit/1, urlsplit_path/1, urlunsplit/1, urlunsplit_path/1]). +-export([guess_mime/1, parse_header/1]). +-export([shell_quote/1, cmd/1, cmd_string/1, cmd_port/2]). +-export([record_to_proplist/2, record_to_proplist/3]). +-export([safe_relative_path/1, partition/2]). +-export([parse_qvalues/1, pick_accepted_encodings/3]). +-export([test/0]). + +-define(PERCENT, 37). % $\% +-define(FULLSTOP, 46). % $\. +-define(IS_HEX(C), ((C >= $0 andalso C =< $9) orelse + (C >= $a andalso C =< $f) orelse + (C >= $A andalso C =< $F))). +-define(QS_SAFE(C), ((C >= $a andalso C =< $z) orelse + (C >= $A andalso C =< $Z) orelse + (C >= $0 andalso C =< $9) orelse + (C =:= ?FULLSTOP orelse C =:= $- orelse C =:= $~ orelse + C =:= $_))). + +hexdigit(C) when C < 10 -> $0 + C; +hexdigit(C) when C < 16 -> $A + (C - 10). + +unhexdigit(C) when C >= $0, C =< $9 -> C - $0; +unhexdigit(C) when C >= $a, C =< $f -> C - $a + 10; +unhexdigit(C) when C >= $A, C =< $F -> C - $A + 10. + +%% @spec partition(String, Sep) -> {String, [], []} | {Prefix, Sep, Postfix} +%% @doc Inspired by Python 2.5's str.partition: +%% partition("foo/bar", "/") = {"foo", "/", "bar"}, +%% partition("foo", "/") = {"foo", "", ""}. +partition(String, Sep) -> + case partition(String, Sep, []) of + undefined -> + {String, "", ""}; + Result -> + Result + end. + +partition("", _Sep, _Acc) -> + undefined; +partition(S, Sep, Acc) -> + case partition2(S, Sep) of + undefined -> + [C | Rest] = S, + partition(Rest, Sep, [C | Acc]); + Rest -> + {lists:reverse(Acc), Sep, Rest} + end. + +partition2(Rest, "") -> + Rest; +partition2([C | R1], [C | R2]) -> + partition2(R1, R2); +partition2(_S, _Sep) -> + undefined. + + + +%% @spec safe_relative_path(string()) -> string() | undefined +%% @doc Return the reduced version of a relative path or undefined if it +%% is not safe. safe relative paths can be joined with an absolute path +%% and will result in a subdirectory of the absolute path. +safe_relative_path("/" ++ _) -> + undefined; +safe_relative_path(P) -> + safe_relative_path(P, []). + +safe_relative_path("", Acc) -> + case Acc of + [] -> + ""; + _ -> + string:join(lists:reverse(Acc), "/") + end; +safe_relative_path(P, Acc) -> + case partition(P, "/") of + {"", "/", _} -> + %% /foo or foo//bar + undefined; + {"..", _, _} when Acc =:= [] -> + undefined; + {"..", _, Rest} -> + safe_relative_path(Rest, tl(Acc)); + {Part, "/", ""} -> + safe_relative_path("", ["", Part | Acc]); + {Part, _, Rest} -> + safe_relative_path(Rest, [Part | Acc]) + end. + +%% @spec shell_quote(string()) -> string() +%% @doc Quote a string according to UNIX shell quoting rules, returns a string +%% surrounded by double quotes. +shell_quote(L) -> + shell_quote(L, [$\"]). + +%% @spec cmd_port([string()], Options) -> port() +%% @doc open_port({spawn, mochiweb_util:cmd_string(Argv)}, Options). +cmd_port(Argv, Options) -> + open_port({spawn, cmd_string(Argv)}, Options). + +%% @spec cmd([string()]) -> string() +%% @doc os:cmd(cmd_string(Argv)). +cmd(Argv) -> + os:cmd(cmd_string(Argv)). + +%% @spec cmd_string([string()]) -> string() +%% @doc Create a shell quoted command string from a list of arguments. +cmd_string(Argv) -> + join([shell_quote(X) || X <- Argv], " "). + +%% @spec join([string()], Separator) -> string() +%% @doc Join a list of strings together with the given separator +%% string or char. +join([], _Separator) -> + []; +join([S], _Separator) -> + lists:flatten(S); +join(Strings, Separator) -> + lists:flatten(revjoin(lists:reverse(Strings), Separator, [])). + +revjoin([], _Separator, Acc) -> + Acc; +revjoin([S | Rest], Separator, []) -> + revjoin(Rest, Separator, [S]); +revjoin([S | Rest], Separator, Acc) -> + revjoin(Rest, Separator, [S, Separator | Acc]). + +%% @spec quote_plus(atom() | integer() | float() | string() | binary()) -> string() +%% @doc URL safe encoding of the given term. +quote_plus(Atom) when is_atom(Atom) -> + quote_plus(atom_to_list(Atom)); +quote_plus(Int) when is_integer(Int) -> + quote_plus(integer_to_list(Int)); +quote_plus(Binary) when is_binary(Binary) -> + quote_plus(binary_to_list(Binary)); +quote_plus(Float) when is_float(Float) -> + quote_plus(mochinum:digits(Float)); +quote_plus(String) -> + quote_plus(String, []). + +quote_plus([], Acc) -> + lists:reverse(Acc); +quote_plus([C | Rest], Acc) when ?QS_SAFE(C) -> + quote_plus(Rest, [C | Acc]); +quote_plus([$\s | Rest], Acc) -> + quote_plus(Rest, [$+ | Acc]); +quote_plus([C | Rest], Acc) -> + <<Hi:4, Lo:4>> = <<C>>, + quote_plus(Rest, [hexdigit(Lo), hexdigit(Hi), ?PERCENT | Acc]). + +%% @spec urlencode([{Key, Value}]) -> string() +%% @doc URL encode the property list. +urlencode(Props) -> + RevPairs = lists:foldl(fun ({K, V}, Acc) -> + [[quote_plus(K), $=, quote_plus(V)] | Acc] + end, [], Props), + lists:flatten(revjoin(RevPairs, $&, [])). + +%% @spec parse_qs(string() | binary()) -> [{Key, Value}] +%% @doc Parse a query string or application/x-www-form-urlencoded. +parse_qs(Binary) when is_binary(Binary) -> + parse_qs(binary_to_list(Binary)); +parse_qs(String) -> + parse_qs(String, []). + +parse_qs([], Acc) -> + lists:reverse(Acc); +parse_qs(String, Acc) -> + {Key, Rest} = parse_qs_key(String), + {Value, Rest1} = parse_qs_value(Rest), + parse_qs(Rest1, [{Key, Value} | Acc]). + +parse_qs_key(String) -> + parse_qs_key(String, []). + +parse_qs_key([], Acc) -> + {qs_revdecode(Acc), ""}; +parse_qs_key([$= | Rest], Acc) -> + {qs_revdecode(Acc), Rest}; +parse_qs_key(Rest=[$; | _], Acc) -> + {qs_revdecode(Acc), Rest}; +parse_qs_key(Rest=[$& | _], Acc) -> + {qs_revdecode(Acc), Rest}; +parse_qs_key([C | Rest], Acc) -> + parse_qs_key(Rest, [C | Acc]). + +parse_qs_value(String) -> + parse_qs_value(String, []). + +parse_qs_value([], Acc) -> + {qs_revdecode(Acc), ""}; +parse_qs_value([$; | Rest], Acc) -> + {qs_revdecode(Acc), Rest}; +parse_qs_value([$& | Rest], Acc) -> + {qs_revdecode(Acc), Rest}; +parse_qs_value([C | Rest], Acc) -> + parse_qs_value(Rest, [C | Acc]). + +%% @spec unquote(string() | binary()) -> string() +%% @doc Unquote a URL encoded string. +unquote(Binary) when is_binary(Binary) -> + unquote(binary_to_list(Binary)); +unquote(String) -> + qs_revdecode(lists:reverse(String)). + +qs_revdecode(S) -> + qs_revdecode(S, []). + +qs_revdecode([], Acc) -> + Acc; +qs_revdecode([$+ | Rest], Acc) -> + qs_revdecode(Rest, [$\s | Acc]); +qs_revdecode([Lo, Hi, ?PERCENT | Rest], Acc) when ?IS_HEX(Lo), ?IS_HEX(Hi) -> + qs_revdecode(Rest, [(unhexdigit(Lo) bor (unhexdigit(Hi) bsl 4)) | Acc]); +qs_revdecode([C | Rest], Acc) -> + qs_revdecode(Rest, [C | Acc]). + +%% @spec urlsplit(Url) -> {Scheme, Netloc, Path, Query, Fragment} +%% @doc Return a 5-tuple, does not expand % escapes. Only supports HTTP style +%% URLs. +urlsplit(Url) -> + {Scheme, Url1} = urlsplit_scheme(Url), + {Netloc, Url2} = urlsplit_netloc(Url1), + {Path, Query, Fragment} = urlsplit_path(Url2), + {Scheme, Netloc, Path, Query, Fragment}. + +urlsplit_scheme(Url) -> + urlsplit_scheme(Url, []). + +urlsplit_scheme([], Acc) -> + {"", lists:reverse(Acc)}; +urlsplit_scheme(":" ++ Rest, Acc) -> + {string:to_lower(lists:reverse(Acc)), Rest}; +urlsplit_scheme([C | Rest], Acc) -> + urlsplit_scheme(Rest, [C | Acc]). + +urlsplit_netloc("//" ++ Rest) -> + urlsplit_netloc(Rest, []); +urlsplit_netloc(Path) -> + {"", Path}. + +urlsplit_netloc(Rest=[C | _], Acc) when C =:= $/; C =:= $?; C =:= $# -> + {lists:reverse(Acc), Rest}; +urlsplit_netloc([C | Rest], Acc) -> + urlsplit_netloc(Rest, [C | Acc]). + + +%% @spec path_split(string()) -> {Part, Rest} +%% @doc Split a path starting from the left, as in URL traversal. +%% path_split("foo/bar") = {"foo", "bar"}, +%% path_split("/foo/bar") = {"", "foo/bar"}. +path_split(S) -> + path_split(S, []). + +path_split("", Acc) -> + {lists:reverse(Acc), ""}; +path_split("/" ++ Rest, Acc) -> + {lists:reverse(Acc), Rest}; +path_split([C | Rest], Acc) -> + path_split(Rest, [C | Acc]). + + +%% @spec urlunsplit({Scheme, Netloc, Path, Query, Fragment}) -> string() +%% @doc Assemble a URL from the 5-tuple. Path must be absolute. +urlunsplit({Scheme, Netloc, Path, Query, Fragment}) -> + lists:flatten([case Scheme of "" -> ""; _ -> [Scheme, "://"] end, + Netloc, + urlunsplit_path({Path, Query, Fragment})]). + +%% @spec urlunsplit_path({Path, Query, Fragment}) -> string() +%% @doc Assemble a URL path from the 3-tuple. +urlunsplit_path({Path, Query, Fragment}) -> + lists:flatten([Path, + case Query of "" -> ""; _ -> [$? | Query] end, + case Fragment of "" -> ""; _ -> [$# | Fragment] end]). + +%% @spec urlsplit_path(Url) -> {Path, Query, Fragment} +%% @doc Return a 3-tuple, does not expand % escapes. Only supports HTTP style +%% paths. +urlsplit_path(Path) -> + urlsplit_path(Path, []). + +urlsplit_path("", Acc) -> + {lists:reverse(Acc), "", ""}; +urlsplit_path("?" ++ Rest, Acc) -> + {Query, Fragment} = urlsplit_query(Rest), + {lists:reverse(Acc), Query, Fragment}; +urlsplit_path("#" ++ Rest, Acc) -> + {lists:reverse(Acc), "", Rest}; +urlsplit_path([C | Rest], Acc) -> + urlsplit_path(Rest, [C | Acc]). + +urlsplit_query(Query) -> + urlsplit_query(Query, []). + +urlsplit_query("", Acc) -> + {lists:reverse(Acc), ""}; +urlsplit_query("#" ++ Rest, Acc) -> + {lists:reverse(Acc), Rest}; +urlsplit_query([C | Rest], Acc) -> + urlsplit_query(Rest, [C | Acc]). + +%% @spec guess_mime(string()) -> string() +%% @doc Guess the mime type of a file by the extension of its filename. +guess_mime(File) -> + case filename:extension(File) of + ".html" -> + "text/html"; + ".xhtml" -> + "application/xhtml+xml"; + ".xml" -> + "application/xml"; + ".css" -> + "text/css"; + ".js" -> + "application/x-javascript"; + ".jpg" -> + "image/jpeg"; + ".gif" -> + "image/gif"; + ".png" -> + "image/png"; + ".swf" -> + "application/x-shockwave-flash"; + ".zip" -> + "application/zip"; + ".bz2" -> + "application/x-bzip2"; + ".gz" -> + "application/x-gzip"; + ".tar" -> + "application/x-tar"; + ".tgz" -> + "application/x-gzip"; + ".txt" -> + "text/plain"; + ".doc" -> + "application/msword"; + ".pdf" -> + "application/pdf"; + ".xls" -> + "application/vnd.ms-excel"; + ".rtf" -> + "application/rtf"; + ".mov" -> + "video/quicktime"; + ".mp3" -> + "audio/mpeg"; + ".z" -> + "application/x-compress"; + ".wav" -> + "audio/x-wav"; + ".ico" -> + "image/x-icon"; + ".bmp" -> + "image/bmp"; + ".m4a" -> + "audio/mpeg"; + ".m3u" -> + "audio/x-mpegurl"; + ".exe" -> + "application/octet-stream"; + ".csv" -> + "text/csv"; + _ -> + "text/plain" + end. + +%% @spec parse_header(string()) -> {Type, [{K, V}]} +%% @doc Parse a Content-Type like header, return the main Content-Type +%% and a property list of options. +parse_header(String) -> + %% TODO: This is exactly as broken as Python's cgi module. + %% Should parse properly like mochiweb_cookies. + [Type | Parts] = [string:strip(S) || S <- string:tokens(String, ";")], + F = fun (S, Acc) -> + case lists:splitwith(fun (C) -> C =/= $= end, S) of + {"", _} -> + %% Skip anything with no name + Acc; + {_, ""} -> + %% Skip anything with no value + Acc; + {Name, [$\= | Value]} -> + [{string:to_lower(string:strip(Name)), + unquote_header(string:strip(Value))} | Acc] + end + end, + {string:to_lower(Type), + lists:foldr(F, [], Parts)}. + +unquote_header("\"" ++ Rest) -> + unquote_header(Rest, []); +unquote_header(S) -> + S. + +unquote_header("", Acc) -> + lists:reverse(Acc); +unquote_header("\"", Acc) -> + lists:reverse(Acc); +unquote_header([$\\, C | Rest], Acc) -> + unquote_header(Rest, [C | Acc]); +unquote_header([C | Rest], Acc) -> + unquote_header(Rest, [C | Acc]). + +%% @spec record_to_proplist(Record, Fields) -> proplist() +%% @doc calls record_to_proplist/3 with a default TypeKey of '__record' +record_to_proplist(Record, Fields) -> + record_to_proplist(Record, Fields, '__record'). + +%% @spec record_to_proplist(Record, Fields, TypeKey) -> proplist() +%% @doc Return a proplist of the given Record with each field in the +%% Fields list set as a key with the corresponding value in the Record. +%% TypeKey is the key that is used to store the record type +%% Fields should be obtained by calling record_info(fields, record_type) +%% where record_type is the record type of Record +record_to_proplist(Record, Fields, TypeKey) + when tuple_size(Record) - 1 =:= length(Fields) -> + lists:zip([TypeKey | Fields], tuple_to_list(Record)). + + +shell_quote([], Acc) -> + lists:reverse([$\" | Acc]); +shell_quote([C | Rest], Acc) when C =:= $\" orelse C =:= $\` orelse + C =:= $\\ orelse C =:= $\$ -> + shell_quote(Rest, [C, $\\ | Acc]); +shell_quote([C | Rest], Acc) -> + shell_quote(Rest, [C | Acc]). + +%% @spec parse_qvalues(string()) -> [qvalue()] | error() +%% @type qvalue() -> {element(), q()} +%% @type element() -> string() +%% @type q() -> 0.0 .. 1.0 +%% @type error() -> invalid_qvalue_string +%% +%% @doc Parses a list (given as a string) of elements with Q values associated +%% to them. Elements are separated by commas and each element is separated +%% from its Q value by a semicolon. Q values are optional but when missing +%% the value of an element is considered as 1.0. A Q value is always in the +%% range [0.0, 1.0]. A Q value list is used for example as the value of the +%% HTTP "Accept-Encoding" header. +%% +%% Q values are described in section 2.9 of the RFC 2616 (HTTP 1.1). +%% +%% Example: +%% +%% parse_qvalues("gzip; q=0.5, deflate, identity;q=0.0") -> +%% [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 0.0}] +%% +parse_qvalues(QValuesStr) -> + try + {ok, Re} = re:compile("^\\s*q\\s*=\\s*((?:0|1)(?:\\.\\d{1,3})?)\\s*$"), + lists:map( + fun(Pair) -> + case string:tokens(Pair, ";") of + [Enc] -> + {string:strip(Enc), 1.0}; + [Enc, QStr] -> + case re:run(QStr, Re, [{capture, [1], list}]) of + {match, [Q]} -> + QVal = case Q of + "0" -> + 0.0; + "1" -> + 1.0; + Else -> + list_to_float(Else) + end, + case QVal < 0.0 orelse QVal > 1.0 of + false -> + {string:strip(Enc), QVal} + end + end + end + end, + string:tokens(string:to_lower(QValuesStr), ",") + ) + catch + _Type:_Error -> + invalid_qvalue_string + end. + +%% @spec pick_accepted_encodings(qvalues(), [encoding()], encoding()) -> +%% [encoding()] +%% @type qvalues() -> [ {encoding(), q()} ] +%% @type encoding() -> string() +%% @type q() -> 0.0 .. 1.0 +%% +%% @doc Determines which encodings specified in the given Q values list are +%% valid according to a list of supported encodings and a default encoding. +%% +%% The returned list of encodings is sorted, descendingly, according to the +%% Q values of the given list. The last element of this list is the given +%% default encoding unless this encoding is explicitily or implicitily +%% marked with a Q value of 0.0 in the given Q values list. +%% Note: encodings with the same Q value are kept in the same order as +%% found in the input Q values list. +%% +%% This encoding picking process is described in section 14.3 of the +%% RFC 2616 (HTTP 1.1). +%% +%% Example: +%% +%% pick_accepted_encodings( +%% [{"gzip", 0.5}, {"deflate", 1.0}], +%% ["gzip", "identity"], +%% "identity" +%% ) -> +%% ["gzip", "identity"] +%% +pick_accepted_encodings(AcceptedEncs, SupportedEncs, DefaultEnc) -> + SortedQList = lists:reverse( + lists:sort(fun({_, Q1}, {_, Q2}) -> Q1 < Q2 end, AcceptedEncs) + ), + {Accepted, Refused} = lists:foldr( + fun({E, Q}, {A, R}) -> + case Q > 0.0 of + true -> + {[E | A], R}; + false -> + {A, [E | R]} + end + end, + {[], []}, + SortedQList + ), + Refused1 = lists:foldr( + fun(Enc, Acc) -> + case Enc of + "*" -> + lists:subtract(SupportedEncs, Accepted) ++ Acc; + _ -> + [Enc | Acc] + end + end, + [], + Refused + ), + Accepted1 = lists:foldr( + fun(Enc, Acc) -> + case Enc of + "*" -> + lists:subtract(SupportedEncs, Accepted ++ Refused1) ++ Acc; + _ -> + [Enc | Acc] + end + end, + [], + Accepted + ), + Accepted2 = case lists:member(DefaultEnc, Accepted1) of + true -> + Accepted1; + false -> + Accepted1 ++ [DefaultEnc] + end, + [E || E <- Accepted2, lists:member(E, SupportedEncs), + not lists:member(E, Refused1)]. + +test() -> + test_join(), + test_quote_plus(), + test_unquote(), + test_urlencode(), + test_parse_qs(), + test_urlsplit_path(), + test_urlunsplit_path(), + test_urlsplit(), + test_urlunsplit(), + test_path_split(), + test_guess_mime(), + test_parse_header(), + test_shell_quote(), + test_cmd(), + test_cmd_string(), + test_partition(), + test_safe_relative_path(), + test_parse_qvalues(), + test_pick_accepted_encodings(), + ok. + +test_shell_quote() -> + "\"foo \\$bar\\\"\\`' baz\"" = shell_quote("foo $bar\"`' baz"), + ok. + +test_cmd() -> + "$bling$ `word`!\n" = cmd(["echo", "$bling$ `word`!"]), + ok. + +test_cmd_string() -> + "\"echo\" \"\\$bling\\$ \\`word\\`!\"" = cmd_string(["echo", "$bling$ `word`!"]), + ok. + +test_parse_header() -> + {"multipart/form-data", [{"boundary", "AaB03x"}]} = + parse_header("multipart/form-data; boundary=AaB03x"), + ok. + +test_guess_mime() -> + "text/plain" = guess_mime(""), + "text/plain" = guess_mime(".text"), + "application/zip" = guess_mime(".zip"), + "application/zip" = guess_mime("x.zip"), + "text/html" = guess_mime("x.html"), + "application/xhtml+xml" = guess_mime("x.xhtml"), + ok. + +test_path_split() -> + {"", "foo/bar"} = path_split("/foo/bar"), + {"foo", "bar"} = path_split("foo/bar"), + {"bar", ""} = path_split("bar"), + ok. + +test_urlsplit() -> + {"", "", "/foo", "", "bar?baz"} = urlsplit("/foo#bar?baz"), + {"http", "host:port", "/foo", "", "bar?baz"} = + urlsplit("http://host:port/foo#bar?baz"), + ok. + +test_urlsplit_path() -> + {"/foo/bar", "", ""} = urlsplit_path("/foo/bar"), + {"/foo", "baz", ""} = urlsplit_path("/foo?baz"), + {"/foo", "", "bar?baz"} = urlsplit_path("/foo#bar?baz"), + {"/foo", "", "bar?baz#wibble"} = urlsplit_path("/foo#bar?baz#wibble"), + {"/foo", "bar", "baz"} = urlsplit_path("/foo?bar#baz"), + {"/foo", "bar?baz", "baz"} = urlsplit_path("/foo?bar?baz#baz"), + ok. + +test_urlunsplit() -> + "/foo#bar?baz" = urlunsplit({"", "", "/foo", "", "bar?baz"}), + "http://host:port/foo#bar?baz" = + urlunsplit({"http", "host:port", "/foo", "", "bar?baz"}), + ok. + +test_urlunsplit_path() -> + "/foo/bar" = urlunsplit_path({"/foo/bar", "", ""}), + "/foo?baz" = urlunsplit_path({"/foo", "baz", ""}), + "/foo#bar?baz" = urlunsplit_path({"/foo", "", "bar?baz"}), + "/foo#bar?baz#wibble" = urlunsplit_path({"/foo", "", "bar?baz#wibble"}), + "/foo?bar#baz" = urlunsplit_path({"/foo", "bar", "baz"}), + "/foo?bar?baz#baz" = urlunsplit_path({"/foo", "bar?baz", "baz"}), + ok. + +test_join() -> + "foo,bar,baz" = join(["foo", "bar", "baz"], $,), + "foo,bar,baz" = join(["foo", "bar", "baz"], ","), + "foo bar" = join([["foo", " bar"]], ","), + "foo bar,baz" = join([["foo", " bar"], "baz"], ","), + "foo" = join(["foo"], ","), + "foobarbaz" = join(["foo", "bar", "baz"], ""), + ok. + +test_quote_plus() -> + "foo" = quote_plus(foo), + "1" = quote_plus(1), + "1.1" = quote_plus(1.1), + "foo" = quote_plus("foo"), + "foo+bar" = quote_plus("foo bar"), + "foo%0A" = quote_plus("foo\n"), + "foo%0A" = quote_plus("foo\n"), + "foo%3B%26%3D" = quote_plus("foo;&="), + ok. + +test_unquote() -> + "foo bar" = unquote("foo+bar"), + "foo bar" = unquote("foo%20bar"), + "foo\r\n" = unquote("foo%0D%0A"), + ok. + +test_urlencode() -> + "foo=bar&baz=wibble+%0D%0A&z=1" = urlencode([{foo, "bar"}, + {"baz", "wibble \r\n"}, + {z, 1}]), + ok. + +test_parse_qs() -> + [{"foo", "bar"}, {"baz", "wibble \r\n"}, {"z", "1"}] = + parse_qs("foo=bar&baz=wibble+%0D%0A&z=1"), + ok. + +test_partition() -> + {"foo", "", ""} = partition("foo", "/"), + {"foo", "/", "bar"} = partition("foo/bar", "/"), + {"foo", "/", ""} = partition("foo/", "/"), + {"", "/", "bar"} = partition("/bar", "/"), + {"f", "oo/ba", "r"} = partition("foo/bar", "oo/ba"), + ok. + +test_safe_relative_path() -> + "foo" = safe_relative_path("foo"), + "foo/" = safe_relative_path("foo/"), + "foo" = safe_relative_path("foo/bar/.."), + "bar" = safe_relative_path("foo/../bar"), + "bar/" = safe_relative_path("foo/../bar/"), + "" = safe_relative_path("foo/.."), + "" = safe_relative_path("foo/../"), + undefined = safe_relative_path("/foo"), + undefined = safe_relative_path("../foo"), + undefined = safe_relative_path("foo/../.."), + undefined = safe_relative_path("foo//"), + ok. + +test_parse_qvalues() -> + [] = parse_qvalues(""), + [{"identity", 0.0}] = parse_qvalues("identity;q=0"), + [{"identity", 0.0}] = parse_qvalues("identity ;q=0"), + [{"identity", 0.0}] = parse_qvalues(" identity; q =0 "), + [{"identity", 0.0}] = parse_qvalues("identity ; q = 0"), + [{"identity", 0.0}] = parse_qvalues("identity ; q= 0.0"), + [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues( + "gzip,deflate,identity;q=0.0" + ), + [{"deflate", 1.0}, {"gzip", 1.0}, {"identity", 0.0}] = parse_qvalues( + "deflate,gzip,identity;q=0.0" + ), + [{"gzip", 1.0}, {"deflate", 1.0}, {"gzip", 1.0}, {"identity", 0.0}] = + parse_qvalues("gzip,deflate,gzip,identity;q=0"), + [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues( + "gzip, deflate , identity; q=0.0" + ), + [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues( + "gzip; q=1, deflate;q=1.0, identity;q=0.0" + ), + [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues( + "gzip; q=0.5, deflate;q=1.0, identity;q=0" + ), + [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 0.0}] = parse_qvalues( + "gzip; q=0.5, deflate , identity;q=0.0" + ), + [{"gzip", 0.5}, {"deflate", 0.8}, {"identity", 0.0}] = parse_qvalues( + "gzip; q=0.5, deflate;q=0.8, identity;q=0.0" + ), + [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 1.0}] = parse_qvalues( + "gzip; q=0.5,deflate,identity" + ), + [{"gzip", 0.5}, {"deflate", 1.0}, {"identity", 1.0}, {"identity", 1.0}] = + parse_qvalues("gzip; q=0.5,deflate,identity, identity "), + invalid_qvalue_string = parse_qvalues("gzip; q=1.1, deflate"), + invalid_qvalue_string = parse_qvalues("gzip; q=0.5, deflate;q=2"), + invalid_qvalue_string = parse_qvalues("gzip, deflate;q=AB"), + invalid_qvalue_string = parse_qvalues("gzip; q=2.1, deflate"), + ok. + +test_pick_accepted_encodings() -> + ["identity"] = pick_accepted_encodings( + [], + ["gzip", "identity"], + "identity" + ), + ["gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 1.0}], + ["gzip", "identity"], + "identity" + ), + ["identity"] = pick_accepted_encodings( + [{"gzip", 0.0}], + ["gzip", "identity"], + "identity" + ), + ["gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 1.0}, {"deflate", 1.0}], + ["gzip", "identity"], + "identity" + ), + ["gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 0.5}, {"deflate", 1.0}], + ["gzip", "identity"], + "identity" + ), + ["identity"] = pick_accepted_encodings( + [{"gzip", 0.0}, {"deflate", 0.0}], + ["gzip", "identity"], + "identity" + ), + ["gzip"] = pick_accepted_encodings( + [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}], + ["gzip", "identity"], + "identity" + ), + ["gzip", "deflate", "identity"] = pick_accepted_encodings( + [{"gzip", 1.0}, {"deflate", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "deflate"] = pick_accepted_encodings( + [{"gzip", 1.0}, {"deflate", 1.0}, {"identity", 0.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["deflate", "gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 0.2}, {"deflate", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["deflate", "deflate", "gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 0.2}, {"deflate", 1.0}, {"deflate", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["deflate", "gzip", "gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 0.2}, {"deflate", 1.0}, {"gzip", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "deflate", "gzip", "identity"] = pick_accepted_encodings( + [{"gzip", 0.2}, {"deflate", 0.9}, {"gzip", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + [] = pick_accepted_encodings( + [{"*", 0.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "deflate", "identity"] = pick_accepted_encodings( + [{"*", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "deflate", "identity"] = pick_accepted_encodings( + [{"*", 0.6}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip"] = pick_accepted_encodings( + [{"gzip", 1.0}, {"*", 0.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "deflate"] = pick_accepted_encodings( + [{"gzip", 1.0}, {"deflate", 0.6}, {"*", 0.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["deflate", "gzip"] = pick_accepted_encodings( + [{"gzip", 0.5}, {"deflate", 1.0}, {"*", 0.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "identity"] = pick_accepted_encodings( + [{"deflate", 0.0}, {"*", 1.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ["gzip", "identity"] = pick_accepted_encodings( + [{"*", 1.0}, {"deflate", 0.0}], + ["gzip", "deflate", "identity"], + "identity" + ), + ok. diff --git a/apps/mochiweb/src/reloader.erl b/apps/mochiweb/src/reloader.erl new file mode 100644 index 00000000..6835f8f9 --- /dev/null +++ b/apps/mochiweb/src/reloader.erl @@ -0,0 +1,123 @@ +%% @copyright 2007 Mochi Media, Inc. +%% @author Matthew Dempsky <matthew@mochimedia.com> +%% +%% @doc Erlang module for automatically reloading modified modules +%% during development. + +-module(reloader). +-author("Matthew Dempsky <matthew@mochimedia.com>"). + +-include_lib("kernel/include/file.hrl"). + +-behaviour(gen_server). +-export([start/0, start_link/0]). +-export([stop/0]). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). + +-record(state, {last, tref}). + +%% External API + +%% @spec start() -> ServerRet +%% @doc Start the reloader. +start() -> + gen_server:start({local, ?MODULE}, ?MODULE, [], []). + +%% @spec start_link() -> ServerRet +%% @doc Start the reloader. +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +%% @spec stop() -> ok +%% @doc Stop the reloader. +stop() -> + gen_server:call(?MODULE, stop). + +%% gen_server callbacks + +%% @spec init([]) -> {ok, State} +%% @doc gen_server init, opens the server in an initial state. +init([]) -> + {ok, TRef} = timer:send_interval(timer:seconds(1), doit), + {ok, #state{last = stamp(), tref = TRef}}. + +%% @spec handle_call(Args, From, State) -> tuple() +%% @doc gen_server callback. +handle_call(stop, _From, State) -> + {stop, shutdown, stopped, State}; +handle_call(_Req, _From, State) -> + {reply, {error, badrequest}, State}. + +%% @spec handle_cast(Cast, State) -> tuple() +%% @doc gen_server callback. +handle_cast(_Req, State) -> + {noreply, State}. + +%% @spec handle_info(Info, State) -> tuple() +%% @doc gen_server callback. +handle_info(doit, State) -> + Now = stamp(), + doit(State#state.last, Now), + {noreply, State#state{last = Now}}; +handle_info(_Info, State) -> + {noreply, State}. + +%% @spec terminate(Reason, State) -> ok +%% @doc gen_server termination callback. +terminate(_Reason, State) -> + {ok, cancel} = timer:cancel(State#state.tref), + ok. + + +%% @spec code_change(_OldVsn, State, _Extra) -> State +%% @doc gen_server code_change callback (trivial). +code_change(_Vsn, State, _Extra) -> + {ok, State}. + +%% Internal API + +doit(From, To) -> + [case file:read_file_info(Filename) of + {ok, #file_info{mtime = Mtime}} when Mtime >= From, Mtime < To -> + reload(Module); + {ok, _} -> + unmodified; + {error, enoent} -> + %% The Erlang compiler deletes existing .beam files if + %% recompiling fails. Maybe it's worth spitting out a + %% warning here, but I'd want to limit it to just once. + gone; + {error, Reason} -> + io:format("Error reading ~s's file info: ~p~n", + [Filename, Reason]), + error + end || {Module, Filename} <- code:all_loaded(), is_list(Filename)]. + +reload(Module) -> + io:format("Reloading ~p ...", [Module]), + code:purge(Module), + case code:load_file(Module) of + {module, Module} -> + io:format(" ok.~n"), + case erlang:function_exported(Module, test, 0) of + true -> + io:format(" - Calling ~p:test() ...", [Module]), + case catch Module:test() of + ok -> + io:format(" ok.~n"), + reload; + Reason -> + io:format(" fail: ~p.~n", [Reason]), + reload_but_test_failed + end; + false -> + reload + end; + {error, Reason} -> + io:format(" fail: ~p.~n", [Reason]), + error + end. + + +stamp() -> + erlang:localtime(). diff --git a/apps/oauth/src/oauth.app.src b/apps/oauth/src/oauth.app.src new file mode 100644 index 00000000..6357b9b0 --- /dev/null +++ b/apps/oauth/src/oauth.app.src @@ -0,0 +1,20 @@ +{application, oauth, [ + {description, "Erlang OAuth implementation"}, + {vsn, "dev"}, + {modules, [ + oauth, + oauth_hmac_sha1, + oauth_http, + oauth_plaintext, + oauth_rsa_sha1, + oauth_unix, + oauth_uri + ]}, + {registered, []}, + {applications, [ + kernel, + stdlib, + crypto, + inets + ]} +]}. diff --git a/apps/oauth/src/oauth.erl b/apps/oauth/src/oauth.erl new file mode 100644 index 00000000..866655c9 --- /dev/null +++ b/apps/oauth/src/oauth.erl @@ -0,0 +1,107 @@ +-module(oauth). + +-export( + [ get/5 + , header/1 + , post/5 + , signature/5 + , signature_base_string/3 + , signed_params/6 + , token/1 + , token_secret/1 + , uri/2 + , verify/6 + ]). + + +get(URL, ExtraParams, Consumer, Token, TokenSecret) -> + SignedParams = signed_params("GET", URL, ExtraParams, Consumer, Token, TokenSecret), + oauth_http:get(uri(URL, SignedParams)). + +post(URL, ExtraParams, Consumer, Token, TokenSecret) -> + SignedParams = signed_params("POST", URL, ExtraParams, Consumer, Token, TokenSecret), + oauth_http:post(URL, oauth_uri:params_to_string(SignedParams)). + +uri(Base, []) -> + Base; +uri(Base, Params) -> + lists:concat([Base, "?", oauth_uri:params_to_string(Params)]). + +header(Params) -> + {"Authorization", "OAuth " ++ oauth_uri:params_to_header_string(Params)}. + +token(Params) -> + proplists:get_value("oauth_token", Params). + +token_secret(Params) -> + proplists:get_value("oauth_token_secret", Params). + +verify(Signature, HttpMethod, URL, Params, Consumer, TokenSecret) -> + case signature_method(Consumer) of + plaintext -> + oauth_plaintext:verify(Signature, consumer_secret(Consumer), TokenSecret); + hmac_sha1 -> + BaseString = signature_base_string(HttpMethod, URL, Params), + oauth_hmac_sha1:verify(Signature, BaseString, consumer_secret(Consumer), TokenSecret); + rsa_sha1 -> + BaseString = signature_base_string(HttpMethod, URL, Params), + oauth_rsa_sha1:verify(Signature, BaseString, consumer_secret(Consumer)) + end. + +signed_params(HttpMethod, URL, ExtraParams, Consumer, Token, TokenSecret) -> + Params = token_param(Token, params(Consumer, ExtraParams)), + [{"oauth_signature", signature(HttpMethod, URL, Params, Consumer, TokenSecret)}|Params]. + +signature(HttpMethod, URL, Params, Consumer, TokenSecret) -> + case signature_method(Consumer) of + plaintext -> + oauth_plaintext:signature(consumer_secret(Consumer), TokenSecret); + hmac_sha1 -> + BaseString = signature_base_string(HttpMethod, URL, Params), + oauth_hmac_sha1:signature(BaseString, consumer_secret(Consumer), TokenSecret); + rsa_sha1 -> + BaseString = signature_base_string(HttpMethod, URL, Params), + oauth_rsa_sha1:signature(BaseString, consumer_secret(Consumer)) + end. + +signature_base_string(HttpMethod, URL, Params) -> + NormalizedURL = oauth_uri:normalize(URL), + NormalizedParams = oauth_uri:params_to_string(lists:sort(Params)), + oauth_uri:calate("&", [HttpMethod, NormalizedURL, NormalizedParams]). + +token_param("", Params) -> + Params; +token_param(Token, Params) -> + [{"oauth_token", Token}|Params]. + +params(Consumer, Params) -> + Nonce = base64:encode_to_string(crypto:rand_bytes(32)), % cf. ruby-oauth + params(Consumer, oauth_unix:timestamp(), Nonce, Params). + +params(Consumer, Timestamp, Nonce, Params) -> + [ {"oauth_version", "1.0"} + , {"oauth_nonce", Nonce} + , {"oauth_timestamp", integer_to_list(Timestamp)} + , {"oauth_signature_method", signature_method_string(Consumer)} + , {"oauth_consumer_key", consumer_key(Consumer)} + | Params + ]. + +signature_method_string(Consumer) -> + case signature_method(Consumer) of + plaintext -> + "PLAINTEXT"; + hmac_sha1 -> + "HMAC-SHA1"; + rsa_sha1 -> + "RSA-SHA1" + end. + +signature_method(_Consumer={_, _, Method}) -> + Method. + +consumer_secret(_Consumer={_, Secret, _}) -> + Secret. + +consumer_key(_Consumer={Key, _, _}) -> + Key. diff --git a/apps/oauth/src/oauth_hmac_sha1.erl b/apps/oauth/src/oauth_hmac_sha1.erl new file mode 100644 index 00000000..79d59f37 --- /dev/null +++ b/apps/oauth/src/oauth_hmac_sha1.erl @@ -0,0 +1,11 @@ +-module(oauth_hmac_sha1). + +-export([signature/3, verify/4]). + + +signature(BaseString, CS, TS) -> + Key = oauth_uri:calate("&", [CS, TS]), + base64:encode_to_string(crypto:sha_mac(Key, BaseString)). + +verify(Signature, BaseString, CS, TS) -> + couch_util:verify(signature(BaseString, CS, TS), Signature). diff --git a/apps/oauth/src/oauth_http.erl b/apps/oauth/src/oauth_http.erl new file mode 100644 index 00000000..bf5a4bac --- /dev/null +++ b/apps/oauth/src/oauth_http.erl @@ -0,0 +1,22 @@ +-module(oauth_http). + +-export([get/1, post/2, response_params/1, response_body/1, response_code/1]). + + +get(URL) -> + request(get, {URL, []}). + +post(URL, Data) -> + request(post, {URL, [], "application/x-www-form-urlencoded", Data}). + +request(Method, Request) -> + http:request(Method, Request, [{autoredirect, false}], []). + +response_params(Response) -> + oauth_uri:params_from_string(response_body(Response)). + +response_body({{_, _, _}, _, Body}) -> + Body. + +response_code({{_, Code, _}, _, _}) -> + Code. diff --git a/apps/oauth/src/oauth_plaintext.erl b/apps/oauth/src/oauth_plaintext.erl new file mode 100644 index 00000000..41a1e9b2 --- /dev/null +++ b/apps/oauth/src/oauth_plaintext.erl @@ -0,0 +1,10 @@ +-module(oauth_plaintext). + +-export([signature/2, verify/3]). + + +signature(CS, TS) -> + oauth_uri:calate("&", [CS, TS]). + +verify(Signature, CS, TS) -> + couch_util:verify(signature(CS, TS), Signature). diff --git a/apps/oauth/src/oauth_rsa_sha1.erl b/apps/oauth/src/oauth_rsa_sha1.erl new file mode 100644 index 00000000..6f4828e0 --- /dev/null +++ b/apps/oauth/src/oauth_rsa_sha1.erl @@ -0,0 +1,30 @@ +-module(oauth_rsa_sha1). + +-export([signature/2, verify/3]). + +-include_lib("public_key/include/public_key.hrl"). + + +signature(BaseString, PrivateKeyPath) -> + {ok, [Info]} = public_key:pem_to_der(PrivateKeyPath), + {ok, PrivateKey} = public_key:decode_private_key(Info), + base64:encode_to_string(public_key:sign(list_to_binary(BaseString), PrivateKey)). + +verify(Signature, BaseString, PublicKey) -> + public_key:verify_signature(to_binary(BaseString), sha, base64:decode(Signature), public_key(PublicKey)). + +to_binary(Term) when is_list(Term) -> + list_to_binary(Term); +to_binary(Term) when is_binary(Term) -> + Term. + +public_key(Path) when is_list(Path) -> + {ok, [{cert, DerCert, not_encrypted}]} = public_key:pem_to_der(Path), + {ok, Cert} = public_key:pkix_decode_cert(DerCert, otp), + public_key(Cert); +public_key(#'OTPCertificate'{tbsCertificate=Cert}) -> + public_key(Cert); +public_key(#'OTPTBSCertificate'{subjectPublicKeyInfo=Info}) -> + public_key(Info); +public_key(#'OTPSubjectPublicKeyInfo'{subjectPublicKey=Key}) -> + Key. diff --git a/apps/oauth/src/oauth_unix.erl b/apps/oauth/src/oauth_unix.erl new file mode 100644 index 00000000..73ca3143 --- /dev/null +++ b/apps/oauth/src/oauth_unix.erl @@ -0,0 +1,16 @@ +-module(oauth_unix). + +-export([timestamp/0]). + + +timestamp() -> + timestamp(calendar:universal_time()). + +timestamp(DateTime) -> + seconds(DateTime) - epoch(). + +epoch() -> + seconds({{1970,1,1},{00,00,00}}). + +seconds(DateTime) -> + calendar:datetime_to_gregorian_seconds(DateTime). diff --git a/apps/oauth/src/oauth_uri.erl b/apps/oauth/src/oauth_uri.erl new file mode 100644 index 00000000..fb27ae72 --- /dev/null +++ b/apps/oauth/src/oauth_uri.erl @@ -0,0 +1,88 @@ +-module(oauth_uri). + +-export([normalize/1, calate/2, encode/1]). +-export([params_from_string/1, params_to_string/1, + params_from_header_string/1, params_to_header_string/1]). + +-import(lists, [concat/1]). + +-define(is_uppercase_alpha(C), C >= $A, C =< $Z). +-define(is_lowercase_alpha(C), C >= $a, C =< $z). +-define(is_alpha(C), ?is_uppercase_alpha(C); ?is_lowercase_alpha(C)). +-define(is_digit(C), C >= $0, C =< $9). +-define(is_alphanumeric(C), ?is_alpha(C); ?is_digit(C)). +-define(is_unreserved(C), ?is_alphanumeric(C); C =:= $-; C =:= $_; C =:= $.; C =:= $~). +-define(is_hex(C), ?is_digit(C); C >= $A, C =< $F). + + +normalize(URI) -> + case http_uri:parse(URI) of + {Scheme, UserInfo, Host, Port, Path, _Query} -> + normalize(Scheme, UserInfo, string:to_lower(Host), Port, [Path]); + Else -> + Else + end. + +normalize(http, UserInfo, Host, 80, Acc) -> + normalize(http, UserInfo, [Host|Acc]); +normalize(https, UserInfo, Host, 443, Acc) -> + normalize(https, UserInfo, [Host|Acc]); +normalize(Scheme, UserInfo, Host, Port, Acc) -> + normalize(Scheme, UserInfo, [Host, ":", Port|Acc]). + +normalize(Scheme, [], Acc) -> + concat([Scheme, "://"|Acc]); +normalize(Scheme, UserInfo, Acc) -> + concat([Scheme, "://", UserInfo, "@"|Acc]). + +params_to_header_string(Params) -> + intercalate(", ", [concat([encode(K), "=\"", encode(V), "\""]) || {K, V} <- Params]). + +params_from_header_string(String) -> + [param_from_header_string(Param) || Param <- re:split(String, ",\\s*", [{return, list}]), Param =/= ""]. + +param_from_header_string(Param) -> + [Key, QuotedValue] = string:tokens(Param, "="), + Value = string:substr(QuotedValue, 2, length(QuotedValue) - 2), + {decode(Key), decode(Value)}. + +params_from_string(Params) -> + [param_from_string(Param) || Param <- string:tokens(Params, "&")]. + +param_from_string(Param) -> + list_to_tuple([decode(Value) || Value <- string:tokens(Param, "=")]). + +params_to_string(Params) -> + intercalate("&", [calate("=", [K, V]) || {K, V} <- Params]). + +calate(Sep, Xs) -> + intercalate(Sep, [encode(X) || X <- Xs]). + +intercalate(Sep, Xs) -> + concat(intersperse(Sep, Xs)). + +intersperse(_, []) -> []; +intersperse(_, [X]) -> [X]; +intersperse(Sep, [X|Xs]) -> + [X, Sep|intersperse(Sep, Xs)]. + +decode(Chars) -> + decode(Chars, []). + +decode([], Decoded) -> + lists:reverse(Decoded); +decode([$%,A,B|Etc], Decoded) when ?is_hex(A), ?is_hex(B) -> + decode(Etc, [erlang:list_to_integer([A,B], 16)|Decoded]); +decode([C|Etc], Decoded) when ?is_unreserved(C) -> + decode(Etc, [C|Decoded]). + +encode(Chars) -> + encode(Chars, []). + +encode([], Encoded) -> + lists:flatten(lists:reverse(Encoded)); +encode([C|Etc], Encoded) when ?is_unreserved(C) -> + encode(Etc, [C|Encoded]); +encode([C|Etc], Encoded) -> + Value = io_lib:format("%~2.1.0s", [erlang:integer_to_list(C, 16)]), + encode(Etc, [Value|Encoded]). diff --git a/apps/rexi/ebin/rexi.app b/apps/rexi/ebin/rexi.app new file mode 100644 index 00000000..620c8863 --- /dev/null +++ b/apps/rexi/ebin/rexi.app @@ -0,0 +1,8 @@ +{application, rexi, [ + {description, "Lightweight RPC server"}, + {vsn, "1.2"}, + {modules, [rexi, rexi_app, rexi_sup, rexi_monitor, rexi_server]}, + {registered, [rexi_sup, rexi_server]}, + {applications, [kernel, stdlib]}, + {mod, {rexi_app,[]}} +]}. diff --git a/apps/rexi/ebin/rexi.appup b/apps/rexi/ebin/rexi.appup new file mode 100644 index 00000000..7ed8ad73 --- /dev/null +++ b/apps/rexi/ebin/rexi.appup @@ -0,0 +1,5 @@ +{"1.1",[{"1.0",[ + {load_module, rexi}, + {add_module, rexi_monitor}, + {load_module, rexi_server} +]}],[{"1.0",[]}]}. diff --git a/apps/rexi/src/rexi.erl b/apps/rexi/src/rexi.erl new file mode 100644 index 00000000..8ab1f05e --- /dev/null +++ b/apps/rexi/src/rexi.erl @@ -0,0 +1,91 @@ +-module(rexi). +-export([start/0, stop/0, restart/0]). +-export([cast/2, cast/3, kill/2]). +-export([reply/1, sync_reply/1, sync_reply/2]). +-export([async_server_call/2, async_server_call/3]). + +-define(SERVER, rexi_server). + +start() -> + application:start(rexi). + +stop() -> + application:stop(rexi). + +restart() -> + stop(), start(). + +%% @equiv cast(Node, self(), MFA) +-spec cast(node(), {atom(), atom(), list()}) -> reference(). +cast(Node, MFA) -> + cast(Node, self(), MFA). + +%% @doc Executes apply(M, F, A) on Node. +%% You might want to use this instead of rpc:cast/4 for two reasons. First, +%% the Caller pid and the returned reference are inserted into the remote +%% process' dictionary as `rexi_from', so it has a way to communicate with you. +%% Second, the remote process is monitored. If it exits with a Reason other +%% than normal, Caller will receive a message of the form +%% `{Ref, {rexi_EXIT, Reason}}' where Ref is the returned reference. +-spec cast(node(), pid(), {atom(), atom(), list()}) -> reference(). +cast(Node, Caller, MFA) -> + Ref = make_ref(), + ok = gen_server:cast({?SERVER, Node}, {doit, {Caller,Ref}, MFA}), + Ref. + +%% @doc Sends an async kill signal to the remote process associated with Ref. +%% No rexi_EXIT message will be sent. +-spec kill(node(), reference()) -> ok. +kill(Node, Ref) -> + ok = gen_server:cast({?SERVER, Node}, {kill, Ref}). + +%% @equiv async_server_call(Server, self(), Request) +-spec async_server_call(pid() | {atom(),node()}, any()) -> reference(). +async_server_call(Server, Request) -> + async_server_call(Server, self(), Request). + +%% @doc Sends a properly formatted gen_server:call Request to the Server and +%% returns the reference which the Server will include in its reply. The +%% function acts more like cast() than call() in that the server process +%% is not monitored. Clients who want to know if the server is alive should +%% monitor it themselves before calling this function. +-spec async_server_call(pid() | {atom(),node()}, pid(), any()) -> reference(). +async_server_call(Server, Caller, Request) -> + Ref = make_ref(), + do_send(Server, {'$gen_call', {Caller,Ref}, Request}), + Ref. + +%% @doc convenience function to reply to the original rexi Caller. +-spec reply(any()) -> any(). +reply(Reply) -> + {Caller, Ref} = get(rexi_from), + erlang:send(Caller, {Ref,Reply}). + +%% @equiv sync_reply(Reply, 300000) +sync_reply(Reply) -> + sync_reply(Reply, 300000). + +%% @doc convenience function to reply to caller and wait for response. Message +%% is of the form {OriginalRef, {self(),reference()}, Reply}, which enables the +%% original caller to respond back. +-spec sync_reply(any(), pos_integer() | infinity) -> any(). +sync_reply(Reply, Timeout) -> + {Caller, Ref} = get(rexi_from), + Tag = make_ref(), + erlang:send(Caller, {Ref, {self(),Tag}, Reply}), + receive {Tag, Response} -> + Response + after Timeout -> + timeout + end. + +%% internal functions %% + +% send a message as quickly as possible +do_send(Dest, Msg) -> + case erlang:send(Dest, Msg, [noconnect]) of + noconnect -> + spawn(erlang, send, [Dest, Msg]); + ok -> + ok + end. diff --git a/apps/rexi/src/rexi_app.erl b/apps/rexi/src/rexi_app.erl new file mode 100644 index 00000000..dda57752 --- /dev/null +++ b/apps/rexi/src/rexi_app.erl @@ -0,0 +1,11 @@ +-module(rexi_app). +-behaviour(application). +-export([start/2, stop/1]). + +-include_lib("eunit/include/eunit.hrl"). + +start(_Type, StartArgs) -> + rexi_sup:start_link(StartArgs). + +stop(_State) -> + ok. diff --git a/apps/rexi/src/rexi_monitor.erl b/apps/rexi/src/rexi_monitor.erl new file mode 100644 index 00000000..bbff22b3 --- /dev/null +++ b/apps/rexi/src/rexi_monitor.erl @@ -0,0 +1,40 @@ +-module(rexi_monitor). +-export([start/1, stop/1]). + +-include_lib("eunit/include/eunit.hrl"). + +%% @doc spawn_links a process which monitors the supplied list of items and +%% returns the process ID. If a monitored process exits, the caller will +%% receive a {rexi_DOWN, MonitoringPid, DeadPid, Reason} message. +-spec start([pid() | atom() | {atom(),node()}]) -> pid(). +start(Procs) -> + Parent = self(), + spawn_link(fun() -> + [erlang:monitor(process, P) || P <- Procs], + wait_monitors(Parent) + end). + +%% @doc Cleanly shut down the monitoring process and flush all rexi_DOWN +%% messages from our mailbox. +-spec stop(pid()) -> ok. +stop(MonitoringPid) -> + MonitoringPid ! {self(), shutdown}, + flush_down_messages(). + +%% internal functions %% + +wait_monitors(Parent) -> + receive + {'DOWN', _, process, Pid, Reason} -> + Parent ! {rexi_DOWN, self(), Pid, Reason}, + wait_monitors(Parent); + {Parent, shutdown} -> + ok + end. + +flush_down_messages() -> + receive {rexi_DOWN, _, _, _} -> + flush_down_messages() + after 0 -> + ok + end. diff --git a/apps/rexi/src/rexi_server.erl b/apps/rexi/src/rexi_server.erl new file mode 100644 index 00000000..c4dc740d --- /dev/null +++ b/apps/rexi/src/rexi_server.erl @@ -0,0 +1,86 @@ +-module(rexi_server). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/0, init_p/2]). + +-include_lib("eunit/include/eunit.hrl"). + +-record(st, { + workers = ets:new(workers, [private, {keypos,2}]) +}). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init([]) -> + {ok, #st{}}. + +handle_call(_Request, _From, St) -> + {reply, ignored, St}. + +handle_cast({doit, From, MFA}, #st{workers=Workers} = St) -> + {LocalPid, Ref} = spawn_monitor(?MODULE, init_p, [From, MFA]), + {noreply, St#st{workers = add_worker({LocalPid, Ref, From}, Workers)}}; + +handle_cast({kill, FromRef}, #st{workers=Workers} = St) -> + case find_worker_from(FromRef, Workers) of + {Pid, KeyRef, {_, FromRef}} -> + erlang:demonitor(KeyRef), + exit(Pid, kill), + {noreply, St#st{workers = remove_worker(KeyRef, Workers)}}; + false -> + {noreply, St} + end. + +handle_info({'DOWN', Ref, process, _, normal}, #st{workers=Workers} = St) -> + {noreply, St#st{workers = remove_worker(Ref, Workers)}}; + +handle_info({'DOWN', Ref, process, Pid, Reason}, #st{workers=Workers} = St) -> + case find_worker(Ref, Workers) of + {Pid, Ref, From} -> + notify_caller(From, Reason), + {noreply, St#st{workers = remove_worker(Ref, Workers)}}; + false -> + {noreply, St} + end; + +handle_info(_Info, St) -> + {noreply, St}. + +terminate(_Reason, St) -> + ets:foldl(fun({Pid, _, _}, _) -> exit(Pid,kill) end, nil, St#st.workers), + ok. + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + +%% @doc initializes a process started by rexi_server. +-spec init_p({pid(), reference()}, {atom(), atom(), list()}) -> any(). +init_p(From, {M,F,A}) -> + put(rexi_from, From), + put(initial_call, {M,F,length(A)}), + try apply(M, F, A) catch _:Reason -> exit(Reason) end. + +%% internal + +add_worker(Worker, Tab) -> + ets:insert(Tab, Worker), Tab. + +remove_worker(Ref, Tab) -> + ets:delete(Tab, Ref), Tab. + +find_worker(Ref, Tab) -> + case ets:lookup(Tab, Ref) of [] -> false; [Worker] -> Worker end. + +find_worker_from(Ref, Tab) -> + case ets:match_object(Tab, {'_', '_', {'_', Ref}}) of + [] -> + false; + [Worker] -> + Worker + end. + +notify_caller({Caller, Ref}, Reason) -> + Caller ! {Ref, {rexi_EXIT, Reason}}. diff --git a/apps/rexi/src/rexi_sup.erl b/apps/rexi/src/rexi_sup.erl new file mode 100644 index 00000000..3a518e7b --- /dev/null +++ b/apps/rexi/src/rexi_sup.erl @@ -0,0 +1,15 @@ +-module(rexi_sup). +-behaviour(supervisor). +-export([init/1]). + +-export([start_link/1]). + +-include_lib("eunit/include/eunit.hrl"). + +start_link(Args) -> + supervisor:start_link({local,?MODULE}, ?MODULE, Args). + +init([]) -> + Mod = rexi_server, + Spec = {Mod, {Mod,start_link,[]}, permanent, 100, worker, [Mod]}, + {ok, {{one_for_one, 3, 10}, [Spec]}}. |