From 9b78e1555d73c888fedaa0b9d256abaeaadbe41a Mon Sep 17 00:00:00 2001 From: Adam Kocoloski Date: Wed, 9 Sep 2009 16:53:49 +0000 Subject: choice of uuid algos for better insert perf. Closes COUCHDB-465. Thanks rnewson, bitdiddle git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@813051 13f79535-47bb-0310-9956-ffa450edef68 --- etc/couchdb/default.ini.tpl.in | 13 ++++ share/www/script/test/uuids.js | 41 +++++++++++ src/couchdb/Makefile.am | 2 + src/couchdb/couch_httpd_auth.erl | 4 +- src/couchdb/couch_httpd_db.erl | 4 +- src/couchdb/couch_httpd_misc_handlers.erl | 7 +- src/couchdb/couch_rep.erl | 2 +- src/couchdb/couch_server.erl | 2 +- src/couchdb/couch_util.erl | 7 +- test/etap/040-util.t | 6 +- test/etap/041-uuid-gen-seq.ini | 2 + test/etap/041-uuid-gen-utc.ini | 2 + test/etap/041-uuid-gen.t | 118 ++++++++++++++++++++++++++++++ test/etap/111-replication-changes-feed.t | 4 +- test/etap/112-replication-missing-revs.t | 4 +- 15 files changed, 194 insertions(+), 24 deletions(-) create mode 100644 test/etap/041-uuid-gen-seq.ini create mode 100644 test/etap/041-uuid-gen-utc.ini create mode 100644 test/etap/041-uuid-gen.t diff --git a/etc/couchdb/default.ini.tpl.in b/etc/couchdb/default.ini.tpl.in index 3260449b..cb7b93dd 100644 --- a/etc/couchdb/default.ini.tpl.in +++ b/etc/couchdb/default.ini.tpl.in @@ -53,6 +53,7 @@ query_servers={couch_query_servers, start_link, []} httpd={couch_httpd, start_link, []} stats_aggregator={couch_stats_aggregator, start, []} stats_collector={couch_stats_collector, start, []} +uuids={couch_uuids, start, []} [httpd_global_handlers] / = {couch_httpd_misc_handlers, handle_welcome_req, <<"Welcome">>} @@ -92,3 +93,15 @@ _show = {couch_httpd_show, handle_doc_show_req} _list = {couch_httpd_show, handle_view_list_req} _info = {couch_httpd_db, handle_design_info_req} _update = {couch_httpd_show, handle_doc_update_req} + +[uuids] +; Known algorithms: +; random - 128 bits of random awesome +; All awesome, all the time. +; sequential - monotonically increasing ids with random increments +; First 26 hex characters are random. Last 6 increment in +; random amounts until an overflow occurs. On overflow, the +; random prefix is regenerated and the process starts over. +; utc_random - Time since Jan 1, 1970 UTC with microseconds +; First 14 characters are the time in hex. Last 18 are random. +algorithm = random diff --git a/share/www/script/test/uuids.js b/share/www/script/test/uuids.js index 3b7533be..50621678 100644 --- a/share/www/script/test/uuids.js +++ b/share/www/script/test/uuids.js @@ -59,4 +59,45 @@ couchTests.uuids = function(debug) { // ensure we return a 405 on POST xhr = CouchDB.request("POST", "/_uuids?count=1000"); T(xhr.status == 405); + + // Test sequential uuids + var seq_testfun = function() { + xhr = CouchDB.request("GET", "/_uuids?count=1000"); + T(xhr.status == 200); + result = JSON.parse(xhr.responseText); + for(var i = 1; i < result.uuids.length; i++) { + T(result.uuids[i].length == 32); + T(result.uuids[i-1] < result.uuids[i], "Sequential uuids are ordered."); + } + }; + + run_on_modified_server([{ + "section": "uuids", + "key": "algorithm", + "value": "sequential", + }], + seq_testfun + ); + + // Test utc_random uuids + var utc_testfun = function() { + xhr = CouchDB.request("GET", "/_uuids?count=1000"); + T(xhr.status == 200); + result = JSON.parse(xhr.responseText); + for(var i = 1; i < result.uuids.length; i++) { + T(result.uuids[i].length == 32); + var u1 = result.uuids[i-1].substr(0, 13); + var u2 = result.uuids[i].substr(0, 13); + T(u1 < u2, "UTC uuids are roughly ordered."); + } + }; + + run_on_modified_server([{ + "section": "uuids", + "key": "algorithm", + "value": "utc_random" + }], + utc_testfun + ); + }; diff --git a/src/couchdb/Makefile.am b/src/couchdb/Makefile.am index 017dc169..97d8a3b0 100644 --- a/src/couchdb/Makefile.am +++ b/src/couchdb/Makefile.am @@ -101,6 +101,7 @@ source_files = \ couch_stream.erl \ couch_task_status.erl \ couch_util.erl \ + couch_uuids.erl \ couch_view.erl \ couch_view_compactor.erl \ couch_view_updater.erl \ @@ -157,6 +158,7 @@ compiled_files = \ couch_stream.beam \ couch_task_status.beam \ couch_util.beam \ + couch_uuids.beam \ couch_view.beam \ couch_view_compactor.beam \ couch_view_updater.beam \ diff --git a/src/couchdb/couch_httpd_auth.erl b/src/couchdb/couch_httpd_auth.erl index c77376b2..1c1ad0a9 100644 --- a/src/couchdb/couch_httpd_auth.erl +++ b/src/couchdb/couch_httpd_auth.erl @@ -381,9 +381,9 @@ create_user_req(#httpd{method='POST', mochi_req=MochiReq}=Req, Db) -> [?l2b(R) || R <- Roles] end, - UserSalt = couch_util:new_uuid(), + UserSalt = couch_uuids:random(), PasswordHash = hash_password(Password, UserSalt), - DocId = couch_util:new_uuid(), + DocId = couch_uuids:random(), {ok, UserDoc} = user_doc(DocId, UserName, UserSalt, PasswordHash, Email, Active, Roles1), {ok, _Rev} = couch_db:update_doc(Db, UserDoc, []), ?LOG_DEBUG("User ~s (~s) with password, ~s created.", [?b2l(UserName), ?b2l(DocId), ?b2l(Password)]), diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl index 1a18c4ba..0f3835ae 100644 --- a/src/couchdb/couch_httpd_db.erl +++ b/src/couchdb/couch_httpd_db.erl @@ -308,7 +308,7 @@ db_req(#httpd{method='POST',path_parts=[DbName]}=Req, Db) -> Doc = couch_doc:from_json_obj(couch_httpd:json_body(Req)), Doc2 = case Doc#doc.id of <<"">> -> - Doc#doc{id=couch_util:new_uuid(), revs={0, []}}; + Doc#doc{id=couch_uuids:new(), revs={0, []}}; _ -> Doc end, @@ -385,7 +385,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> Doc = couch_doc:from_json_obj(JsonObj), validate_attachment_names(Doc), Id = case Doc#doc.id of - <<>> -> couch_util:new_uuid(); + <<>> -> couch_uuids:new(); Id0 -> Id0 end, case proplists:get_value(<<"_rev">>, ObjProps) of diff --git a/src/couchdb/couch_httpd_misc_handlers.erl b/src/couchdb/couch_httpd_misc_handlers.erl index c3c5b37a..7f5573ff 100644 --- a/src/couchdb/couch_httpd_misc_handlers.erl +++ b/src/couchdb/couch_httpd_misc_handlers.erl @@ -108,11 +108,10 @@ handle_uuids_req(#httpd{method='GET'}=Req) -> Count = list_to_integer(couch_httpd:qs_value(Req, "count", "1")), CacheBustingHeaders = [{"Date", httpd_util:rfc1123_date()}, {"Cache-Control", "no-cache"}, - {"Expires", "Fri, 01 Jan 1990 00:00:00 GMT"}, % Past date, ON PURPOSE! + % Past date, ON PURPOSE! + {"Expires", "Fri, 01 Jan 1990 00:00:00 GMT"}, {"Pragma", "no-cache"}], - % generate the uuids - UUIDs = [ couch_util:new_uuid() || _ <- lists:seq(1,Count)], - % send a JSON response + UUIDs = [couch_uuids:new() || _ <- lists:seq(1, Count)], send_json(Req, 200, CacheBustingHeaders, {[{<<"uuids">>, UUIDs}]}); handle_uuids_req(Req) -> send_method_not_allowed(Req, "GET"). diff --git a/src/couchdb/couch_rep.erl b/src/couchdb/couch_rep.erl index bcd1807c..c2f691eb 100644 --- a/src/couchdb/couch_rep.erl +++ b/src/couchdb/couch_rep.erl @@ -470,7 +470,7 @@ do_checkpoint(State) -> "replication is redone and documents reexamined.", []), StartSeqNum end, - SessionId = couch_util:new_uuid(), + SessionId = couch_uuids:random(), NewHistoryEntry = {[ {<<"session_id">>, SessionId}, {<<"start_time">>, list_to_binary(ReplicationStartTime)}, diff --git a/src/couchdb/couch_server.erl b/src/couchdb/couch_server.erl index f407acf0..8fab42f1 100644 --- a/src/couchdb/couch_server.erl +++ b/src/couchdb/couch_server.erl @@ -99,7 +99,7 @@ hash_admin_passwords() -> fun({_User, "-hashed-" ++ _}) -> ok; % already hashed ({User, ClearPassword}) -> - Salt = ?b2l(couch_util:new_uuid()), + Salt = ?b2l(couch_uuids:random()), Hashed = couch_util:to_hex(crypto:sha(ClearPassword ++ Salt)), couch_config:set("admins", User, "-hashed-" ++ Hashed ++ "," ++ Salt) end, couch_config:get("admins")). diff --git a/src/couchdb/couch_util.erl b/src/couchdb/couch_util.erl index d967b433..088fde85 100644 --- a/src/couchdb/couch_util.erl +++ b/src/couchdb/couch_util.erl @@ -14,7 +14,7 @@ -export([start_driver/1,terminate_linked/1]). -export([should_flush/0, should_flush/1, to_existing_atom/1]). --export([new_uuid/0, rand32/0, implode/2, collate/2, collate/3]). +-export([rand32/0, implode/2, collate/2, collate/3]). -export([abs_pathname/1,abs_pathname/2, trim/1, ascii_lower/1]). -export([encodeBase64/1, decodeBase64/1, encodeBase64Url/1, decodeBase64Url/1, to_hex/1,parse_term/1, dict_find/3]). @@ -55,9 +55,6 @@ terminate_linked(Reason) -> ok. -new_uuid() -> - list_to_binary(to_hex(crypto:rand_bytes(16))). - to_hex([]) -> []; to_hex(Bin) when is_binary(Bin) -> @@ -394,4 +391,4 @@ url_encode([H|T]) -> end end; url_encode([]) -> - []. \ No newline at end of file + []. diff --git a/test/etap/040-util.t b/test/etap/040-util.t index c23dc97f..35fae20d 100755 --- a/test/etap/040-util.t +++ b/test/etap/040-util.t @@ -17,7 +17,7 @@ main(_) -> code:add_pathz("src/couchdb"), application:start(crypto), - etap:plan(11), + etap:plan(10), case (catch test()) of ok -> etap:end_tests(); @@ -47,10 +47,6 @@ test() -> etap:ok(not is_process_alive(Pid), "why wont this work?") end, - % new_uuid - etap:isnt(couch_util:new_uuid(), couch_util:new_uuid(), - "A guid ought to be unique."), - % implode etap:is([1, 38, 2, 38, 3], couch_util:implode([1,2,3],"&"), "use & as separator in list."), diff --git a/test/etap/041-uuid-gen-seq.ini b/test/etap/041-uuid-gen-seq.ini new file mode 100644 index 00000000..005048cc --- /dev/null +++ b/test/etap/041-uuid-gen-seq.ini @@ -0,0 +1,2 @@ +[uuids] +algorithm = sequential diff --git a/test/etap/041-uuid-gen-utc.ini b/test/etap/041-uuid-gen-utc.ini new file mode 100644 index 00000000..2d1ce1b1 --- /dev/null +++ b/test/etap/041-uuid-gen-utc.ini @@ -0,0 +1,2 @@ +[uuids] +algorithm = utc_random diff --git a/test/etap/041-uuid-gen.t b/test/etap/041-uuid-gen.t new file mode 100644 index 00000000..45cc7f59 --- /dev/null +++ b/test/etap/041-uuid-gen.t @@ -0,0 +1,118 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +default_config() -> + "etc/couchdb/default_dev.ini". + +seq_alg_config() -> + "test/etap/041-uuid-gen-seq.ini". + +utc_alg_config() -> + "test/etap/041-uuid-gen-utc.ini". + +% Run tests and wait for the gen_servers to shutdown +run_test(IniFiles, Test) -> + {ok, Pid} = couch_config:start_link(IniFiles), + erlang:monitor(process, Pid), + couch_uuids:start(), + Test(), + couch_uuids:stop(), + couch_config:stop(), + receive + {'DOWN', _, _, Pid, _} -> ok; + _Other -> etap:diag("OTHER: ~p~n", [_Other]) + after + 1000 -> throw({timeout_error, config_stop}) + end. + +main(_) -> + code:add_pathz("src/couchdb"), + application:start(crypto), + etap:plan(unknown), + + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + + TestUnique = fun() -> + etap:is( + test_unique(10000, couch_uuids:new()), + true, + "Can generate 10K unique IDs" + ) + end, + run_test([default_config()], TestUnique), + run_test([default_config(), seq_alg_config()], TestUnique), + run_test([default_config(), utc_alg_config()], TestUnique), + + TestMonotonic = fun () -> + etap:is( + couch_uuids:new() < couch_uuids:new(), + true, + "should produce monotonically increasing ids" + ) + end, + run_test([default_config(), seq_alg_config()], TestMonotonic), + run_test([default_config(), utc_alg_config()], TestMonotonic), + + % Pretty sure that the average of a uniform distribution is the + % midpoint of the range. Thus, to exceed a threshold, we need + % approximately Total / (Range/2 + RangeMin) samples. + % + % In our case this works out to be 8194. (0xFFF000 / 0x7FF) + % These tests just fudge the limits for a good generator at 25% + % in either direction. Technically it should be possible to generate + % bounds that will show if your random number generator is not + % sufficiently random but I hated statistics in school. + TestRollOver = fun() -> + UUID = binary_to_list(couch_uuids:new()), + Prefix = element(1, lists:split(26, UUID)), + N = gen_until_pref_change(Prefix,0), + etap:diag("N is: ~p~n",[N]), + etap:is( + N >= 5000 andalso N =< 11000, + true, + "should roll over every so often." + ) + end, + run_test([default_config(), seq_alg_config()], TestRollOver). + +test_unique(0, _) -> + true; +test_unique(N, UUID) -> + case couch_uuids:new() of + UUID -> + etap:diag("N: ~p~n", [N]), + false; + Else -> test_unique(N-1, Else) + end. + +get_prefix(UUID) -> + element(1, lists:split(26, binary_to_list(UUID))). + +gen_until_pref_change(_, Count) when Count > 8251 -> + Count; +gen_until_pref_change(Prefix, N) -> + case get_prefix(couch_uuids:new()) of + Prefix -> gen_until_pref_change(Prefix, N+1); + _ -> N + end. diff --git a/test/etap/111-replication-changes-feed.t b/test/etap/111-replication-changes-feed.t index dcbd500e..4045a28d 100755 --- a/test/etap/111-replication-changes-feed.t +++ b/test/etap/111-replication-changes-feed.t @@ -195,7 +195,7 @@ get_all_changes(Pid, Acc) -> end. generate_change() -> - generate_change(couch_util:new_uuid()). + generate_change(couch_uuids:random()). generate_change(Id) -> generate_change(Id, {[]}). @@ -212,7 +212,7 @@ generate_change(Id, EJson) -> ]}. generate_conflict() -> - Id = couch_util:new_uuid(), + Id = couch_uuids:random(), Db = get_db(), Doc1 = (couch_doc:from_json_obj({[<<"foo">>, <<"bar">>]}))#doc{id = Id}, Doc2 = (couch_doc:from_json_obj({[<<"foo">>, <<"baz">>]}))#doc{id = Id}, diff --git a/test/etap/112-replication-missing-revs.t b/test/etap/112-replication-missing-revs.t index 07b9a8de..e42c185e 100755 --- a/test/etap/112-replication-missing-revs.t +++ b/test/etap/112-replication-missing-revs.t @@ -112,7 +112,7 @@ test_multiple_changes(SrcType, TgtType) -> test_changes_not_missing(SrcType, TgtType) -> %% put identical changes on source and target - Id = couch_util:new_uuid(), + Id = couch_uuids:random(), {Id, _Seq, [Rev]} = Expect = generate_change(Id, {[]}, get_db(source)), {Id, _, [Rev]} = generate_change(Id, {[]}, get_db(target)), @@ -131,7 +131,7 @@ test_changes_not_missing(SrcType, TgtType) -> ). generate_change() -> - generate_change(couch_util:new_uuid()). + generate_change(couch_uuids:random()). generate_change(Id) -> generate_change(Id, {[]}). -- cgit v1.2.3