summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Kocoloski <kocolosk@apache.org>2009-09-09 16:53:49 +0000
committerAdam Kocoloski <kocolosk@apache.org>2009-09-09 16:53:49 +0000
commit9b78e1555d73c888fedaa0b9d256abaeaadbe41a (patch)
tree921f620daaeb7116e5511db830e5ecb6d0498f98
parent85d1cbfc79eb0a04ec7f16624d6290920b4355ac (diff)
choice of uuid algos for better insert perf. Closes COUCHDB-465. Thanks rnewson, bitdiddle
git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@813051 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--etc/couchdb/default.ini.tpl.in13
-rw-r--r--share/www/script/test/uuids.js41
-rw-r--r--src/couchdb/Makefile.am2
-rw-r--r--src/couchdb/couch_httpd_auth.erl4
-rw-r--r--src/couchdb/couch_httpd_db.erl4
-rw-r--r--src/couchdb/couch_httpd_misc_handlers.erl7
-rw-r--r--src/couchdb/couch_rep.erl2
-rw-r--r--src/couchdb/couch_server.erl2
-rw-r--r--src/couchdb/couch_util.erl7
-rwxr-xr-xtest/etap/040-util.t6
-rw-r--r--test/etap/041-uuid-gen-seq.ini2
-rw-r--r--test/etap/041-uuid-gen-utc.ini2
-rw-r--r--test/etap/041-uuid-gen.t118
-rwxr-xr-xtest/etap/111-replication-changes-feed.t4
-rwxr-xr-xtest/etap/112-replication-missing-revs.t4
15 files changed, 194 insertions, 24 deletions
diff --git a/etc/couchdb/default.ini.tpl.in b/etc/couchdb/default.ini.tpl.in
index 3260449b..cb7b93dd 100644
--- a/etc/couchdb/default.ini.tpl.in
+++ b/etc/couchdb/default.ini.tpl.in
@@ -53,6 +53,7 @@ query_servers={couch_query_servers, start_link, []}
httpd={couch_httpd, start_link, []}
stats_aggregator={couch_stats_aggregator, start, []}
stats_collector={couch_stats_collector, start, []}
+uuids={couch_uuids, start, []}
[httpd_global_handlers]
/ = {couch_httpd_misc_handlers, handle_welcome_req, <<"Welcome">>}
@@ -92,3 +93,15 @@ _show = {couch_httpd_show, handle_doc_show_req}
_list = {couch_httpd_show, handle_view_list_req}
_info = {couch_httpd_db, handle_design_info_req}
_update = {couch_httpd_show, handle_doc_update_req}
+
+[uuids]
+; Known algorithms:
+; random - 128 bits of random awesome
+; All awesome, all the time.
+; sequential - monotonically increasing ids with random increments
+; First 26 hex characters are random. Last 6 increment in
+; random amounts until an overflow occurs. On overflow, the
+; random prefix is regenerated and the process starts over.
+; utc_random - Time since Jan 1, 1970 UTC with microseconds
+; First 14 characters are the time in hex. Last 18 are random.
+algorithm = random
diff --git a/share/www/script/test/uuids.js b/share/www/script/test/uuids.js
index 3b7533be..50621678 100644
--- a/share/www/script/test/uuids.js
+++ b/share/www/script/test/uuids.js
@@ -59,4 +59,45 @@ couchTests.uuids = function(debug) {
// ensure we return a 405 on POST
xhr = CouchDB.request("POST", "/_uuids?count=1000");
T(xhr.status == 405);
+
+ // Test sequential uuids
+ var seq_testfun = function() {
+ xhr = CouchDB.request("GET", "/_uuids?count=1000");
+ T(xhr.status == 200);
+ result = JSON.parse(xhr.responseText);
+ for(var i = 1; i < result.uuids.length; i++) {
+ T(result.uuids[i].length == 32);
+ T(result.uuids[i-1] < result.uuids[i], "Sequential uuids are ordered.");
+ }
+ };
+
+ run_on_modified_server([{
+ "section": "uuids",
+ "key": "algorithm",
+ "value": "sequential",
+ }],
+ seq_testfun
+ );
+
+ // Test utc_random uuids
+ var utc_testfun = function() {
+ xhr = CouchDB.request("GET", "/_uuids?count=1000");
+ T(xhr.status == 200);
+ result = JSON.parse(xhr.responseText);
+ for(var i = 1; i < result.uuids.length; i++) {
+ T(result.uuids[i].length == 32);
+ var u1 = result.uuids[i-1].substr(0, 13);
+ var u2 = result.uuids[i].substr(0, 13);
+ T(u1 < u2, "UTC uuids are roughly ordered.");
+ }
+ };
+
+ run_on_modified_server([{
+ "section": "uuids",
+ "key": "algorithm",
+ "value": "utc_random"
+ }],
+ utc_testfun
+ );
+
};
diff --git a/src/couchdb/Makefile.am b/src/couchdb/Makefile.am
index 017dc169..97d8a3b0 100644
--- a/src/couchdb/Makefile.am
+++ b/src/couchdb/Makefile.am
@@ -101,6 +101,7 @@ source_files = \
couch_stream.erl \
couch_task_status.erl \
couch_util.erl \
+ couch_uuids.erl \
couch_view.erl \
couch_view_compactor.erl \
couch_view_updater.erl \
@@ -157,6 +158,7 @@ compiled_files = \
couch_stream.beam \
couch_task_status.beam \
couch_util.beam \
+ couch_uuids.beam \
couch_view.beam \
couch_view_compactor.beam \
couch_view_updater.beam \
diff --git a/src/couchdb/couch_httpd_auth.erl b/src/couchdb/couch_httpd_auth.erl
index c77376b2..1c1ad0a9 100644
--- a/src/couchdb/couch_httpd_auth.erl
+++ b/src/couchdb/couch_httpd_auth.erl
@@ -381,9 +381,9 @@ create_user_req(#httpd{method='POST', mochi_req=MochiReq}=Req, Db) ->
[?l2b(R) || R <- Roles]
end,
- UserSalt = couch_util:new_uuid(),
+ UserSalt = couch_uuids:random(),
PasswordHash = hash_password(Password, UserSalt),
- DocId = couch_util:new_uuid(),
+ DocId = couch_uuids:random(),
{ok, UserDoc} = user_doc(DocId, UserName, UserSalt, PasswordHash, Email, Active, Roles1),
{ok, _Rev} = couch_db:update_doc(Db, UserDoc, []),
?LOG_DEBUG("User ~s (~s) with password, ~s created.", [?b2l(UserName), ?b2l(DocId), ?b2l(Password)]),
diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl
index 1a18c4ba..0f3835ae 100644
--- a/src/couchdb/couch_httpd_db.erl
+++ b/src/couchdb/couch_httpd_db.erl
@@ -308,7 +308,7 @@ db_req(#httpd{method='POST',path_parts=[DbName]}=Req, Db) ->
Doc = couch_doc:from_json_obj(couch_httpd:json_body(Req)),
Doc2 = case Doc#doc.id of
<<"">> ->
- Doc#doc{id=couch_util:new_uuid(), revs={0, []}};
+ Doc#doc{id=couch_uuids:new(), revs={0, []}};
_ ->
Doc
end,
@@ -385,7 +385,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) ->
Doc = couch_doc:from_json_obj(JsonObj),
validate_attachment_names(Doc),
Id = case Doc#doc.id of
- <<>> -> couch_util:new_uuid();
+ <<>> -> couch_uuids:new();
Id0 -> Id0
end,
case proplists:get_value(<<"_rev">>, ObjProps) of
diff --git a/src/couchdb/couch_httpd_misc_handlers.erl b/src/couchdb/couch_httpd_misc_handlers.erl
index c3c5b37a..7f5573ff 100644
--- a/src/couchdb/couch_httpd_misc_handlers.erl
+++ b/src/couchdb/couch_httpd_misc_handlers.erl
@@ -108,11 +108,10 @@ handle_uuids_req(#httpd{method='GET'}=Req) ->
Count = list_to_integer(couch_httpd:qs_value(Req, "count", "1")),
CacheBustingHeaders = [{"Date", httpd_util:rfc1123_date()},
{"Cache-Control", "no-cache"},
- {"Expires", "Fri, 01 Jan 1990 00:00:00 GMT"}, % Past date, ON PURPOSE!
+ % Past date, ON PURPOSE!
+ {"Expires", "Fri, 01 Jan 1990 00:00:00 GMT"},
{"Pragma", "no-cache"}],
- % generate the uuids
- UUIDs = [ couch_util:new_uuid() || _ <- lists:seq(1,Count)],
- % send a JSON response
+ UUIDs = [couch_uuids:new() || _ <- lists:seq(1, Count)],
send_json(Req, 200, CacheBustingHeaders, {[{<<"uuids">>, UUIDs}]});
handle_uuids_req(Req) ->
send_method_not_allowed(Req, "GET").
diff --git a/src/couchdb/couch_rep.erl b/src/couchdb/couch_rep.erl
index bcd1807c..c2f691eb 100644
--- a/src/couchdb/couch_rep.erl
+++ b/src/couchdb/couch_rep.erl
@@ -470,7 +470,7 @@ do_checkpoint(State) ->
"replication is redone and documents reexamined.", []),
StartSeqNum
end,
- SessionId = couch_util:new_uuid(),
+ SessionId = couch_uuids:random(),
NewHistoryEntry = {[
{<<"session_id">>, SessionId},
{<<"start_time">>, list_to_binary(ReplicationStartTime)},
diff --git a/src/couchdb/couch_server.erl b/src/couchdb/couch_server.erl
index f407acf0..8fab42f1 100644
--- a/src/couchdb/couch_server.erl
+++ b/src/couchdb/couch_server.erl
@@ -99,7 +99,7 @@ hash_admin_passwords() ->
fun({_User, "-hashed-" ++ _}) ->
ok; % already hashed
({User, ClearPassword}) ->
- Salt = ?b2l(couch_util:new_uuid()),
+ Salt = ?b2l(couch_uuids:random()),
Hashed = couch_util:to_hex(crypto:sha(ClearPassword ++ Salt)),
couch_config:set("admins", User, "-hashed-" ++ Hashed ++ "," ++ Salt)
end, couch_config:get("admins")).
diff --git a/src/couchdb/couch_util.erl b/src/couchdb/couch_util.erl
index d967b433..088fde85 100644
--- a/src/couchdb/couch_util.erl
+++ b/src/couchdb/couch_util.erl
@@ -14,7 +14,7 @@
-export([start_driver/1,terminate_linked/1]).
-export([should_flush/0, should_flush/1, to_existing_atom/1]).
--export([new_uuid/0, rand32/0, implode/2, collate/2, collate/3]).
+-export([rand32/0, implode/2, collate/2, collate/3]).
-export([abs_pathname/1,abs_pathname/2, trim/1, ascii_lower/1]).
-export([encodeBase64/1, decodeBase64/1, encodeBase64Url/1, decodeBase64Url/1,
to_hex/1,parse_term/1, dict_find/3]).
@@ -55,9 +55,6 @@ terminate_linked(Reason) ->
ok.
-new_uuid() ->
- list_to_binary(to_hex(crypto:rand_bytes(16))).
-
to_hex([]) ->
[];
to_hex(Bin) when is_binary(Bin) ->
@@ -394,4 +391,4 @@ url_encode([H|T]) ->
end
end;
url_encode([]) ->
- []. \ No newline at end of file
+ [].
diff --git a/test/etap/040-util.t b/test/etap/040-util.t
index c23dc97f..35fae20d 100755
--- a/test/etap/040-util.t
+++ b/test/etap/040-util.t
@@ -17,7 +17,7 @@ main(_) ->
code:add_pathz("src/couchdb"),
application:start(crypto),
- etap:plan(11),
+ etap:plan(10),
case (catch test()) of
ok ->
etap:end_tests();
@@ -47,10 +47,6 @@ test() ->
etap:ok(not is_process_alive(Pid), "why wont this work?")
end,
- % new_uuid
- etap:isnt(couch_util:new_uuid(), couch_util:new_uuid(),
- "A guid ought to be unique."),
-
% implode
etap:is([1, 38, 2, 38, 3], couch_util:implode([1,2,3],"&"),
"use & as separator in list."),
diff --git a/test/etap/041-uuid-gen-seq.ini b/test/etap/041-uuid-gen-seq.ini
new file mode 100644
index 00000000..005048cc
--- /dev/null
+++ b/test/etap/041-uuid-gen-seq.ini
@@ -0,0 +1,2 @@
+[uuids]
+algorithm = sequential
diff --git a/test/etap/041-uuid-gen-utc.ini b/test/etap/041-uuid-gen-utc.ini
new file mode 100644
index 00000000..2d1ce1b1
--- /dev/null
+++ b/test/etap/041-uuid-gen-utc.ini
@@ -0,0 +1,2 @@
+[uuids]
+algorithm = utc_random
diff --git a/test/etap/041-uuid-gen.t b/test/etap/041-uuid-gen.t
new file mode 100644
index 00000000..45cc7f59
--- /dev/null
+++ b/test/etap/041-uuid-gen.t
@@ -0,0 +1,118 @@
+#!/usr/bin/env escript
+%% -*- erlang -*-
+
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+default_config() ->
+ "etc/couchdb/default_dev.ini".
+
+seq_alg_config() ->
+ "test/etap/041-uuid-gen-seq.ini".
+
+utc_alg_config() ->
+ "test/etap/041-uuid-gen-utc.ini".
+
+% Run tests and wait for the gen_servers to shutdown
+run_test(IniFiles, Test) ->
+ {ok, Pid} = couch_config:start_link(IniFiles),
+ erlang:monitor(process, Pid),
+ couch_uuids:start(),
+ Test(),
+ couch_uuids:stop(),
+ couch_config:stop(),
+ receive
+ {'DOWN', _, _, Pid, _} -> ok;
+ _Other -> etap:diag("OTHER: ~p~n", [_Other])
+ after
+ 1000 -> throw({timeout_error, config_stop})
+ end.
+
+main(_) ->
+ code:add_pathz("src/couchdb"),
+ application:start(crypto),
+ etap:plan(unknown),
+
+ case (catch test()) of
+ ok ->
+ etap:end_tests();
+ Other ->
+ etap:diag(io_lib:format("Test died abnormally: ~p", [Other])),
+ etap:bail(Other)
+ end,
+ ok.
+
+test() ->
+
+ TestUnique = fun() ->
+ etap:is(
+ test_unique(10000, couch_uuids:new()),
+ true,
+ "Can generate 10K unique IDs"
+ )
+ end,
+ run_test([default_config()], TestUnique),
+ run_test([default_config(), seq_alg_config()], TestUnique),
+ run_test([default_config(), utc_alg_config()], TestUnique),
+
+ TestMonotonic = fun () ->
+ etap:is(
+ couch_uuids:new() < couch_uuids:new(),
+ true,
+ "should produce monotonically increasing ids"
+ )
+ end,
+ run_test([default_config(), seq_alg_config()], TestMonotonic),
+ run_test([default_config(), utc_alg_config()], TestMonotonic),
+
+ % Pretty sure that the average of a uniform distribution is the
+ % midpoint of the range. Thus, to exceed a threshold, we need
+ % approximately Total / (Range/2 + RangeMin) samples.
+ %
+ % In our case this works out to be 8194. (0xFFF000 / 0x7FF)
+ % These tests just fudge the limits for a good generator at 25%
+ % in either direction. Technically it should be possible to generate
+ % bounds that will show if your random number generator is not
+ % sufficiently random but I hated statistics in school.
+ TestRollOver = fun() ->
+ UUID = binary_to_list(couch_uuids:new()),
+ Prefix = element(1, lists:split(26, UUID)),
+ N = gen_until_pref_change(Prefix,0),
+ etap:diag("N is: ~p~n",[N]),
+ etap:is(
+ N >= 5000 andalso N =< 11000,
+ true,
+ "should roll over every so often."
+ )
+ end,
+ run_test([default_config(), seq_alg_config()], TestRollOver).
+
+test_unique(0, _) ->
+ true;
+test_unique(N, UUID) ->
+ case couch_uuids:new() of
+ UUID ->
+ etap:diag("N: ~p~n", [N]),
+ false;
+ Else -> test_unique(N-1, Else)
+ end.
+
+get_prefix(UUID) ->
+ element(1, lists:split(26, binary_to_list(UUID))).
+
+gen_until_pref_change(_, Count) when Count > 8251 ->
+ Count;
+gen_until_pref_change(Prefix, N) ->
+ case get_prefix(couch_uuids:new()) of
+ Prefix -> gen_until_pref_change(Prefix, N+1);
+ _ -> N
+ end.
diff --git a/test/etap/111-replication-changes-feed.t b/test/etap/111-replication-changes-feed.t
index dcbd500e..4045a28d 100755
--- a/test/etap/111-replication-changes-feed.t
+++ b/test/etap/111-replication-changes-feed.t
@@ -195,7 +195,7 @@ get_all_changes(Pid, Acc) ->
end.
generate_change() ->
- generate_change(couch_util:new_uuid()).
+ generate_change(couch_uuids:random()).
generate_change(Id) ->
generate_change(Id, {[]}).
@@ -212,7 +212,7 @@ generate_change(Id, EJson) ->
]}.
generate_conflict() ->
- Id = couch_util:new_uuid(),
+ Id = couch_uuids:random(),
Db = get_db(),
Doc1 = (couch_doc:from_json_obj({[<<"foo">>, <<"bar">>]}))#doc{id = Id},
Doc2 = (couch_doc:from_json_obj({[<<"foo">>, <<"baz">>]}))#doc{id = Id},
diff --git a/test/etap/112-replication-missing-revs.t b/test/etap/112-replication-missing-revs.t
index 07b9a8de..e42c185e 100755
--- a/test/etap/112-replication-missing-revs.t
+++ b/test/etap/112-replication-missing-revs.t
@@ -112,7 +112,7 @@ test_multiple_changes(SrcType, TgtType) ->
test_changes_not_missing(SrcType, TgtType) ->
%% put identical changes on source and target
- Id = couch_util:new_uuid(),
+ Id = couch_uuids:random(),
{Id, _Seq, [Rev]} = Expect = generate_change(Id, {[]}, get_db(source)),
{Id, _, [Rev]} = generate_change(Id, {[]}, get_db(target)),
@@ -131,7 +131,7 @@ test_changes_not_missing(SrcType, TgtType) ->
).
generate_change() ->
- generate_change(couch_util:new_uuid()).
+ generate_change(couch_uuids:random()).
generate_change(Id) ->
generate_change(Id, {[]}).