summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Christopher Anderson <jchris@apache.org>2009-05-12 21:38:43 +0000
committerJohn Christopher Anderson <jchris@apache.org>2009-05-12 21:38:43 +0000
commitc596db1079f5a062a89c4c70dbb1e2929f7b0d84 (patch)
treea275c6cfc9d8eeae6928828ee02330ce1eef6353
parent9418d9814779dd27e15036fc6b9d9355a4b70578 (diff)
You can now specify either "_sum" or "_count" as the source code for 2 built-in Erlang reduce functions. The framework is ready for YOU to add more built in reductions. The short list includes: _avg, _stddev, _min, and _max. We could also have one function that does all that in a single function, but it might not be as fun to use.
git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@774101 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--share/www/script/couch_tests.js1
-rw-r--r--share/www/script/test/reduce_builtin.js119
-rw-r--r--src/couchdb/couch_query_servers.erl52
3 files changed, 167 insertions, 5 deletions
diff --git a/share/www/script/couch_tests.js b/share/www/script/couch_tests.js
index 62e877d8..6528835d 100644
--- a/share/www/script/couch_tests.js
+++ b/share/www/script/couch_tests.js
@@ -37,6 +37,7 @@ loadTest("bulk_docs.js");
loadTest("lots_of_docs.js");
loadTest("reduce.js");
loadTest("reduce_false.js");
+loadTest("reduce_builtin.js");
loadTest("design_options.js");
loadTest("multiple_rows.js");
loadTest("large_docs.js");
diff --git a/share/www/script/test/reduce_builtin.js b/share/www/script/test/reduce_builtin.js
new file mode 100644
index 00000000..c3d00339
--- /dev/null
+++ b/share/www/script/test/reduce_builtin.js
@@ -0,0 +1,119 @@
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+couchTests.reduce_builtin = function(debug) {
+ var db = new CouchDB("test_suite_db");
+ db.deleteDb();
+ db.createDb();
+ if (debug) debugger;
+
+ var numDocs = 500
+ var docs = makeDocs(1,numDocs + 1);
+ db.bulkSave(docs);
+
+ var summate = function(N) {return (N+1)*N/2;};
+
+ // this is the same test as the reduce.js test
+ // only we'll let CouchDB run reduce in Erlang
+ var map = function (doc) {
+ emit(doc.integer, doc.integer);
+ emit(doc.integer, doc.integer)};
+
+ var result = db.query(map, "_sum");
+ T(result.rows[0].value == 2*summate(numDocs));
+ result = db.query(map, "_count");
+ T(result.rows[0].value == 1000);
+
+ result = db.query(map, "_sum", {startkey: 4, endkey: 4});
+ T(result.rows[0].value == 8);
+ result = db.query(map, "_count", {startkey: 4, endkey: 4});
+ T(result.rows[0].value == 2);
+
+ result = db.query(map, "_sum", {startkey: 4, endkey: 5});
+ T(result.rows[0].value == 18);
+ result = db.query(map, "_count", {startkey: 4, endkey: 5});
+ T(result.rows[0].value == 4);
+
+ result = db.query(map, "_sum", {startkey: 4, endkey: 6});
+ T(result.rows[0].value == 30);
+ result = db.query(map, "_count", {startkey: 4, endkey: 6});
+ T(result.rows[0].value == 6);
+
+ result = db.query(map, "_sum", {group:true, limit:3});
+ T(result.rows[0].value == 2);
+ T(result.rows[1].value == 4);
+ T(result.rows[2].value == 6);
+
+ for(var i=1; i<numDocs/2; i+=30) {
+ result = db.query(map, "_sum", {startkey: i, endkey: numDocs - i});
+ T(result.rows[0].value == 2*(summate(numDocs-i) - summate(i-1)));
+ }
+
+ db.deleteDb();
+ db.createDb();
+
+ for(var i=1; i <= 5; i++) {
+
+ for(var j=0; j < 10; j++) {
+ // these docs are in the order of the keys collation, for clarity
+ var docs = [];
+ docs.push({keys:["a"]});
+ docs.push({keys:["a"]});
+ docs.push({keys:["a", "b"]});
+ docs.push({keys:["a", "b"]});
+ docs.push({keys:["a", "b", "c"]});
+ docs.push({keys:["a", "b", "d"]});
+ docs.push({keys:["a", "c", "d"]});
+ docs.push({keys:["d"]});
+ docs.push({keys:["d", "a"]});
+ docs.push({keys:["d", "b"]});
+ docs.push({keys:["d", "c"]});
+ db.bulkSave(docs);
+ T(db.info().doc_count == ((i - 1) * 10 * 11) + ((j + 1) * 11));
+ }
+
+ map = function (doc) {emit(doc.keys, 1)};
+ // with emitted values being 1, count should be the same as sum
+ var builtins = ["_sum", "_count"];
+
+ for (var b=0; b < builtins.length; b++) {
+ var fun = builtins[b];
+ var results = db.query(map, fun, {group:true});
+
+ //group by exact key match
+ T(equals(results.rows[0], {key:["a"],value:20*i}));
+ T(equals(results.rows[1], {key:["a","b"],value:20*i}));
+ T(equals(results.rows[2], {key:["a", "b", "c"],value:10*i}));
+ T(equals(results.rows[3], {key:["a", "b", "d"],value:10*i}));
+
+ // test to make sure group reduce and limit params provide valid json
+ var results = db.query(map, fun, {group: true, limit: 2});
+ T(equals(results.rows[0], {key: ["a"], value: 20*i}));
+ T(equals(results.rows.length, 2));
+
+ //group by the first element in the key array
+ var results = db.query(map, fun, {group_level:1});
+ T(equals(results.rows[0], {key:["a"],value:70*i}));
+ T(equals(results.rows[1], {key:["d"],value:40*i}));
+
+ //group by the first 2 elements in the key array
+ var results = db.query(map, fun, {group_level:2});
+ T(equals(results.rows[0], {key:["a"],value:20*i}));
+ T(equals(results.rows[1], {key:["a","b"],value:40*i}));
+ T(equals(results.rows[2], {key:["a","c"],value:10*i}));
+ T(equals(results.rows[3], {key:["d"],value:10*i}));
+ T(equals(results.rows[4], {key:["d","a"],value:10*i}));
+ T(equals(results.rows[5], {key:["d","b"],value:10*i}));
+ T(equals(results.rows[6], {key:["d","c"],value:10*i}));
+ };
+ }
+} \ No newline at end of file
diff --git a/src/couchdb/couch_query_servers.erl b/src/couchdb/couch_query_servers.erl
index a27943a1..734baade 100644
--- a/src/couchdb/couch_query_servers.erl
+++ b/src/couchdb/couch_query_servers.erl
@@ -86,7 +86,11 @@ rereduce(Lang, RedSrcs, ReducedValues) ->
Pid = get_os_process(Lang),
Grouped = group_reductions_results(ReducedValues),
Results = try lists:zipwith(
- fun(FunSrc, Values) ->
+ fun
+ (<<"_", _/binary>> = FunSrc, Values) ->
+ {ok, [Result]} = builtin_reduce(rereduce, [FunSrc], [[[], V] || V <- Values], []),
+ Result;
+ (FunSrc, Values) ->
[true, [Result]] =
couch_os_process:prompt(Pid, [<<"rereduce">>, [FunSrc], Values]),
Result
@@ -99,15 +103,53 @@ rereduce(Lang, RedSrcs, ReducedValues) ->
reduce(_Lang, [], _KVs) ->
{ok, []};
reduce(Lang, RedSrcs, KVs) ->
+ {OsRedSrcs, BuiltinReds} = lists:partition(fun
+ (<<"_", _/binary>>) -> false;
+ (_OsFun) -> true
+ end, RedSrcs),
+ {ok, OsResults} = os_reduce(Lang, OsRedSrcs, KVs),
+ {ok, BuiltinResults} = builtin_reduce(reduce, BuiltinReds, KVs, []),
+ recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, []).
+
+recombine_reduce_results([], [], [], Acc) ->
+ {ok, lists:reverse(Acc)};
+recombine_reduce_results([<<"_", _/binary>>|RedSrcs], OsResults, [BRes|BuiltinResults], Acc) ->
+ recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, [BRes|Acc]);
+recombine_reduce_results([_OsFun|RedSrcs], [OsR|OsResults], BuiltinResults, Acc) ->
+ recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, [OsR|Acc]).
+
+os_reduce(Lang, [], KVs) ->
+ {ok, []};
+os_reduce(Lang, OsRedSrcs, KVs) ->
Pid = get_os_process(Lang),
- Results = try couch_os_process:prompt(Pid,
- [<<"reduce">>, RedSrcs, KVs]) of
+ OsResults = try couch_os_process:prompt(Pid,
+ [<<"reduce">>, OsRedSrcs, KVs]) of
[true, Reductions] -> Reductions
after
ok = ret_os_process(Lang, Pid)
end,
- {ok, Results}.
-
+ {ok, OsResults}.
+
+builtin_reduce(_Re, [], KVs, Acc) ->
+ {ok, lists:reverse(Acc)};
+builtin_reduce(Re, [<<"_sum">>|BuiltinReds], KVs, Acc) ->
+ Sum = builtin_sum_rows(KVs),
+ builtin_reduce(Re, BuiltinReds, KVs, [Sum|Acc]);
+builtin_reduce(reduce, [<<"_count">>|BuiltinReds], KVs, Acc) ->
+ Count = length(KVs),
+ builtin_reduce(reduce, BuiltinReds, KVs, [Count|Acc]);
+builtin_reduce(rereduce, [<<"_count">>|BuiltinReds], KVs, Acc) ->
+ Count = builtin_sum_rows(KVs),
+ builtin_reduce(rereduce, BuiltinReds, KVs, [Count|Acc]).
+
+builtin_sum_rows(KVs) ->
+ lists:foldl(fun
+ ([_Key, Value], Acc) when is_number(Value) ->
+ Acc + Value;
+ (_Else, _Acc) ->
+ throw({invalid_value, <<"builtin _sum function requires map values to be numbers">>})
+ end, 0, KVs).
+
validate_doc_update(Lang, FunSrc, EditDoc, DiskDoc, Ctx) ->
Pid = get_os_process(Lang),
JsonEditDoc = couch_doc:to_json_obj(EditDoc, [revs]),