From 712830243c89a0863831ead8e983f089fd37fd42 Mon Sep 17 00:00:00 2001 From: "Damien F. Katz" Date: Thu, 5 Jun 2008 23:49:48 +0000 Subject: Added reduce/combine example. Fixed broken node chunking with very large keys/reduction values git-svn-id: https://svn.apache.org/repos/asf/incubator/couchdb/trunk@663786 13f79535-47bb-0310-9956-ffa450edef68 --- share/www/script/couch_tests.js | 65 +++++++++++++++++++++++++++++++++++++++++ src/couchdb/couch_btree.erl | 5 +++- src/couchdb/couch_httpd.erl | 7 +++-- 3 files changed, 73 insertions(+), 4 deletions(-) diff --git a/share/www/script/couch_tests.js b/share/www/script/couch_tests.js index 97e91f1e..effab90c 100644 --- a/share/www/script/couch_tests.js +++ b/share/www/script/couch_tests.js @@ -294,6 +294,9 @@ var tests = { result = db.query(map, reduce, {startkey: i, endkey: numDocs - i}); T(result.rows[0].value == summate(numDocs-i) - summate(i-1)); } + + db.deleteDb(); + db.createDb(); for(var i=1; i <= 5; i++) { @@ -340,6 +343,68 @@ var tests = { T(equals(results.rows[5], {key:["d","b"],value:10*i})); T(equals(results.rows[6], {key:["d","c"],value:10*i})); } + + // now test out more complex reductions that need to use the combine option. + + db.deleteDb(); + db.createDb(); + + + var map = function (doc) {emit(null, doc.val)}; + var reduceCombine = function (keys, values, combine) { + // this computes the standard deviation + + var stdDeviation=0; + var count=0; + var total=0; + var sqrTotal=0; + + if (combine) { + for(var i in values) { + count = count + values[i].count; + total = total + values[i].total; + sqrTotal = sqrTotal + (values[i].sqrTotal * values[i].sqrTotal); + } + var variance = (sqrTotal - ((total * total)/count)) / count; + stdDeviation = Math.sqrt(variance); + + return {"stdDeviation":stdDeviation,"count":count, + "total":total,"sqrTotal":sqrTotal}; + } + else { + for(var i in values) { + total = total + values[i] + sqrTotal = sqrTotal + (values[i] * values[i]) + } + count = values.length; + var variance = (sqrTotal - ((total * total)/count)) / count; + stdDeviation = Math.sqrt(variance); + } + + return {"stdDeviation":stdDeviation,"count":count, + "total":total,"sqrTotal":sqrTotal}; + }; + + for(var j=0; j < 10; j++) { + // these docs are in the order of the keys collation, for clarity + var docs = []; + docs.push({val:10}); + docs.push({val:20}); + docs.push({val:30}); + docs.push({val:40}); + docs.push({val:50}); + docs.push({val:60}); + docs.push({val:70}); + docs.push({val:80}); + docs.push({val:90}); + docs.push({val:100}); + T(db.bulkSave(docs).ok); + } + + var results = db.query(map, reduceCombine); + //account for floating point error + T(results.rows[0].value.stdDeviation == 28.722813232690143); + }, multiple_rows: function(debug) { diff --git a/src/couchdb/couch_btree.erl b/src/couchdb/couch_btree.erl index f5111c28..f01234c1 100644 --- a/src/couchdb/couch_btree.erl +++ b/src/couchdb/couch_btree.erl @@ -255,7 +255,7 @@ chunkify(_Bt, [], _ChunkThreshold, OutList, _OutListSize, OutputChunks) -> lists:reverse([lists:reverse(OutList) | OutputChunks]); chunkify(Bt, [InElement | RestInList], ChunkThreshold, OutList, OutListSize, OutputChunks) -> case size(term_to_binary(InElement)) of - Size when (Size + OutListSize) > ChunkThreshold -> + Size when (Size + OutListSize) > ChunkThreshold andalso OutList /= [] -> chunkify(Bt, RestInList, ChunkThreshold, [], 0, [lists:reverse([InElement | OutList]) | OutputChunks]); Size -> chunkify(Bt, RestInList, ChunkThreshold, [InElement | OutList], OutListSize + Size, OutputChunks) @@ -398,6 +398,9 @@ modify_kvnode(Bt, [{Key, Value} | RestKVs], [{ActionType, ActionKey, ActionValue end. +reduce_stream_node(_Bt, _Dir, nil, _KeyStart, _KeyEnd, GroupedKey, GroupedKVsAcc, + GroupedRedsAcc, _KeyGroupFun, _Fun, Acc) -> + {ok, Acc, GroupedRedsAcc, GroupedKVsAcc, GroupedKey}; reduce_stream_node(Bt, Dir, {P, _R}, KeyStart, KeyEnd, GroupedKey, GroupedKVsAcc, GroupedRedsAcc, KeyGroupFun, Fun, Acc) -> case get_node(Bt, P) of diff --git a/src/couchdb/couch_httpd.erl b/src/couchdb/couch_httpd.erl index b9244b1b..da3d7555 100644 --- a/src/couchdb/couch_httpd.erl +++ b/src/couchdb/couch_httpd.erl @@ -469,7 +469,9 @@ output_reduce_view(Req, View) -> Resp:write_chunk(AccSeparator ++ Json), {ok, {",",0,AccCount-1}}; (Key, Red, {AccSeparator,0,AccCount}) - when is_tuple(Key) and is_integer(GroupLevel) -> + when is_integer(GroupLevel) + andalso is_tuple(Key) + andalso element(1, Key) /= obj -> Json = lists:flatten(cjson:encode( {obj, [{key, list_to_tuple(lists:sublist(tuple_to_list(Key), GroupLevel))}, {value, Red}]})), @@ -557,8 +559,7 @@ handle_doc_request(Req, 'GET', _DbName, Db, DocId) -> Json = lists:flatten(cjson:encode({obj, [{ok, JsonDoc}]})), Resp:write_chunk(AccSeparator ++ Json); {{not_found, missing}, RevId} -> - Json = {obj, [{"missing", RevId}]}, - Json = lists:flatten(cjson:encode(Json)), + Json = lists:flatten(cjson:encode({obj, [{"missing", RevId}]})), Resp:write_chunk(AccSeparator ++ Json) end, "," % AccSeparator now has a comma -- cgit v1.2.3