From a684f95cbcee7f2568a2ce04e7dc2bbb605a27b3 Mon Sep 17 00:00:00 2001 From: "Damien F. Katz" Date: Thu, 15 May 2008 21:51:22 +0000 Subject: Incremental reduce first checkin. Warning! Disk format change. git-svn-id: https://svn.apache.org/repos/asf/incubator/couchdb/trunk@656861 13f79535-47bb-0310-9956-ffa450edef68 --- share/server/main.js | 218 +++++++++++++++++++++++++--------------- share/www/script/couch.js | 18 +++- share/www/script/couch_tests.js | 77 ++++++++++++-- 3 files changed, 225 insertions(+), 88 deletions(-) (limited to 'share') diff --git a/share/server/main.js b/share/server/main.js index 5cc854aa..e7184682 100644 --- a/share/server/main.js +++ b/share/server/main.js @@ -11,21 +11,30 @@ // the License. var cmd; -var map_funs = []; // The map functions to compute against documents -var map_results = []; +var funs = []; // holds functions used for computation +var map_results = []; // holds temporary emitted values during doc map -try { - var sandbox = evalcx(''); - sandbox.map = function(key, value) { +var sandbox = null; + +map = function(key, value) { map_results.push([key, value]); } -} catch (e) { - // fallback for older versions of spidermonkey that don't have evalcx - var sandbox = null; - map = function(key, value) { - map_results.push([key, value]); + +sum = function(values) { + var values_sum=0; + for(var i in values) { + values_sum += values[i]; + } + return values_sum; } -} + + +try { + // if possible, use evalcx (not always available) + sandbox = evalcx(''); + sandbox.map = map; + sandbox.sum = sum; +} catch (e) {} // Commands are in the form of json arrays: // ["commandname",..optional args...]\n @@ -33,83 +42,132 @@ try { // Responses are json values followed by a new line ("\n") while (cmd = eval(readline())) { - switch (cmd[0]) { - case "reset": - // clear the map_functions and run gc - map_funs = []; - gc(); - print("true"); // indicates success - break; - case "add_fun": - // The second arg is a string that will compile to a function. - // and then we add it to map_functions array - try { - var functionObject = sandbox ? evalcx(cmd[1], sandbox) : eval(cmd[1]); - } catch (err) { - print(toJSON({error: {id: "map_compilation_error", - reason: err.toString() + " (" + toJSON(cmd[1]) + ")"}})); + try { + switch (cmd[0]) { + case "reset": + // clear the globals and run gc + funs = []; + gc(); + print("true"); // indicates success break; - } - if (typeof(functionObject) == "function") { - map_funs.push(functionObject); - print("true"); - } else { - print(toJSON({error: "map_compilation_error", - reason: "expression does not eval to a function. (" + cmd[1] + ")"})); - } - break; - case "map_doc": - // The second arg is a document. We compute all the map functions against - // it. - // - // Each function can output multiple keys value, pairs for each document - // - // Example output of map_doc after three functions set by add_fun cmds: - // [ - // [["Key","Value"]], <- fun 1 returned 1 key value - // [], <- fun 2 returned 0 key values - // [["Key1","Value1"],["Key2","Value2"]] <- fun 3 returned 2 key values - // ] - // - var doc = cmd[1]; - seal(doc); // seal to prevent map functions from changing doc - var buf = []; - for (var i = 0; i < map_funs.length; i++) { - map_results = []; - try { - map_funs[i](doc); - buf.push(map_results.filter(function(pair) { - return pair[0] !== undefined && pair[1] !== undefined; - })); - } catch (err) { - if (err == "fatal_error") { - // Only if it's a "fatal_error" do we exit. What's a fatal error? - // That's for the query to decide. - // - // This will make it possible for queries to completely error out, - // by catching their own local exception and rethrowing a - // fatal_error. But by default if they don't do error handling we - // just eat the exception and carry on. - print(toJSON({error: "map_runtime_error", - reason: "function raised fatal exception"})); - quit(); + case "add_fun": + // The second arg is a string that will compile to a function. + // and then we add it to funs array + funs.push(safe_compile_function(cmd[1])); + print("true"); + break; + case "map_doc": + // The second arg is a document. We compute all the map functions against + // it. + // + // Each function can output multiple keys value, pairs for each document + // + // Example output of map_doc after three functions set by add_fun cmds: + // [ + // [["Key","Value"]], <- fun 1 returned 1 key value + // [], <- fun 2 returned 0 key values + // [["Key1","Value1"],["Key2","Value2"]] <- fun 3 returned 2 key values + // ] + // + var doc = cmd[1]; + seal(doc); // seal to prevent map functions from changing doc + var buf = []; + for (var i = 0; i < funs.length; i++) { + map_results = []; + try { + funs[i](doc); + buf.push(map_results.filter(function(pair) { + return pair[0] !== undefined && pair[1] !== undefined; + })); + } catch (err) { + if (err == "fatal_error") { + // Only if it's a "fatal_error" do we exit. What's a fatal error? + // That's for the query to decide. + // + // This will make it possible for queries to completely error out, + // by catching their own local exception and rethrowing a + // fatal_error. But by default if they don't do error handling we + // just eat the exception and carry on. + throw {error: "map_runtime_error", + reason: "function raised fatal exception"}; + } + print(toJSON({log: "function raised exception (" + err + ")"})); + buf.push([]); } - print(toJSON({log: "function raised exception (" + err + ")"})); - buf.push([]); } - } - print(toJSON(buf)); - break; - default: - print(toJSON({error: "query_server_error", - reason: "unknown command '" + cmd[0] + "'"})); - quit(); + print(toJSON(buf)); + break; + + case "combine": + case "reduce": + { + var keys = null; + var values = null; + var reduceFuns = cmd[1]; + var is_combine = false; + if (cmd[0] == "reduce") { + var kvs = cmd[2]; + keys = new Array(kvs.length); + values = new Array(kvs.length); + for (var i = 0; i < kvs.length; i++) { + keys[i] = kvs[i][0]; + values[i] = kvs[i][1]; + } + } else { + values = cmd[2]; + is_combine = true; + } + + for(var i in reduceFuns) { + reduceFuns[i] = safe_compile_function(reduceFuns[i]); + } + + var reductions = new Array(funs.length); + for (var i = 0; i < reduceFuns.length; i++) { + try { + reductions[i] = reduceFuns[i](keys, values, is_combine); + } catch (err) { + if (err == "fatal_error") { + throw {error: "reduce_runtime_error", + reason: "function raised fatal exception"}; + } + print(toJSON({log: "function raised exception (" + err + ")"})); + reductions[i] = null; + } + } + print("[true," + toJSON(reductions) + "]"); + } + break; + + default: + print(toJSON({error: "query_server_error", + reason: "unknown command '" + cmd[0] + "'"})); + quit(); + } + } catch(exception) { + print(toJSON(exception)); + } +} + + +function safe_compile_function(Src) { + try { + var functionObject = sandbox ? evalcx(Src, sandbox) : eval(Src); + } catch (err) { + throw {error: "compilation_error", + reason: err.toString() + " (" + Src + ")"}; + } + if (typeof(functionObject) == "function") { + return functionObject; + } else { + throw {error: "compilation_error", + reason: "expression does not eval to a function. (" + Src + ")"}; } } function toJSON(val) { if (typeof(val) == "undefined") { - throw new TypeError("Cannot encode undefined value as JSON"); + throw {error:"bad_value", reason:"Cannot encode 'undefined' value as JSON"}; } var subs = {'\b': '\\b', '\t': '\\t', '\n': '\\n', '\f': '\\f', '\r': '\\r', '"' : '\\"', '\\': '\\\\'}; diff --git a/share/www/script/couch.js b/share/www/script/couch.js index 5f42ac38..f72fb712 100644 --- a/share/www/script/couch.js +++ b/share/www/script/couch.js @@ -99,7 +99,23 @@ function CouchDB(name) { mapFun = mapFun.toSource ? mapFun.toSource() : "(" + mapFun.toString() + ")"; var req = request("POST", this.uri + "_temp_view" + encodeOptions(options), { headers: {"Content-Type": "text/javascript"}, - body: mapFun + body: JSON.stringify(mapFun) + }); + var result = JSON.parse(req.responseText); + if (req.status != 200) + throw result; + return result; + } + + // Applies the map function to the contents of database and returns the results. + this.reduce_query = function(mapFun, reduceFun, options) { + if (typeof(mapFun) != "string") + mapFun = mapFun.toSource ? mapFun.toSource() : "(" + mapFun.toString() + ")"; + if (typeof(reduceFun) != "string") + reduceFun = reduceFun.toSource ? reduceFun.toSource() : "(" + reduceFun.toString() + ")"; + var req = request("POST", this.uri + "_temp_view" + encodeOptions(options), { + headers: {"Content-Type": "text/javascript"}, + body: JSON.stringify({map:mapFun, reduce:reduceFun}) }); var result = JSON.parse(req.responseText); if (req.status != 200) diff --git a/share/www/script/couch_tests.js b/share/www/script/couch_tests.js index 5f9cb5ae..72ed9f58 100644 --- a/share/www/script/couch_tests.js +++ b/share/www/script/couch_tests.js @@ -91,7 +91,15 @@ var tests = { // 1 more document should now be in the result. T(results.total_rows == 3); T(db.info().doc_count == 6); + + var reduceFunction = function(keys, values){ + return sum(values); + }; + + result = db.reduce_query(mapFunction, reduceFunction); + T(result.result == 33); + // delete a document T(db.deleteDoc(existingDoc).ok); @@ -219,6 +227,39 @@ var tests = { T(results.rows[numDocsToCreate-1-i].key==i); } }, + + reduce: function(debug) { + var db = new CouchDB("test_suite_db"); + db.deleteDb(); + db.createDb(); + if (debug) debugger; + var numDocs = 500 + var docs = makeDocs(1,numDocs + 1); + T(db.bulkSave(docs).ok); + var summate = function(N) {return (N+1)*N/2;}; + + var map = function (doc) {map(doc.integer, doc.integer)}; + var reduce = function (keys, values) { return sum(values); }; + var result = db.reduce_query(map, reduce).result; + T(result == summate(numDocs)); + + result = db.reduce_query(map, reduce, {startkey:4,endkey:4}).result; + + T(result == 4); + + result = db.reduce_query(map, reduce, {startkey:4,endkey:5}).result; + + T(result == 9); + + result = db.reduce_query(map, reduce, {startkey:4,endkey:6}).result; + + T(result == 15); + + for(var i=1; i