diff options
Diffstat (limited to 'apps')
130 files changed, 28775 insertions, 0 deletions
diff --git a/apps/couch/AUTHORS b/apps/couch/AUTHORS new file mode 100644 index 00000000..e0181c1d --- /dev/null +++ b/apps/couch/AUTHORS @@ -0,0 +1,20 @@ +Apache CouchDB AUTHORS +====================== + +A number of people have contributed directly to Apache CouchDB by writing +documentation or developing software. Some of these people are: + + * Damien Katz <damien@apache.org> + * Jan Lehnardt <jan@apache.org> + * Noah Slater <nslater@apache.org> + * Christopher Lenz <cmlenz@apache.org> + * J. Chris Anderson <jchris@apache.org> + * Paul Joseph Davis <davisp@apache.org> + * Adam Kocoloski <kocolosk@apache.org> + * Jason Davies <jasondavies@apache.org> + * Mark Hammond <mhammond@skippinet.com.au> + * Benoît Chesneau <benoitc@apache.org> + * Filipe Manana <fdmanana@apache.org> + * Robert Newson <rnewson@apache.org> + +For a list of other credits see the `THANKS` file. diff --git a/apps/couch/BUGS b/apps/couch/BUGS new file mode 100644 index 00000000..8cd1d161 --- /dev/null +++ b/apps/couch/BUGS @@ -0,0 +1,6 @@ +Apache CouchDB BUGS +=================== + +Please see the [documentation][1] on how to report bugs with Apache CouchDB. + +[1] http://couchdb.apache.org/community/issues.html diff --git a/apps/couch/CHANGES b/apps/couch/CHANGES new file mode 100644 index 00000000..8e89305f --- /dev/null +++ b/apps/couch/CHANGES @@ -0,0 +1,725 @@ +Apache CouchDB CHANGES +====================== + +Version 1.1.1 +------------- + +* Support SpiderMonkey 1.8.5 +* Add configurable maximum to the number of bytes returned by _log. +* Allow CommonJS modules to be an empty string. +* Bump minimum Erlang version to R13B02. +* Do not run deleted validate_doc_update functions. +* ETags for views include current sequence if include_docs=true. +* Fix bug where duplicates can appear in _changes feed. +* Fix bug where update handlers break after conflict resolution. +* Fix bug with _replicator where include "filter" could crash couch. +* Fix crashes when compacting large views. +* Fix file descriptor leak in _log +* Fix missing revisions in _changes?style=all_docs. +* Improve handling of compaction at max_dbs_open limit. +* JSONP responses now send "text/javascript" for Content-Type. +* Link to ICU 4.2 on Windows. +* Permit forward slashes in path to update functions. +* Reap couchjs processes that hit reduce_overflow error. +* Status code can be specified in update handlers. +* Support provides() in show functions. +* _view_cleanup when ddoc has no views now removes all index files. +* max_replication_retry_count now supports "infinity". +* Fix replication crash when source database has a document with empty ID. +* Fix deadlock when assigning couchjs processes to serve requests. +* Fixes to the document multipart PUT API. +* Fixes regarding file descriptor leaks for databases with views. + +Version 1.1.0 +------------- + +All CHANGES for 1.0.2 and 1.0.3 also apply to 1.1.0. + +HTTP Interface: + + * Native SSL support. + * Added support for HTTP range requests for attachments. + * Added built-in filters for `_changes`: `_doc_ids` and `_design`. + * Added configuration option for TCP_NODELAY aka "Nagle". + * Allow POSTing arguments to `_changes`. + * Allow `keys` parameter for GET requests to views. + * Allow wildcards in vhosts definitions. + * More granular ETag support for views. + * More flexible URL rewriter. + * Added support for recognizing "Q values" and media parameters in + HTTP Accept headers. + * Validate doc ids that come from a PUT to a URL. + +Externals: + + * Added OS Process module to manage daemons outside of CouchDB. + * Added HTTP Proxy handler for more scalable externals. + +Replicator: + + * Added `_replicator` database to manage replications. + * Fixed issues when an endpoint is a remote database accessible via SSL. + * Added support for continuous by-doc-IDs replication. + * Fix issue where revision info was omitted when replicating attachments. + * Integrity of attachment replication is now verified by MD5. + +Storage System: + + * Multiple micro-optimizations when reading data. + +View Server: + + * Added CommonJS support to map functions. + * Added `stale=update_after` query option that triggers a view update after + returning a `stale=ok` response. + * Warn about empty result caused by `startkey` and `endkey` limiting. + * Built-in reduce function `_sum` now accepts lists of integers as input. + * Added view query aliases start_key, end_key, start_key_doc_id and + end_key_doc_id. + +Futon: + + * Added a "change password"-feature to Futon. + +URL Rewriter & Vhosts: + + * Fix for variable substituion + +Version 1.0.2 +------------- + +Futon: + + * Make test suite work with Safari and Chrome. + * Fixed animated progress spinner. + * Fix raw view document link due to overzealous URI encoding. + * Spell javascript correctly in loadScript(uri). + +Storage System: + + * Fix leaking file handles after compacting databases and views. + * Fix databases forgetting their validation function after compaction. + * Fix occasional timeout errors after successfully compacting large databases. + * Fix ocassional error when writing to a database that has just been compacted. + * Fix occasional timeout errors on systems with slow or heavily loaded IO. + * Fix for OOME when compactions include documents with many conflicts. + * Fix for missing attachment compression when MIME types included parameters. + * Preserve purge metadata during compaction to avoid spurious view rebuilds. + * Fix spurious conflicts introduced when uploading an attachment after + a doc has been in a conflict. See COUCHDB-902 for details. + * Fix for frequently edited documents in multi-master deployments being + duplicated in _changes and _all_docs. See COUCHDDB-968 for details on how + to repair. + * Significantly higher read and write throughput against database and + view index files. + +Log System: + + * Reduce lengthy stack traces. + * Allow logging of native <xml> types. + +HTTP Interface: + + * Allow reduce=false parameter in map-only views. + * Fix parsing of Accept headers. + * Fix for multipart GET APIs when an attachment was created during a + local-local replication. See COUCHDB-1022 for details. + +Replicator: + + * Updated ibrowse library to 2.1.2 fixing numerous replication issues. + * Make sure that the replicator respects HTTP settings defined in the config. + * Fix error when the ibrowse connection closes unexpectedly. + * Fix authenticated replication (with HTTP basic auth) of design documents + with attachments. + * Various fixes to make replication more resilient for edge-cases. + +View Server: + + * Don't trigger view updates when requesting `_design/doc/_info`. + * Fix for circular references in CommonJS requires. + * Made isArray() function available to functions executed in the query server. + * Documents are now sealed before being passed to map functions. + * Force view compaction failure when duplicated document data exists. When + this error is seen in the logs users should rebuild their views from + scratch to fix the issue. See COUCHDB-999 for details. + +Version 1.0.1 +------------- + +Storage System: + + * Fix data corruption bug COUCHDB-844. Please see + http://couchdb.apache.org/notice/1.0.1.html for details. + +Replicator: + + * Added support for replication via an HTTP/HTTPS proxy. + * Fix pull replication of attachments from 0.11 to 1.0.x. + * Make the _changes feed work with non-integer seqnums. + +HTTP Interface: + + * Expose `committed_update_seq` for monitoring purposes. + * Show fields saved along with _deleted=true. Allows for auditing of deletes. + * More robust Accept-header detection. + +Authentication: + + * Enable basic-auth popup when required to access the server, to prevent + people from getting locked out. + +Futon: + + * User interface element for querying stale (cached) views. + +Build and System Integration: + + * Included additional source files for distribution. + +Version 1.0.0 +------------- + +Security: + + * Added authentication caching, to avoid repeated opening and closing of the + users database for each request requiring authentication. + +Storage System: + + * Small optimization for reordering result lists. + * More efficient header commits. + * Use O_APPEND to save lseeks. + * Faster implementation of pread_iolist(). Further improves performance on + concurrent reads. + +View Server: + + * Faster default view collation. + * Added option to include update_seq in view responses. + +Version 0.11.2 +-------------- + +Replicator: + + * Fix bug when pushing design docs by non-admins, which was hanging the + replicator for no good reason. + * Fix bug when pulling design documents from a source that requires + basic-auth. + +HTTP Interface: + + * Better error messages on invalid URL requests. + +Authentication: + + * User documents can now be deleted by admins or the user. + +Security: + + * Avoid potential DOS attack by guarding all creation of atoms. + +Futon: + + * Add some Futon files that were missing from the Makefile. + +Version 0.11.1 +-------------- + +HTTP Interface: + + * Mask passwords in active tasks and logging. + * Update mochijson2 to allow output of BigNums not in float form. + * Added support for X-HTTP-METHOD-OVERRIDE. + * Better error message for database names. + * Disable jsonp by default. + * Accept gzip encoded standalone attachments. + * Made max_concurrent_connections configurable. + * Made changes API more robust. + * Send newly generated document rev to callers of an update function. + +Futon: + + * Use "expando links" for over-long document values in Futon. + * Added continuous replication option. + * Added option to replicating test results anonymously to a community + CouchDB instance. + * Allow creation and deletion of config entries. + * Fixed display issues with doc ids that have escaped characters. + * Fixed various UI issues. + +Build and System Integration: + + * Output of `couchdb --help` has been improved. + * Fixed compatibility with the Erlang R14 series. + * Fixed warnings on Linux builds. + * Fixed build error when aclocal needs to be called during the build. + * Require ICU 4.3.1. + * Fixed compatibility with Solaris. + +Security: + + * Added authentication redirect URL to log in clients. + * Fixed query parameter encoding issue in oauth.js. + * Made authentication timeout configurable. + * Temporary views are now admin-only resources. + +Storage System: + + * Don't require a revpos for attachment stubs. + * Added checking to ensure when a revpos is sent with an attachment stub, + it's correct. + * Make file deletions async to avoid pauses during compaction and db + deletion. + * Fixed for wrong offset when writing headers and converting them to blocks, + only triggered when header is larger than 4k. + * Preserve _revs_limit and instance_start_time after compaction. + +Configuration System: + + * Fixed timeout with large .ini files. + +JavaScript Clients: + + * Added tests for couch.js and jquery.couch.js + * Added changes handler to jquery.couch.js. + * Added cache busting to jquery.couch.js if the user agent is msie. + * Added support for multi-document-fetch (via _all_docs) to jquery.couch.js. + * Added attachment versioning to jquery.couch.js. + * Added option to control ensure_full_commit to jquery.couch.js. + * Added list functionality to jquery.couch.js. + * Fixed issues where bulkSave() wasn't sending a POST body. + +View Server: + + * Provide a UUID to update functions (and all other functions) that they can + use to create new docs. + * Upgrade CommonJS modules support to 1.1.1. + * Fixed erlang filter funs and normalize filter fun API. + * Fixed hang in view shutdown. + +Log System: + + * Log HEAD requests as HEAD, not GET. + * Keep massive JSON blobs out of the error log. + * Fixed a timeout issue. + +Replication System: + + * Refactored various internal APIs related to attachment streaming. + * Fixed hanging replication. + * Fixed keepalive issue. + +URL Rewriter & Vhosts: + + * Allow more complex keys in rewriter. + * Allow global rewrites so system defaults are available in vhosts. + * Allow isolation of databases with vhosts. + * Fix issue with passing variables to query parameters. + +Test Suite: + + * Made the test suite overall more reliable. + +Version 0.11.0 +-------------- + +Security: + + * Fixed CVE-2010-0009: Apache CouchDB Timing Attack Vulnerability. + * Added default cookie-authentication and users database. + * Added Futon user interface for user signup and login. + * Added per-database reader access control lists. + * Added per-database security object for configuration data in validation + functions. + * Added proxy authentication handler + +HTTP Interface: + + * Provide Content-MD5 header support for attachments. + * Added URL Rewriter handler. + * Added virtual host handling. + +View Server: + + * Added optional 'raw' binary collation for faster view builds where Unicode + collation is not important. + * Improved view index build time by reducing ICU collation callouts. + * Improved view information objects. + * Bug fix for partial updates during view builds. + * Move query server to a design-doc based protocol. + * Use json2.js for JSON serialization for compatiblity with native JSON. + * Major refactoring of couchjs to lay the groundwork for disabling cURL + support. The new HTTP interaction acts like a synchronous XHR. Example usage + of the new system is in the JavaScript CLI test runner. + +Replication: + + * Added option to implicitly create replication target databases. + * Avoid leaking file descriptors on automatic replication restarts. + * Added option to replicate a list of documents by id. + * Allow continuous replication to be cancelled. + +Storage System: + + * Adds batching of multiple updating requests, to improve throughput with many + writers. Removed the now redundant couch_batch_save module. + * Adds configurable compression of attachments. + +Runtime Statistics: + + * Statistics are now calculated for a moving window instead of non-overlapping + timeframes. + * Fixed a problem with statistics timers and system sleep. + * Moved statistic names to a term file in the priv directory. + +Futon: + + * Added a button for view compaction. + * JSON strings are now displayed as-is in the document view, without the escaping of + new-lines and quotes. That dramatically improves readability of multi-line + strings. + * Same goes for editing of JSON string values. When a change to a field value is + submitted, and the value is not valid JSON it is assumed to be a string. This + improves editing of multi-line strings a lot. + * Hitting tab in textareas no longer moves focus to the next form field, but simply + inserts a tab character at the current caret position. + * Fixed some font declarations. + +Build and System Integration: + + * Updated and improved source documentation. + * Fixed distribution preparation for building on Mac OS X. + * Added support for building a Windows installer as part of 'make dist'. + * Bug fix for building couch.app's module list. + * ETap tests are now run during make distcheck. This included a number of + updates to the build system to properly support VPATH builds. + * Gavin McDonald setup a build-bot instance. More info can be found at + http://ci.apache.org/buildbot.html + +Version 0.10.1 +-------------- + +Replicator: + + * Stability enhancements regarding redirects, timeouts, OAuth. + +Query Server: + + * Avoid process leaks + * Allow list and view to span languages + +Stats: + + * Eliminate new process flood on system wake + +Build and System Integration: + + * Test suite now works with the distcheck target. + +Version 0.10.0 +-------------- + +Storage Format: + + * Add move headers with checksums to the end of database files for extra robust + storage and faster storage. + +View Server: + + * Added native Erlang views for high-performance applications. + +HTTP Interface: + + * Added optional cookie-based authentication handler. + * Added optional two-legged OAuth authentication handler. + +Build and System Integration: + + * Changed `couchdb` script configuration options. + * Added default.d and local.d configuration directories to load sequence. + + +Version 0.9.2 +------------- + +Replication: + + * Fix replication with 0.10 servers initiated by an 0.9 server (COUCHDB-559). + +Build and System Integration: + + * Remove branch callbacks to allow building couchjs against newer versions of + Spidermonkey. + +Version 0.9.1 +------------- + +Build and System Integration: + + * PID file directory is now created by the SysV/BSD daemon scripts. + * Fixed the environment variables shown by the configure script. + * Fixed the build instructions shown by the configure script. + * Updated ownership and permission advice in `README` for better security. + +Configuration and stats system: + + * Corrected missing configuration file error message. + * Fixed incorrect recording of request time. + +Database Core: + + * Document validation for underscore prefixed variables. + * Made attachment storage less sparse. + * Fixed problems when a database with delayed commits pending is considered + idle, and subject to losing changes when shutdown. (COUCHDB-334) + +External Handlers: + + * Fix POST requests. + +Futon: + + * Redirect when loading a deleted view URI from the cookie. + +HTTP Interface: + + * Attachment requests respect the "rev" query-string parameter. + +JavaScript View Server: + + * Useful JavaScript Error messages. + +Replication: + + * Added support for Unicode characters transmitted as UTF-16 surrogate pairs. + * URL-encode attachment names when necessary. + * Pull specific revisions of an attachment, instead of just the latest one. + * Work around a rare chunk-merging problem in ibrowse. + * Work with documents containing Unicode characters outside the Basic + Multilingual Plane. + +Version 0.9.0 +------------- + +Futon Utility Client: + + * Added pagination to the database listing page. + * Implemented attachment uploading from the document page. + * Added page that shows the current configuration, and allows modification of + option values. + * Added a JSON "source view" for document display. + * JSON data in view rows is now syntax highlighted. + * Removed the use of an iframe for better integration with browser history and + bookmarking. + * Full database listing in the sidebar has been replaced by a short list of + recent databases. + * The view editor now allows selection of the view language if there is more + than one configured. + * Added links to go to the raw view or document URI. + * Added status page to display currently running tasks in CouchDB. + * JavaScript test suite split into multiple files. + * Pagination for reduce views. + +Design Document Resource Paths: + + * Added httpd_design_handlers config section. + * Moved _view to httpd_design_handlers. + * Added ability to render documents as non-JSON content-types with _show and + _list functions, which are also httpd_design_handlers. + +HTTP Interface: + + * Added client side UUIDs for idempotent document creation + * HTTP COPY for documents + * Streaming of chunked attachment PUTs to disk + * Remove negative count feature + * Add include_docs option for view queries + * Add multi-key view post for views + * Query parameter validation + * Use stale=ok to request potentially cached view index + * External query handler module for full-text or other indexers. + * Etags for attachments, views, shows and lists + * Show and list functions for rendering documents and views as developer + controlled content-types. + * Attachment names may use slashes to allow uploading of nested directories + (useful for static web hosting). + * Option for a view to run over design documents. + * Added newline to JSON responses. Closes bike-shed. + +Replication: + + * Using ibrowse. + * Checkpoint replications so failures are less expensive. + * Automatically retry of failed replications. + * Stream attachments in pull-replication. + +Database Core: + + * Faster B-tree implementation. + * Changed internal JSON term format. + * Improvements to Erlang VM interactions under heavy load. + * User context and administrator role. + * Update validations with design document validation functions. + * Document purge functionality. + * Ref-counting for database file handles. + +Build and System Integration: + + * The `couchdb` script now supports system chainable configuration files. + * The Mac OS X daemon script now redirects STDOUT and STDERR like SysV/BSD. + * The build and system integration have been improved for portability. + * Added COUCHDB_OPTIONS to etc/default/couchdb file. + * Remove COUCHDB_INI_FILE and COUCHDB_PID_FILE from etc/default/couchdb file. + * Updated `configure.ac` to manually link `libm` for portability. + * Updated `configure.ac` to extended default library paths. + * Removed inets configuration files. + * Added command line test runner. + * Created dev target for make. + +Configuration and stats system: + + * Separate default and local configuration files. + * HTTP interface for configuration changes. + * Statistics framework with HTTP query API. + +Version 0.8.1-incubating +------------------------ + +Database Core: + + * Fix for replication problems where the write queues can get backed up if the + writes aren't happening fast enough to keep up with the reads. For a large + replication, this can exhaust memory and crash, or slow down the machine + dramatically. The fix keeps only one document in the write queue at a time. + * Fix for databases sometimes incorrectly reporting that they contain 0 + documents after compaction. + * CouchDB now uses ibrowse instead of inets for its internal HTTP client + implementation. This means better replication stability. + +HTTP Interface: + + * Fix for chunked responses where chunks were always being split into multiple + TCP packets, which caused problems with the test suite under Safari, and in + some other cases. + * Fix for an invalid JSON response body being returned for some kinds of + views. (COUCHDB-84) + * Fix for connections not getting closed after rejecting a chunked request. + (COUCHDB-55) + * CouchDB can now be bound to IPv6 addresses. + * The HTTP `Server` header now contains the versions of CouchDB and Erlang. + +JavaScript View Server: + + * Sealing of documents has been disabled due to an incompatibility with + SpiderMonkey 1.9. + * Improve error handling for undefined values emitted by map functions. + (COUCHDB-83) + +Build and System Integration: + + * The `couchdb` script no longer uses `awk` for configuration checks as this + was causing portability problems. + * Updated `sudo` example in `README` to use the `-i` option, this fixes + problems when invoking from a directory the `couchdb` user cannot access. + +Futon: + + * The view selector dropdown should now work in Opera and Internet Explorer + even when it includes optgroups for design documents. (COUCHDB-81) + +Version 0.8.0-incubating +------------------------ + +Database Core: + + * The view engine has been completely decoupled from the storage engine. Index + data is now stored in separate files, and the format of the main database + file has changed. + * Databases can now be compacted to reclaim space used for deleted documents + and old document revisions. + * Support for incremental map/reduce views has been added. + * To support map/reduce, the structure of design documents has changed. View + values are now JSON objects containing at least a `map` member, and + optionally a `reduce` member. + * View servers are now identified by name (for example `javascript`) instead of + by media type. + * Automatically generated document IDs are now based on proper UUID generation + using the crypto module. + * The field `content-type` in the JSON representation of attachments has been + renamed to `content_type` (underscore). + +HTTP Interface: + + * CouchDB now uses MochiWeb instead of inets for the HTTP server + implementation. Among other things, this means that the extra configuration + files needed for inets (such as `couch_httpd.conf`) are no longer used. + * The HTTP interface now completely supports the `HEAD` method. (COUCHDB-3) + * Improved compliance of `Etag` handling with the HTTP specification. + (COUCHDB-13) + * Etags are no longer included in responses to document `GET` requests that + include query string parameters causing the JSON response to change without + the revision or the URI having changed. + * The bulk document update API has changed slightly on both the request and the + response side. In addition, bulk updates are now atomic. + * CouchDB now uses `TCP_NODELAY` to fix performance problems with persistent + connections on some platforms due to nagling. + * Including a `?descending=false` query string parameter in requests to views + no longer raises an error. + * Requests to unknown top-level reserved URLs (anything with a leading + underscore) now return a `unknown_private_path` error instead of the + confusing `illegal_database_name`. + * The Temporary view handling now expects a JSON request body, where the JSON + is an object with at least a `map` member, and optional `reduce` and + `language` members. + * Temporary views no longer determine the view server based on the Content-Type + header of the `POST` request, but rather by looking for a `language` member + in the JSON body of the request. + * The status code of responses to `DELETE` requests is now 200 to reflect that + that the deletion is performed synchronously. + +JavaScript View Server: + + * SpiderMonkey is no longer included with CouchDB, but rather treated as a + normal external dependency. A simple C program (`_couchjs`) is provided that + links against an existing SpiderMonkey installation and uses the interpreter + embedding API. + * View functions using the default JavaScript view server can now do logging + using the global `log(message)` function. Log messages are directed into the + CouchDB log at `INFO` level. (COUCHDB-59) + * The global `map(key, value)` function made available to view code has been + renamed to `emit(key, value)`. + * Fixed handling of exceptions raised by view functions. + +Build and System Integration: + + * CouchDB can automatically respawn following a server crash. + * Database server no longer refuses to start with a stale PID file. + * System logrotate configuration provided. + * Improved handling of ICU shared libraries. + * The `couchdb` script now automatically enables SMP support in Erlang. + * The `couchdb` and `couchjs` scripts have been improved for portability. + * The build and system integration have been improved for portability. + +Futon: + + * When adding a field to a document, Futon now just adds a field with an + autogenerated name instead of prompting for the name with a dialog. The name + is automatically put into edit mode so that it can be changed immediately. + * Fields are now sorted alphabetically by name when a document is displayed. + * Futon can be used to create and update permanent views. + * The maximum number of rows to display per page on the database page can now + be adjusted. + * Futon now uses the XMLHTTPRequest API asynchronously to communicate with the + CouchDB HTTP server, so that most operations no longer block the browser. + * View results sorting can now be switched between ascending and descending by + clicking on the `Key` column header. + * Fixed a bug where documents that contained a `@` character could not be + viewed. (COUCHDB-12) + * The database page now provides a `Compact` button to trigger database + compaction. (COUCHDB-38) + * Fixed portential double encoding of document IDs and other URI segments in + many instances. (COUCHDB-39) + * Improved display of attachments. + * The JavaScript Shell has been removed due to unresolved licensing issues. diff --git a/apps/couch/DEVELOPERS b/apps/couch/DEVELOPERS new file mode 100644 index 00000000..a7a6926e --- /dev/null +++ b/apps/couch/DEVELOPERS @@ -0,0 +1,95 @@ +Apache CouchDB DEVELOPERS +========================= + +Only follow these instructions if you are building from a source checkout. + +If you're unsure what this means, ignore this document. + +Dependencies +------------ + +You will need the following installed: + + * GNU Automake (>=1.6.3) (http://www.gnu.org/software/automake/) + * GNU Autoconf (>=2.59) (http://www.gnu.org/software/autoconf/) + * GNU Libtool (http://www.gnu.org/software/libtool/) + * GNU help2man (http://www.gnu.org/software/help2man/) + +The `help2man` tool is optional, but will generate `man` pages for you. + +Debian-based (inc. Ubuntu) Systems +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can install the dependencies by running: + + apt-get install automake autoconf libtool help2man + +Be sure to update the version numbers to match your system's available packages. + +Mac OS X +~~~~~~~~ + +You can install the dependencies by running: + + port install automake autoconf libtool help2man + +You will need MacPorts installed to use the `port` command. + +Bootstrapping +------------- + +Bootstrap the pristine source by running: + + ./bootstrap + +You must repeat this step every time you update your source checkout. + +Testing +------- + +Check the test suite by running: + + make check + +Generate a coverage report by running: + + make cover + +Please report any problems to the developer's mailing list. + +Releasing +--------- + +Unix-like Systems +~~~~~~~~~~~~~~~~~ + +Configure the source by running: + + ./configure + +Prepare the release artefacts by running: + + make distcheck + +You can prepare signed release artefacts by running: + + make distsign + +The release artefacts can be found in the root source directory. + +Microsoft Windows +~~~~~~~~~~~~~~~~~ + +Configure the source by running: + + ./configure + +Prepare the release artefacts by running: + + make dist + +The release artefacts can be found in the `etc/windows` directory. + +Until the build system has been improved, you must make sure that you run this +command from a clean source checkout. If you do not, your test database and log +files will be bundled up in the release artefact. diff --git a/apps/couch/INSTALL.Unix b/apps/couch/INSTALL.Unix new file mode 100644 index 00000000..720134d4 --- /dev/null +++ b/apps/couch/INSTALL.Unix @@ -0,0 +1,231 @@ +Apache CouchDB README.Unix +========================== + +A high-level guide to Unix-like systems, inc. Mac OS X and Ubuntu. + +Dependencies +------------ + +You will need the following installed: + + * Erlang OTP (>=R13B2) (http://erlang.org/) + * ICU (http://icu.sourceforge.net/) + * OpenSSL (http://www.openssl.org/) + * Mozilla SpiderMonkey (1.8) (http://www.mozilla.org/js/spidermonkey/) + * libcurl (http://curl.haxx.se/libcurl/) + * GNU Make (http://www.gnu.org/software/make/) + * GNU Compiler Collection (http://gcc.gnu.org/) + +It is recommended that you install Erlang OTP R12B-5 or above where possible. + +Ubuntu +~~~~~~ + +See + + http://wiki.apache.org/couchdb/Installing_on_Ubuntu + +for updated instructions on how to install on Ubuntu. + +Debian-based Systems +~~~~~~~~~~~~~~~~~~~~ + +You can install the build tools by running: + + sudo apt-get install build-essential + +You can install the other dependencies by running: + + sudo apt-get install erlang libicu-dev libmozjs-dev libcurl4-openssl-dev + +Be sure to update the version numbers to match your system's available packages. + +Mac OS X +~~~~~~~~ + +You can install the build tools by running: + + open /Applications/Installers/Xcode\ Tools/XcodeTools.mpkg + +You can install the other dependencies by running: + + sudo port install icu erlang spidermonkey curl + +You will need MacPorts installed to use the `port` command. + +Installing +---------- + +Once you have satisfied the dependencies you should run: + + ./configure + +This script will configure CouchDB to be installed into `/usr/local` by default. + +If you wish to customise the installation, pass `--help` to this script. + +If everything was successful you should see the following message: + + You have configured Apache CouchDB, time to relax. + +Relax. + +To install CouchDB you should run: + + make && sudo make install + +You only need to use `sudo` if you're installing into a system directory. + +Try `gmake` if `make` is giving you any problems. + +If everything was successful you should see the following message: + + You have installed Apache CouchDB, time to relax. + +Relax. + +First Run +--------- + +You can start the CouchDB server by running: + + sudo -i -u couchdb couchdb + +This uses the `sudo` command to run the `couchdb` command as the `couchdb` user. + +When CouchDB starts it should eventually display the following message: + + Apache CouchDB has started, time to relax. + +Relax. + +To check that everything has worked, point your web browser to: + + http://127.0.0.1:5984/_utils/index.html + +From here you should run the test suite. + +Security Considerations +----------------------- + +You should create a special `couchdb` user for CouchDB. + +On many Unix-like systems you can run: + + adduser --system \ + --home /usr/local/var/lib/couchdb \ + --no-create-home \ + --shell /bin/bash \ + --group --gecos \ + "CouchDB Administrator" couchdb + +On Mac OS X you can use the Workgroup Manager to create users: + + http://www.apple.com/support/downloads/serveradmintools1047.html + +You must make sure that: + + * The user has a working POSIX shell + + * The user's home directory is `/usr/local/var/lib/couchdb` + +You can test this by: + + * Trying to log in as the `couchdb` user + + * Running `pwd` and checking the present working directory + +Change the ownership of the CouchDB directories by running: + + chown -R couchdb:couchdb /usr/local/etc/couchdb + chown -R couchdb:couchdb /usr/local/var/lib/couchdb + chown -R couchdb:couchdb /usr/local/var/log/couchdb + chown -R couchdb:couchdb /usr/local/var/run/couchdb + +Change the permission of the CouchDB directories by running: + + chmod 0770 /usr/local/etc/couchdb + chmod 0770 /usr/local/var/lib/couchdb + chmod 0770 /usr/local/var/log/couchdb + chmod 0770 /usr/local/var/run/couchdb + +Running as a Daemon +------------------- + +SysV/BSD-style Systems +~~~~~~~~~~~~~~~~~~~~~~ + +You can use the `couchdb` init script to control the CouchDB daemon. + +On SysV-style systems, the init script will be installed into: + + /usr/local/etc/init.d + +On BSD-style systems, the init script will be installed into: + + /usr/local/etc/rc.d + +We use the `[init.d|rc.d]` notation to refer to both of these directories. + +You can control the CouchDB daemon by running: + + /usr/local/etc/[init.d|rc.d]/couchdb [start|stop|restart|status] + +If you wish to configure how the init script works, you can edit: + + /usr/local/etc/default/couchdb + +Comment out the `COUCHDB_USER` setting if you're running as a non-superuser. + +To start the daemon on boot, copy the init script to: + + /etc/[init.d|rc.d] + +You should then configure your system to run the init script automatically. + +You may be able to run: + + sudo update-rc.d couchdb defaults + +If this fails, consult your system documentation for more information. + +A `logrotate` configuration is installed into: + + /usr/local/etc/logrotate.d/couchdb + +Consult your `logrotate` documentation for more information. + +It is critical that the CouchDB logs are rotated so as not to fill your disk. + +Mac OS X +~~~~~~~~ + +You can use the `launchctl` command to control the CouchDB daemon. + +You can load the configuration by running: + + sudo launchctl load \ + /usr/local/Library/LaunchDaemons/org.apache.couchdb.plist + +You can stop the CouchDB daemon by running: + + sudo launchctl unload \ + /usr/local/Library/LaunchDaemons/org.apache.couchdb.plist + +You can start CouchDB by running: + + sudo launchctl start org.apache.couchdb + +You can restart CouchDB by running: + + sudo launchctl stop org.apache.couchdb + +You can edit the launchd configuration by running: + + open /usr/local/Library/LaunchDaemons/org.apache.couchdb.plist + +To start the daemon on boot, copy the configuration file to: + + /Library/LaunchDaemons + +Consult your system documentation for more information. diff --git a/apps/couch/INSTALL.Windows b/apps/couch/INSTALL.Windows new file mode 100644 index 00000000..d661f1da --- /dev/null +++ b/apps/couch/INSTALL.Windows @@ -0,0 +1,153 @@ +Apache CouchDB README.Windows +============================== + +For a high-level guide to Microsoft Windows. + +Dependencies +------------ + +You will need the following installed: + + * Erlang OTP (>=14B03) (http://erlang.org/) + * ICU (=4.4.*) (http://icu.sourceforge.net/) + * OpenSSL (http://www.openssl.org/) + * Mozilla SpiderMonkey (1.8) (http://www.mozilla.org/js/spidermonkey/) + * libcurl (http://curl.haxx.se/libcurl/) + * Cygwin (http://www.cygwin.com/) + * Visual Studio 2008 (http://msdn.microsoft.com/en-gb/vstudio/default.aspx) + +General Notes +------------- + + * When installing Erlang, you must build it from source. + +The CouchDB build requires a number of the Erlang build scripts. + + * When installing ICU, select the binaries built with Visual Studio 2008. + + * When installing Cygwin, be sure to select all the `development` tools. + + * When installing libcurl, be sure to install by hand. + + The Cygwin binaries are incompatible and will not work with Erlang. + +Setting Up Cygwin +----------------- + +Before starting any Cygwin terminals, run: + + set CYGWIN=nontsec + +To set up your environment, run: + + [VS_BIN]/vcvars32.bat + +Replace [VS_BIN] with the path to your Visual Studio `bin` directory. + +You must check that: + + * The `which link` command points to the Microsoft linker. + + * The `which cl` command points to the Microsoft compiler. + + * The `which mc` command points to the Microsoft message compiler. + + * The `which mt` command points to the Microsoft manifest tool. + +If you do not do this, the build may fail due to Cygwin ones found in `/usr/bin` +being used instead. + +Building Erlang +--------------- + +You must include Win32 OpenSSL. + +However, you can skip the GUI tools by running: + + echo "skipping gs" > lib/gs/SKIP + + echo "skipping ic" > lib/ic/SKIP + +Follow the rest of the Erlang instructions as described. + +After running: + + ./otp_build release -a + +You should run: + + ./release/win32/Install.exe + +This will set up the release/win32/bin directory correctly. + +To set up your environment for building CouchDB, run: + + eval `./otp_build env_win32` + +To set up the `ERL_TOP` environment variable, run: + + export ERL_TOP=[ERL_TOP] + +Replace `[ERL_TOP]` with the Erlang source directory name. + +Remember to use `/cygdrive/c/` instead of `c:/` as the directory prefix. + +To set up your path, run: + + export PATH=$ERL_TOP/release/win32/erts-5.8.2/bin:$PATH + +If everything was successful, you should be ready to build CouchDB. + +Relax. + +Building CouchDB +---------------- + +Once you have satisfied the dependencies you should run: + + ./configure \ + --with-js-include=/cygdrive/c/path_to_spidermonkey_include \ + --with-js-lib=/cygdrive/c/path_to_spidermonkey_lib \ + --with-win32-icu-binaries=/cygdrive/c/path_to_icu_binaries_root \ + --with-erlang=$ERL_TOP/release/win32/usr/include \ + --with-win32-curl=/cygdrive/c/path/to/curl/root/directory \ + --with-openssl-bin-dir=/cygdrive/c/openssl/bin \ + --with-msvc-redist-dir=/cygdrive/c/dir/with/vcredist_platform_executable \ + --prefix=$ERL_TOP/release/win32 + +This command could take a while to complete. + +If everything was successful you should see the following message: + + You have configured Apache CouchDB, time to relax. + +Relax. + +To install CouchDB you should run: + + make install + +If everything was successful you should see the following message: + + You have installed Apache CouchDB, time to relax. + +Relax. + +First Run +--------- + +You can start the CouchDB server by running: + + $ERL_TOP/release/win32/bin/couchdb.bat + +When CouchDB starts it should eventually display the following message: + + Apache CouchDB has started, time to relax. + +Relax. + +To check that everything has worked, point your web browser to: + + http://127.0.0.1:5984/_utils/index.html + +From here you should run the test suite in either Firefox 3.6+ or Safari 4+. diff --git a/apps/couch/LICENSE b/apps/couch/LICENSE new file mode 100644 index 00000000..20a425b1 --- /dev/null +++ b/apps/couch/LICENSE @@ -0,0 +1,371 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +Apache CouchDB Subcomponents + +The Apache CouchDB project includes a number of subcomponents with separate +copyright notices and license terms. Your use of the code for the these +subcomponents is subject to the terms and conditions of the following licenses. + +For the m4/ac_check_icu.m4 component: + + Copyright (c) 2005 Akos Maroy <darkeye@tyrell.hu> + + Copying and distribution of this file, with or without modification, are + permitted in any medium without royalty provided the copyright notice + and this notice are preserved. + +For the share/www/script/jquery.js component: + + Copyright (c) 2009 John Resig, http://jquery.com/ + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +For the share/www/script/jquery.form.js component: + + http://malsup.com/jquery/form/ + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +For the share/www/script/json2.js component: + + Public Domain + + No warranty expressed or implied. Use at your own risk. + +For the src/mochiweb component: + + Copyright (c) 2007 Mochi Media, Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +For the src/ibrowse component: + + Copyright (c) 2006, Chandrashekhar Mullaparthi + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the T-Mobile nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +For the src/erlang-oauth component: + + Copyright (c) 2008-2009 Tim Fletcher <http://tfletcher.com/> + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, + copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following + conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + +For the src/etap component: + + Copyright (c) 2008-2009 Nick Gerakines <nick@gerakines.net> + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, + copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following + conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + diff --git a/apps/couch/NEWS b/apps/couch/NEWS new file mode 100644 index 00000000..e62289ed --- /dev/null +++ b/apps/couch/NEWS @@ -0,0 +1,353 @@ +Apache CouchDB NEWS +=================== + +For details about backwards incompatible changes, see: + + http://wiki.apache.org/couchdb/Breaking_changes + +Each release section notes when backwards incompatible changes have been made. + +Version 1.1.1 +------------- + +* Support SpiderMonkey 1.8.5 +* Add configurable maximum to the number of bytes returned by _log. +* Allow CommonJS modules to be an empty string. +* Bump minimum Erlang version to R13B02. +* Do not run deleted validate_doc_update functions. +* ETags for views include current sequence if include_docs=true. +* Fix bug where duplicates can appear in _changes feed. +* Fix bug where update handlers break after conflict resolution. +* Fix bug with _replicator where include "filter" could crash couch. +* Fix crashes when compacting large views. +* Fix file descriptor leak in _log +* Fix missing revisions in _changes?style=all_docs. +* Improve handling of compaction at max_dbs_open limit. +* JSONP responses now send "text/javascript" for Content-Type. +* Link to ICU 4.2 on Windows. +* Permit forward slashes in path to update functions. +* Reap couchjs processes that hit reduce_overflow error. +* Status code can be specified in update handlers. +* Support provides() in show functions. +* _view_cleanup when ddoc has no views now removes all index files. +* max_replication_retry_count now supports "infinity". +* Fix replication crash when source database has a document with empty ID. +* Fix deadlock when assigning couchjs processes to serve requests. +* Fixes to the document multipart PUT API. +* Fixes regarding file descriptor leaks for databases with views. + +Version 1.1.0 +------------- + +All NEWS for 1.0.2 also apply to 1.1.0. + +This release contains backwards incompatible changes. + + * Native SSL support. + * Added support for HTTP range requests for attachments. + * Added built-in filters for `_changes`: `_doc_ids` and `_design`. + * Added configuration option for TCP_NODELAY aka "Nagle". + * Allow wildcards in vhosts definitions. + * More granular ETag support for views. + * More flexible URL rewriter. + * Added OS Process module to manage daemons outside of CouchDB. + * Added HTTP Proxy handler for more scalable externals. + * Added `_replicator` database to manage replications. + * Multiple micro-optimizations when reading data. + * Added CommonJS support to map functions. + * Added `stale=update_after` query option that triggers a view update after + returning a `stale=ok` response. + * More explicit error messages when it's not possible to access a file due + to lack of permissions. + * Added a "change password"-feature to Futon. + +Version 1.0.2 +------------- + + * Make test suite work with Safari and Chrome. + * Fix leaking file handles after compacting databases and views. + * Fix databases forgetting their validation function after compaction. + * Fix occasional timeout errors. + * Reduce lengthy stack traces. + * Allow logging of native <xml> types. + * Updated ibrowse library to 2.1.2 fixing numerous replication issues. + * Fix authenticated replication of design documents with attachments. + * Fix multipart GET APIs by always sending attachments in compressed + form when the source attachment is compressed on disk. Fixes a possible + edge case when an attachment underwent local-local replication. + * Various fixes to make replicated more resilient for edge-cases. + * Don't trigger a view update when requesting `_design/doc/_info`. + * Fix for circular references in CommonJS requires. + * Fix for frequently edited documents in multi-master deployments being + duplicated in _changes and _all_docs. + * Fix spurious conflict generation during attachment uploads. + * Fix for various bugs in Futon. + +Version 1.0.1 +------------- + + * Fix data corruption bug COUCHDB-844. Please see + http://couchdb.apache.org/notice/1.0.1.html for details. + * Added support for replication via an HTTP/HTTPS proxy. + * Fixed various replicator bugs for interop with older CouchDB versions. + * Show fields saved along with _deleted=true. Allows for auditing of deletes. + * Enable basic-auth popup when required to access the server, to prevent + people from getting locked out. + * User interface element for querying stale (cached) views. + +Version 1.0.0 +------------- + + * More efficient header commits. + * Use O_APPEND to save lseeks. + * Faster implementation of pread_iolist(). Further improves performance on + concurrent reads. + * Added authentication caching + * Faster default view collation. + * Added option to include update_seq in view responses. + +Version 0.11.2 +-------------- + + * Replicator buxfixes for replicating design documents from secured databases. + * Better error messages on invalid URL requests. + * User documents can now be deleted by admins or the user. + * Avoid potential DOS attack by guarding all creation of atoms. + * Some Futon and JavaScript library bugfixes. + * Fixed CVE-2010-2234: Apache CouchDB Cross Site Request Forgery Attack + +Version 0.11.1 +-------------- + + * Mask passwords in active tasks and logging. + * Update mochijson2 to allow output of BigNums not in float form. + * Added support for X-HTTP-METHOD-OVERRIDE. + * Disable jsonp by default. + * Accept gzip encoded standalone attachments. + * Made max_concurrent_connections configurable. + * Added continuous replication option to Futon. + * Added option to replicating test results anonymously to a community + CouchDB instance. + * Allow creation and deletion of config entries in Futon. + * Fixed various UI issues in Futon. + * Fixed compatibility with the Erlang R14 series. + * Fixed warnings on Linux builds. + * Fixed build error when aclocal needs to be called during the build. + * Require ICU 4.3.1. + * Fixed compatibility with Solaris. + * Added authentication redirect URL to log in clients. + * Added authentication caching, to avoid repeated opening and closing of the + users database for each request requiring authentication. + * Made authentication timeout configurable. + * Temporary views are now admin-only resources. + * Don't require a revpos for attachment stubs. + * Make file deletions async to avoid pauses during compaction and db + deletion. + * Fixed for wrong offset when writing headers and converting them to blocks, + only triggered when header is larger than 4k. + * Preserve _revs_limit and instance_start_time after compaction. + * Fixed timeout with large .ini files. + * Added tests for couch.js and jquery.couch.js + * Added various API features to jquery.couch.js + * Faster default view collation. + * Upgrade CommonJS modules support to 1.1.1. + * Added option to include update_seq in view responses. + * Fixed erlang filter funs and normalize filter fun API. + * Fixed hang in view shutdown. + * Refactored various internal APIs related to attachment streaming. + * Fixed hanging replication. + * Fixed keepalive issue. + * Allow global rewrites so system defaults are available in vhosts. + * Allow isolation of databases with vhosts. + * Made the test suite overall more reliable. + +Version 0.11.0 +-------------- + +This version is a feature-freeze release candidate for Apache CouchDB 1.0. + + * Fixed CVE-2010-0009: Apache CouchDB Timing Attack Vulnerability. + * Added support for building a Windows installer as part of 'make dist'. + * Added optional 'raw' binary collation for faster view builds where Unicode + collation is not important. + * Improved view index build time by reducing ICU collation callouts. + * Added option to implicitly create replication target databases. + * Improved view information objects. + * Bug fix for partial updates during view builds. + * Bug fix for building couch.app's module list. + * Fixed a problem with statistics timers and system sleep. + * Improved the statistics calculations to use an online moving window + algorithm. + * Adds batching of multiple updating requests, to improve throughput with many + writers. + * Removed the now redundant couch_batch_save module. + * Bug fix for premature termination of chunked responses. + * Improved speed and concurrency of config lookups. + * Fixed an edge case for HTTP redirects during replication. + * Fixed HTTP timeout handling for replication. + * Fixed query parameter handling in OAuth'd replication. + * Fixed a bug preventing mixing languages with lists and views. + * Avoid OS process leaks in lists. + * Avoid leaking file descriptors on automatic replication restarts. + * Various improvements to the Futon UI. + * Provide Content-MD5 header support for attachments. + * Added default cookie-authentication and users db. + * Added per-db reader access control lists. + * Added per-db security object for configuration data in validation functions. + * Added URL Rewriter handler. + * Added proxy authentication handler. + * Added ability to replicate documents by id. + * Added virtual host handling. + * Uses json2.js for JSON serialization compatiblity with native JSON. + * Fixed CVE-2010-0009: Apache CouchDB Timing Attack Vulnerability. + +Version 0.10.2 +-------------- + + * Fixed CVE-2010-0009: Apache CouchDB Timing Attack Vulnerability. + +Version 0.10.1 +-------------- + + * Fixed test suite to work with build system. + * Fixed a problem with statistics timers and system sleep. + * Fixed an edge case for HTTP redirects during replication. + * Fixed HTTP timeout handling for replication. + * Fixed query parameter handling in OAuth'd replication. + * Fixed a bug preventing mixing languages with lists and views. + * Avoid OS process leaks in lists. + +Version 0.10.0 +-------------- + +This release contains backwards incompatible changes. + + * General performance improvements. + * View index generation speedups. + * Even more robust storage format. + * Native Erlang Views for high-performance applications. + * More robust push and pull replication. + * Two-legged OAuth support for applications and replication (three-legged in + preparation). + * Cookie authentication. + * API detail improvements. + * Better RFC 2616 (HTTP 1.1) compliance. + * Added modular configuration file directories. + * Miscellaneous improvements to build, system integration, and portability. + +Version 0.9.2 +------------- + + * Remove branch callbacks to allow building couchjs against newer versions of + Spidermonkey. + * Fix replication with 0.10 servers initiated by an 0.9 server. + +Version 0.9.1 +------------- + + * Various bug fixes for the build system, configuration, statistics reporting, + database core, external handlers, Futon interface, HTTP interface, + JavaScript View Server and replicator. + +Version 0.9.0 +------------- + +This release contains backwards incompatible changes. + + * Modular configuration. + * Performance enhancements for document and view access. + * More resilient replication process. + * Replication streams binary attachments. + * Administrator role and basic authentication. + * Document validation functions in design documents. + * Show and list functions for rendering documents and views as developer + controlled content-types. + * External process server module. + * Attachment uploading from Futon. + * Etags for views, lists, shows, document and attachment requests. + * Miscellaneous improvements to build, system integration, and portability. + +Version 0.8.1-incubating +------------------------ + + * Various bug fixes for replication, compaction, the HTTP interface and the + JavaScript View Server. + +Version 0.8.0-incubating +------------------------ + +This release contains backwards incompatible changes. + + * Changed core licensing to the Apache Software License 2.0. + * Refactoring of the core view and storage engines. + * Added support for incremental map/reduce views. + * Changed database file format. + * Many improvements to Futon, the web administration interface. + * Miscellaneous improvements to build, system integration, and portability. + * Swapped out Erlang's inets HTTP server for the Mochiweb HTTP server. + * SpiderMonkey is no longer included with CouchDB, but rather treated as an + external dependency. + * Added bits of awesome. + +Version 0.7.2 +------------- + + * Small changes to build process and `couchdb` command. + * Database server official port is now 5984 TCP/UDP instead of 8888. + +Version 0.7.1 +------------- + + * Small compatibility issue with Firefox 3 fixed. + +Version 0.7.0 +------------- + + * Infrastructure rewritten to use the GNU build system for portability. + * The built-in database browsing tool has been rewritten to provide a much + nicer interface for interacting directly with CouchDB from your web browser. + * XML and Fabric have been replaced with JSON and JavaScript for data + transport and View definitions. + +Version 0.6.0 +------------- + + * A replication facility is now available. + * CouchPeek can now create, delete and view documents. + * Building from source is easier and less error prone. + +Version 0.5.0 +------------- + + * A built-in CouchPeek utility. + * A full install kit buildable from a single command. + * A new GNU/Linux version is available. An OS X version is coming soon. + +Version 0.4.0 +------------- + + * Non-existent variables are now nil lists. + * Couch error codes and messages are no longer sent in the HTTP fields, + instead they are exclusively returned in the XML body. This is to avoid HTTP + header parsing problems with oddly formed error messages. + * Returned error messages are now logged at the server at the `info` level to + make general debugging easier. + * Fixed a problem where big table builds caused timeout errors. + * Lots of changes in the low level machinery. Most formulas will continue to + function the same. + * Added full compiler support for extended characters in formula source. + * Support for Perl/Ruby like regular expressions. + * Added `total_rows` and `result_start` attributes to tables. + +Version 0.3.0 +------------- + + * CouchDB now fully supports Unicode and locale specific collation via the ICU + library, both in the Fabric engine and computed tables. + * The `in` operator has been added to Fabric. + * The `startdoc` query string variable specifies the starting document to use + if there are multiple rows with identical startkeys. + * The `skip` query string variable specifies the number of rows to skip before + returning results. The `skip` value must be a positive integer. If used with + a `count` variable the skipped rows aren't counted as output. + * Various changes to the output XML format. diff --git a/apps/couch/NOTICE b/apps/couch/NOTICE new file mode 100644 index 00000000..4daa496f --- /dev/null +++ b/apps/couch/NOTICE @@ -0,0 +1,55 @@ +Apache CouchDB +Copyright 2009 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product also includes the following third-party components: + + * ac_check_icu.m4 (http://autoconf-archive.cryp.to/ac_check_icu.html) + + Copyright 2008, Akos Maroy <darkeye@tyrell.hu> + + * ac_check_curl.m4 (http://autoconf-archive.cryp.to/ac_check_curl.html) + + Copyright 2008, Akos Maroy <darkeye@tyrell.hu> + + * jQuery (http://jquery.com/) + + Copyright 2010, John Resig + + * jQuery UI (http://jqueryui.com) + + Copyright 2011, Paul Bakaus + + * json2.js (http://www.json.org/) + + In the public domain + + * MochiWeb (http://code.google.com/p/mochiweb/) + + Copyright 2007, Mochi Media Coporation + + * ibrowse (http://github.com/cmullaparthi/ibrowse/tree/master) + + Copyright 2009, Chandrashekhar Mullaparthi + + * Erlang OAuth (http://github.com/tim/erlang-oauth/tree/master) + + Copyright 2009, Tim Fletcher <http://tfletcher.com/> + + * ETap (http://github.com/ngerakines/etap/) + + Copyright 2009, Nick Gerakines <nick@gerakines.net> + + * mimeparse.js (http://code.google.com/p/mimeparse/) + + Copyright 2009, Chris Anderson <jchris@apache.org> + + * base64.js + + Copyright 1999, Masanao Izumo <iz@onicos.co.jp> + + * jspec.js (http://visionmedia.github.com/jspec/) + + Copyright 2010 TJ Holowaychuk <tj@vision-media.ca> diff --git a/apps/couch/README b/apps/couch/README new file mode 100644 index 00000000..540226d3 --- /dev/null +++ b/apps/couch/README @@ -0,0 +1,81 @@ +Apache CouchDB README +===================== + +Installation +------------ + +For a low-level guide, see: + + INSTALL + +For a high-level guide to Unix-like systems, inc. Mac OS X and Ubuntu, see: + + INSTALL.Unix + +For a high-level guide to Microsoft Windows, see: + + INSTALL.Windows + +Follow the proper instructions to get CouchDB installed on your system. + +If you're having problems, skip to the next section. + +Troubleshooting +---------------- + +For troubleshooting, see: + + http://wiki.apache.org/couchdb/Troubleshooting + +If you're getting a cryptic error message, see: + + http://wiki.apache.org/couchdb/Error_messages + +For general help, see: + + http://couchdb.apache.org/community/lists.html + +The mailing lists provide a wealth of support and knowledge for you to tap into. +Feel free to drop by with your questions or discussion. See the official CouchDB +website for more information about our community resources. + + +Running the Testsuite +--------------------- + +Run the testsuite for couch.js and jquery.couch.js by browsing to this site: http://127.0.0.1:5984/_utils/spec/run.html +It should work in at least Firefox >= 3.6 and Safari >= 4.0.4. + +Read more about JSpec here: http://jspec.info/ + +Trouble shooting +~~~~~~~~~~~~~~~~ + + * When you change the specs, but your changes have no effect, manually reload the changed spec file in the browser. + + * When the spec that tests erlang views fails, make sure you have enabled erlang views as described here: <http://wiki.apache.org/couchdb/EnableErlangViews> + + +Cryptographic Software Notice +----------------------------- + +This distribution includes cryptographic software. The country in which you +currently reside may have restrictions on the import, possession, use, and/or +re-export to another country, of encryption software. BEFORE using any +encryption software, please check your country's laws, regulations and policies +concerning the import, possession, or use, and re-export of encryption software, +to see if this is permitted. See <http://www.wassenaar.org/> for more +information. + +The U.S. Government Department of Commerce, Bureau of Industry and Security +(BIS), has classified this software as Export Commodity Control Number (ECCN) +5D002.C.1, which includes information security software using or performing +cryptographic functions with asymmetric algorithms. The form and manner of this +Apache Software Foundation distribution makes it eligible for export under the +License Exception ENC Technology Software Unrestricted (TSU) exception (see the +BIS Export Administration Regulations, Section 740.13) for both object code and +source code. + +The following provides more details on the included cryptographic software: + +CouchDB includes a HTTP client (ibrowse) with SSL functionality. diff --git a/apps/couch/THANKS b/apps/couch/THANKS new file mode 100644 index 00000000..470c3937 --- /dev/null +++ b/apps/couch/THANKS @@ -0,0 +1,87 @@ +Apache CouchDB THANKS +===================== + +A number of people have contributed to Apache CouchDB by reporting problems, +suggesting improvements or submitting changes. Some of these people are: + + * William Beh <willbeh@gmail.com> + * Dirk Schalge <dirk@epd-me.net> + * Roger Leigh <rleigh@debian.org> + * Sam Ruby <rubys@intertwingly.net> + * Carlos Valiente <superdupont@gmail.com> + * Till Klampaeckel <till@klampaeckel.de> + * Jim Lindley <web@jimlindley.com> + * Yoan Blanc <yoan.blanc@gmail.com> + * Michael Gottesman <gottesmm@reed.edu> + * Mark Baran <mebaran@gmail.com> + * Michael Hendricks <michael@ndrix.org> + * Antony Blakey <antony.blakey@gmail.com> + * Paul Carey <paul.p.carey@gmail.com> + * Hunter Morris <huntermorris@gmail.com> + * Brian Palmer <jira@brian.codekitchen.net> + * Maximillian Dornseif <md@hudora.de> + * Eric Casteleijn <eric.casteleijn@canonical.com> + * Maarten Thibaut <mthibaut@cisco.com> + * Florian Ebeling <florian.ebeling@gmail.com> + * Volker Mische <volker.mische@gmail.com> + * Brian Candler <B.Candler@pobox.com> + * Brad Anderson <brad@sankatygroup.com> + * Nick Gerakines <nick@gerakines.net> + * Bob Dionne <dionne@member.fsf.org> + * Kevin Ilchmann Jørgensen <kijmail@gmail.com> + * Dirkjan Ochtman <dirkjan@ochtman.nl> + * Sebastian Cohnen <sebastian.cohnen@gmx.net> + * Sven Helmberger <sven.helmberger@gmx.de> + * Dan Walters <dan@danwalters.net> + * Curt Arnold <carnold@apache.org> + * Gustavo Niemeyer + * Joshua Bronson <jabronson@gmail.com> + * Kostis Sagonas <kostis@cs.ntua.gr> + * Matthew Hooker <mwhooker@gmail.com> + * Ilia Cheishvili <ilia.cheishvili@gmail.com> + * Lena Herrmann <lena@zeromail.org> + * Jack Moffit <metajack@gmail.com> + * Damjan Georgievski <gdamjan@gmail.com> + * Jan Kassens <jan@kassens.net> + * James Marca <jmarca@translab.its.uci.edu> + * Matt Goodall <matt.goodall@gmail.com> + * Joel Clark <unsigned_char@yahoo.com> + * Matt Lyon <matt@flowerpowered.com> + * mikeal <mikeal.rogers@gmail.com> + * Randall Leeds <randall.leeds@gmail.com> + * Joscha Feth <joscha@feth.com> + * Jarrod Roberson <jarrod@vertigrated.com> + * Jae Kwon <jkwon.work@gmail.com> + * Gavin Sherry <swm@alcove.com.au> + * Timothy Smith <tim@couch.io> + * Martin Haaß <MartinHaass@gmx.net> + * Hans Ulrich Niedermann <hun@n-dimensional.de> + * Jason Smith <jhs@proven-corporation.com> + * Dmitry Unkovsky <oil.crayons@gmail.com> + * Zachary Zolton <zachary.zolton@gmail.com> + * Brian Jenkins <bonkydog@bonkydog.com> + * Paul Bonser <pib@paulbonser.com> + * Caleb Land <caleb.land@gmail.com> + * Juhani Ränkimies <juhani@juranki.com> + * Kev Jackson <foamdino@gmail.com> + * Jonathan D. Knezek <jdknezek@gmail.com> + * David Rose <doppler@gmail.com> + * Lim Yue Chuan <shasderias@gmail.com> + * David Davis <xantus@xantus.org> + * Klaus Trainer <klaus.trainer@web.de> + * Dale Harvey <dale@arandomurl.com> + * Juuso Väänänen <juuso@vaananen.org> + * Jeff Zellner <jeff.zellner@gmail.com> + * Benjamin Young <byoung@bigbluehat.com> + * Gabriel Farrell <gsf747@gmail.com> + * Mike Leddy <mike@loop.com.br> + * Felix Hummel <apache@felixhummel.de> + * Tim Smith <tim@couchbase.com> + * Sam Bisbee <sam@sbisbee.com> + * Nathan Vander Wilt <natevw@yahoo.com> + * Caolan McMahon <caolan.mcmahon@googlemail.com> + * Alexander Shorin <kxepal@gmail.com> + * Christopher Bonhage <queezey@me.com> + * Christian Carter <cdcarter@gmail.com> + +For a list of authors see the `AUTHORS` file. diff --git a/apps/couch/c_src/couch_icu_driver.c b/apps/couch/c_src/couch_icu_driver.c new file mode 100644 index 00000000..1afe8eac --- /dev/null +++ b/apps/couch/c_src/couch_icu_driver.c @@ -0,0 +1,177 @@ +/* + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. You may obtain a copy of the +License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +*/ + +// This file is the C port driver for Erlang. It provides a low overhead +// means of calling into C code, however coding errors in this module can +// crash the entire Erlang server. + +#ifdef DARWIN +#define U_HIDE_DRAFT_API 1 +#define U_DISABLE_RENAMING 1 +#endif + +#include "erl_driver.h" +#include "unicode/ucol.h" +#include "unicode/ucasemap.h" +#ifndef WIN32 +#include <string.h> // for memcpy +#endif + +typedef struct { + ErlDrvPort port; + UCollator* collNoCase; + UCollator* coll; +} couch_drv_data; + +static void couch_drv_stop(ErlDrvData data) +{ + couch_drv_data* pData = (couch_drv_data*)data; + if (pData->coll) { + ucol_close(pData->coll); + } + if (pData->collNoCase) { + ucol_close(pData->collNoCase); + } + driver_free((char*)pData); +} + +static ErlDrvData couch_drv_start(ErlDrvPort port, char *buff) +{ + UErrorCode status = U_ZERO_ERROR; + couch_drv_data* pData = (couch_drv_data*)driver_alloc(sizeof(couch_drv_data)); + + if (pData == NULL) + return ERL_DRV_ERROR_GENERAL; + + pData->port = port; + + pData->coll = ucol_open("", &status); + if (U_FAILURE(status)) { + couch_drv_stop((ErlDrvData)pData); + return ERL_DRV_ERROR_GENERAL; + } + + pData->collNoCase = ucol_open("", &status); + if (U_FAILURE(status)) { + couch_drv_stop((ErlDrvData)pData); + return ERL_DRV_ERROR_GENERAL; + } + + ucol_setAttribute(pData->collNoCase, UCOL_STRENGTH, UCOL_PRIMARY, &status); + if (U_FAILURE(status)) { + couch_drv_stop((ErlDrvData)pData); + return ERL_DRV_ERROR_GENERAL; + } + + return (ErlDrvData)pData; +} + +static int return_control_result(void* pLocalResult, int localLen, char **ppRetBuf, int returnLen) +{ + if (*ppRetBuf == NULL || localLen > returnLen) { + *ppRetBuf = (char*)driver_alloc_binary(localLen); + if(*ppRetBuf == NULL) { + return -1; + } + } + memcpy(*ppRetBuf, pLocalResult, localLen); + return localLen; +} + +static int couch_drv_control(ErlDrvData drv_data, unsigned int command, char *pBuf, + int bufLen, char **rbuf, int rlen) +{ + + couch_drv_data* pData = (couch_drv_data*)drv_data; + switch(command) { + case 0: // COLLATE + case 1: // COLLATE_NO_CASE: + { + UErrorCode status = U_ZERO_ERROR; + int collResult; + char response; + UCharIterator iterA; + UCharIterator iterB; + int32_t length; + + // 2 strings are in the buffer, consecutively + // The strings begin first with a 32 bit integer byte length, then the actual + // string bytes follow. + + // first 32bits are the length + memcpy(&length, pBuf, sizeof(length)); + pBuf += sizeof(length); + + // point the iterator at it. + uiter_setUTF8(&iterA, pBuf, length); + + pBuf += length; // now on to string b + + // first 32bits are the length + memcpy(&length, pBuf, sizeof(length)); + pBuf += sizeof(length); + + // point the iterator at it. + uiter_setUTF8(&iterB, pBuf, length); + + if (command == 0) // COLLATE + collResult = ucol_strcollIter(pData->coll, &iterA, &iterB, &status); + else // COLLATE_NO_CASE + collResult = ucol_strcollIter(pData->collNoCase, &iterA, &iterB, &status); + + if (collResult < 0) + response = 0; //lt + else if (collResult > 0) + response = 2; //gt + else + response = 1; //eq + + return return_control_result(&response, sizeof(response), rbuf, rlen); + } + + default: + return -1; + } +} + +ErlDrvEntry couch_driver_entry = { + NULL, /* F_PTR init, N/A */ + couch_drv_start, /* L_PTR start, called when port is opened */ + couch_drv_stop, /* F_PTR stop, called when port is closed */ + NULL, /* F_PTR output, called when erlang has sent */ + NULL, /* F_PTR ready_input, called when input descriptor ready */ + NULL, /* F_PTR ready_output, called when output descriptor ready */ + "couch_icu_driver", /* char *driver_name, the argument to open_port */ + NULL, /* F_PTR finish, called when unloaded */ + NULL, /* Not used */ + couch_drv_control, /* F_PTR control, port_command callback */ + NULL, /* F_PTR timeout, reserved */ + NULL, /* F_PTR outputv, reserved */ + NULL, /* F_PTR ready_async */ + NULL, /* F_PTR flush */ + NULL, /* F_PTR call */ + NULL, /* F_PTR event */ + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + ERL_DRV_FLAG_USE_PORT_LOCKING, + NULL, /* Reserved -- Used by emulator internally */ + NULL, /* F_PTR process_exit */ +}; + +DRIVER_INIT(couch_icu_driver) /* must match name in driver_entry */ +{ + return &couch_driver_entry; +} diff --git a/apps/couch/c_src/spawnkillable/couchspawnkillable_win.c b/apps/couch/c_src/spawnkillable/couchspawnkillable_win.c new file mode 100644 index 00000000..06782315 --- /dev/null +++ b/apps/couch/c_src/spawnkillable/couchspawnkillable_win.c @@ -0,0 +1,145 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +// Do what 2 lines of shell script in couchspawnkillable does... +// * Create a new suspended process with the same (duplicated) standard +// handles as us. +// * Write a line to stdout, consisting of the path to ourselves, plus +// '--kill {pid}' where {pid} is the PID of the newly created process. +// * Un-suspend the new process. +// * Wait for the process to terminate. +// * Terminate with the child's exit-code. + +// Later, couch will call us with --kill and the PID, so we dutifully +// terminate the specified PID. + +#include <stdlib.h> +#include "windows.h" + +char *get_child_cmdline(int argc, char **argv) +{ + // make a new command-line, but skipping me. + // XXX - todo - spaces etc in args??? + int i; + char *p, *cmdline; + int nchars = 0; + int nthis = 1; + for (i=1;i<argc;i++) + nchars += strlen(argv[i])+1; + cmdline = p = malloc(nchars+1); + if (!cmdline) + return NULL; + for (i=1;i<argc;i++) { + nthis = strlen(argv[i]); + strncpy(p, argv[i], nthis); + p[nthis] = ' '; + p += nthis+1; + } + // Replace the last space we added above with a '\0' + cmdline[nchars-1] = '\0'; + return cmdline; +} + +// create the child process, returning 0, or the exit-code we will +// terminate with. +int create_child(int argc, char **argv, PROCESS_INFORMATION *pi) +{ + char buf[1024]; + DWORD dwcreate; + STARTUPINFO si; + char *cmdline; + if (argc < 2) + return 1; + cmdline = get_child_cmdline(argc, argv); + if (!cmdline) + return 2; + + memset(&si, 0, sizeof(si)); + si.cb = sizeof(si); + // depending on how *our* parent is started, we may or may not have + // a valid stderr stream - so although we try and duplicate it, only + // failing to duplicate stdin and stdout are considered fatal. + if (!DuplicateHandle(GetCurrentProcess(), + GetStdHandle(STD_INPUT_HANDLE), + GetCurrentProcess(), + &si.hStdInput, + 0, + TRUE, // inheritable + DUPLICATE_SAME_ACCESS) || + !DuplicateHandle(GetCurrentProcess(), + GetStdHandle(STD_OUTPUT_HANDLE), + GetCurrentProcess(), + &si.hStdOutput, + 0, + TRUE, // inheritable + DUPLICATE_SAME_ACCESS)) { + return 3; + } + DuplicateHandle(GetCurrentProcess(), + GetStdHandle(STD_ERROR_HANDLE), + GetCurrentProcess(), + &si.hStdError, + 0, + TRUE, // inheritable + DUPLICATE_SAME_ACCESS); + + si.dwFlags = STARTF_USESTDHANDLES; + dwcreate = CREATE_SUSPENDED; + if (!CreateProcess( NULL, cmdline, + NULL, + NULL, + TRUE, // inherit handles + dwcreate, + NULL, // environ + NULL, // cwd + &si, + pi)) + return 4; + return 0; +} + +// and here we go... +int main(int argc, char **argv) +{ + char out_buf[1024]; + int rc; + DWORD cbwritten; + DWORD exitcode; + PROCESS_INFORMATION pi; + if (argc==3 && strcmp(argv[1], "--kill")==0) { + HANDLE h = OpenProcess(PROCESS_TERMINATE, 0, atoi(argv[2])); + if (!h) + return 1; + if (!TerminateProcess(h, 0)) + return 2; + CloseHandle(h); + return 0; + } + // spawn the new suspended process + rc = create_child(argc, argv, &pi); + if (rc) + return rc; + // Write the 'terminate' command, which includes this PID, back to couch. + // *sob* - what about spaces etc? + sprintf_s(out_buf, sizeof(out_buf), "%s --kill %d\n", + argv[0], pi.dwProcessId); + WriteFile(GetStdHandle(STD_OUTPUT_HANDLE), out_buf, strlen(out_buf), + &cbwritten, NULL); + // Let the child process go... + ResumeThread(pi.hThread); + // Wait for the process to terminate so we can reflect the exit code + // back to couch. + WaitForSingleObject(pi.hProcess, INFINITE); + if (!GetExitCodeProcess(pi.hProcess, &exitcode)) + return 6; + return exitcode; +} diff --git a/apps/couch/include/couch_db.hrl b/apps/couch/include/couch_db.hrl new file mode 100644 index 00000000..1c425e8d --- /dev/null +++ b/apps/couch/include/couch_db.hrl @@ -0,0 +1,310 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(LOCAL_DOC_PREFIX, "_local/"). +-define(DESIGN_DOC_PREFIX0, "_design"). +-define(DESIGN_DOC_PREFIX, "_design/"). + +-define(MIN_STR, <<"">>). +-define(MAX_STR, <<255>>). % illegal utf string + +-define(JSON_ENCODE(V), couch_util:json_encode(V)). +-define(JSON_DECODE(V), couch_util:json_decode(V)). + +-define(b2l(V), binary_to_list(V)). +-define(l2b(V), list_to_binary(V)). + +-define(DEFAULT_ATTACHMENT_CONTENT_TYPE, <<"application/octet-stream">>). + +-define(LOG_DEBUG(Format, Args), + case couch_log:debug_on() of + true -> + couch_log:debug(Format, Args); + false -> ok + end). + +-define(LOG_INFO(Format, Args), + case couch_log:info_on() of + true -> + couch_log:info(Format, Args); + false -> ok + end). + +-define(LOG_ERROR(Format, Args), couch_log:error(Format, Args)). + +-record(rev_info, + { + rev, + seq = 0, + deleted = false, + body_sp = nil % stream pointer + }). + +-record(doc_info, + { + id = <<"">>, + high_seq = 0, + revs = [] % rev_info + }). + +-record(full_doc_info, + {id = <<"">>, + update_seq = 0, + deleted = false, + data_size = 0, + rev_tree = [] + }). + +-record(httpd, + {mochi_req, + peer, + method, + requested_path_parts, + path_parts, + db_url_handlers, + user_ctx, + req_body = undefined, + design_url_handlers, + auth, + default_fun, + url_handlers + }). + + +-record(doc, + { + id = <<"">>, + revs = {0, []}, + + % the json body object. + body = {[]}, + + atts = [], % attachments + + deleted = false, + + % key/value tuple of meta information, provided when using special options: + % couch_db:open_doc(Db, Id, Options). + meta = [] + }). + + +-record(att, + { + name, + type, + att_len, + disk_len, % length of the attachment in its identity form + % (that is, without a content encoding applied to it) + % differs from att_len when encoding /= identity + md5= <<>>, + revpos=0, + data, + encoding=identity % currently supported values are: + % identity, gzip + % additional values to support in the future: + % deflate, compress + }). + + +-record(user_ctx, + { + name=null, + roles=[], + handler + }). + +% This should be updated anytime a header change happens that requires more +% than filling in new defaults. +% +% As long the changes are limited to new header fields (with inline +% defaults) added to the end of the record, then there is no need to increment +% the disk revision number. +% +% if the disk revision is incremented, then new upgrade logic will need to be +% added to couch_db_updater:init_db. + +-define(LATEST_DISK_VERSION, 5). + +-record(db_header, + {disk_version = ?LATEST_DISK_VERSION, + update_seq = 0, + unused = 0, + id_tree_state = nil, + seq_tree_state = nil, + local_tree_state = nil, + purge_seq = 0, + purged_docs = nil, + security_ptr = nil, + revs_limit = 1000 + }). + +-record(db, + {main_pid = nil, + update_pid = nil, + compactor_pid = nil, + instance_start_time, % number of microsecs since jan 1 1970 as a binary string + fd, + fd_monitor, + header = #db_header{}, + committed_update_seq, + id_tree, + seq_tree, + local_tree, + update_seq, + name, + filepath, + validate_doc_funs = undefined, + security = [], + security_ptr = nil, + user_ctx = #user_ctx{}, + waiting_delayed_commit = nil, + revs_limit = 1000, + fsync_options = [], + is_sys_db = false + }). + + +-record(view_query_args, { + start_key, + end_key, + start_docid = ?MIN_STR, + end_docid = ?MAX_STR, + + direction = fwd, + inclusive_end=true, % aka a closed-interval + + limit = 10000000000, % Huge number to simplify logic + skip = 0, + + group_level = 0, + + view_type = nil, + include_docs = false, + conflicts = false, + stale = false, + multi_get = false, + callback = nil, + list = nil, + keys = nil, + sorted = true, + extra = [] +}). + +-record(view_fold_helper_funs, { + reduce_count, + passed_end, + start_response, + send_row +}). + +-record(reduce_fold_helper_funs, { + start_response, + send_row +}). + +-record(extern_resp_args, { + code = 200, + stop = false, + data = <<>>, + ctype = "application/json", + headers = [] +}). + +-record(group, { + sig=nil, + fd=nil, + name, + def_lang, + design_options=[], + views, + lib, + id_btree=nil, + current_seq=0, + purge_seq=0, + query_server=nil, + waiting_delayed_commit=nil + }). + +-record(view, + {id_num, + update_seq=0, + purge_seq=0, + map_names=[], + def, + btree=nil, + reduce_funs=[], + options=[] + }). + +-record(index_header, + {seq=0, + purge_seq=0, + id_btree_state=nil, + view_states=nil + }). + +-record(http_db, { + url, + auth = [], + resource = "", + headers = [ + {"User-Agent", "CouchDB/"++couch:version()}, + {"Accept", "application/json"}, + {"Accept-Encoding", "gzip"} + ], + qs = [], + method = get, + body = nil, + options = [ + {response_format,binary}, + {inactivity_timeout, 30000} + ], + retries = 10, + pause = 500, + conn = nil +}). + +% small value used in revision trees to indicate the revision isn't stored +-define(REV_MISSING, []). + +-record(changes_args, { + feed = "normal", + dir = fwd, + since = 0, + limit = 1000000000000000, + style = main_only, + heartbeat, + timeout, + filter = "", + include_docs = false, + conflicts = false, + db_open_options = [] +}). + +-record(proc, { + pid, + lang, + client = nil, + ddoc_keys = [], + prompt_fun, + set_timeout_fun, + stop_fun +}). + +-record(leaf, { + deleted, + ptr, + seq, + size = 0, + atts = [] +}). diff --git a/apps/couch/include/couch_js_functions.hrl b/apps/couch/include/couch_js_functions.hrl new file mode 100644 index 00000000..d07eead5 --- /dev/null +++ b/apps/couch/include/couch_js_functions.hrl @@ -0,0 +1,238 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(AUTH_DB_DOC_VALIDATE_FUNCTION, <<" + function(newDoc, oldDoc, userCtx) { + if (newDoc._deleted === true) { + // allow deletes by admins and matching users + // without checking the other fields + if ((userCtx.roles.indexOf('_admin') !== -1) || + (userCtx.name == oldDoc.name)) { + return; + } else { + throw({forbidden: 'Only admins may delete other user docs.'}); + } + } + + if ((oldDoc && oldDoc.type !== 'user') || newDoc.type !== 'user') { + throw({forbidden : 'doc.type must be user'}); + } // we only allow user docs for now + + if (!newDoc.name) { + throw({forbidden: 'doc.name is required'}); + } + + if (newDoc.roles && !isArray(newDoc.roles)) { + throw({forbidden: 'doc.roles must be an array'}); + } + + if (newDoc._id !== ('org.couchdb.user:' + newDoc.name)) { + throw({ + forbidden: 'Doc ID must be of the form org.couchdb.user:name' + }); + } + + if (oldDoc) { // validate all updates + if (oldDoc.name !== newDoc.name) { + throw({forbidden: 'Usernames can not be changed.'}); + } + } + + if (newDoc.password_sha && !newDoc.salt) { + throw({ + forbidden: 'Users with password_sha must have a salt.' + + 'See /_utils/script/couch.js for example code.' + }); + } + + if (userCtx.roles.indexOf('_admin') === -1) { + if (oldDoc) { // validate non-admin updates + if (userCtx.name !== newDoc.name) { + throw({ + forbidden: 'You may only update your own user document.' + }); + } + // validate role updates + var oldRoles = oldDoc.roles.sort(); + var newRoles = newDoc.roles.sort(); + + if (oldRoles.length !== newRoles.length) { + throw({forbidden: 'Only _admin may edit roles'}); + } + + for (var i = 0; i < oldRoles.length; i++) { + if (oldRoles[i] !== newRoles[i]) { + throw({forbidden: 'Only _admin may edit roles'}); + } + } + } else if (newDoc.roles.length > 0) { + throw({forbidden: 'Only _admin may set roles'}); + } + } + + // no system roles in users db + for (var i = 0; i < newDoc.roles.length; i++) { + if (newDoc.roles[i][0] === '_') { + throw({ + forbidden: + 'No system roles (starting with underscore) in users db.' + }); + } + } + + // no system names as names + if (newDoc.name[0] === '_') { + throw({forbidden: 'Username may not start with underscore.'}); + } + } +">>). + + +-define(REP_DB_DOC_VALIDATE_FUN, <<" + function(newDoc, oldDoc, userCtx) { + function reportError(error_msg) { + log('Error writing document `' + newDoc._id + + '\\' to the replicator database: ' + error_msg); + throw({forbidden: error_msg}); + } + + function validateEndpoint(endpoint, fieldName) { + if ((typeof endpoint !== 'string') && + ((typeof endpoint !== 'object') || (endpoint === null))) { + + reportError('The `' + fieldName + '\\' property must exist' + + ' and be either a string or an object.'); + } + + if (typeof endpoint === 'object') { + if ((typeof endpoint.url !== 'string') || !endpoint.url) { + reportError('The url property must exist in the `' + + fieldName + '\\' field and must be a non-empty string.'); + } + + if ((typeof endpoint.auth !== 'undefined') && + ((typeof endpoint.auth !== 'object') || + endpoint.auth === null)) { + + reportError('`' + fieldName + + '.auth\\' must be a non-null object.'); + } + + if ((typeof endpoint.headers !== 'undefined') && + ((typeof endpoint.headers !== 'object') || + endpoint.headers === null)) { + + reportError('`' + fieldName + + '.headers\\' must be a non-null object.'); + } + } + } + + var isReplicator = (userCtx.roles.indexOf('_replicator') >= 0); + var isAdmin = (userCtx.roles.indexOf('_admin') >= 0); + + if (oldDoc && !newDoc._deleted && !isReplicator && + (oldDoc._replication_state === 'triggered')) { + reportError('Only the replicator can edit replication documents ' + + 'that are in the triggered state.'); + } + + if (!newDoc._deleted) { + validateEndpoint(newDoc.source, 'source'); + validateEndpoint(newDoc.target, 'target'); + + if ((typeof newDoc.create_target !== 'undefined') && + (typeof newDoc.create_target !== 'boolean')) { + + reportError('The `create_target\\' field must be a boolean.'); + } + + if ((typeof newDoc.continuous !== 'undefined') && + (typeof newDoc.continuous !== 'boolean')) { + + reportError('The `continuous\\' field must be a boolean.'); + } + + if ((typeof newDoc.doc_ids !== 'undefined') && + !isArray(newDoc.doc_ids)) { + + reportError('The `doc_ids\\' field must be an array of strings.'); + } + + if ((typeof newDoc.filter !== 'undefined') && + ((typeof newDoc.filter !== 'string') || !newDoc.filter)) { + + reportError('The `filter\\' field must be a non-empty string.'); + } + + if ((typeof newDoc.query_params !== 'undefined') && + ((typeof newDoc.query_params !== 'object') || + newDoc.query_params === null)) { + + reportError('The `query_params\\' field must be an object.'); + } + + if (newDoc.user_ctx) { + var user_ctx = newDoc.user_ctx; + + if ((typeof user_ctx !== 'object') || (user_ctx === null)) { + reportError('The `user_ctx\\' property must be a ' + + 'non-null object.'); + } + + if (!(user_ctx.name === null || + (typeof user_ctx.name === 'undefined') || + ((typeof user_ctx.name === 'string') && + user_ctx.name.length > 0))) { + + reportError('The `user_ctx.name\\' property must be a ' + + 'non-empty string or null.'); + } + + if (!isAdmin && (user_ctx.name !== userCtx.name)) { + reportError('The given `user_ctx.name\\' is not valid'); + } + + if (user_ctx.roles && !isArray(user_ctx.roles)) { + reportError('The `user_ctx.roles\\' property must be ' + + 'an array of strings.'); + } + + if (!isAdmin && user_ctx.roles) { + for (var i = 0; i < user_ctx.roles.length; i++) { + var role = user_ctx.roles[i]; + + if (typeof role !== 'string' || role.length === 0) { + reportError('Roles must be non-empty strings.'); + } + if (userCtx.roles.indexOf(role) === -1) { + reportError('Invalid role (`' + role + + '\\') in the `user_ctx\\''); + } + } + } + } else { + if (!isAdmin) { + reportError('The `user_ctx\\' property is missing (it is ' + + 'optional for admins only).'); + } + } + } else { + if (!isAdmin) { + if (!oldDoc.user_ctx || (oldDoc.user_ctx.name !== userCtx.name)) { + reportError('Replication documents can only be deleted by ' + + 'admins or by the users who created them.'); + } + } + } + } +">>). diff --git a/apps/couch/license.skip b/apps/couch/license.skip new file mode 100644 index 00000000..151ccc08 --- /dev/null +++ b/apps/couch/license.skip @@ -0,0 +1,111 @@ +\.svn +^AUTHORS +^BUGS +^CHANGES +^DEVELOPERS +^DEVELOPERS.gz +^INSTALL +^INSTALL.Unix +^INSTALL.Unix.gz +^INSTALL.Windows +^INSTALL.Windows.gz +^INSTALL.gz +^LICENSE.gz +^Makefile +^Makefile.in +^NEWS +^NOTICE +^README +^THANKS +^aclocal.m4 +^apache-couchdb-* +^autom4te.cache/* +^bin/Makefile +^bin/Makefile.in +^bin/couchdb.1 +^bin/couchjs.1 +^build-aux/* +^config.* +^configure +^couchdb.stderr +^couchdb.stdout +^cover/.*\.coverdata +^cover/.*\.html +^erl_crash.dump +^etc/Makefile +^etc/Makefile.in +^etc/couchdb/Makefile +^etc/couchdb/Makefile.in +^etc/couchdb/default* +^etc/couchdb/local* +^etc/default/Makefile +^etc/default/Makefile.in +^etc/default/couchdb +^etc/init/Makefile +^etc/init/Makefile.in +^etc/launchd/Makefile +^etc/launchd/Makefile.in +^etc/launchd/org.apache.couchdb.plist.* +^etc/logrotate.d/Makefile +^etc/logrotate.d/Makefile.in +^etc/logrotate.d/couchdb* +^etc/windows/Makefile +^etc/windows/README.txt.tpl +^libtool +^license.skip +^m4/* +^share/Makefile +^share/Makefile.in +^share/server/json2.js +^share/server/mimeparse.js +^share/www/favicon.ico +^share/www/image/* +^share/www/script/jquery.* +^share/www/script/json2.js +^share/www/script/jspec/* +^share/www/script/sha1.js +^share/www/script/base64.js +^share/www/script/test/lorem* +^share/www/style/jquery-ui-1.8.11.custom.css +^src/Makefile +^src/Makefile.in +^src/couchdb/.*beam +^src/couchdb/.deps/* +^src/couchdb/Makefile +^src/couchdb/Makefile.in +^src/couchdb/couch.app* +^src/couchdb/couch.app.tpl.in +^src/couchdb/priv/.*o +^src/couchdb/priv/.deps/* +^src/couchdb/priv/Makefile +^src/couchdb/priv/Makefile.in +^src/couchdb/priv/couch_icu_driver.la +^src/couchdb/priv/couchjs +^src/couchdb/priv/couchspawnkillable +^src/couchdb/priv/stat_descriptions.cfg +^src/erlang-oauth/* +^src/etap/* +^src/ibrowse/* +^src/mochiweb/* +^stamp-h1 +^test/Makefile +^test/Makefile.in +^test/bench/Makefile +^test/bench/Makefile.in +^test/etap/.*beam +^test/etap/.*\.o +^test/etap/.deps/* +^test/etap/test_cfg_register +^test/etap/Makefile +^test/etap/Makefile.in +^test/etap/temp.* +^test/javascript/Makefile +^test/javascript/Makefile.in +^test/local.ini +^test/view_server/Makefile +^test/view_server/Makefile.in +^tmp/* +^utils/Makefile +^utils/Makefile.in +^var/Makefile +^var/Makefile.in diff --git a/apps/couch/priv/couchspawnkillable.sh b/apps/couch/priv/couchspawnkillable.sh new file mode 100755 index 00000000..f8d042e3 --- /dev/null +++ b/apps/couch/priv/couchspawnkillable.sh @@ -0,0 +1,20 @@ +#! /bin/sh -e + +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +# The purpose of this script is to echo an OS specific command before launching +# the actual process. This provides a way for Erlang to hard-kill its external +# processes. + +echo "kill -9 $$" +exec $* diff --git a/apps/couch/priv/stat_descriptions.cfg b/apps/couch/priv/stat_descriptions.cfg new file mode 100644 index 00000000..a6796263 --- /dev/null +++ b/apps/couch/priv/stat_descriptions.cfg @@ -0,0 +1,51 @@ +%% Licensed under the Apache License, Version 2.0 (the "License"); you may not +%% use this file except in compliance with the License. You may obtain a copy of +%% the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +%% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +%% License for the specific language governing permissions and limitations under +%% the License. + +% Style guide for descriptions: Start with a lowercase letter & do not add +% a trailing full-stop / period +% Please keep this in alphabetical order + +{couchdb, database_writes, "number of times a database was changed"}. +{couchdb, database_reads, "number of times a document was read from a database"}. +{couchdb, open_databases, "number of open databases"}. +{couchdb, open_os_files, "number of file descriptors CouchDB has open"}. +{couchdb, request_time, "length of a request inside CouchDB without MochiWeb"}. +{couchdb, auth_cache_hits, "number of authentication cache hits"}. +{couchdb, auth_cache_misses, "number of authentication cache misses"}. + +{httpd, bulk_requests, "number of bulk requests"}. +{httpd, requests, "number of HTTP requests"}. +{httpd, temporary_view_reads, "number of temporary view reads"}. +{httpd, view_reads, "number of view reads"}. +{httpd, clients_requesting_changes, "number of clients for continuous _changes"}. +{httpd, aborted_requests, "number of requests aborted during response"}. + +{httpd_request_methods, 'COPY', "number of HTTP COPY requests"}. +{httpd_request_methods, 'DELETE', "number of HTTP DELETE requests"}. +{httpd_request_methods, 'GET', "number of HTTP GET requests"}. +{httpd_request_methods, 'HEAD', "number of HTTP HEAD requests"}. +{httpd_request_methods, 'POST', "number of HTTP POST requests"}. +{httpd_request_methods, 'PUT', "number of HTTP PUT requests"}. + +{httpd_status_codes, '200', "number of HTTP 200 OK responses"}. +{httpd_status_codes, '201', "number of HTTP 201 Created responses"}. +{httpd_status_codes, '202', "number of HTTP 202 Accepted responses"}. +{httpd_status_codes, '301', "number of HTTP 301 Moved Permanently responses"}. +{httpd_status_codes, '304', "number of HTTP 304 Not Modified responses"}. +{httpd_status_codes, '400', "number of HTTP 400 Bad Request responses"}. +{httpd_status_codes, '401', "number of HTTP 401 Unauthorized responses"}. +{httpd_status_codes, '403', "number of HTTP 403 Forbidden responses"}. +{httpd_status_codes, '404', "number of HTTP 404 Not Found responses"}. +{httpd_status_codes, '405', "number of HTTP 405 Method Not Allowed responses"}. +{httpd_status_codes, '409', "number of HTTP 409 Conflict responses"}. +{httpd_status_codes, '412', "number of HTTP 412 Precondition Failed responses"}. +{httpd_status_codes, '500', "number of HTTP 500 Internal Server Error responses"}. diff --git a/apps/couch/rebar.config b/apps/couch/rebar.config new file mode 100644 index 00000000..25b7e569 --- /dev/null +++ b/apps/couch/rebar.config @@ -0,0 +1,10 @@ +{so_name, "couch_icu_driver.so"}. +{port_envs, [ + {"DRV_CFLAGS", "$DRV_CFLAGS -DPIC -O2 -fno-common"}, + {"DRV_LDFLAGS", "$DRV_LDFLAGS -lm -licuuc -licudata -licui18n -lpthread"}, + {"linux", "DRV_LDFLAGS", "$DRV_LDFLAGS -lcrypt"}, + {"freebsd", "DRV_CFLAGS", "$DRV_CFLAGS -I/usr/local/include"}, + {"freebsd", "DRV_LDFLAGS", "$DRV_LDFLAGS -L/usr/local/lib"}, + {"solaris", "CC", "/usr/sfw/bin/gcc"}, + {"solaris", "DRV_LDFLAGS", "$DRV_LDFLAGS -L/opt/local/lib"} +]}. diff --git a/apps/couch/src/couch.app.src b/apps/couch/src/couch.app.src new file mode 100644 index 00000000..3b94feb7 --- /dev/null +++ b/apps/couch/src/couch.app.src @@ -0,0 +1,25 @@ +{application, couch, [ + {description, "Apache CouchDB"}, + {vsn, "%VSN%"}, + {registered, [ + couch_config, + couch_db_update, + couch_db_update_notifier_sup, + couch_external_manager, + couch_httpd, + couch_log, + couch_primary_services, + couch_proc_manager, + couch_rep_sup, + couch_secondary_services, + couch_server, + couch_server_sup, + couch_stats_aggregator, + couch_stats_collector, + couch_task_status, + couch_view + ]}, + {mod, {couch_app, []}}, + {applications, [kernel, stdlib, crypto, sasl, inets, oauth, ibrowse, + mochiweb, ssl]} +]}. diff --git a/apps/couch/src/couch.erl b/apps/couch/src/couch.erl new file mode 100644 index 00000000..f6b048a5 --- /dev/null +++ b/apps/couch/src/couch.erl @@ -0,0 +1,43 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch). + +-compile(export_all). + +start() -> + application:start(couch). + +stop() -> + application:stop(couch). + +restart() -> + case stop() of + ok -> + start(); + {error, {not_started,couch}} -> + start(); + {error, Reason} -> + {error, Reason} + end. + +reload() -> + case supervisor:terminate_child(couch_server_sup, couch_config) of + ok -> + supervisor:restart_child(couch_server_sup, couch_config); + {error, Reason} -> + {error, Reason} + end. + +version() -> + {ok, FullVersion} = application:get_key(couch, vsn), + hd(string:tokens(FullVersion, "-")). diff --git a/apps/couch/src/couch_app.erl b/apps/couch/src/couch_app.erl new file mode 100644 index 00000000..70e1b7e0 --- /dev/null +++ b/apps/couch/src/couch_app.erl @@ -0,0 +1,39 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_app). + +-behaviour(application). + +-include("couch_db.hrl"). + +-export([start/2, stop/1]). + +start(_Type, _Args) -> + catch erlang:system_flag(scheduler_bind_type, default_bind), + IniFiles = get_ini_files(), + couch_server_sup:start_link(IniFiles). + +stop(_) -> + ok. + +get_ini_files() -> + Etc = filename:join(code:root_dir(), "etc"), + Default = [filename:join(Etc,"default.ini"), filename:join(Etc,"local.ini")], + case init:get_argument(couch_ini) of + error -> + Default; + {ok, [[]]} -> + Default; + {ok, [Values]} -> + Values + end. diff --git a/apps/couch/src/couch_auth_cache.erl b/apps/couch/src/couch_auth_cache.erl new file mode 100644 index 00000000..8b911543 --- /dev/null +++ b/apps/couch/src/couch_auth_cache.erl @@ -0,0 +1,421 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_auth_cache). +-behaviour(gen_server). + +% public API +-export([get_user_creds/1]). + +% gen_server API +-export([start_link/0, init/1, handle_call/3, handle_info/2, handle_cast/2]). +-export([code_change/3, terminate/2]). + +-include("couch_db.hrl"). +-include("couch_js_functions.hrl"). + +-define(STATE, auth_state_ets). +-define(BY_USER, auth_by_user_ets). +-define(BY_ATIME, auth_by_atime_ets). + +-record(state, { + max_cache_size = 0, + cache_size = 0, + db_notifier = nil +}). + + +-spec get_user_creds(UserName::string() | binary()) -> + Credentials::list() | nil. + +get_user_creds(UserName) when is_list(UserName) -> + get_user_creds(?l2b(UserName)); + +get_user_creds(UserName) -> + UserCreds = case couch_config:get("admins", ?b2l(UserName)) of + "-hashed-" ++ HashedPwdAndSalt -> + % the name is an admin, now check to see if there is a user doc + % which has a matching name, salt, and password_sha + [HashedPwd, Salt] = string:tokens(HashedPwdAndSalt, ","), + case get_from_cache(UserName) of + nil -> + [{<<"roles">>, [<<"_admin">>]}, + {<<"salt">>, ?l2b(Salt)}, + {<<"password_sha">>, ?l2b(HashedPwd)}]; + UserProps when is_list(UserProps) -> + DocRoles = couch_util:get_value(<<"roles">>, UserProps), + [{<<"roles">>, [<<"_admin">> | DocRoles]}, + {<<"salt">>, ?l2b(Salt)}, + {<<"password_sha">>, ?l2b(HashedPwd)}] + end; + _Else -> + get_from_cache(UserName) + end, + validate_user_creds(UserCreds). + + +get_from_cache(UserName) -> + exec_if_auth_db( + fun(_AuthDb) -> + maybe_refresh_cache(), + case ets:lookup(?BY_USER, UserName) of + [] -> + gen_server:call(?MODULE, {fetch, UserName}, infinity); + [{UserName, {Credentials, _ATime}}] -> + couch_stats_collector:increment({couchdb, auth_cache_hits}), + gen_server:cast(?MODULE, {cache_hit, UserName}), + Credentials + end + end, + nil + ). + + +validate_user_creds(nil) -> + nil; +validate_user_creds(UserCreds) -> + case couch_util:get_value(<<"_conflicts">>, UserCreds) of + undefined -> + ok; + _ConflictList -> + throw({unauthorized, + <<"User document conflicts must be resolved before the document", + " is used for authentication purposes.">> + }) + end, + UserCreds. + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +init(_) -> + ?STATE = ets:new(?STATE, [set, protected, named_table]), + ?BY_USER = ets:new(?BY_USER, [set, protected, named_table]), + ?BY_ATIME = ets:new(?BY_ATIME, [ordered_set, private, named_table]), + AuthDbName = couch_config:get("couch_httpd_auth", "authentication_db"), + true = ets:insert(?STATE, {auth_db_name, ?l2b(AuthDbName)}), + true = ets:insert(?STATE, {auth_db, open_auth_db()}), + process_flag(trap_exit, true), + ok = couch_config:register( + fun("couch_httpd_auth", "auth_cache_size", SizeList) -> + Size = list_to_integer(SizeList), + ok = gen_server:call(?MODULE, {new_max_cache_size, Size}, infinity) + end + ), + ok = couch_config:register( + fun("couch_httpd_auth", "authentication_db", DbName) -> + ok = gen_server:call(?MODULE, {new_auth_db, ?l2b(DbName)}, infinity) + end + ), + {ok, Notifier} = couch_db_update_notifier:start_link(fun handle_db_event/1), + State = #state{ + db_notifier = Notifier, + max_cache_size = list_to_integer( + couch_config:get("couch_httpd_auth", "auth_cache_size", "50") + ) + }, + {ok, State}. + + +handle_db_event({Event, DbName}) -> + [{auth_db_name, AuthDbName}] = ets:lookup(?STATE, auth_db_name), + case DbName =:= AuthDbName of + true -> + case Event of + deleted -> gen_server:call(?MODULE, auth_db_deleted, infinity); + created -> gen_server:call(?MODULE, auth_db_created, infinity); + compacted -> gen_server:call(?MODULE, auth_db_compacted, infinity); + _Else -> ok + end; + false -> + ok + end. + + +handle_call({new_auth_db, AuthDbName}, _From, State) -> + NewState = clear_cache(State), + true = ets:insert(?STATE, {auth_db_name, AuthDbName}), + true = ets:insert(?STATE, {auth_db, open_auth_db()}), + {reply, ok, NewState}; + +handle_call(auth_db_deleted, _From, State) -> + NewState = clear_cache(State), + true = ets:insert(?STATE, {auth_db, nil}), + {reply, ok, NewState}; + +handle_call(auth_db_created, _From, State) -> + NewState = clear_cache(State), + true = ets:insert(?STATE, {auth_db, open_auth_db()}), + {reply, ok, NewState}; + +handle_call(auth_db_compacted, _From, State) -> + exec_if_auth_db( + fun(AuthDb) -> + true = ets:insert(?STATE, {auth_db, reopen_auth_db(AuthDb)}) + end + ), + {reply, ok, State}; + +handle_call({new_max_cache_size, NewSize}, _From, State) -> + case NewSize >= State#state.cache_size of + true -> + ok; + false -> + lists:foreach( + fun(_) -> + LruTime = ets:last(?BY_ATIME), + [{LruTime, UserName}] = ets:lookup(?BY_ATIME, LruTime), + true = ets:delete(?BY_ATIME, LruTime), + true = ets:delete(?BY_USER, UserName) + end, + lists:seq(1, State#state.cache_size - NewSize) + ) + end, + NewState = State#state{ + max_cache_size = NewSize, + cache_size = lists:min([NewSize, State#state.cache_size]) + }, + {reply, ok, NewState}; + +handle_call({fetch, UserName}, _From, State) -> + {Credentials, NewState} = case ets:lookup(?BY_USER, UserName) of + [{UserName, {Creds, ATime}}] -> + couch_stats_collector:increment({couchdb, auth_cache_hits}), + cache_hit(UserName, Creds, ATime), + {Creds, State}; + [] -> + couch_stats_collector:increment({couchdb, auth_cache_misses}), + Creds = get_user_props_from_db(UserName), + State1 = add_cache_entry(UserName, Creds, erlang:now(), State), + {Creds, State1} + end, + {reply, Credentials, NewState}; + +handle_call(refresh, _From, State) -> + exec_if_auth_db(fun refresh_entries/1), + {reply, ok, State}. + + +handle_cast({cache_hit, UserName}, State) -> + case ets:lookup(?BY_USER, UserName) of + [{UserName, {Credentials, ATime}}] -> + cache_hit(UserName, Credentials, ATime); + _ -> + ok + end, + {noreply, State}. + + +handle_info(_Msg, State) -> + {noreply, State}. + + +terminate(_Reason, #state{db_notifier = Notifier}) -> + couch_db_update_notifier:stop(Notifier), + exec_if_auth_db(fun(AuthDb) -> catch couch_db:close(AuthDb) end), + true = ets:delete(?BY_USER), + true = ets:delete(?BY_ATIME), + true = ets:delete(?STATE). + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +clear_cache(State) -> + exec_if_auth_db(fun(AuthDb) -> catch couch_db:close(AuthDb) end), + true = ets:delete_all_objects(?BY_USER), + true = ets:delete_all_objects(?BY_ATIME), + State#state{cache_size = 0}. + + +add_cache_entry(UserName, Credentials, ATime, State) -> + case State#state.cache_size >= State#state.max_cache_size of + true -> + free_mru_cache_entry(); + false -> + ok + end, + true = ets:insert(?BY_ATIME, {ATime, UserName}), + true = ets:insert(?BY_USER, {UserName, {Credentials, ATime}}), + State#state{cache_size = couch_util:get_value(size, ets:info(?BY_USER))}. + + +free_mru_cache_entry() -> + case ets:last(?BY_ATIME) of + '$end_of_table' -> + ok; % empty cache + LruTime -> + [{LruTime, UserName}] = ets:lookup(?BY_ATIME, LruTime), + true = ets:delete(?BY_ATIME, LruTime), + true = ets:delete(?BY_USER, UserName) + end. + + +cache_hit(UserName, Credentials, ATime) -> + NewATime = erlang:now(), + true = ets:delete(?BY_ATIME, ATime), + true = ets:insert(?BY_ATIME, {NewATime, UserName}), + true = ets:insert(?BY_USER, {UserName, {Credentials, NewATime}}). + + +refresh_entries(AuthDb) -> + case reopen_auth_db(AuthDb) of + nil -> + ok; + AuthDb2 -> + case AuthDb2#db.update_seq > AuthDb#db.update_seq of + true -> + {ok, _, _} = couch_db:enum_docs_since( + AuthDb2, + AuthDb#db.update_seq, + fun(DocInfo, _, _) -> refresh_entry(AuthDb2, DocInfo) end, + AuthDb#db.update_seq, + [] + ), + true = ets:insert(?STATE, {auth_db, AuthDb2}); + false -> + ok + end + end. + + +refresh_entry(Db, #full_doc_info{} = FDI) -> + refresh_entry(Db, couch_doc:to_doc_info(FDI)); +refresh_entry(Db, #doc_info{high_seq = DocSeq} = DocInfo) -> + case is_user_doc(DocInfo) of + {true, UserName} -> + case ets:lookup(?BY_USER, UserName) of + [] -> + ok; + [{UserName, {_OldCreds, ATime}}] -> + {ok, Doc} = couch_db:open_doc(Db, DocInfo, [conflicts, deleted]), + NewCreds = user_creds(Doc), + true = ets:insert(?BY_USER, {UserName, {NewCreds, ATime}}) + end; + false -> + ok + end, + {ok, DocSeq}. + + +user_creds(#doc{deleted = true}) -> + nil; +user_creds(#doc{} = Doc) -> + {Creds} = couch_query_servers:json_doc(Doc), + Creds. + + +is_user_doc(#doc_info{id = <<"org.couchdb.user:", UserName/binary>>}) -> + {true, UserName}; +is_user_doc(_) -> + false. + + +maybe_refresh_cache() -> + case cache_needs_refresh() of + true -> + ok = gen_server:call(?MODULE, refresh, infinity); + false -> + ok + end. + + +cache_needs_refresh() -> + exec_if_auth_db( + fun(AuthDb) -> + case reopen_auth_db(AuthDb) of + nil -> + false; + AuthDb2 -> + AuthDb2#db.update_seq > AuthDb#db.update_seq + end + end, + false + ). + + +reopen_auth_db(AuthDb) -> + case (catch couch_db:reopen(AuthDb)) of + {ok, AuthDb2} -> + AuthDb2; + _ -> + nil + end. + + +exec_if_auth_db(Fun) -> + exec_if_auth_db(Fun, ok). + +exec_if_auth_db(Fun, DefRes) -> + case ets:lookup(?STATE, auth_db) of + [{auth_db, #db{} = AuthDb}] -> + Fun(AuthDb); + _ -> + DefRes + end. + + +open_auth_db() -> + [{auth_db_name, DbName}] = ets:lookup(?STATE, auth_db_name), + {ok, AuthDb} = ensure_users_db_exists(DbName, [sys_db]), + AuthDb. + + +get_user_props_from_db(UserName) -> + exec_if_auth_db( + fun(AuthDb) -> + Db = reopen_auth_db(AuthDb), + DocId = <<"org.couchdb.user:", UserName/binary>>, + try + {ok, Doc} = couch_db:open_doc(Db, DocId, [conflicts]), + {DocProps} = couch_query_servers:json_doc(Doc), + DocProps + catch + _:_Error -> + nil + end + end, + nil + ). + +ensure_users_db_exists(DbName, Options) -> + Options1 = [{user_ctx, #user_ctx{roles=[<<"_admin">>]}} | Options], + case couch_db:open(DbName, Options1) of + {ok, Db} -> + ensure_auth_ddoc_exists(Db, <<"_design/_auth">>), + {ok, Db}; + _Error -> + {ok, Db} = couch_db:create(DbName, Options1), + ok = ensure_auth_ddoc_exists(Db, <<"_design/_auth">>), + {ok, Db} + end. + +ensure_auth_ddoc_exists(Db, DDocId) -> + case couch_db:open_doc(Db, DDocId) of + {not_found, _Reason} -> + {ok, AuthDesign} = auth_design_doc(DDocId), + {ok, _Rev} = couch_db:update_doc(Db, AuthDesign, []); + _ -> + ok + end, + ok. + +auth_design_doc(DocId) -> + DocProps = [ + {<<"_id">>, DocId}, + {<<"language">>,<<"javascript">>}, + {<<"validate_doc_update">>, ?AUTH_DB_DOC_VALIDATE_FUNCTION} + ], + {ok, couch_doc:from_json_obj({DocProps})}. diff --git a/apps/couch/src/couch_btree.erl b/apps/couch/src/couch_btree.erl new file mode 100644 index 00000000..52fcaece --- /dev/null +++ b/apps/couch/src/couch_btree.erl @@ -0,0 +1,703 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_btree). + +-export([open/2, open/3, query_modify/4, add/2, add_remove/3]). +-export([fold/4, full_reduce/1, final_reduce/2, foldl/3, foldl/4]). +-export([fold_reduce/4, lookup/2, get_state/1, set_options/2]). +-export([less/3]). + +-record(btree, + {fd, + root, + extract_kv, + assemble_kv, + less, + reduce = nil + }). + +extract(#btree{extract_kv = undefined}, Value) -> + Value; +extract(#btree{extract_kv=Extract}, Value) -> + Extract(Value). + +assemble(#btree{assemble_kv = undefined}, Key, Value) -> + {Key, Value}; +assemble(#btree{assemble_kv=Assemble}, Key, Value) -> + Assemble(Key, Value). + +less(#btree{less = undefined}, A, B) -> + A < B; +less(#btree{less=Less}, A, B) -> + Less(A, B). + +% pass in 'nil' for State if a new Btree. +open(State, Fd) -> + {ok, #btree{root=State, fd=Fd}}. + +set_options(Bt, []) -> + Bt; +set_options(Bt, [{split, Extract}|Rest]) -> + set_options(Bt#btree{extract_kv=Extract}, Rest); +set_options(Bt, [{join, Assemble}|Rest]) -> + set_options(Bt#btree{assemble_kv=Assemble}, Rest); +set_options(Bt, [{less, Less}|Rest]) -> + set_options(Bt#btree{less=Less}, Rest); +set_options(Bt, [{reduce, Reduce}|Rest]) -> + set_options(Bt#btree{reduce=Reduce}, Rest). + +open(State, Fd, Options) -> + {ok, set_options(#btree{root=State, fd=Fd}, Options)}. + +get_state(#btree{root=Root}) -> + Root. + +final_reduce(#btree{reduce=Reduce}, Val) -> + final_reduce(Reduce, Val); +final_reduce(Reduce, {[], []}) -> + Reduce(reduce, []); +final_reduce(Reduce, {[], [Red]}) -> + Reduce(rereduce, [Red]); +final_reduce(Reduce, {[], Reductions}) -> + Reduce(rereduce, Reductions); +final_reduce(Reduce, {KVs, Reductions}) -> + Red = Reduce(reduce, KVs), + final_reduce(Reduce, {[], [Red | Reductions]}). + +fold_reduce(#btree{root=Root}=Bt, Fun, Acc, Options) -> + Dir = couch_util:get_value(dir, Options, fwd), + StartKey = couch_util:get_value(start_key, Options), + EndKey = case couch_util:get_value(end_key_gt, Options) of + undefined -> couch_util:get_value(end_key, Options); + LastKey -> LastKey + end, + KeyGroupFun = couch_util:get_value(key_group_fun, Options, fun(_,_) -> true end), + {StartKey2, EndKey2} = + case Dir of + rev -> {EndKey, StartKey}; + fwd -> {StartKey, EndKey} + end, + try + {ok, Acc2, GroupedRedsAcc2, GroupedKVsAcc2, GroupedKey2} = + reduce_stream_node(Bt, Dir, Root, StartKey2, EndKey2, undefined, [], [], + KeyGroupFun, Fun, Acc), + if GroupedKey2 == undefined -> + {ok, Acc2}; + true -> + case Fun(GroupedKey2, {GroupedKVsAcc2, GroupedRedsAcc2}, Acc2) of + {ok, Acc3} -> {ok, Acc3}; + {stop, Acc3} -> {ok, Acc3} + end + end + catch + throw:{stop, AccDone} -> {ok, AccDone} + end. + +full_reduce(#btree{root=nil,reduce=Reduce}) -> + {ok, Reduce(reduce, [])}; +full_reduce(#btree{root={_P, Red}, reduce=Reduce}) -> + {ok, Reduce(rereduce, [Red])}. + +% wraps a 2 arity function with the proper 3 arity function +convert_fun_arity(Fun) when is_function(Fun, 2) -> + fun + (visit, KV, _Reds, AccIn) -> Fun(KV, AccIn); + (traverse, _K, _Red, AccIn) -> {ok, AccIn} + end; +convert_fun_arity(Fun) when is_function(Fun, 3) -> + fun + (visit, KV, Reds, AccIn) -> Fun(KV, Reds, AccIn); + (traverse, _K, _Red, AccIn) -> {ok, AccIn} + end; +convert_fun_arity(Fun) when is_function(Fun, 4) -> + Fun. % Already arity 4 + +make_key_in_end_range_function(Bt, fwd, Options) -> + case couch_util:get_value(end_key_gt, Options) of + undefined -> + case couch_util:get_value(end_key, Options) of + undefined -> + fun(_Key) -> true end; + LastKey -> + fun(Key) -> not less(Bt, LastKey, Key) end + end; + EndKey -> + fun(Key) -> less(Bt, Key, EndKey) end + end; +make_key_in_end_range_function(Bt, rev, Options) -> + case couch_util:get_value(end_key_gt, Options) of + undefined -> + case couch_util:get_value(end_key, Options) of + undefined -> + fun(_Key) -> true end; + LastKey -> + fun(Key) -> not less(Bt, Key, LastKey) end + end; + EndKey -> + fun(Key) -> less(Bt, EndKey, Key) end + end. + + +foldl(Bt, Fun, Acc) -> + fold(Bt, Fun, Acc, []). + +foldl(Bt, Fun, Acc, Options) -> + fold(Bt, Fun, Acc, Options). + + +fold(#btree{root=nil}, _Fun, Acc, _Options) -> + {ok, {[], []}, Acc}; +fold(#btree{root=Root}=Bt, Fun, Acc, Options) -> + Dir = couch_util:get_value(dir, Options, fwd), + InRange = make_key_in_end_range_function(Bt, Dir, Options), + Result = + case couch_util:get_value(start_key, Options) of + undefined -> + stream_node(Bt, [], Bt#btree.root, InRange, Dir, + convert_fun_arity(Fun), Acc); + StartKey -> + stream_node(Bt, [], Bt#btree.root, StartKey, InRange, Dir, + convert_fun_arity(Fun), Acc) + end, + case Result of + {ok, Acc2}-> + {_P, FullReduction} = Root, + {ok, {[], [FullReduction]}, Acc2}; + {stop, LastReduction, Acc2} -> + {ok, LastReduction, Acc2} + end. + +add(Bt, InsertKeyValues) -> + add_remove(Bt, InsertKeyValues, []). + +add_remove(Bt, InsertKeyValues, RemoveKeys) -> + {ok, [], Bt2} = query_modify(Bt, [], InsertKeyValues, RemoveKeys), + {ok, Bt2}. + +query_modify(Bt, LookupKeys, InsertValues, RemoveKeys) -> + #btree{root=Root} = Bt, + InsertActions = lists:map( + fun(KeyValue) -> + {Key, Value} = extract(Bt, KeyValue), + {insert, Key, Value} + end, InsertValues), + RemoveActions = [{remove, Key, nil} || Key <- RemoveKeys], + FetchActions = [{fetch, Key, nil} || Key <- LookupKeys], + SortFun = + fun({OpA, A, _}, {OpB, B, _}) -> + case A == B of + % A and B are equal, sort by op. + true -> op_order(OpA) < op_order(OpB); + false -> + less(Bt, A, B) + end + end, + Actions = lists:sort(SortFun, lists:append([InsertActions, RemoveActions, FetchActions])), + {ok, KeyPointers, QueryResults, Bt2} = modify_node(Bt, Root, Actions, []), + {ok, NewRoot, Bt3} = complete_root(Bt2, KeyPointers), + {ok, QueryResults, Bt3#btree{root=NewRoot}}. + +% for ordering different operations with the same key. +% fetch < remove < insert +op_order(fetch) -> 1; +op_order(remove) -> 2; +op_order(insert) -> 3. + +lookup(#btree{root=Root, less=Less}=Bt, Keys) -> + case Less of undefined -> + SortedKeys = lists:sort(Keys); + _ -> + SortedKeys = lists:sort(Less, Keys) + end, + {ok, SortedResults} = lookup(Bt, Root, SortedKeys), + % We want to return the results in the same order as the keys were input + % but we may have changed the order when we sorted. So we need to put the + % order back into the results. + couch_util:reorder_results(Keys, SortedResults). + +lookup(_Bt, nil, Keys) -> + {ok, [{Key, not_found} || Key <- Keys]}; +lookup(Bt, {Pointer, _Reds}, Keys) -> + {NodeType, NodeList} = get_node(Bt, Pointer), + case NodeType of + kp_node -> + lookup_kpnode(Bt, list_to_tuple(NodeList), 1, Keys, []); + kv_node -> + lookup_kvnode(Bt, list_to_tuple(NodeList), 1, Keys, []) + end. + +lookup_kpnode(_Bt, _NodeTuple, _LowerBound, [], Output) -> + {ok, lists:reverse(Output)}; +lookup_kpnode(_Bt, NodeTuple, LowerBound, Keys, Output) when tuple_size(NodeTuple) < LowerBound -> + {ok, lists:reverse(Output, [{Key, not_found} || Key <- Keys])}; +lookup_kpnode(Bt, NodeTuple, LowerBound, [FirstLookupKey | _] = LookupKeys, Output) -> + N = find_first_gteq(Bt, NodeTuple, LowerBound, tuple_size(NodeTuple), FirstLookupKey), + {Key, PointerInfo} = element(N, NodeTuple), + SplitFun = fun(LookupKey) -> not less(Bt, Key, LookupKey) end, + case lists:splitwith(SplitFun, LookupKeys) of + {[], GreaterQueries} -> + lookup_kpnode(Bt, NodeTuple, N + 1, GreaterQueries, Output); + {LessEqQueries, GreaterQueries} -> + {ok, Results} = lookup(Bt, PointerInfo, LessEqQueries), + lookup_kpnode(Bt, NodeTuple, N + 1, GreaterQueries, lists:reverse(Results, Output)) + end. + + +lookup_kvnode(_Bt, _NodeTuple, _LowerBound, [], Output) -> + {ok, lists:reverse(Output)}; +lookup_kvnode(_Bt, NodeTuple, LowerBound, Keys, Output) when tuple_size(NodeTuple) < LowerBound -> + % keys not found + {ok, lists:reverse(Output, [{Key, not_found} || Key <- Keys])}; +lookup_kvnode(Bt, NodeTuple, LowerBound, [LookupKey | RestLookupKeys], Output) -> + N = find_first_gteq(Bt, NodeTuple, LowerBound, tuple_size(NodeTuple), LookupKey), + {Key, Value} = element(N, NodeTuple), + case less(Bt, LookupKey, Key) of + true -> + % LookupKey is less than Key + lookup_kvnode(Bt, NodeTuple, N, RestLookupKeys, [{LookupKey, not_found} | Output]); + false -> + case less(Bt, Key, LookupKey) of + true -> + % LookupKey is greater than Key + lookup_kvnode(Bt, NodeTuple, N+1, RestLookupKeys, [{LookupKey, not_found} | Output]); + false -> + % LookupKey is equal to Key + lookup_kvnode(Bt, NodeTuple, N, RestLookupKeys, [{LookupKey, {ok, assemble(Bt, LookupKey, Value)}} | Output]) + end + end. + + +complete_root(Bt, []) -> + {ok, nil, Bt}; +complete_root(Bt, [{_Key, PointerInfo}])-> + {ok, PointerInfo, Bt}; +complete_root(Bt, KPs) -> + {ok, ResultKeyPointers, Bt2} = write_node(Bt, kp_node, KPs), + complete_root(Bt2, ResultKeyPointers). + +%%%%%%%%%%%%% The chunkify function sucks! %%%%%%%%%%%%% +% It is inaccurate as it does not account for compression when blocks are +% written. Plus with the "case byte_size(term_to_binary(InList)) of" code +% it's probably really inefficient. + +chunkify(InList) -> + BaseChunkSize = list_to_integer(couch_config:get("couchdb", + "btree_chunk_size", "1279")), + case byte_size(term_to_binary(InList)) of + Size when Size > BaseChunkSize -> + NumberOfChunksLikely = ((Size div BaseChunkSize) + 1), + ChunkThreshold = Size div NumberOfChunksLikely, + chunkify(InList, ChunkThreshold, [], 0, []); + _Else -> + [InList] + end. + +chunkify([], _ChunkThreshold, [], 0, OutputChunks) -> + lists:reverse(OutputChunks); +chunkify([], _ChunkThreshold, OutList, _OutListSize, OutputChunks) -> + lists:reverse([lists:reverse(OutList) | OutputChunks]); +chunkify([InElement | RestInList], ChunkThreshold, OutList, OutListSize, OutputChunks) -> + case byte_size(term_to_binary(InElement)) of + Size when (Size + OutListSize) > ChunkThreshold andalso OutList /= [] -> + chunkify(RestInList, ChunkThreshold, [], 0, [lists:reverse([InElement | OutList]) | OutputChunks]); + Size -> + chunkify(RestInList, ChunkThreshold, [InElement | OutList], OutListSize + Size, OutputChunks) + end. + +modify_node(Bt, RootPointerInfo, Actions, QueryOutput) -> + case RootPointerInfo of + nil -> + NodeType = kv_node, + NodeList = []; + {Pointer, _Reds} -> + {NodeType, NodeList} = get_node(Bt, Pointer) + end, + NodeTuple = list_to_tuple(NodeList), + + {ok, NewNodeList, QueryOutput2, Bt2} = + case NodeType of + kp_node -> modify_kpnode(Bt, NodeTuple, 1, Actions, [], QueryOutput); + kv_node -> modify_kvnode(Bt, NodeTuple, 1, Actions, [], QueryOutput) + end, + case NewNodeList of + [] -> % no nodes remain + {ok, [], QueryOutput2, Bt2}; + NodeList -> % nothing changed + {LastKey, _LastValue} = element(tuple_size(NodeTuple), NodeTuple), + {ok, [{LastKey, RootPointerInfo}], QueryOutput2, Bt2}; + _Else2 -> + {ok, ResultList, Bt3} = write_node(Bt2, NodeType, NewNodeList), + {ok, ResultList, QueryOutput2, Bt3} + end. + +reduce_node(#btree{reduce=nil}, _NodeType, _NodeList) -> + []; +reduce_node(#btree{reduce=R}, kp_node, NodeList) -> + R(rereduce, [Red || {_K, {_P, Red}} <- NodeList]); +reduce_node(#btree{reduce=R}=Bt, kv_node, NodeList) -> + R(reduce, [assemble(Bt, K, V) || {K, V} <- NodeList]). + + +get_node(#btree{fd = Fd}, NodePos) -> + {ok, {NodeType, NodeList}} = couch_file:pread_term(Fd, NodePos), + {NodeType, NodeList}. + +write_node(Bt, NodeType, NodeList) -> + % split up nodes into smaller sizes + NodeListList = chunkify(NodeList), + % now write out each chunk and return the KeyPointer pairs for those nodes + ResultList = [ + begin + {ok, Pointer} = couch_file:append_term(Bt#btree.fd, {NodeType, ANodeList}), + {LastKey, _} = lists:last(ANodeList), + {LastKey, {Pointer, reduce_node(Bt, NodeType, ANodeList)}} + end + || + ANodeList <- NodeListList + ], + {ok, ResultList, Bt}. + +modify_kpnode(Bt, {}, _LowerBound, Actions, [], QueryOutput) -> + modify_node(Bt, nil, Actions, QueryOutput); +modify_kpnode(Bt, NodeTuple, LowerBound, [], ResultNode, QueryOutput) -> + {ok, lists:reverse(ResultNode, bounded_tuple_to_list(NodeTuple, LowerBound, + tuple_size(NodeTuple), [])), QueryOutput, Bt}; +modify_kpnode(Bt, NodeTuple, LowerBound, + [{_, FirstActionKey, _}|_]=Actions, ResultNode, QueryOutput) -> + Sz = tuple_size(NodeTuple), + N = find_first_gteq(Bt, NodeTuple, LowerBound, Sz, FirstActionKey), + case N =:= Sz of + true -> + % perform remaining actions on last node + {_, PointerInfo} = element(Sz, NodeTuple), + {ok, ChildKPs, QueryOutput2, Bt2} = + modify_node(Bt, PointerInfo, Actions, QueryOutput), + NodeList = lists:reverse(ResultNode, bounded_tuple_to_list(NodeTuple, LowerBound, + Sz - 1, ChildKPs)), + {ok, NodeList, QueryOutput2, Bt2}; + false -> + {NodeKey, PointerInfo} = element(N, NodeTuple), + SplitFun = fun({_ActionType, ActionKey, _ActionValue}) -> + not less(Bt, NodeKey, ActionKey) + end, + {LessEqQueries, GreaterQueries} = lists:splitwith(SplitFun, Actions), + {ok, ChildKPs, QueryOutput2, Bt2} = + modify_node(Bt, PointerInfo, LessEqQueries, QueryOutput), + ResultNode2 = lists:reverse(ChildKPs, bounded_tuple_to_revlist(NodeTuple, + LowerBound, N - 1, ResultNode)), + modify_kpnode(Bt2, NodeTuple, N+1, GreaterQueries, ResultNode2, QueryOutput2) + end. + +bounded_tuple_to_revlist(_Tuple, Start, End, Tail) when Start > End -> + Tail; +bounded_tuple_to_revlist(Tuple, Start, End, Tail) -> + bounded_tuple_to_revlist(Tuple, Start+1, End, [element(Start, Tuple)|Tail]). + +bounded_tuple_to_list(Tuple, Start, End, Tail) -> + bounded_tuple_to_list2(Tuple, Start, End, [], Tail). + +bounded_tuple_to_list2(_Tuple, Start, End, Acc, Tail) when Start > End -> + lists:reverse(Acc, Tail); +bounded_tuple_to_list2(Tuple, Start, End, Acc, Tail) -> + bounded_tuple_to_list2(Tuple, Start + 1, End, [element(Start, Tuple) | Acc], Tail). + +find_first_gteq(_Bt, _Tuple, Start, End, _Key) when Start == End -> + End; +find_first_gteq(Bt, Tuple, Start, End, Key) -> + Mid = Start + ((End - Start) div 2), + {TupleKey, _} = element(Mid, Tuple), + case less(Bt, TupleKey, Key) of + true -> + find_first_gteq(Bt, Tuple, Mid+1, End, Key); + false -> + find_first_gteq(Bt, Tuple, Start, Mid, Key) + end. + +modify_kvnode(Bt, NodeTuple, LowerBound, [], ResultNode, QueryOutput) -> + {ok, lists:reverse(ResultNode, bounded_tuple_to_list(NodeTuple, LowerBound, tuple_size(NodeTuple), [])), QueryOutput, Bt}; +modify_kvnode(Bt, NodeTuple, LowerBound, [{ActionType, ActionKey, ActionValue} | RestActions], ResultNode, QueryOutput) when LowerBound > tuple_size(NodeTuple) -> + case ActionType of + insert -> + modify_kvnode(Bt, NodeTuple, LowerBound, RestActions, [{ActionKey, ActionValue} | ResultNode], QueryOutput); + remove -> + % just drop the action + modify_kvnode(Bt, NodeTuple, LowerBound, RestActions, ResultNode, QueryOutput); + fetch -> + % the key/value must not exist in the tree + modify_kvnode(Bt, NodeTuple, LowerBound, RestActions, ResultNode, [{not_found, {ActionKey, nil}} | QueryOutput]) + end; +modify_kvnode(Bt, NodeTuple, LowerBound, [{ActionType, ActionKey, ActionValue} | RestActions], AccNode, QueryOutput) -> + N = find_first_gteq(Bt, NodeTuple, LowerBound, tuple_size(NodeTuple), ActionKey), + {Key, Value} = element(N, NodeTuple), + ResultNode = bounded_tuple_to_revlist(NodeTuple, LowerBound, N - 1, AccNode), + case less(Bt, ActionKey, Key) of + true -> + case ActionType of + insert -> + % ActionKey is less than the Key, so insert + modify_kvnode(Bt, NodeTuple, N, RestActions, [{ActionKey, ActionValue} | ResultNode], QueryOutput); + remove -> + % ActionKey is less than the Key, just drop the action + modify_kvnode(Bt, NodeTuple, N, RestActions, ResultNode, QueryOutput); + fetch -> + % ActionKey is less than the Key, the key/value must not exist in the tree + modify_kvnode(Bt, NodeTuple, N, RestActions, ResultNode, [{not_found, {ActionKey, nil}} | QueryOutput]) + end; + false -> + % ActionKey and Key are maybe equal. + case less(Bt, Key, ActionKey) of + false -> + case ActionType of + insert -> + modify_kvnode(Bt, NodeTuple, N+1, RestActions, [{ActionKey, ActionValue} | ResultNode], QueryOutput); + remove -> + modify_kvnode(Bt, NodeTuple, N+1, RestActions, ResultNode, QueryOutput); + fetch -> + % ActionKey is equal to the Key, insert into the QueryOuput, but re-process the node + % since an identical action key can follow it. + modify_kvnode(Bt, NodeTuple, N, RestActions, ResultNode, [{ok, assemble(Bt, Key, Value)} | QueryOutput]) + end; + true -> + modify_kvnode(Bt, NodeTuple, N + 1, [{ActionType, ActionKey, ActionValue} | RestActions], [{Key, Value} | ResultNode], QueryOutput) + end + end. + + +reduce_stream_node(_Bt, _Dir, nil, _KeyStart, _KeyEnd, GroupedKey, GroupedKVsAcc, + GroupedRedsAcc, _KeyGroupFun, _Fun, Acc) -> + {ok, Acc, GroupedRedsAcc, GroupedKVsAcc, GroupedKey}; +reduce_stream_node(Bt, Dir, {P, _R}, KeyStart, KeyEnd, GroupedKey, GroupedKVsAcc, + GroupedRedsAcc, KeyGroupFun, Fun, Acc) -> + case get_node(Bt, P) of + {kp_node, NodeList} -> + reduce_stream_kp_node(Bt, Dir, NodeList, KeyStart, KeyEnd, GroupedKey, + GroupedKVsAcc, GroupedRedsAcc, KeyGroupFun, Fun, Acc); + {kv_node, KVs} -> + reduce_stream_kv_node(Bt, Dir, KVs, KeyStart, KeyEnd, GroupedKey, + GroupedKVsAcc, GroupedRedsAcc, KeyGroupFun, Fun, Acc) + end. + +reduce_stream_kv_node(Bt, Dir, KVs, KeyStart, KeyEnd, + GroupedKey, GroupedKVsAcc, GroupedRedsAcc, + KeyGroupFun, Fun, Acc) -> + + GTEKeyStartKVs = + case KeyStart of + undefined -> + KVs; + _ -> + lists:dropwhile(fun({Key,_}) -> less(Bt, Key, KeyStart) end, KVs) + end, + KVs2 = + case KeyEnd of + undefined -> + GTEKeyStartKVs; + _ -> + lists:takewhile( + fun({Key,_}) -> + not less(Bt, KeyEnd, Key) + end, GTEKeyStartKVs) + end, + reduce_stream_kv_node2(Bt, adjust_dir(Dir, KVs2), GroupedKey, GroupedKVsAcc, GroupedRedsAcc, + KeyGroupFun, Fun, Acc). + + +reduce_stream_kv_node2(_Bt, [], GroupedKey, GroupedKVsAcc, GroupedRedsAcc, + _KeyGroupFun, _Fun, Acc) -> + {ok, Acc, GroupedRedsAcc, GroupedKVsAcc, GroupedKey}; +reduce_stream_kv_node2(Bt, [{Key, Value}| RestKVs], GroupedKey, GroupedKVsAcc, + GroupedRedsAcc, KeyGroupFun, Fun, Acc) -> + case GroupedKey of + undefined -> + reduce_stream_kv_node2(Bt, RestKVs, Key, + [assemble(Bt,Key,Value)], [], KeyGroupFun, Fun, Acc); + _ -> + + case KeyGroupFun(GroupedKey, Key) of + true -> + reduce_stream_kv_node2(Bt, RestKVs, GroupedKey, + [assemble(Bt,Key,Value)|GroupedKVsAcc], GroupedRedsAcc, KeyGroupFun, + Fun, Acc); + false -> + case Fun(GroupedKey, {GroupedKVsAcc, GroupedRedsAcc}, Acc) of + {ok, Acc2} -> + reduce_stream_kv_node2(Bt, RestKVs, Key, [assemble(Bt,Key,Value)], + [], KeyGroupFun, Fun, Acc2); + {stop, Acc2} -> + throw({stop, Acc2}) + end + end + end. + +reduce_stream_kp_node(Bt, Dir, NodeList, KeyStart, KeyEnd, + GroupedKey, GroupedKVsAcc, GroupedRedsAcc, + KeyGroupFun, Fun, Acc) -> + Nodes = + case KeyStart of + undefined -> + NodeList; + _ -> + lists:dropwhile( + fun({Key,_}) -> + less(Bt, Key, KeyStart) + end, NodeList) + end, + NodesInRange = + case KeyEnd of + undefined -> + Nodes; + _ -> + {InRange, MaybeInRange} = lists:splitwith( + fun({Key,_}) -> + less(Bt, Key, KeyEnd) + end, Nodes), + InRange ++ case MaybeInRange of [] -> []; [FirstMaybe|_] -> [FirstMaybe] end + end, + reduce_stream_kp_node2(Bt, Dir, adjust_dir(Dir, NodesInRange), KeyStart, KeyEnd, + GroupedKey, GroupedKVsAcc, GroupedRedsAcc, KeyGroupFun, Fun, Acc). + + +reduce_stream_kp_node2(Bt, Dir, [{_Key, NodeInfo} | RestNodeList], KeyStart, KeyEnd, + undefined, [], [], KeyGroupFun, Fun, Acc) -> + {ok, Acc2, GroupedRedsAcc2, GroupedKVsAcc2, GroupedKey2} = + reduce_stream_node(Bt, Dir, NodeInfo, KeyStart, KeyEnd, undefined, + [], [], KeyGroupFun, Fun, Acc), + reduce_stream_kp_node2(Bt, Dir, RestNodeList, KeyStart, KeyEnd, GroupedKey2, + GroupedKVsAcc2, GroupedRedsAcc2, KeyGroupFun, Fun, Acc2); +reduce_stream_kp_node2(Bt, Dir, NodeList, KeyStart, KeyEnd, + GroupedKey, GroupedKVsAcc, GroupedRedsAcc, KeyGroupFun, Fun, Acc) -> + {Grouped0, Ungrouped0} = lists:splitwith(fun({Key,_}) -> + KeyGroupFun(GroupedKey, Key) end, NodeList), + {GroupedNodes, UngroupedNodes} = + case Grouped0 of + [] -> + {Grouped0, Ungrouped0}; + _ -> + [FirstGrouped | RestGrouped] = lists:reverse(Grouped0), + {RestGrouped, [FirstGrouped | Ungrouped0]} + end, + GroupedReds = [R || {_, {_,R}} <- GroupedNodes], + case UngroupedNodes of + [{_Key, NodeInfo}|RestNodes] -> + {ok, Acc2, GroupedRedsAcc2, GroupedKVsAcc2, GroupedKey2} = + reduce_stream_node(Bt, Dir, NodeInfo, KeyStart, KeyEnd, GroupedKey, + GroupedKVsAcc, GroupedReds ++ GroupedRedsAcc, KeyGroupFun, Fun, Acc), + reduce_stream_kp_node2(Bt, Dir, RestNodes, KeyStart, KeyEnd, GroupedKey2, + GroupedKVsAcc2, GroupedRedsAcc2, KeyGroupFun, Fun, Acc2); + [] -> + {ok, Acc, GroupedReds ++ GroupedRedsAcc, GroupedKVsAcc, GroupedKey} + end. + +adjust_dir(fwd, List) -> + List; +adjust_dir(rev, List) -> + lists:reverse(List). + +stream_node(Bt, Reds, {Pointer, _Reds}, StartKey, InRange, Dir, Fun, Acc) -> + {NodeType, NodeList} = get_node(Bt, Pointer), + case NodeType of + kp_node -> + stream_kp_node(Bt, Reds, adjust_dir(Dir, NodeList), StartKey, InRange, Dir, Fun, Acc); + kv_node -> + stream_kv_node(Bt, Reds, adjust_dir(Dir, NodeList), StartKey, InRange, Dir, Fun, Acc) + end. + +stream_node(Bt, Reds, {Pointer, _Reds}, InRange, Dir, Fun, Acc) -> + {NodeType, NodeList} = get_node(Bt, Pointer), + case NodeType of + kp_node -> + stream_kp_node(Bt, Reds, adjust_dir(Dir, NodeList), InRange, Dir, Fun, Acc); + kv_node -> + stream_kv_node2(Bt, Reds, [], adjust_dir(Dir, NodeList), InRange, Dir, Fun, Acc) + end. + +stream_kp_node(_Bt, _Reds, [], _InRange, _Dir, _Fun, Acc) -> + {ok, Acc}; +stream_kp_node(Bt, Reds, [{Key, {Pointer, Red}} | Rest], InRange, Dir, Fun, Acc) -> + case Fun(traverse, Key, Red, Acc) of + {ok, Acc2} -> + case stream_node(Bt, Reds, {Pointer, Red}, InRange, Dir, Fun, Acc2) of + {ok, Acc3} -> + stream_kp_node(Bt, [Red | Reds], Rest, InRange, Dir, Fun, Acc3); + {stop, LastReds, Acc3} -> + {stop, LastReds, Acc3} + end; + {skip, Acc2} -> + stream_kp_node(Bt, [Red | Reds], Rest, InRange, Dir, Fun, Acc2) + end. + +drop_nodes(_Bt, Reds, _StartKey, []) -> + {Reds, []}; +drop_nodes(Bt, Reds, StartKey, [{NodeKey, {Pointer, Red}} | RestKPs]) -> + case less(Bt, NodeKey, StartKey) of + true -> drop_nodes(Bt, [Red | Reds], StartKey, RestKPs); + false -> {Reds, [{NodeKey, {Pointer, Red}} | RestKPs]} + end. + +stream_kp_node(Bt, Reds, KPs, StartKey, InRange, Dir, Fun, Acc) -> + {NewReds, NodesToStream} = + case Dir of + fwd -> + % drop all nodes sorting before the key + drop_nodes(Bt, Reds, StartKey, KPs); + rev -> + % keep all nodes sorting before the key, AND the first node to sort after + RevKPs = lists:reverse(KPs), + case lists:splitwith(fun({Key, _Pointer}) -> less(Bt, Key, StartKey) end, RevKPs) of + {_RevsBefore, []} -> + % everything sorts before it + {Reds, KPs}; + {RevBefore, [FirstAfter | Drop]} -> + {[Red || {_K,{_P,Red}} <- Drop] ++ Reds, + [FirstAfter | lists:reverse(RevBefore)]} + end + end, + case NodesToStream of + [] -> + {ok, Acc}; + [{_Key, {Pointer, Red}} | Rest] -> + case stream_node(Bt, NewReds, {Pointer, Red}, StartKey, InRange, Dir, Fun, Acc) of + {ok, Acc2} -> + stream_kp_node(Bt, [Red | NewReds], Rest, InRange, Dir, Fun, Acc2); + {stop, LastReds, Acc2} -> + {stop, LastReds, Acc2} + end + end. + +stream_kv_node(Bt, Reds, KVs, StartKey, InRange, Dir, Fun, Acc) -> + DropFun = + case Dir of + fwd -> + fun({Key, _}) -> less(Bt, Key, StartKey) end; + rev -> + fun({Key, _}) -> less(Bt, StartKey, Key) end + end, + {LTKVs, GTEKVs} = lists:splitwith(DropFun, KVs), + AssembleLTKVs = [assemble(Bt,K,V) || {K,V} <- LTKVs], + stream_kv_node2(Bt, Reds, AssembleLTKVs, GTEKVs, InRange, Dir, Fun, Acc). + +stream_kv_node2(_Bt, _Reds, _PrevKVs, [], _InRange, _Dir, _Fun, Acc) -> + {ok, Acc}; +stream_kv_node2(Bt, Reds, PrevKVs, [{K,V} | RestKVs], InRange, Dir, Fun, Acc) -> + case InRange(K) of + false -> + {stop, {PrevKVs, Reds}, Acc}; + true -> + AssembledKV = assemble(Bt, K, V), + case Fun(visit, AssembledKV, {PrevKVs, Reds}, Acc) of + {ok, Acc2} -> + stream_kv_node2(Bt, Reds, [AssembledKV | PrevKVs], RestKVs, InRange, Dir, Fun, Acc2); + {stop, Acc2} -> + {stop, {PrevKVs, Reds}, Acc2} + end + end. diff --git a/apps/couch/src/couch_changes.erl b/apps/couch/src/couch_changes.erl new file mode 100644 index 00000000..4f2857b6 --- /dev/null +++ b/apps/couch/src/couch_changes.erl @@ -0,0 +1,413 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_changes). +-include("couch_db.hrl"). + +-export([handle_changes/3, get_changes_timeout/2, get_rest_db_updated/0, + configure_filter/4, filter/2]). + +%% @spec handle_changes(#changes_args{}, #httpd{} | {json_req, {[any()]}}, #db{}) -> any() +handle_changes(#changes_args{style=Style}=Args1, Req, Db) -> + #changes_args{feed = Feed} = Args = Args1#changes_args{ + filter = make_filter_fun(Args1#changes_args.filter, Style, Req, Db) + }, + StartSeq = case Args#changes_args.dir of + rev -> + couch_db:get_update_seq(Db); + fwd -> + Args#changes_args.since + end, + if Feed == "continuous" orelse Feed == "longpoll" -> + fun(CallbackAcc) -> + {Callback, UserAcc} = get_callback_acc(CallbackAcc), + Self = self(), + {ok, Notify} = couch_db_update_notifier:start_link( + fun({_, DbName}) when DbName == Db#db.name -> + Self ! db_updated; + (_) -> + ok + end + ), + UserAcc2 = start_sending_changes(Callback, UserAcc, Feed), + {Timeout, TimeoutFun} = get_changes_timeout(Args, Callback), + try + keep_sending_changes( + Args, + Callback, + UserAcc2, + Db, + StartSeq, + <<"">>, + Timeout, + TimeoutFun + ) + after + couch_db_update_notifier:stop(Notify), + get_rest_db_updated(ok) % clean out any remaining update messages + end + end; + true -> + fun(CallbackAcc) -> + {Callback, UserAcc} = get_callback_acc(CallbackAcc), + UserAcc2 = start_sending_changes(Callback, UserAcc, Feed), + {ok, {_, LastSeq, _Prepend, _, _, UserAcc3, _, _, _, _}} = + send_changes( + Args#changes_args{feed="normal"}, + Callback, + UserAcc2, + Db, + StartSeq, + <<>> + ), + end_sending_changes(Callback, UserAcc3, LastSeq, Feed) + end + end. + +get_callback_acc({Callback, _UserAcc} = Pair) when is_function(Callback, 3) -> + Pair; +get_callback_acc(Callback) when is_function(Callback, 2) -> + {fun(Ev, Data, _) -> Callback(Ev, Data) end, ok}. + +%% @spec make_filter_fun(string(), main_only|all_docs, #httpd{} | {json_req, +%% {[any()]}}, #db{}) -> fun() +make_filter_fun([$_ | _] = FilterName, Style, Req, Db) -> + builtin_filter_fun(FilterName, Style, Req, Db); +make_filter_fun(FilterName, Style, Req, Db) -> + os_filter_fun(FilterName, Style, Req, Db). + +os_filter_fun(FilterName, Style, Req, Db) -> + case [list_to_binary(couch_httpd:unquote(Part)) + || Part <- string:tokens(FilterName, "/")] of + [] -> + fun(_Db2, #doc_info{revs=Revs}) -> + builtin_results(Style, Revs) + end; + [DName, FName] -> + DesignId = <<"_design/", DName/binary>>, + DDoc = couch_httpd_db:couch_doc_open(Db, DesignId, nil, []), + % validate that the ddoc has the filter fun + #doc{body={Props}} = DDoc, + couch_util:get_nested_json_value({Props}, [<<"filters">>, FName]), + fun(Db2, DocInfo) -> + DocInfos = + case Style of + main_only -> + [DocInfo]; + all_docs -> + [DocInfo#doc_info{revs=[Rev]}|| Rev <- DocInfo#doc_info.revs] + end, + Docs = [Doc || {ok, Doc} <- [ + couch_db:open_doc(Db2, DocInfo2, [deleted, conflicts]) + || DocInfo2 <- DocInfos]], + {ok, Passes} = couch_query_servers:filter_docs( + Req, Db2, DDoc, FName, Docs + ), + [{[{<<"rev">>, couch_doc:rev_to_str({RevPos,RevId})}]} + || {Pass, #doc{revs={RevPos,[RevId|_]}}} + <- lists:zip(Passes, Docs), Pass == true] + end; + _Else -> + throw({bad_request, + "filter parameter must be of the form `designname/filtername`"}) + end. + +builtin_filter_fun("_doc_ids", Style, {json_req, {Props}}, _Db) -> + filter_docids(couch_util:get_value(<<"doc_ids">>, Props), Style); +builtin_filter_fun("_doc_ids", Style, #httpd{method='POST'}=Req, _Db) -> + {Props} = couch_httpd:json_body_obj(Req), + DocIds = couch_util:get_value(<<"doc_ids">>, Props, nil), + filter_docids(DocIds, Style); +builtin_filter_fun("_doc_ids", Style, #httpd{method='GET'}=Req, _Db) -> + DocIds = ?JSON_DECODE(couch_httpd:qs_value(Req, "doc_ids", "null")), + filter_docids(DocIds, Style); +builtin_filter_fun("_design", Style, _Req, _Db) -> + filter_designdoc(Style); +builtin_filter_fun(_FilterName, _Style, _Req, _Db) -> + throw({bad_request, "unknown builtin filter name"}). + +filter_docids(DocIds, Style) when is_list(DocIds)-> + fun(_Db, #doc_info{id=DocId, revs=Revs}) -> + case lists:member(DocId, DocIds) of + true -> + builtin_results(Style, Revs); + _ -> [] + end + end; +filter_docids(_, _) -> + throw({bad_request, "`doc_ids` filter parameter is not a list."}). + +filter_designdoc(Style) -> + fun(_Db, #doc_info{id=DocId, revs=Revs}) -> + case DocId of + <<"_design", _/binary>> -> + builtin_results(Style, Revs); + _ -> [] + end + end. + +builtin_results(Style, [#rev_info{rev=Rev}|_]=Revs) -> + case Style of + main_only -> + [{[{<<"rev">>, couch_doc:rev_to_str(Rev)}]}]; + all_docs -> + [{[{<<"rev">>, couch_doc:rev_to_str(R)}]} + || #rev_info{rev=R} <- Revs] + end. + +configure_filter(Filter, Style, Req, Db) when is_list(Filter) -> + case [?l2b(couch_httpd:unquote(X)) || X <- string:tokens(Filter, "/")] of + [] -> + % fall back to standard filter + Style; + [DName, FName] -> + JsonReq = chttpd_external:json_req_obj(Req, Db), + DesignId = <<"_design/", DName/binary>>, + DDoc = chttpd_db:couch_doc_open(Db, DesignId, nil, []), + % validate that the ddoc has the filter fun + #doc{body={Props}} = DDoc, + couch_util:get_nested_json_value({Props}, [<<"filters">>, FName]), + {custom, Style, {Db, JsonReq, DDoc, FName}}; + [<<"_doc_ids">>] -> + DocIds = ?JSON_DECODE(couch_httpd:qs_value(Req, "doc_ids", "null")), + case is_list(DocIds) of + true -> ok; + false -> throw({bad_request, "`doc_ids` filter parameter is not a list."}) + end, + {builtin, Style, {doc_ids, DocIds}}; + [<<"_design">>] -> + {builtin, Style, design}; + [<<"_", _/binary>>] -> + throw({bad_request, "unknown builtin filter name"}); + _Else -> + throw({bad_request, + "filter parameter must be of the form `designname/filtername`"}) + end; +configure_filter(_, Style, _, _) -> + Style. + +filter(#doc_info{revs=[#rev_info{rev=Rev}|_]}, main_only) -> + [{[{<<"rev">>, couch_doc:rev_to_str(Rev)}]}]; +filter(#doc_info{revs=Revs}, all_docs) -> + [{[{<<"rev">>, couch_doc:rev_to_str(Rev)}]} || #rev_info{rev=Rev} <- Revs]; +filter(#doc_info{id=Id, revs=RevInfos}, {custom, main_only, Acc}) -> + custom_filter(Id, [(hd(RevInfos))#rev_info.rev], Acc); +filter(#doc_info{id=Id, revs=RevInfos}, {custom, all_docs, Acc}) -> + custom_filter(Id, [R || #rev_info{rev=R} <- RevInfos], Acc); +filter(#doc_info{id=Id, revs=RevInfos}, {builtin, main_only, Acc}) -> + builtin_filter(Id, [(hd(RevInfos))#rev_info.rev], Acc); +filter(#doc_info{id=Id, revs=RevInfos}, {builtin, all_docs, Acc}) -> + builtin_filter(Id, [R || #rev_info{rev=R} <- RevInfos], Acc). + +custom_filter(Id, Revs, {Db, JsonReq, DDoc, Filter}) -> + {ok, Results} = fabric:open_revs(Db, Id, Revs, [deleted, conflicts]), + Docs = [Doc || {ok, Doc} <- Results], + {ok, Passes} = couch_query_servers:filter_docs({json_req,JsonReq}, Db, + DDoc, Filter, Docs), + % ?LOG_INFO("filtering ~p ~p", [Id, Passes]), + [{[{<<"rev">>, couch_doc:rev_to_str({RevPos,RevId})}]} + || {Pass, #doc{revs={RevPos,[RevId|_]}}} + <- lists:zip(Passes, Docs), Pass == true]. + +builtin_filter(Id, Revs, design) -> + case Id of + <<"_design", _/binary>> -> + [{[{<<"rev">>, couch_doc:rev_to_str(Rev)}]} || Rev <- Revs]; + _ -> + [] + end; +builtin_filter(Id, Revs, {doc_ids, DocIds}) -> + case lists:member(Id, DocIds) of + true -> + [{[{<<"rev">>, couch_doc:rev_to_str(Rev)}]} || Rev <- Revs]; + false -> + [] + end. + +get_changes_timeout(Args, Callback) -> + #changes_args{ + heartbeat = Heartbeat, + timeout = Timeout, + feed = ResponseType + } = Args, + DefaultTimeout = list_to_integer( + couch_config:get("httpd", "changes_timeout", "60000") + ), + case Heartbeat of + undefined -> + case Timeout of + undefined -> + {DefaultTimeout, fun(UserAcc) -> {stop, UserAcc} end}; + infinity -> + {infinity, fun(UserAcc) -> {stop, UserAcc} end}; + _ -> + {lists:min([DefaultTimeout, Timeout]), + fun(UserAcc) -> {stop, UserAcc} end} + end; + true -> + {DefaultTimeout, + fun(UserAcc) -> {ok, Callback(timeout, ResponseType, UserAcc)} end}; + _ -> + {lists:min([DefaultTimeout, Heartbeat]), + fun(UserAcc) -> {ok, Callback(timeout, ResponseType, UserAcc)} end} + end. + +start_sending_changes(_Callback, UserAcc, "continuous") -> + UserAcc; +start_sending_changes(Callback, UserAcc, ResponseType) -> + Callback(start, ResponseType, UserAcc). + +send_changes(Args, Callback, UserAcc, Db, StartSeq, Prepend) -> + #changes_args{ + include_docs = IncludeDocs, + conflicts = Conflicts, + limit = Limit, + feed = ResponseType, + dir = Dir, + filter = FilterFun + } = Args, + couch_db:changes_since( + Db, + StartSeq, + fun changes_enumerator/2, + [{dir, Dir}], + {Db, StartSeq, Prepend, FilterFun, Callback, UserAcc, ResponseType, + Limit, IncludeDocs, Conflicts} + ). + +keep_sending_changes(Args, Callback, UserAcc, Db, StartSeq, Prepend, Timeout, + TimeoutFun) -> + #changes_args{ + feed = ResponseType, + limit = Limit, + db_open_options = DbOptions + } = Args, + % ?LOG_INFO("send_changes start ~p",[StartSeq]), + {ok, {_, EndSeq, Prepend2, _, _, UserAcc2, _, NewLimit, _, _}} = send_changes( + Args#changes_args{dir=fwd}, Callback, UserAcc, Db, StartSeq, Prepend + ), + % ?LOG_INFO("send_changes last ~p",[EndSeq]), + couch_db:close(Db), + if Limit > NewLimit, ResponseType == "longpoll" -> + end_sending_changes(Callback, UserAcc2, EndSeq, ResponseType); + true -> + case wait_db_updated(Timeout, TimeoutFun, UserAcc2) of + {updated, UserAcc3} -> + % ?LOG_INFO("wait_db_updated updated ~p",[{Db#db.name, EndSeq}]), + DbOptions1 = [{user_ctx, Db#db.user_ctx} | DbOptions], + case couch_db:open(Db#db.name, DbOptions1) of + {ok, Db2} -> + keep_sending_changes( + Args#changes_args{limit=NewLimit}, + Callback, + UserAcc3, + Db2, + EndSeq, + Prepend2, + Timeout, + TimeoutFun + ); + _Else -> + end_sending_changes(Callback, UserAcc2, EndSeq, ResponseType) + end; + {stop, UserAcc3} -> + % ?LOG_INFO("wait_db_updated stop ~p",[{Db#db.name, EndSeq}]), + end_sending_changes(Callback, UserAcc3, EndSeq, ResponseType) + end + end. + +end_sending_changes(Callback, UserAcc, EndSeq, ResponseType) -> + Callback({stop, EndSeq}, ResponseType, UserAcc). + +changes_enumerator(DocInfo, {Db, _, _, FilterFun, Callback, UserAcc, + "continuous", Limit, IncludeDocs, Conflicts}) -> + + #doc_info{high_seq = Seq} = DocInfo, + Results0 = FilterFun(Db, DocInfo), + Results = [Result || Result <- Results0, Result /= null], + Go = if Limit =< 1 -> stop; true -> ok end, + case Results of + [] -> + {Go, {Db, Seq, nil, FilterFun, Callback, UserAcc, "continuous", Limit, + IncludeDocs, Conflicts} + }; + _ -> + ChangesRow = changes_row(Db, Results, DocInfo, IncludeDocs, Conflicts), + UserAcc2 = Callback({change, ChangesRow, <<>>}, "continuous", UserAcc), + {Go, {Db, Seq, nil, FilterFun, Callback, UserAcc2, "continuous", + Limit - 1, IncludeDocs, Conflicts} + } + end; +changes_enumerator(DocInfo, {Db, _, Prepend, FilterFun, Callback, UserAcc, + ResponseType, Limit, IncludeDocs, Conflicts}) -> + + #doc_info{high_seq = Seq} = DocInfo, + Results0 = FilterFun(Db, DocInfo), + Results = [Result || Result <- Results0, Result /= null], + Go = if (Limit =< 1) andalso Results =/= [] -> stop; true -> ok end, + case Results of + [] -> + {Go, {Db, Seq, Prepend, FilterFun, Callback, UserAcc, ResponseType, + Limit, IncludeDocs, Conflicts} + }; + _ -> + ChangesRow = changes_row(Db, Results, DocInfo, IncludeDocs, Conflicts), + UserAcc2 = Callback({change, ChangesRow, Prepend}, ResponseType, UserAcc), + {Go, {Db, Seq, <<",\n">>, FilterFun, Callback, UserAcc2, ResponseType, + Limit - 1, IncludeDocs, Conflicts} + } + end. + + +changes_row(Db, Results, DocInfo, IncludeDoc, Conflicts) -> + #doc_info{ + id = Id, high_seq = Seq, revs = [#rev_info{deleted = Del} | _] + } = DocInfo, + {[{<<"seq">>, Seq}, {<<"id">>, Id}, {<<"changes">>, Results}] ++ + deleted_item(Del) ++ case IncludeDoc of + true -> + Options = if Conflicts -> [conflicts]; true -> [] end, + couch_httpd_view:doc_member(Db, DocInfo, Options); + false -> + [] + end}. + +deleted_item(true) -> [{<<"deleted">>, true}]; +deleted_item(_) -> []. + +% waits for a db_updated msg, if there are multiple msgs, collects them. +wait_db_updated(Timeout, TimeoutFun, UserAcc) -> + receive + db_updated -> + get_rest_db_updated(UserAcc) + after Timeout -> + {Go, UserAcc2} = TimeoutFun(UserAcc), + case Go of + ok -> + wait_db_updated(Timeout, TimeoutFun, UserAcc2); + stop -> + {stop, UserAcc2} + end + end. + +get_rest_db_updated(UserAcc) -> + receive + db_updated -> + get_rest_db_updated(UserAcc) + after 0 -> + {updated, UserAcc} + end. + +get_rest_db_updated() -> + receive db_updated -> get_rest_db_updated() + after 0 -> updated + end. diff --git a/apps/couch/src/couch_config.erl b/apps/couch/src/couch_config.erl new file mode 100644 index 00000000..933bb5d5 --- /dev/null +++ b/apps/couch/src/couch_config.erl @@ -0,0 +1,234 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +% Reads CouchDB's ini file and gets queried for configuration parameters. +% This module is initialized with a list of ini files that it consecutively +% reads Key/Value pairs from and saves them in an ets table. If more an one +% ini file is specified, the last one is used to write changes that are made +% with store/2 back to that ini file. + +-module(couch_config). +-behaviour(gen_server). + +-include("couch_db.hrl"). + + +-export([start_link/1, stop/0]). +-export([all/0, get/1, get/2, get/3, set/3, set/4, delete/2, delete/3]). +-export([register/1, register/2]). +-export([parse_ini_file/1]). + +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +-record(config, { + notify_funs=[], + write_filename=undefined +}). + + +start_link(IniFiles) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, IniFiles, []). + +stop() -> + gen_server:cast(?MODULE, stop). + + +all() -> + lists:sort(gen_server:call(?MODULE, all, infinity)). + + +get(Section) when is_binary(Section) -> + ?MODULE:get(?b2l(Section)); +get(Section) -> + Matches = ets:match(?MODULE, {{Section, '$1'}, '$2'}), + [{Key, Value} || [Key, Value] <- Matches]. + +get(Section, Key) -> + ?MODULE:get(Section, Key, undefined). + +get(Section, Key, Default) when is_binary(Section) and is_binary(Key) -> + ?MODULE:get(?b2l(Section), ?b2l(Key), Default); +get(Section, Key, Default) -> + case ets:lookup(?MODULE, {Section, Key}) of + [] -> Default; + [{_, Match}] -> Match + end. + +set(Section, Key, Value) -> + ?MODULE:set(Section, Key, Value, true). + +set(Section, Key, Value, Persist) when is_binary(Section) and is_binary(Key) -> + ?MODULE:set(?b2l(Section), ?b2l(Key), Value, Persist); +set(Section, Key, Value, Persist) -> + gen_server:call(?MODULE, {set, Section, Key, Value, Persist}). + + +delete(Section, Key) when is_binary(Section) and is_binary(Key) -> + delete(?b2l(Section), ?b2l(Key)); +delete(Section, Key) -> + delete(Section, Key, true). + +delete(Section, Key, Persist) when is_binary(Section) and is_binary(Key) -> + delete(?b2l(Section), ?b2l(Key), Persist); +delete(Section, Key, Persist) -> + gen_server:call(?MODULE, {delete, Section, Key, Persist}). + + +register(Fun) -> + ?MODULE:register(Fun, self()). + +register(Fun, Pid) -> + couch_config_event:register(Fun, Pid). + + +init(IniFiles) -> + ets:new(?MODULE, [named_table, set, protected]), + try + lists:map(fun(IniFile) -> + {ok, ParsedIniValues} = parse_ini_file(IniFile), + ets:insert(?MODULE, ParsedIniValues) + end, IniFiles), + WriteFile = case IniFiles of + [_|_] -> lists:last(IniFiles); + _ -> undefined + end, + {ok, #config{write_filename = WriteFile}} + catch _Tag:Error -> + {stop, Error} + end. + + +terminate(_Reason, _State) -> + ok. + + +handle_call(all, _From, Config) -> + Resp = lists:sort((ets:tab2list(?MODULE))), + {reply, Resp, Config}; +handle_call({set, Sec, Key, Val, Persist}, _From, Config) -> + Result = case {Persist, Config#config.write_filename} of + {true, undefined} -> + ok; + {true, FileName} -> + couch_config_writer:save_to_file({{Sec, Key}, Val}, FileName); + _ -> + ok + end, + case Result of + ok -> + true = ets:insert(?MODULE, {{Sec, Key}, Val}), + Event = {config_change, Sec, Key, Val, Persist}, + gen_event:sync_notify(couch_config_event, Event), + {reply, ok, Config}; + _Error -> + {reply, Result, Config} + end; +handle_call({delete, Sec, Key, Persist}, _From, Config) -> + true = ets:delete(?MODULE, {Sec,Key}), + case {Persist, Config#config.write_filename} of + {true, undefined} -> + ok; + {true, FileName} -> + couch_config_writer:save_to_file({{Sec, Key}, ""}, FileName); + _ -> + ok + end, + Event = {config_change, Sec, Key, deleted, Persist}, + gen_event:sync_notify(couch_config_event, Event), + {reply, ok, Config}. + + +handle_cast(stop, State) -> + {stop, normal, State}; +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(Info, State) -> + ?LOG_ERROR("couch_config:handle_info Info: ~p~n", [Info]), + {noreply, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +parse_ini_file(IniFile) -> + IniFilename = couch_util:abs_pathname(IniFile), + IniBin = + case file:read_file(IniFilename) of + {ok, IniBin0} -> + IniBin0; + {error, eacces} -> + throw({file_permission_error, IniFile}); + {error, enoent} -> + Fmt = "Couldn't find server configuration file ~s.", + Msg = ?l2b(io_lib:format(Fmt, [IniFilename])), + ?LOG_ERROR("~s~n", [Msg]), + throw({startup_error, Msg}) + end, + + Lines = re:split(IniBin, "\r\n|\n|\r|\032", [{return, list}]), + {_, ParsedIniValues} = + lists:foldl(fun(Line, {AccSectionName, AccValues}) -> + case string:strip(Line) of + "[" ++ Rest -> + case re:split(Rest, "\\]", [{return, list}]) of + [NewSectionName, ""] -> + {NewSectionName, AccValues}; + _Else -> % end bracket not at end, ignore this line + {AccSectionName, AccValues} + end; + ";" ++ _Comment -> + {AccSectionName, AccValues}; + Line2 -> + case re:split(Line2, "\s?=\s?", [{return, list}]) of + [Value] -> + MultiLineValuePart = case re:run(Line, "^ \\S", []) of + {match, _} -> + true; + _ -> + false + end, + case {MultiLineValuePart, AccValues} of + {true, [{{_, ValueName}, PrevValue} | AccValuesRest]} -> + % remove comment + case re:split(Value, " ;|\t;", [{return, list}]) of + [[]] -> + % empty line + {AccSectionName, AccValues}; + [LineValue | _Rest] -> + E = {{AccSectionName, ValueName}, + PrevValue ++ " " ++ LineValue}, + {AccSectionName, [E | AccValuesRest]} + end; + _ -> + {AccSectionName, AccValues} + end; + [""|_LineValues] -> % line begins with "=", ignore + {AccSectionName, AccValues}; + [ValueName|LineValues] -> % yeehaw, got a line! + RemainingLine = couch_util:implode(LineValues, "="), + % removes comments + case re:split(RemainingLine, " ;|\t;", [{return, list}]) of + [[]] -> + % empty line means delete this key + ets:delete(?MODULE, {AccSectionName, ValueName}), + {AccSectionName, AccValues}; + [LineValue | _Rest] -> + {AccSectionName, + [{{AccSectionName, ValueName}, LineValue} | AccValues]} + end + end + end + end, {"", []}, Lines), + {ok, ParsedIniValues}. + diff --git a/apps/couch/src/couch_config_event.erl b/apps/couch/src/couch_config_event.erl new file mode 100644 index 00000000..e353c7d8 --- /dev/null +++ b/apps/couch/src/couch_config_event.erl @@ -0,0 +1,46 @@ +-module(couch_config_event). +-behaviour(gen_event). +-export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/0, register/2]). + +-include("couch_db.hrl"). + +start_link() -> + gen_event:start_link({local, ?MODULE}). + +register(Fun, Pid) -> + gen_event:add_handler(?MODULE, {?MODULE, Fun}, [Fun, Pid]). + +init([Fun, Pid]) -> + Ref = erlang:monitor(process, Pid), + {ok, {Fun, Ref}}. + +handle_event({config_change,Sec,_,_,_}, {F,_}=St) when is_function(F,1) -> + catch F(Sec), + {ok, St}; +handle_event({config_change,Sec,K,_,_}, {F,_}=St) when is_function(F,2) -> + catch F(Sec,K), + {ok, St}; +handle_event({config_change,Sec,K,V,_}, {F,_}=St) when is_function(F,3) -> + catch F(Sec,K,V), + {ok, St}; +handle_event({config_change,Sec,K,V,Write}, {F,_}=St) when is_function(F,4) -> + catch F(Sec,K,V,Write), + {ok, St}. + +handle_call(_Request, St) -> + {ok, ok, St}. + +handle_info({'DOWN', Ref, _, _, _}, {_, Ref}) -> + remove_handler; +handle_info(_Info, St) -> + {ok, St}. + +terminate(Reason, St) -> + ?LOG_INFO("config_event handler ~p terminating with ~p", [St, Reason]), + ok. + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. diff --git a/apps/couch/src/couch_config_writer.erl b/apps/couch/src/couch_config_writer.erl new file mode 100644 index 00000000..decd269a --- /dev/null +++ b/apps/couch/src/couch_config_writer.erl @@ -0,0 +1,86 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% @doc Saves a Key/Value pair to a ini file. The Key consists of a Section +%% and Option combination. If that combination is found in the ini file +%% the new value replaces the old value. If only the Section is found the +%% Option and value combination is appended to the Section. If the Section +%% does not yet exist in the ini file, it is added and the Option/Value +%% pair is appended. +%% @see couch_config + +-module(couch_config_writer). + +-export([save_to_file/2]). + +%% @spec save_to_file( +%% Config::{{Section::string(), Option::string()}, Value::string()}, +%% File::filename()) -> ok +%% @doc Saves a Section/Key/Value triple to the ini file File::filename() +save_to_file({{Section, Key}, Value}, File) -> + {ok, OldFileContents} = file:read_file(File), + Lines = re:split(OldFileContents, "\r\n|\n|\r|\032", [{return, list}]), + + SectionLine = "[" ++ Section ++ "]", + {ok, Pattern} = re:compile(["^(", Key, "\\s*=)|\\[[a-zA-Z0-9\_-]*\\]"]), + + NewLines = process_file_lines(Lines, [], SectionLine, Pattern, Key, Value), + NewFileContents = reverse_and_add_newline(strip_empty_lines(NewLines), []), + case file:write_file(File, NewFileContents) of + ok -> + ok; + {error, eacces} -> + {file_permission_error, File}; + Error -> + Error + end. + + +process_file_lines([Section|Rest], SeenLines, Section, Pattern, Key, Value) -> + process_section_lines(Rest, [Section|SeenLines], Pattern, Key, Value); + +process_file_lines([Line|Rest], SeenLines, Section, Pattern, Key, Value) -> + process_file_lines(Rest, [Line|SeenLines], Section, Pattern, Key, Value); + +process_file_lines([], SeenLines, Section, _Pattern, Key, Value) -> + % Section wasn't found. Append it with the option here. + [Key ++ " = " ++ Value, Section, "" | strip_empty_lines(SeenLines)]. + + +process_section_lines([Line|Rest], SeenLines, Pattern, Key, Value) -> + case re:run(Line, Pattern, [{capture, all_but_first}]) of + nomatch -> % Found nothing interesting. Move on. + process_section_lines(Rest, [Line|SeenLines], Pattern, Key, Value); + {match, []} -> % Found another section. Append the option here. + lists:reverse(Rest) ++ + [Line, "", Key ++ " = " ++ Value | strip_empty_lines(SeenLines)]; + {match, _} -> % Found the option itself. Replace it. + lists:reverse(Rest) ++ [Key ++ " = " ++ Value | SeenLines] + end; + +process_section_lines([], SeenLines, _Pattern, Key, Value) -> + % Found end of file within the section. Append the option here. + [Key ++ " = " ++ Value | strip_empty_lines(SeenLines)]. + + +reverse_and_add_newline([Line|Rest], Content) -> + reverse_and_add_newline(Rest, [Line, "\n", Content]); + +reverse_and_add_newline([], Content) -> + Content. + + +strip_empty_lines(["" | Rest]) -> + strip_empty_lines(Rest); + +strip_empty_lines(All) -> + All. diff --git a/apps/couch/src/couch_db.erl b/apps/couch/src/couch_db.erl new file mode 100644 index 00000000..c01b0a35 --- /dev/null +++ b/apps/couch/src/couch_db.erl @@ -0,0 +1,1207 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_db). + +-export([open/2,open_int/2,close/1,create/2,start_compact/1,get_db_info/1,get_design_docs/1]). +-export([open_ref_counted/2,is_idle/1,monitor/1,count_changes_since/2]). +-export([update_doc/3,update_doc/4,update_docs/4,update_docs/2,update_docs/3,delete_doc/3]). +-export([get_doc_info/2,open_doc/2,open_doc/3,open_doc_revs/4]). +-export([set_revs_limit/2,get_revs_limit/1]). +-export([get_missing_revs/2,name/1,doc_to_tree/1,get_update_seq/1,get_committed_update_seq/1]). +-export([enum_docs/4,enum_docs_since/5]). +-export([enum_docs_since_reduce_to_count/1,enum_docs_reduce_to_count/1]). +-export([increment_update_seq/1,get_purge_seq/1,purge_docs/2,get_last_purged/1]). +-export([start_link/3,open_doc_int/3,ensure_full_commit/1,ensure_full_commit/2]). +-export([set_security/2,get_security/1]). +-export([changes_since/4,changes_since/5,read_doc/2,new_revid/1]). +-export([check_is_admin/1, check_is_reader/1, get_doc_count/1]). +-export([reopen/1, make_doc/5]). + +-include("couch_db.hrl"). + +start_link(DbName, Filepath, Options) -> + case open_db_file(Filepath, Options) of + {ok, Fd} -> + {ok, UpdaterPid} = gen_server:start_link(couch_db_updater, {DbName, + Filepath, Fd, Options}, []), + unlink(Fd), + gen_server:call(UpdaterPid, get_db); + Else -> + Else + end. + +open_db_file(Filepath, Options) -> + case couch_file:open(Filepath, Options) of + {ok, Fd} -> + {ok, Fd}; + {error, enoent} -> + % couldn't find file. is there a compact version? This can happen if + % crashed during the file switch. + case couch_file:open(Filepath ++ ".compact") of + {ok, Fd} -> + ?LOG_INFO("Found ~s~s compaction file, using as primary storage.", [Filepath, ".compact"]), + ok = file:rename(Filepath ++ ".compact", Filepath), + ok = couch_file:sync(Filepath), + {ok, Fd}; + {error, enoent} -> + {not_found, no_db_file} + end; + Error -> + Error + end. + + +create(DbName, Options) -> + couch_server:create(DbName, Options). + +% this is for opening a database for internal purposes like the replicator +% or the view indexer. it never throws a reader error. +open_int(DbName, Options) -> + couch_server:open(DbName, Options). + +% this should be called anytime an http request opens the database. +% it ensures that the http userCtx is a valid reader +open(DbName, Options) -> + case couch_server:open(DbName, Options) of + {ok, Db} -> + try + check_is_reader(Db), + {ok, Db} + catch + throw:Error -> + close(Db), + throw(Error) + end; + Else -> Else + end. + +reopen(#db{main_pid = Pid, fd = Fd, fd_monitor = OldRef, user_ctx = UserCtx}) -> + {ok, #db{fd = NewFd} = NewDb} = gen_server:call(Pid, get_db, infinity), + case NewFd =:= Fd of + true -> + {ok, NewDb#db{user_ctx = UserCtx}}; + false -> + erlang:demonitor(OldRef), + NewRef = erlang:monitor(process, NewFd), + {ok, NewDb#db{user_ctx = UserCtx, fd_monitor = NewRef}} + end. + +ensure_full_commit(#db{main_pid=Pid, instance_start_time=StartTime}) -> + ok = gen_server:call(Pid, full_commit, infinity), + {ok, StartTime}. + +ensure_full_commit(Db, RequiredSeq) -> + #db{main_pid=Pid, instance_start_time=StartTime} = Db, + ok = gen_server:call(Pid, {full_commit, RequiredSeq}, infinity), + {ok, StartTime}. + +close(#db{fd_monitor=RefCntr}) -> + erlang:demonitor(RefCntr). + +open_ref_counted(MainPid, OpenedPid) -> + gen_server:call(MainPid, {open_ref_count, OpenedPid}). + +is_idle(#db{compactor_pid=nil, waiting_delayed_commit=nil} = Db) -> + case erlang:process_info(Db#db.fd, monitored_by) of + undefined -> + true; + {monitored_by, Pids} -> + (Pids -- [Db#db.main_pid, whereis(couch_stats_collector)]) =:= [] + end; +is_idle(_Db) -> + false. + +monitor(#db{main_pid=MainPid}) -> + erlang:monitor(process, MainPid). + +start_compact(#db{main_pid=Pid}) -> + {ok, _} = gen_server:call(Pid, start_compact), + ok. + +delete_doc(Db, Id, Revisions) -> + DeletedDocs = [#doc{id=Id, revs=[Rev], deleted=true} || Rev <- Revisions], + {ok, [Result]} = update_docs(Db, DeletedDocs, []), + {ok, Result}. + +open_doc(Db, IdOrDocInfo) -> + open_doc(Db, IdOrDocInfo, []). + +open_doc(Db, Id, Options) -> + increment_stat(Db, {couchdb, database_reads}), + case open_doc_int(Db, Id, Options) of + {ok, #doc{deleted=true}=Doc} -> + case lists:member(deleted, Options) of + true -> + apply_open_options({ok, Doc},Options); + false -> + {not_found, deleted} + end; + Else -> + apply_open_options(Else,Options) + end. + +apply_open_options({ok, Doc},Options) -> + apply_open_options2(Doc,Options); +apply_open_options(Else,_Options) -> + Else. + +apply_open_options2(Doc,[]) -> + {ok, Doc}; +apply_open_options2(#doc{atts=Atts,revs=Revs}=Doc, + [{atts_since, PossibleAncestors}|Rest]) -> + RevPos = find_ancestor_rev_pos(Revs, PossibleAncestors), + apply_open_options2(Doc#doc{atts=[A#att{data= + if AttPos>RevPos -> Data; true -> stub end} + || #att{revpos=AttPos,data=Data}=A <- Atts]}, Rest); +apply_open_options2(Doc,[_|Rest]) -> + apply_open_options2(Doc,Rest). + + +find_ancestor_rev_pos({_, []}, _AttsSinceRevs) -> + 0; +find_ancestor_rev_pos(_DocRevs, []) -> + 0; +find_ancestor_rev_pos({RevPos, [RevId|Rest]}, AttsSinceRevs) -> + case lists:member({RevPos, RevId}, AttsSinceRevs) of + true -> + RevPos; + false -> + find_ancestor_rev_pos({RevPos - 1, Rest}, AttsSinceRevs) + end. + +open_doc_revs(Db, Id, Revs, Options) -> + increment_stat(Db, {couchdb, database_reads}), + [{ok, Results}] = open_doc_revs_int(Db, [{Id, Revs}], Options), + {ok, [apply_open_options(Result, Options) || Result <- Results]}. + +% Each returned result is a list of tuples: +% {Id, MissingRevs, PossibleAncestors} +% if no revs are missing, it's omitted from the results. +get_missing_revs(Db, IdRevsList) -> + Results = get_full_doc_infos(Db, [Id1 || {Id1, _Revs} <- IdRevsList]), + {ok, find_missing(IdRevsList, Results)}. + +find_missing([], []) -> + []; +find_missing([{Id, Revs}|RestIdRevs], [{ok, FullInfo} | RestLookupInfo]) -> + case couch_key_tree:find_missing(FullInfo#full_doc_info.rev_tree, Revs) of + [] -> + find_missing(RestIdRevs, RestLookupInfo); + MissingRevs -> + #doc_info{revs=RevsInfo} = couch_doc:to_doc_info(FullInfo), + LeafRevs = [Rev || #rev_info{rev=Rev} <- RevsInfo], + % Find the revs that are possible parents of this rev + PossibleAncestors = + lists:foldl(fun({LeafPos, LeafRevId}, Acc) -> + % this leaf is a "possible ancenstor" of the missing + % revs if this LeafPos lessthan any of the missing revs + case lists:any(fun({MissingPos, _}) -> + LeafPos < MissingPos end, MissingRevs) of + true -> + [{LeafPos, LeafRevId} | Acc]; + false -> + Acc + end + end, [], LeafRevs), + [{Id, MissingRevs, PossibleAncestors} | + find_missing(RestIdRevs, RestLookupInfo)] + end; +find_missing([{Id, Revs}|RestIdRevs], [not_found | RestLookupInfo]) -> + [{Id, Revs, []} | find_missing(RestIdRevs, RestLookupInfo)]. + +get_doc_info(Db, Id) -> + case get_full_doc_info(Db, Id) of + {ok, DocInfo} -> + {ok, couch_doc:to_doc_info(DocInfo)}; + Else -> + Else + end. + +% returns {ok, DocInfo} or not_found +get_full_doc_info(Db, Id) -> + [Result] = get_full_doc_infos(Db, [Id]), + Result. + +get_full_doc_infos(Db, Ids) -> + couch_btree:lookup(Db#db.id_tree, Ids). + +increment_update_seq(#db{main_pid=Pid}) -> + gen_server:call(Pid, increment_update_seq). + +purge_docs(#db{main_pid=Pid}, IdsRevs) -> + gen_server:call(Pid, {purge_docs, IdsRevs}). + +get_committed_update_seq(#db{committed_update_seq=Seq}) -> + Seq. + +get_update_seq(#db{update_seq=Seq})-> + Seq. + +get_purge_seq(#db{header=#db_header{purge_seq=PurgeSeq}})-> + PurgeSeq. + +get_last_purged(#db{header=#db_header{purged_docs=nil}}) -> + {ok, []}; +get_last_purged(#db{fd=Fd, header=#db_header{purged_docs=PurgedPointer}}) -> + couch_file:pread_term(Fd, PurgedPointer). + +get_doc_count(Db) -> + {ok, {Count, _, _}} = couch_btree:full_reduce(Db#db.id_tree), + {ok, Count}. + +get_db_info(Db) -> + #db{fd=Fd, + header=#db_header{disk_version=DiskVersion}, + compactor_pid=Compactor, + update_seq=SeqNum, + name=Name, + id_tree=FullDocBtree, + instance_start_time=StartTime, + committed_update_seq=CommittedUpdateSeq} = Db, + {ok, Size} = couch_file:bytes(Fd), + {ok, {Count, DelCount, DataSize}} = couch_btree:full_reduce(FullDocBtree), + InfoList = [ + {db_name, Name}, + {doc_count, Count}, + {doc_del_count, DelCount}, + {update_seq, SeqNum}, + {purge_seq, couch_db:get_purge_seq(Db)}, + {compact_running, Compactor/=nil}, + {disk_size, Size}, + {other, {[{data_size, DataSize}]}}, + {instance_start_time, StartTime}, + {disk_format_version, DiskVersion}, + {committed_update_seq, CommittedUpdateSeq} + ], + {ok, InfoList}. + +get_design_docs(#db{name = <<"shards/", _/binary>> = ShardName}) -> + {_, Ref} = spawn_monitor(fun() -> + exit(fabric:design_docs(mem3:dbname(ShardName))) + end), + receive {'DOWN', Ref, _, _, Response} -> + Response + end; +get_design_docs(#db{id_tree=Btree}=Db) -> + {ok, _, Docs} = couch_view:fold( + #view{btree=Btree}, + fun(#full_doc_info{deleted = true}, _Reds, AccDocs) -> + {ok, AccDocs}; + (#full_doc_info{id= <<"_design/",_/binary>>}=FullDocInfo, _Reds, AccDocs) -> + {ok, Doc} = couch_db:open_doc_int(Db, FullDocInfo, []), + {ok, [Doc | AccDocs]}; + (_, _Reds, AccDocs) -> + {stop, AccDocs} + end, + [], [{start_key, <<"_design/">>}, {end_key_gt, <<"_design0">>}]), + {ok, Docs}. + +check_is_admin(#db{user_ctx=#user_ctx{name=Name,roles=Roles}}=Db) -> + {Admins} = get_admins(Db), + AdminRoles = [<<"_admin">> | couch_util:get_value(<<"roles">>, Admins, [])], + AdminNames = couch_util:get_value(<<"names">>, Admins,[]), + case AdminRoles -- Roles of + AdminRoles -> % same list, not an admin role + case AdminNames -- [Name] of + AdminNames -> % same names, not an admin + throw({unauthorized, <<"You are not a db or server admin.">>}); + _ -> + ok + end; + _ -> + ok + end. + +check_is_reader(#db{user_ctx=#user_ctx{name=Name,roles=Roles}=UserCtx}=Db) -> + case (catch check_is_admin(Db)) of + ok -> ok; + _ -> + {Readers} = get_readers(Db), + ReaderRoles = couch_util:get_value(<<"roles">>, Readers,[]), + WithAdminRoles = [<<"_admin">> | ReaderRoles], + ReaderNames = couch_util:get_value(<<"names">>, Readers,[]), + case ReaderRoles ++ ReaderNames of + [] -> ok; % no readers == public access + _Else -> + case WithAdminRoles -- Roles of + WithAdminRoles -> % same list, not an reader role + case ReaderNames -- [Name] of + ReaderNames -> % same names, not a reader + ?LOG_DEBUG("Not a reader: UserCtx ~p vs Names ~p Roles ~p",[UserCtx, ReaderNames, WithAdminRoles]), + throw({unauthorized, <<"You are not authorized to access this db.">>}); + _ -> + ok + end; + _ -> + ok + end + end + end. + +get_admins(#db{security=SecProps}) -> + couch_util:get_value(<<"admins">>, SecProps, {[]}). + +get_readers(#db{security=SecProps}) -> + couch_util:get_value(<<"readers">>, SecProps, {[]}). + +get_security(#db{security=SecProps}) -> + {SecProps}. + +set_security(#db{main_pid=Pid}=Db, {NewSecProps}) when is_list(NewSecProps) -> + check_is_admin(Db), + ok = validate_security_object(NewSecProps), + ok = gen_server:call(Pid, {set_security, NewSecProps}, infinity), + {ok, _} = ensure_full_commit(Db), + ok; +set_security(_, _) -> + throw(bad_request). + +validate_security_object(SecProps) -> + Admins = couch_util:get_value(<<"admins">>, SecProps, {[]}), + Readers = couch_util:get_value(<<"readers">>, SecProps, {[]}), + ok = validate_names_and_roles(Admins), + ok = validate_names_and_roles(Readers), + ok. + +% validate user input +validate_names_and_roles({Props}) when is_list(Props) -> + case couch_util:get_value(<<"names">>,Props,[]) of + Ns when is_list(Ns) -> + [throw("names must be a JSON list of strings") ||N <- Ns, not is_binary(N)], + Ns; + _ -> throw("names must be a JSON list of strings") + end, + case couch_util:get_value(<<"roles">>,Props,[]) of + Rs when is_list(Rs) -> + [throw("roles must be a JSON list of strings") ||R <- Rs, not is_binary(R)], + Rs; + _ -> throw("roles must be a JSON list of strings") + end, + ok. + +get_revs_limit(#db{revs_limit=Limit}) -> + Limit. + +set_revs_limit(#db{main_pid=Pid}=Db, Limit) when Limit > 0 -> + check_is_admin(Db), + gen_server:call(Pid, {set_revs_limit, Limit}, infinity); +set_revs_limit(_Db, _Limit) -> + throw(invalid_revs_limit). + +name(#db{name=Name}) -> + Name. + +update_doc(Db, Doc, Options) -> + update_doc(Db, Doc, Options, interactive_edit). + +update_doc(Db, Doc, Options, UpdateType) -> + case update_docs(Db, [Doc], Options, UpdateType) of + {ok, [{ok, NewRev}]} -> + {ok, NewRev}; + {ok, [{{_Id, _Rev}, Error}]} -> + throw(Error); + {ok, [Error]} -> + throw(Error); + {ok, []} -> + % replication success + {Pos, [RevId | _]} = Doc#doc.revs, + {ok, {Pos, RevId}} + end. + +update_docs(Db, Docs) -> + update_docs(Db, Docs, []). + +% group_alike_docs groups the sorted documents into sublist buckets, by id. +% ([DocA, DocA, DocB, DocC], []) -> [[DocA, DocA], [DocB], [DocC]] +group_alike_docs(Docs) -> + Sorted = lists:sort(fun(#doc{id=A},#doc{id=B})-> A < B end, Docs), + group_alike_docs(Sorted, []). + +group_alike_docs([], Buckets) -> + lists:reverse(Buckets); +group_alike_docs([Doc|Rest], []) -> + group_alike_docs(Rest, [[Doc]]); +group_alike_docs([Doc|Rest], [Bucket|RestBuckets]) -> + [#doc{id=BucketId}|_] = Bucket, + case Doc#doc.id == BucketId of + true -> + % add to existing bucket + group_alike_docs(Rest, [[Doc|Bucket]|RestBuckets]); + false -> + % add to new bucket + group_alike_docs(Rest, [[Doc]|[Bucket|RestBuckets]]) + end. + +validate_doc_update(#db{}=Db, #doc{id= <<"_design/",_/binary>>}, _GetDiskDocFun) -> + catch check_is_admin(Db); +validate_doc_update(#db{validate_doc_funs = undefined} = Db, Doc, Fun) -> + ValidationFuns = load_validation_funs(Db), + validate_doc_update(Db#db{validate_doc_funs = ValidationFuns}, Doc, Fun); +validate_doc_update(#db{validate_doc_funs=[]}, _Doc, _GetDiskDocFun) -> + ok; +validate_doc_update(_Db, #doc{id= <<"_local/",_/binary>>}, _GetDiskDocFun) -> + ok; +validate_doc_update(Db, Doc, GetDiskDocFun) -> + DiskDoc = GetDiskDocFun(), + JsonCtx = couch_util:json_user_ctx(Db), + SecObj = get_security(Db), + try [case Fun(Doc, DiskDoc, JsonCtx, SecObj) of + ok -> ok; + Error -> throw(Error) + end || Fun <- Db#db.validate_doc_funs], + ok + catch + throw:Error -> + Error + end. + +% to be safe, spawn a middleman here +load_validation_funs(#db{main_pid = Pid} = Db) -> + {_, Ref} = spawn_monitor(fun() -> + {ok, DesignDocs} = get_design_docs(Db), + exit({ok, lists:flatmap(fun(DesignDoc) -> + case couch_doc:get_validate_doc_fun(DesignDoc) of + nil -> + []; + Fun -> + [Fun] + end + end, DesignDocs)}) + end), + receive + {'DOWN', Ref, _, _, {ok, Funs}} -> + gen_server:cast(Pid, {load_validation_funs, Funs}), + Funs; + {'DOWN', Ref, _, _, Reason} -> + ?LOG_ERROR("could not load validation funs ~p", [Reason]), + throw(internal_server_error) + end. + +prep_and_validate_update(Db, #doc{id=Id,revs={RevStart, Revs}}=Doc, + OldFullDocInfo, LeafRevsDict, AllowConflict) -> + case Revs of + [PrevRev|_] -> + case dict:find({RevStart, PrevRev}, LeafRevsDict) of + {ok, {Deleted, DiskSp, DiskRevs}} -> + case couch_doc:has_stubs(Doc) of + true -> + DiskDoc = make_doc(Db, Id, Deleted, DiskSp, DiskRevs), + Doc2 = couch_doc:merge_stubs(Doc, DiskDoc), + {validate_doc_update(Db, Doc2, fun() -> DiskDoc end), Doc2}; + false -> + LoadDiskDoc = fun() -> make_doc(Db,Id,Deleted,DiskSp,DiskRevs) end, + {validate_doc_update(Db, Doc, LoadDiskDoc), Doc} + end; + error when AllowConflict -> + couch_doc:merge_stubs(Doc, #doc{}), % will generate error if + % there are stubs + {validate_doc_update(Db, Doc, fun() -> nil end), Doc}; + error -> + {conflict, Doc} + end; + [] -> + % new doc, and we have existing revs. + % reuse existing deleted doc + if OldFullDocInfo#full_doc_info.deleted orelse AllowConflict -> + {validate_doc_update(Db, Doc, fun() -> nil end), Doc}; + true -> + {conflict, Doc} + end + end. + + + +prep_and_validate_updates(_Db, [], [], _AllowConflict, AccPrepped, + AccFatalErrors) -> + {AccPrepped, AccFatalErrors}; +prep_and_validate_updates(Db, [DocBucket|RestBuckets], [not_found|RestLookups], + AllowConflict, AccPrepped, AccErrors) -> + [#doc{id=Id}|_]=DocBucket, + % no existing revs are known, + {PreppedBucket, AccErrors3} = lists:foldl( + fun(#doc{revs=Revs}=Doc, {AccBucket, AccErrors2}) -> + case couch_doc:has_stubs(Doc) of + true -> + couch_doc:merge_stubs(Doc, #doc{}); % will throw exception + false -> ok + end, + case Revs of + {0, []} -> + case validate_doc_update(Db, Doc, fun() -> nil end) of + ok -> + {[Doc | AccBucket], AccErrors2}; + Error -> + {AccBucket, [{{Id, {0, []}}, Error} | AccErrors2]} + end; + _ -> + % old revs specified but none exist, a conflict + {AccBucket, [{{Id, Revs}, conflict} | AccErrors2]} + end + end, + {[], AccErrors}, DocBucket), + + prep_and_validate_updates(Db, RestBuckets, RestLookups, AllowConflict, + [PreppedBucket | AccPrepped], AccErrors3); +prep_and_validate_updates(Db, [DocBucket|RestBuckets], + [{ok, #full_doc_info{rev_tree=OldRevTree}=OldFullDocInfo}|RestLookups], + AllowConflict, AccPrepped, AccErrors) -> + Leafs = couch_key_tree:get_all_leafs(OldRevTree), + LeafRevsDict = dict:from_list([{{Start, RevId}, {Del, Ptr, Revs}} || + {#leaf{deleted=Del, ptr=Ptr}, {Start, [RevId|_]}=Revs} <- Leafs]), + {PreppedBucket, AccErrors3} = lists:foldl( + fun(Doc, {Docs2Acc, AccErrors2}) -> + case prep_and_validate_update(Db, Doc, OldFullDocInfo, + LeafRevsDict, AllowConflict) of + {ok, Doc2} -> + {[Doc2 | Docs2Acc], AccErrors2}; + {Error, #doc{id=Id,revs=Revs}} -> + % Record the error + {Docs2Acc, [{{Id, Revs}, Error} |AccErrors2]} + end + end, + {[], AccErrors}, DocBucket), + prep_and_validate_updates(Db, RestBuckets, RestLookups, AllowConflict, + [PreppedBucket | AccPrepped], AccErrors3). + + +update_docs(Db, Docs, Options) -> + update_docs(Db, Docs, Options, interactive_edit). + + +prep_and_validate_replicated_updates(_Db, [], [], AccPrepped, AccErrors) -> + Errors2 = [{{Id, {Pos, Rev}}, Error} || + {#doc{id=Id,revs={Pos,[Rev|_]}}, Error} <- AccErrors], + {lists:reverse(AccPrepped), lists:reverse(Errors2)}; +prep_and_validate_replicated_updates(Db, [Bucket|RestBuckets], [OldInfo|RestOldInfo], AccPrepped, AccErrors) -> + case OldInfo of + not_found -> + {ValidatedBucket, AccErrors3} = lists:foldl( + fun(Doc, {AccPrepped2, AccErrors2}) -> + case couch_doc:has_stubs(Doc) of + true -> + couch_doc:merge_stubs(Doc, #doc{}); % will throw exception + false -> ok + end, + case validate_doc_update(Db, Doc, fun() -> nil end) of + ok -> + {[Doc | AccPrepped2], AccErrors2}; + Error -> + {AccPrepped2, [{Doc, Error} | AccErrors2]} + end + end, + {[], AccErrors}, Bucket), + prep_and_validate_replicated_updates(Db, RestBuckets, RestOldInfo, [ValidatedBucket | AccPrepped], AccErrors3); + {ok, #full_doc_info{rev_tree=OldTree}} -> + NewRevTree = lists:foldl( + fun(NewDoc, AccTree) -> + {NewTree, _} = couch_key_tree:merge(AccTree, + couch_db:doc_to_tree(NewDoc), Db#db.revs_limit), + NewTree + end, + OldTree, Bucket), + Leafs = couch_key_tree:get_all_leafs_full(NewRevTree), + LeafRevsFullDict = dict:from_list( [{{Start, RevId}, FullPath} || {Start, [{RevId, _}|_]}=FullPath <- Leafs]), + {ValidatedBucket, AccErrors3} = + lists:foldl( + fun(#doc{id=Id,revs={Pos, [RevId|_]}}=Doc, {AccValidated, AccErrors2}) -> + case dict:find({Pos, RevId}, LeafRevsFullDict) of + {ok, {Start, Path}} -> + % our unflushed doc is a leaf node. Go back on the path + % to find the previous rev that's on disk. + + LoadPrevRevFun = fun() -> + make_first_doc_on_disk(Db,Id,Start-1, tl(Path)) + end, + + case couch_doc:has_stubs(Doc) of + true -> + DiskDoc = LoadPrevRevFun(), + Doc2 = couch_doc:merge_stubs(Doc, DiskDoc), + GetDiskDocFun = fun() -> DiskDoc end; + false -> + Doc2 = Doc, + GetDiskDocFun = LoadPrevRevFun + end, + + case validate_doc_update(Db, Doc2, GetDiskDocFun) of + ok -> + {[Doc2 | AccValidated], AccErrors2}; + Error -> + {AccValidated, [{Doc, Error} | AccErrors2]} + end; + _ -> + % this doc isn't a leaf or already exists in the tree. + % ignore but consider it a success. + {AccValidated, AccErrors2} + end + end, + {[], AccErrors}, Bucket), + prep_and_validate_replicated_updates(Db, RestBuckets, RestOldInfo, + [ValidatedBucket | AccPrepped], AccErrors3) + end. + + + +new_revid(#doc{body=Body,revs={OldStart,OldRevs}, + atts=Atts,deleted=Deleted}) -> + case [{N, T, M} || #att{name=N,type=T,md5=M} <- Atts, M =/= <<>>] of + Atts2 when length(Atts) =/= length(Atts2) -> + % We must have old style non-md5 attachments + ?l2b(integer_to_list(couch_util:rand32())); + Atts2 -> + OldRev = case OldRevs of [] -> 0; [OldRev0|_] -> OldRev0 end, + couch_util:md5(term_to_binary([Deleted, OldStart, OldRev, Body, Atts2])) + end. + +new_revs([], OutBuckets, IdRevsAcc) -> + {lists:reverse(OutBuckets), IdRevsAcc}; +new_revs([Bucket|RestBuckets], OutBuckets, IdRevsAcc) -> + {NewBucket, IdRevsAcc3} = lists:mapfoldl( + fun(#doc{id=Id,revs={Start, RevIds}}=Doc, IdRevsAcc2)-> + NewRevId = new_revid(Doc), + {Doc#doc{revs={Start+1, [NewRevId | RevIds]}}, + [{{Id, {Start, RevIds}}, {ok, {Start+1, NewRevId}}} | IdRevsAcc2]} + end, IdRevsAcc, Bucket), + new_revs(RestBuckets, [NewBucket|OutBuckets], IdRevsAcc3). + +check_dup_atts(#doc{atts=Atts}=Doc) -> + Atts2 = lists:sort(fun(#att{name=N1}, #att{name=N2}) -> N1 < N2 end, Atts), + check_dup_atts2(Atts2), + Doc. + +check_dup_atts2([#att{name=N}, #att{name=N} | _]) -> + throw({bad_request, <<"Duplicate attachments">>}); +check_dup_atts2([_ | Rest]) -> + check_dup_atts2(Rest); +check_dup_atts2(_) -> + ok. + + +update_docs(Db, Docs, Options, replicated_changes) -> + increment_stat(Db, {couchdb, database_writes}), + DocBuckets = group_alike_docs(Docs), + + case (Db#db.validate_doc_funs /= []) orelse + lists:any( + fun(#doc{id= <<?DESIGN_DOC_PREFIX, _/binary>>}) -> true; + (#doc{atts=Atts}) -> + Atts /= [] + end, Docs) of + true -> + Ids = [Id || [#doc{id=Id}|_] <- DocBuckets], + ExistingDocs = get_full_doc_infos(Db, Ids), + + {DocBuckets2, DocErrors} = + prep_and_validate_replicated_updates(Db, DocBuckets, ExistingDocs, [], []), + DocBuckets3 = [Bucket || [_|_]=Bucket <- DocBuckets2]; % remove empty buckets + false -> + DocErrors = [], + DocBuckets3 = DocBuckets + end, + DocBuckets4 = [[doc_flush_atts(check_dup_atts(Doc), Db#db.fd) + || Doc <- Bucket] || Bucket <- DocBuckets3], + {ok, []} = write_and_commit(Db, DocBuckets4, [], [merge_conflicts | Options]), + {ok, DocErrors}; + +update_docs(Db, Docs, Options, interactive_edit) -> + increment_stat(Db, {couchdb, database_writes}), + AllOrNothing = lists:member(all_or_nothing, Options), + % go ahead and generate the new revision ids for the documents. + % separate out the NonRep documents from the rest of the documents + {Docs2, NonRepDocs} = lists:foldl( + fun(#doc{id=Id}=Doc, {DocsAcc, NonRepDocsAcc}) -> + case Id of + <<?LOCAL_DOC_PREFIX, _/binary>> -> + {DocsAcc, [Doc | NonRepDocsAcc]}; + Id-> + {[Doc | DocsAcc], NonRepDocsAcc} + end + end, {[], []}, Docs), + + DocBuckets = group_alike_docs(Docs2), + + case (Db#db.validate_doc_funs /= []) orelse + lists:any( + fun(#doc{id= <<?DESIGN_DOC_PREFIX, _/binary>>}) -> + true; + (#doc{atts=Atts}) -> + Atts /= [] + end, Docs2) of + true -> + % lookup the doc by id and get the most recent + Ids = [Id || [#doc{id=Id}|_] <- DocBuckets], + ExistingDocInfos = get_full_doc_infos(Db, Ids), + + {DocBucketsPrepped, PreCommitFailures} = prep_and_validate_updates(Db, + DocBuckets, ExistingDocInfos, AllOrNothing, [], []), + + % strip out any empty buckets + DocBuckets2 = [Bucket || [_|_] = Bucket <- DocBucketsPrepped]; + false -> + PreCommitFailures = [], + DocBuckets2 = DocBuckets + end, + + if (AllOrNothing) and (PreCommitFailures /= []) -> + {aborted, lists:map( + fun({{Id,{Pos, [RevId|_]}}, Error}) -> + {{Id, {Pos, RevId}}, Error}; + ({{Id,{0, []}}, Error}) -> + {{Id, {0, <<>>}}, Error} + end, PreCommitFailures)}; + true -> + Options2 = if AllOrNothing -> [merge_conflicts]; + true -> [] end ++ Options, + DocBuckets3 = [[ + doc_flush_atts(set_new_att_revpos( + check_dup_atts(Doc)), Db#db.fd) + || Doc <- B] || B <- DocBuckets2], + {DocBuckets4, IdRevs} = new_revs(DocBuckets3, [], []), + + {ok, CommitResults} = write_and_commit(Db, DocBuckets4, NonRepDocs, Options2), + + ResultsDict = dict:from_list(IdRevs ++ CommitResults ++ PreCommitFailures), + {ok, lists:map( + fun(#doc{id=Id,revs={Pos, RevIds}}) -> + {ok, Result} = dict:find({Id, {Pos, RevIds}}, ResultsDict), + Result + end, Docs)} + end. + +% Returns the first available document on disk. Input list is a full rev path +% for the doc. +make_first_doc_on_disk(_Db, _Id, _Pos, []) -> + nil; +make_first_doc_on_disk(Db, Id, Pos, [{_Rev, #doc{}} | RestPath]) -> + make_first_doc_on_disk(Db, Id, Pos-1, RestPath); +make_first_doc_on_disk(Db, Id, Pos, [{_Rev, ?REV_MISSING}|RestPath]) -> + make_first_doc_on_disk(Db, Id, Pos - 1, RestPath); +make_first_doc_on_disk(Db, Id, Pos, [{_, #leaf{deleted=IsDel, ptr=Sp}} |_]=DocPath) -> + Revs = [Rev || {Rev, _} <- DocPath], + make_doc(Db, Id, IsDel, Sp, {Pos, Revs}). + +set_commit_option(Options) -> + CommitSettings = { + [true || O <- Options, O==full_commit orelse O==delay_commit], + couch_config:get("couchdb", "delayed_commits", "false") + }, + case CommitSettings of + {[true], _} -> + Options; % user requested explicit commit setting, do not change it + {_, "true"} -> + Options; % delayed commits are enabled, do nothing + {_, "false"} -> + [full_commit|Options]; + {_, Else} -> + ?LOG_ERROR("[couchdb] delayed_commits setting must be true/false, not ~p", + [Else]), + [full_commit|Options] + end. + +collect_results(Pid, MRef, ResultsAcc) -> + receive + {result, Pid, Result} -> + collect_results(Pid, MRef, [Result | ResultsAcc]); + {done, Pid} -> + {ok, ResultsAcc}; + {retry, Pid} -> + retry; + {'DOWN', MRef, _, _, Reason} -> + exit(Reason) + end. + +write_and_commit(#db{main_pid=Pid, user_ctx=Ctx}=Db, DocBuckets, + NonRepDocs, Options0) -> + Options = set_commit_option(Options0), + MergeConflicts = lists:member(merge_conflicts, Options), + FullCommit = lists:member(full_commit, Options), + MRef = erlang:monitor(process, Pid), + try + Pid ! {update_docs, self(), DocBuckets, NonRepDocs, MergeConflicts, FullCommit}, + case collect_results(Pid, MRef, []) of + {ok, Results} -> {ok, Results}; + retry -> + % This can happen if the db file we wrote to was swapped out by + % compaction. Retry by reopening the db and writing to the current file + {ok, Db2} = open(Db#db.name, [{user_ctx, Ctx}]), + DocBuckets2 = [[doc_flush_atts(Doc, Db2#db.fd) || Doc <- Bucket] || Bucket <- DocBuckets], + % We only retry once + close(Db2), + Pid ! {update_docs, self(), DocBuckets2, NonRepDocs, MergeConflicts, FullCommit}, + case collect_results(Pid, MRef, []) of + {ok, Results} -> {ok, Results}; + retry -> throw({update_error, compaction_retry}) + end + end + after + erlang:demonitor(MRef, [flush]) + end. + + +set_new_att_revpos(#doc{revs={RevPos,_Revs},atts=Atts}=Doc) -> + Doc#doc{atts= lists:map(fun(#att{data={_Fd,_Sp}}=Att) -> + % already commited to disk, do not set new rev + Att; + (Att) -> + Att#att{revpos=RevPos+1} + end, Atts)}. + + +doc_flush_atts(Doc, Fd) -> + Doc#doc{atts=[flush_att(Fd, Att) || Att <- Doc#doc.atts]}. + +check_md5(_NewSig, <<>>) -> ok; +check_md5(Sig, Sig) -> ok; +check_md5(_, _) -> throw(md5_mismatch). + +flush_att(Fd, #att{data={Fd0, _}}=Att) when Fd0 == Fd -> + % already written to our file, nothing to write + Att; + +flush_att(Fd, #att{data={OtherFd,StreamPointer}, md5=InMd5, + disk_len=InDiskLen} = Att) -> + {NewStreamData, Len, _IdentityLen, Md5, IdentityMd5} = + couch_stream:copy_to_new_stream(OtherFd, StreamPointer, Fd), + check_md5(IdentityMd5, InMd5), + Att#att{data={Fd, NewStreamData}, md5=Md5, att_len=Len, disk_len=InDiskLen}; + +flush_att(Fd, #att{data=Data}=Att) when is_binary(Data) -> + with_stream(Fd, Att, fun(OutputStream) -> + couch_stream:write(OutputStream, Data) + end); + +flush_att(Fd, #att{data=Fun,att_len=undefined}=Att) when is_function(Fun) -> + with_stream(Fd, Att, fun(OutputStream) -> + % Fun(MaxChunkSize, WriterFun) must call WriterFun + % once for each chunk of the attachment, + Fun(4096, + % WriterFun({Length, Binary}, State) + % WriterFun({0, _Footers}, State) + % Called with Length == 0 on the last time. + % WriterFun returns NewState. + fun({0, Footers}, _) -> + F = mochiweb_headers:from_binary(Footers), + case mochiweb_headers:get_value("Content-MD5", F) of + undefined -> + ok; + Md5 -> + {md5, base64:decode(Md5)} + end; + ({_Length, Chunk}, _) -> + couch_stream:write(OutputStream, Chunk) + end, ok) + end); + +flush_att(Fd, #att{data=Fun,att_len=AttLen}=Att) when is_function(Fun) -> + with_stream(Fd, Att, fun(OutputStream) -> + write_streamed_attachment(OutputStream, Fun, AttLen) + end). + +% From RFC 2616 3.6.1 - Chunked Transfer Coding +% +% In other words, the origin server is willing to accept +% the possibility that the trailer fields might be silently +% discarded along the path to the client. +% +% I take this to mean that if "Trailers: Content-MD5\r\n" +% is present in the request, but there is no Content-MD5 +% trailer, we're free to ignore this inconsistency and +% pretend that no Content-MD5 exists. +with_stream(Fd, #att{md5=InMd5,type=Type,encoding=Enc}=Att, Fun) -> + {ok, OutputStream} = case (Enc =:= identity) andalso + couch_util:compressible_att_type(Type) of + true -> + CompLevel = list_to_integer( + couch_config:get("attachments", "compression_level", "0") + ), + couch_stream:open(Fd, gzip, [{compression_level, CompLevel}]); + _ -> + couch_stream:open(Fd) + end, + ReqMd5 = case Fun(OutputStream) of + {md5, FooterMd5} -> + case InMd5 of + md5_in_footer -> FooterMd5; + _ -> InMd5 + end; + _ -> + InMd5 + end, + {StreamInfo, Len, IdentityLen, Md5, IdentityMd5} = + couch_stream:close(OutputStream), + check_md5(IdentityMd5, ReqMd5), + {AttLen, DiskLen, NewEnc} = case Enc of + identity -> + case {Md5, IdentityMd5} of + {Same, Same} -> + {Len, IdentityLen, identity}; + _ -> + {Len, IdentityLen, gzip} + end; + gzip -> + case {Att#att.att_len, Att#att.disk_len} of + {AL, DL} when AL =:= undefined orelse DL =:= undefined -> + % Compressed attachment uploaded through the standalone API. + {Len, Len, gzip}; + {AL, DL} -> + % This case is used for efficient push-replication, where a + % compressed attachment is located in the body of multipart + % content-type request. + {AL, DL, gzip} + end + end, + Att#att{ + data={Fd,StreamInfo}, + att_len=AttLen, + disk_len=DiskLen, + md5=Md5, + encoding=NewEnc + }. + + +write_streamed_attachment(_Stream, _F, 0) -> + ok; +write_streamed_attachment(Stream, F, LenLeft) when LenLeft > 0 -> + Bin = read_next_chunk(F, LenLeft), + ok = couch_stream:write(Stream, Bin), + write_streamed_attachment(Stream, F, LenLeft - size(Bin)). + +read_next_chunk(F, _) when is_function(F, 0) -> + F(); +read_next_chunk(F, LenLeft) when is_function(F, 1) -> + F(lists:min([LenLeft, 16#2000])). + +enum_docs_since_reduce_to_count(Reds) -> + couch_btree:final_reduce( + fun couch_db_updater:btree_by_seq_reduce/2, Reds). + +enum_docs_reduce_to_count(Reds) -> + {Count, _, _} = couch_btree:final_reduce( + fun couch_db_updater:btree_by_id_reduce/2, Reds), + Count. + +changes_since(Db, StartSeq, Fun, Acc) -> + changes_since(Db, StartSeq, Fun, [], Acc). + +changes_since(Db, StartSeq, Fun, Options, Acc) -> + Wrapper = fun(FullDocInfo, _Offset, Acc2) -> + case FullDocInfo of + #full_doc_info{} -> + DocInfo = couch_doc:to_doc_info(FullDocInfo); + #doc_info{} -> + DocInfo = FullDocInfo + end, + Fun(DocInfo, Acc2) + end, + {ok, _LastReduction, AccOut} = couch_btree:fold(Db#db.seq_tree, Wrapper, + Acc, [{start_key, couch_util:to_integer(StartSeq) + 1} | Options]), + {ok, AccOut}. + +count_changes_since(Db, SinceSeq) -> + {ok, Changes} = + couch_btree:fold_reduce(Db#db.seq_tree, + fun(_SeqStart, PartialReds, 0) -> + {ok, couch_btree:final_reduce(Db#db.seq_tree, PartialReds)} + end, + 0, [{start_key, SinceSeq + 1}]), + Changes. + +enum_docs_since(Db, SinceSeq, InFun, Acc, Options) -> + {ok, LastReduction, AccOut} = couch_btree:fold(Db#db.seq_tree, InFun, Acc, [{start_key, SinceSeq + 1} | Options]), + {ok, enum_docs_since_reduce_to_count(LastReduction), AccOut}. + +enum_docs(Db, InFun, InAcc, Options) -> + {ok, LastReduce, OutAcc} = couch_view:fold( + #view{btree=Db#db.id_tree}, InFun, InAcc, Options), + {ok, enum_docs_reduce_to_count(LastReduce), OutAcc}. + +%%% Internal function %%% +open_doc_revs_int(Db, IdRevs, Options) -> + Ids = [Id || {Id, _Revs} <- IdRevs], + LookupResults = get_full_doc_infos(Db, Ids), + lists:zipwith( + fun({Id, Revs}, Lookup) -> + case Lookup of + {ok, #full_doc_info{rev_tree=RevTree}} -> + {FoundRevs, MissingRevs} = + case Revs of + all -> + {couch_key_tree:get_all_leafs(RevTree), []}; + _ -> + case lists:member(latest, Options) of + true -> + couch_key_tree:get_key_leafs(RevTree, Revs); + false -> + couch_key_tree:get(RevTree, Revs) + end + end, + FoundResults = + lists:map(fun({Value, {Pos, [Rev|_]}=FoundRevPath}) -> + case Value of + ?REV_MISSING -> + % we have the rev in our list but know nothing about it + {{not_found, missing}, {Pos, Rev}}; + #leaf{deleted=IsDeleted, ptr=SummaryPtr} -> + {ok, make_doc(Db, Id, IsDeleted, SummaryPtr, FoundRevPath)} + end + end, FoundRevs), + Results = FoundResults ++ [{{not_found, missing}, MissingRev} || MissingRev <- MissingRevs], + {ok, Results}; + not_found when Revs == all -> + {ok, []}; + not_found -> + {ok, [{{not_found, missing}, Rev} || Rev <- Revs]} + end + end, + IdRevs, LookupResults). + +open_doc_int(Db, <<?LOCAL_DOC_PREFIX, _/binary>> = Id, _Options) -> + case couch_btree:lookup(Db#db.local_tree, [Id]) of + [{ok, {_, {Rev, BodyData}}}] -> + {ok, #doc{id=Id, revs={0, [list_to_binary(integer_to_list(Rev))]}, body=BodyData}}; + [not_found] -> + {not_found, missing} + end; +open_doc_int(Db, #doc_info{id=Id,revs=[RevInfo|_]}=DocInfo, Options) -> + #rev_info{deleted=IsDeleted,rev={Pos,RevId},body_sp=Bp} = RevInfo, + Doc = make_doc(Db, Id, IsDeleted, Bp, {Pos,[RevId]}), + {ok, Doc#doc{meta=doc_meta_info(DocInfo, [], Options)}}; +open_doc_int(Db, #full_doc_info{id=Id,rev_tree=RevTree}=FullDocInfo, Options) -> + #doc_info{revs=[#rev_info{deleted=IsDeleted,rev=Rev,body_sp=Bp}|_]} = + DocInfo = couch_doc:to_doc_info(FullDocInfo), + {[{_, RevPath}], []} = couch_key_tree:get(RevTree, [Rev]), + Doc = make_doc(Db, Id, IsDeleted, Bp, RevPath), + {ok, Doc#doc{meta=doc_meta_info(DocInfo, RevTree, Options)}}; +open_doc_int(Db, Id, Options) -> + case get_full_doc_info(Db, Id) of + {ok, FullDocInfo} -> + open_doc_int(Db, FullDocInfo, Options); + not_found -> + {not_found, missing} + end. + +doc_meta_info(#doc_info{high_seq=Seq,revs=[#rev_info{rev=Rev}|RestInfo]}, RevTree, Options) -> + case lists:member(revs_info, Options) of + false -> []; + true -> + {[{Pos, RevPath}],[]} = + couch_key_tree:get_full_key_paths(RevTree, [Rev]), + + [{revs_info, Pos, lists:map( + fun({Rev1, #leaf{deleted=true}}) -> + {Rev1, deleted}; + ({Rev1, #leaf{deleted=false}}) -> + {Rev1, available}; + ({Rev1, ?REV_MISSING}) -> + {Rev1, missing} + end, RevPath)}] + end ++ + case lists:member(conflicts, Options) of + false -> []; + true -> + case [Rev1 || #rev_info{rev=Rev1,deleted=false} <- RestInfo] of + [] -> []; + ConflictRevs -> [{conflicts, ConflictRevs}] + end + end ++ + case lists:member(deleted_conflicts, Options) of + false -> []; + true -> + case [Rev1 || #rev_info{rev=Rev1,deleted=true} <- RestInfo] of + [] -> []; + DelConflictRevs -> [{deleted_conflicts, DelConflictRevs}] + end + end ++ + case lists:member(local_seq, Options) of + false -> []; + true -> [{local_seq, Seq}] + end. + +read_doc(#db{fd=Fd}, OldStreamPointer) when is_tuple(OldStreamPointer) -> + % 09 UPGRADE CODE + couch_stream:old_read_term(Fd, OldStreamPointer); +read_doc(#db{fd=Fd}, Pos) -> + couch_file:pread_term(Fd, Pos). + + +doc_to_tree(#doc{revs={Start, RevIds}}=Doc) -> + [Tree] = doc_to_tree_simple(Doc, lists:reverse(RevIds)), + {Start - length(RevIds) + 1, Tree}. + + +doc_to_tree_simple(Doc, [RevId]) -> + [{RevId, Doc, []}]; +doc_to_tree_simple(Doc, [RevId | Rest]) -> + [{RevId, ?REV_MISSING, doc_to_tree_simple(Doc, Rest)}]. + + +make_doc(#db{fd=Fd}=Db, Id, Deleted, Bp, RevisionPath) -> + {BodyData, Atts} = + case Bp of + nil -> + {[], []}; + _ -> + {ok, {BodyData0, Atts0}} = read_doc(Db, Bp), + {BodyData0, + lists:map( + fun({Name,Type,Sp,AttLen,DiskLen,RevPos,Md5,Enc}) -> + #att{name=Name, + type=Type, + att_len=AttLen, + disk_len=DiskLen, + md5=Md5, + revpos=RevPos, + data={Fd,Sp}, + encoding= + case Enc of + true -> + % 0110 UPGRADE CODE + gzip; + false -> + % 0110 UPGRADE CODE + identity; + _ -> + Enc + end + }; + ({Name,Type,Sp,AttLen,RevPos,Md5}) -> + #att{name=Name, + type=Type, + att_len=AttLen, + disk_len=AttLen, + md5=Md5, + revpos=RevPos, + data={Fd,Sp}}; + ({Name,{Type,Sp,AttLen}}) -> + #att{name=Name, + type=Type, + att_len=AttLen, + disk_len=AttLen, + md5= <<>>, + revpos=0, + data={Fd,Sp}} + end, Atts0)} + end, + #doc{ + id = Id, + revs = RevisionPath, + body = BodyData, + atts = Atts, + deleted = Deleted + }. + + +increment_stat(#db{is_sys_db = true}, _Stat) -> + ok; +increment_stat(#db{}, Stat) -> + couch_stats_collector:increment(Stat). diff --git a/apps/couch/src/couch_db_update_notifier.erl b/apps/couch/src/couch_db_update_notifier.erl new file mode 100644 index 00000000..150eb31b --- /dev/null +++ b/apps/couch/src/couch_db_update_notifier.erl @@ -0,0 +1,73 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +% +% This causes an OS process to spawned and it is notified every time a database +% is updated. +% +% The notifications are in the form of a the database name sent as a line of +% text to the OS processes stdout. +% + +-module(couch_db_update_notifier). + +-behaviour(gen_event). + +-export([start_link/1, notify/1]). +-export([init/1, terminate/2, handle_event/2, handle_call/2, handle_info/2, code_change/3,stop/1]). + +-include("couch_db.hrl"). + +start_link(Exec) -> + couch_event_sup:start_link(couch_db_update, {couch_db_update_notifier, make_ref()}, Exec). + +notify(Event) -> + gen_event:notify(couch_db_update, Event). + +stop(Pid) -> + couch_event_sup:stop(Pid). + +init(Exec) when is_list(Exec) -> % an exe + couch_os_process:start_link(Exec, []); +init(Else) -> + {ok, Else}. + +terminate(_Reason, Pid) when is_pid(Pid) -> + couch_os_process:stop(Pid), + ok; +terminate(_Reason, _State) -> + ok. + +handle_event(Event, Fun) when is_function(Fun, 1) -> + Fun(Event), + {ok, Fun}; +handle_event(Event, {Fun, FunAcc}) -> + FunAcc2 = Fun(Event, FunAcc), + {ok, {Fun, FunAcc2}}; +handle_event({EventAtom, DbName}, Pid) -> + Obj = {[{type, list_to_binary(atom_to_list(EventAtom))}, {db, DbName}]}, + ok = couch_os_process:send(Pid, Obj), + {ok, Pid}. + +handle_call(_Request, State) -> + {reply, ok, State}. + +handle_info({'EXIT', Pid, Reason}, Pid) -> + ?LOG_ERROR("Update notification process ~p died: ~p", [Pid, Reason]), + remove_handler; +handle_info({'EXIT', _, _}, Pid) -> + %% the db_update event manager traps exits and forwards this message to all + %% its handlers. Just ignore as it wasn't our os_process that exited. + {ok, Pid}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/apps/couch/src/couch_db_update_notifier_sup.erl b/apps/couch/src/couch_db_update_notifier_sup.erl new file mode 100644 index 00000000..e7cc16c1 --- /dev/null +++ b/apps/couch/src/couch_db_update_notifier_sup.erl @@ -0,0 +1,61 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +% +% This causes an OS process to spawned and it is notified every time a database +% is updated. +% +% The notifications are in the form of a the database name sent as a line of +% text to the OS processes stdout. +% + +-module(couch_db_update_notifier_sup). + +-behaviour(supervisor). + +-export([start_link/0, init/1, config_change/3]). + +start_link() -> + supervisor:start_link({local, couch_db_update_notifier_sup}, + couch_db_update_notifier_sup, []). + +init([]) -> + ok = couch_config:register(fun ?MODULE:config_change/3), + + UpdateNotifierExes = couch_config:get("update_notification"), + + {ok, + {{one_for_one, 10, 3600}, + lists:map(fun({Name, UpdateNotifierExe}) -> + {Name, + {couch_db_update_notifier, start_link, [UpdateNotifierExe]}, + permanent, + 1000, + supervisor, + [couch_db_update_notifier]} + end, UpdateNotifierExes)}}. + +%% @doc when update_notification configuration changes, terminate the process +%% for that notifier and start a new one with the updated config +config_change("update_notification", Id, Exe) -> + ChildSpec = { + Id, + {couch_db_update_notifier, start_link, [Exe]}, + permanent, + 1000, + supervisor, + [couch_db_update_notifier] + }, + supervisor:terminate_child(couch_db_update_notifier_sup, Id), + supervisor:delete_child(couch_db_update_notifier_sup, Id), + supervisor:start_child(couch_db_update_notifier_sup, ChildSpec). + diff --git a/apps/couch/src/couch_db_updater.erl b/apps/couch/src/couch_db_updater.erl new file mode 100644 index 00000000..9bf52ee0 --- /dev/null +++ b/apps/couch/src/couch_db_updater.erl @@ -0,0 +1,962 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_db_updater). +-behaviour(gen_server). + +-export([btree_by_id_split/1,btree_by_id_join/2,btree_by_id_reduce/2]). +-export([btree_by_seq_split/1,btree_by_seq_join/2,btree_by_seq_reduce/2]). +-export([init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3,handle_info/2]). + +-include("couch_db.hrl"). + + +init({DbName, Filepath, Fd, Options}) -> + case lists:member(create, Options) of + true -> + % create a new header and writes it to the file + Header = #db_header{}, + ok = couch_file:write_header(Fd, Header), + % delete any old compaction files that might be hanging around + RootDir = couch_config:get("couchdb", "database_dir", "."), + couch_file:delete(RootDir, Filepath ++ ".compact"); + false -> + ok = couch_file:upgrade_old_header(Fd, <<$g, $m, $k, 0>>), % 09 UPGRADE CODE + case couch_file:read_header(Fd) of + {ok, Header} -> + ok; + no_valid_header -> + % create a new header and writes it to the file + Header = #db_header{}, + ok = couch_file:write_header(Fd, Header), + % delete any old compaction files that might be hanging around + file:delete(Filepath ++ ".compact") + end + end, + + Db = init_db(DbName, Filepath, Fd, Header), + couch_stats_collector:track_process_count({couchdb, open_databases}), + % we don't load validation funs here because the fabric query is liable to + % race conditions. Instead see couch_db:validate_doc_update, which loads + % them lazily + {ok, Db#db{main_pid = self(), is_sys_db = lists:member(sys_db, Options)}}. + + +terminate(_Reason, Db) -> + ok = couch_file:close(Db#db.fd), + couch_util:shutdown_sync(Db#db.compactor_pid), + couch_util:shutdown_sync(Db#db.fd), + ok. + +handle_call(start_compact, _From, Db) -> + {noreply, NewDb} = handle_cast(start_compact, Db), + {reply, {ok, NewDb#db.compactor_pid}, NewDb}; + +handle_call(get_db, _From, Db) -> + {reply, {ok, Db}, Db}; +handle_call(full_commit, _From, #db{waiting_delayed_commit=nil}=Db) -> + {reply, ok, Db}; +handle_call(full_commit, _From, Db) -> + {reply, ok, commit_data(Db)}; + +handle_call({full_commit, _}, _From, #db{waiting_delayed_commit=nil}=Db) -> + {reply, ok, Db}; % no data waiting, return ok immediately +handle_call({full_commit, RequiredSeq}, _From, Db) when RequiredSeq =< + Db#db.committed_update_seq -> + {reply, ok, Db}; +handle_call({full_commit, _}, _, Db) -> + {reply, ok, commit_data(Db)}; % commit the data and return ok +handle_call(increment_update_seq, _From, Db) -> + Db2 = commit_data(Db#db{update_seq=Db#db.update_seq+1}), + ok = gen_server:call(couch_server, {db_updated, Db2}, infinity), + couch_db_update_notifier:notify({updated, Db#db.name}), + {reply, {ok, Db2#db.update_seq}, Db2}; + +handle_call({set_security, NewSec}, _From, Db) -> + {ok, Ptr} = couch_file:append_term(Db#db.fd, NewSec), + Db2 = commit_data(Db#db{security=NewSec, security_ptr=Ptr, + update_seq=Db#db.update_seq+1}), + ok = gen_server:call(couch_server, {db_updated, Db2}, infinity), + {reply, ok, Db2}; + +handle_call({set_revs_limit, Limit}, _From, Db) -> + Db2 = commit_data(Db#db{revs_limit=Limit, + update_seq=Db#db.update_seq+1}), + ok = gen_server:call(couch_server, {db_updated, Db2}, infinity), + {reply, ok, Db2}; + +handle_call({purge_docs, _IdRevs}, _From, + #db{compactor_pid=Pid}=Db) when Pid /= nil -> + {reply, {error, purge_during_compaction}, Db}; +handle_call({purge_docs, IdRevs}, _From, Db) -> + #db{ + fd=Fd, + id_tree = DocInfoByIdBTree, + seq_tree = DocInfoBySeqBTree, + update_seq = LastSeq, + header = Header = #db_header{purge_seq=PurgeSeq} + } = Db, + DocLookups = couch_btree:lookup(DocInfoByIdBTree, + [Id || {Id, _Revs} <- IdRevs]), + + NewDocInfos = lists:zipwith( + fun({_Id, Revs}, {ok, #full_doc_info{rev_tree=Tree}=FullDocInfo}) -> + case couch_key_tree:remove_leafs(Tree, Revs) of + {_, []=_RemovedRevs} -> % no change + nil; + {NewTree, RemovedRevs} -> + {FullDocInfo#full_doc_info{rev_tree=NewTree},RemovedRevs} + end; + (_, not_found) -> + nil + end, + IdRevs, DocLookups), + + SeqsToRemove = [Seq + || {#full_doc_info{update_seq=Seq},_} <- NewDocInfos], + + FullDocInfoToUpdate = [FullInfo + || {#full_doc_info{rev_tree=Tree}=FullInfo,_} + <- NewDocInfos, Tree /= []], + + IdRevsPurged = [{Id, Revs} + || {#full_doc_info{id=Id}, Revs} <- NewDocInfos], + + {DocInfoToUpdate, NewSeq} = lists:mapfoldl( + fun(#full_doc_info{rev_tree=Tree}=FullInfo, SeqAcc) -> + Tree2 = couch_key_tree:map_leafs( + fun(_RevId, {IsDeleted, BodyPointer, _UpdateSeq}) -> + {IsDeleted, BodyPointer, SeqAcc + 1} + end, Tree), + {couch_doc:to_doc_info(FullInfo#full_doc_info{rev_tree=Tree2}), + SeqAcc + 1} + end, LastSeq, FullDocInfoToUpdate), + + IdsToRemove = [Id || {#full_doc_info{id=Id,rev_tree=[]},_} + <- NewDocInfos], + + {ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree, + DocInfoToUpdate, SeqsToRemove), + {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree, + FullDocInfoToUpdate, IdsToRemove), + {ok, Pointer} = couch_file:append_term(Fd, IdRevsPurged), + + Db2 = commit_data( + Db#db{ + id_tree = DocInfoByIdBTree2, + seq_tree = DocInfoBySeqBTree2, + update_seq = NewSeq + 1, + header=Header#db_header{purge_seq=PurgeSeq+1, purged_docs=Pointer}}), + + ok = gen_server:call(couch_server, {db_updated, Db2}, infinity), + couch_db_update_notifier:notify({updated, Db#db.name}), + {reply, {ok, (Db2#db.header)#db_header.purge_seq, IdRevsPurged}, Db2}. + + +handle_cast({load_validation_funs, ValidationFuns}, Db) -> + Db2 = Db#db{validate_doc_funs = ValidationFuns}, + ok = gen_server:call(couch_server, {db_updated, Db2}, infinity), + {noreply, Db2}; +handle_cast(start_compact, Db) -> + case Db#db.compactor_pid of + nil -> + ?LOG_INFO("Starting compaction for db \"~s\"", [Db#db.name]), + Pid = spawn_link(fun() -> start_copy_compact(Db) end), + Db2 = Db#db{compactor_pid=Pid}, + ok = gen_server:call(couch_server, {db_updated, Db2}, infinity), + {noreply, Db2}; + _ -> + % compact currently running, this is a no-op + {noreply, Db} + end; +handle_cast({compact_done, CompactFilepath}, #db{filepath=Filepath}=Db) -> + {ok, NewFd} = couch_file:open(CompactFilepath), + {ok, NewHeader} = couch_file:read_header(NewFd), + #db{update_seq=NewSeq} = NewDb = + init_db(Db#db.name, Filepath, NewFd, NewHeader), + unlink(NewFd), + case Db#db.update_seq == NewSeq of + true -> + % suck up all the local docs into memory and write them to the new db + {ok, _, LocalDocs} = couch_btree:foldl(Db#db.local_tree, + fun(Value, _Offset, Acc) -> {ok, [Value | Acc]} end, []), + {ok, NewLocalBtree} = couch_btree:add(NewDb#db.local_tree, LocalDocs), + + NewDb2 = commit_data(NewDb#db{ + local_tree = NewLocalBtree, + main_pid = self(), + filepath = Filepath, + instance_start_time = Db#db.instance_start_time, + revs_limit = Db#db.revs_limit + }), + + ?LOG_DEBUG("CouchDB swapping files ~s and ~s.", + [Filepath, CompactFilepath]), + RootDir = couch_config:get("couchdb", "database_dir", "."), + couch_file:delete(RootDir, Filepath), + ok = file:rename(CompactFilepath, Filepath), + close_db(Db), + NewDb3 = refresh_validate_doc_funs(NewDb2), + ok = gen_server:call(couch_server, {db_updated, NewDb3}, infinity), + couch_db_update_notifier:notify({compacted, NewDb3#db.name}), + ?LOG_INFO("Compaction for db \"~s\" completed.", [Db#db.name]), + {noreply, NewDb3#db{compactor_pid=nil}}; + false -> + ?LOG_INFO("Compaction for ~s still behind main file " + "(update seq=~p. compact update seq=~p). Retrying.", + [Db#db.name, Db#db.update_seq, NewSeq]), + close_db(NewDb), + Pid = spawn_link(fun() -> start_copy_compact(Db) end), + Db2 = Db#db{compactor_pid=Pid}, + {noreply, Db2} + end. + + +handle_info({update_docs, Client, GroupedDocs, NonRepDocs, MergeConflicts, + FullCommit}, Db) -> + GroupedDocs2 = [[{Client, D} || D <- DocGroup] || DocGroup <- GroupedDocs], + if NonRepDocs == [] -> + {GroupedDocs3, Clients, FullCommit2} = collect_updates(GroupedDocs2, + [Client], MergeConflicts, FullCommit); + true -> + GroupedDocs3 = GroupedDocs2, + FullCommit2 = FullCommit, + Clients = [Client] + end, + NonRepDocs2 = [{Client, NRDoc} || NRDoc <- NonRepDocs], + try update_docs_int(Db, GroupedDocs3, NonRepDocs2, MergeConflicts, + FullCommit2) of + {ok, Db2} -> + ok = gen_server:call(couch_server, {db_updated, Db2}, infinity), + if Db2#db.update_seq /= Db#db.update_seq -> + couch_db_update_notifier:notify({updated, Db2#db.name}); + true -> ok + end, + [catch(ClientPid ! {done, self()}) || ClientPid <- Clients], + {noreply, Db2} + catch + throw: retry -> + [catch(ClientPid ! {retry, self()}) || ClientPid <- Clients], + {noreply, Db} + end; +handle_info(delayed_commit, #db{waiting_delayed_commit=nil}=Db) -> + %no outstanding delayed commits, ignore + {noreply, Db}; +handle_info(delayed_commit, Db) -> + case commit_data(Db) of + Db -> + {noreply, Db}; + Db2 -> + ok = gen_server:call(couch_server, {db_updated, Db2}, infinity), + {noreply, Db2} + end; +handle_info({'EXIT', _Pid, normal}, Db) -> + {noreply, Db}; +handle_info({'EXIT', _Pid, Reason}, Db) -> + {stop, Reason, Db}; +handle_info({'DOWN', Ref, _, _, Reason}, #db{fd_monitor=Ref, name=Name} = Db) -> + ?LOG_ERROR("DB ~s shutting down - Fd ~p", [Name, Reason]), + {stop, normal, Db}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +merge_updates([], RestB, AccOutGroups) -> + lists:reverse(AccOutGroups, RestB); +merge_updates(RestA, [], AccOutGroups) -> + lists:reverse(AccOutGroups, RestA); +merge_updates([[{_, #doc{id=IdA}}|_]=GroupA | RestA], + [[{_, #doc{id=IdB}}|_]=GroupB | RestB], AccOutGroups) -> + if IdA == IdB -> + merge_updates(RestA, RestB, [GroupA ++ GroupB | AccOutGroups]); + IdA < IdB -> + merge_updates(RestA, [GroupB | RestB], [GroupA | AccOutGroups]); + true -> + merge_updates([GroupA | RestA], RestB, [GroupB | AccOutGroups]) + end. + +collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit) -> + receive + % Only collect updates with the same MergeConflicts flag and without + % local docs. It's easier to just avoid multiple _local doc + % updaters than deal with their possible conflicts, and local docs + % writes are relatively rare. Can be optmized later if really needed. + {update_docs, Client, GroupedDocs, [], MergeConflicts, FullCommit2} -> + GroupedDocs2 = [[{Client, Doc} || Doc <- DocGroup] + || DocGroup <- GroupedDocs], + GroupedDocsAcc2 = + merge_updates(GroupedDocsAcc, GroupedDocs2, []), + collect_updates(GroupedDocsAcc2, [Client | ClientsAcc], + MergeConflicts, (FullCommit or FullCommit2)) + after 0 -> + {GroupedDocsAcc, ClientsAcc, FullCommit} + end. + + +rev_tree(DiskTree) -> + couch_key_tree:map(fun(_RevId, {Del, Ptr, Seq}) -> + #leaf{deleted=(Del==1), ptr=Ptr, seq=Seq}; + (_RevId, {Del, Ptr, Seq, Size, Atts}) -> + #leaf{deleted=(Del==1), ptr=Ptr, seq=Seq, size=Size, atts=Atts}; + (_RevId, ?REV_MISSING) -> + ?REV_MISSING + end, DiskTree). + +disk_tree(RevTree) -> + couch_key_tree:map(fun(_RevId, ?REV_MISSING) -> + ?REV_MISSING; + (_RevId, #leaf{deleted=Del, ptr=Ptr, seq=Seq, size=Size, atts=Atts}) -> + {if Del -> 1; true -> 0 end, Ptr, Seq, Size, Atts} + end, RevTree). + +btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del, rev_tree=T}) -> + {Seq, {Id, if Del -> 1; true -> 0 end, disk_tree(T)}}. + +btree_by_seq_join(Seq, {Id, Del, T}) when is_integer(Del) -> + #full_doc_info{id=Id, update_seq=Seq, deleted=Del==1, rev_tree=rev_tree(T)}; +btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) -> + % 1.0 stored #doc_info records in the seq tree. compact to upgrade. + #doc_info{ + id = Id, + high_seq=KeySeq, + revs = + [#rev_info{rev=Rev,seq=Seq,deleted=false,body_sp = Bp} || + {Rev, Seq, Bp} <- RevInfos] ++ + [#rev_info{rev=Rev,seq=Seq,deleted=true,body_sp = Bp} || + {Rev, Seq, Bp} <- DeletedRevInfos]}; +btree_by_seq_join(KeySeq,{Id, Rev, Bp, Conflicts, DelConflicts, Deleted}) -> + % 09 UPGRADE CODE + % this is the 0.9.0 and earlier by_seq record. It's missing the body pointers + % and individual seq nums for conflicts that are currently in the index, + % meaning the filtered _changes api will not work except for on main docs. + % Simply compact a 0.9.0 database to upgrade the index. + #doc_info{ + id=Id, + high_seq=KeySeq, + revs = [#rev_info{rev=Rev,seq=KeySeq,deleted=Deleted,body_sp=Bp}] ++ + [#rev_info{rev=Rev1,seq=KeySeq,deleted=false} || Rev1 <- Conflicts] ++ + [#rev_info{rev=Rev2,seq=KeySeq,deleted=true} || Rev2 <- DelConflicts]}. + +btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq, + data_size=Size, deleted=Deleted, rev_tree=Tree}) -> + {Id, {Seq, if Deleted -> 1; true -> 0 end, Size, disk_tree(Tree)}}. + +%% handle old formats before `data_size` added +btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) -> + btree_by_id_join(Id, {HighSeq, Deleted, 0, DiskTree}); + +btree_by_id_join(Id, {HighSeq, Deleted, Size, DiskTree}) -> + #full_doc_info{id=Id, update_seq=HighSeq, + deleted=Deleted==1, data_size=Size, + rev_tree=rev_tree(DiskTree)}. + +btree_by_id_reduce(reduce, FullDocInfos) -> + lists:foldl( + fun(#full_doc_info{deleted = false, data_size=Size}, + {NotDeleted, Deleted, DocSize}) -> + {NotDeleted + 1, Deleted, DocSize + Size}; + (#full_doc_info{deleted = true, data_size=Size}, + {NotDeleted, Deleted, DocSize}) -> + {NotDeleted, Deleted + 1, DocSize + Size} + end, + {0, 0, 0}, FullDocInfos); + +btree_by_id_reduce(rereduce, Reductions) -> + lists:foldl( + fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted, AccDocSizes}) -> + {AccNotDeleted + NotDeleted, AccDeleted + Deleted, AccDocSizes}; + ({NotDeleted, Deleted, DocSizes}, {AccNotDeleted, AccDeleted, AccDocSizes}) -> + {AccNotDeleted + NotDeleted, AccDeleted + Deleted, DocSizes + AccDocSizes} + end, + {0, 0, 0}, Reductions). + +btree_by_seq_reduce(reduce, DocInfos) -> + % count the number of documents + length(DocInfos); +btree_by_seq_reduce(rereduce, Reds) -> + lists:sum(Reds). + +simple_upgrade_record(Old, New) when tuple_size(Old) =:= tuple_size(New) -> + Old; +simple_upgrade_record(Old, New) when tuple_size(Old) < tuple_size(New) -> + OldSz = tuple_size(Old), + NewValuesTail = + lists:sublist(tuple_to_list(New), OldSz + 1, tuple_size(New) - OldSz), + list_to_tuple(tuple_to_list(Old) ++ NewValuesTail). + + +init_db(DbName, Filepath, Fd, Header0) -> + Header1 = simple_upgrade_record(Header0, #db_header{}), + Header = + case element(2, Header1) of + 1 -> Header1#db_header{unused = 0, security_ptr = nil}; % 0.9 + 2 -> Header1#db_header{unused = 0, security_ptr = nil}; % post 0.9 and pre 0.10 + 3 -> Header1#db_header{security_ptr = nil}; % post 0.9 and pre 0.10 + 4 -> Header1#db_header{security_ptr = nil}; % 0.10 and pre 0.11 + ?LATEST_DISK_VERSION -> Header1; + _ -> throw({database_disk_version_error, "Incorrect disk header version"}) + end, + + {ok, FsyncOptions} = couch_util:parse_term( + couch_config:get("couchdb", "fsync_options", + "[before_header, after_header, on_file_open]")), + + case lists:member(on_file_open, FsyncOptions) of + true -> ok = couch_file:sync(Filepath); + _ -> ok + end, + + {ok, IdBtree} = couch_btree:open(Header#db_header.id_tree_state, Fd, + [{split, fun ?MODULE:btree_by_id_split/1}, + {join, fun ?MODULE:btree_by_id_join/2}, + {reduce, fun ?MODULE:btree_by_id_reduce/2}]), + {ok, SeqBtree} = couch_btree:open(Header#db_header.seq_tree_state, Fd, + [{split, fun ?MODULE:btree_by_seq_split/1}, + {join, fun ?MODULE:btree_by_seq_join/2}, + {reduce, fun ?MODULE:btree_by_seq_reduce/2}]), + {ok, LocalDocsBtree} = couch_btree:open(Header#db_header.local_tree_state, Fd), + case Header#db_header.security_ptr of + nil -> + Security = [], + SecurityPtr = nil; + SecurityPtr -> + {ok, Security} = couch_file:pread_term(Fd, SecurityPtr) + end, + % convert start time tuple to microsecs and store as a binary string + {MegaSecs, Secs, MicroSecs} = now(), + StartTime = ?l2b(io_lib:format("~p", + [(MegaSecs*1000000*1000000) + (Secs*1000000) + MicroSecs])), + #db{ + fd=Fd, + fd_monitor = erlang:monitor(process,Fd), + header=Header, + id_tree = IdBtree, + seq_tree = SeqBtree, + local_tree = LocalDocsBtree, + committed_update_seq = Header#db_header.update_seq, + update_seq = Header#db_header.update_seq, + name = DbName, + filepath = Filepath, + security = Security, + security_ptr = SecurityPtr, + instance_start_time = StartTime, + revs_limit = Header#db_header.revs_limit, + fsync_options = FsyncOptions + }. + + +close_db(#db{fd_monitor = Ref}) -> + erlang:demonitor(Ref). + + +refresh_validate_doc_funs(Db) -> + {ok, DesignDocs} = couch_db:get_design_docs(Db), + ProcessDocFuns = lists:flatmap( + fun(DesignDoc) -> + case couch_doc:get_validate_doc_fun(DesignDoc) of + nil -> []; + Fun -> [Fun] + end + end, DesignDocs), + case Db#db.name of + <<"shards/", _/binary>> = ShardName -> + fabric:reset_validation_funs(mem3:dbname(ShardName)), + Db#db{validate_doc_funs=undefined}; + _ -> + Db#db{validate_doc_funs=ProcessDocFuns} + end. + +% rev tree functions + +flush_trees(_Db, [], AccFlushedTrees) -> + {ok, lists:reverse(AccFlushedTrees)}; +flush_trees(#db{fd=Fd,header=Header}=Db, + [InfoUnflushed | RestUnflushed], AccFlushed) -> + #full_doc_info{update_seq=UpdateSeq, rev_tree=Unflushed} = InfoUnflushed, + Flushed = couch_key_tree:map( + fun(_Rev, Value) -> + case Value of + #doc{atts=Atts,deleted=IsDeleted}=Doc -> + % this node value is actually an unwritten document summary, + % write to disk. + % make sure the Fd in the written bins is the same Fd we are + % and convert bins, removing the FD. + % All bins should have been written to disk already. + {DiskAtts, SizeInfo} = + case Atts of + [] -> {[],[]}; + [#att{data={BinFd, _Sp}} | _ ] when BinFd == Fd -> + {[{N,T,P,AL,DL,R,M,E} + || #att{name=N,type=T,data={_,P},md5=M,revpos=R, + att_len=AL,disk_len=DL,encoding=E} + <- Atts], + [{P1,AL1} + || #att{data={_,P1},att_len=AL1} + <- Atts]}; + _ -> + % BinFd must not equal our Fd. This can happen when a database + % is being switched out during a compaction + ?LOG_DEBUG("File where the attachments are written has" + " changed. Possibly retrying.", []), + throw(retry) + end, + {ok, NewSummaryPointer} = + case Header#db_header.disk_version < 4 of + true -> + {ok, _} = couch_file:append_term(Fd, {Doc#doc.body, DiskAtts}); + false -> + {ok, _} = couch_file:append_term_md5(Fd, {Doc#doc.body, DiskAtts}) + end, + #leaf{ + deleted = IsDeleted, + ptr = NewSummaryPointer, + seq = UpdateSeq, + size = size(term_to_binary(Doc#doc.body)), + atts = SizeInfo + }; + _ -> + Value + end + end, Unflushed), + flush_trees(Db, RestUnflushed, [InfoUnflushed#full_doc_info{rev_tree=Flushed} | AccFlushed]). + + +send_result(Client, Id, OriginalRevs, NewResult) -> + % used to send a result to the client + catch(Client ! {result, self(), {{Id, OriginalRevs}, NewResult}}). + +merge_rev_trees(_Limit, _Merge, [], [], AccNewInfos, AccRemoveSeqs, AccSeq) -> + {ok, lists:reverse(AccNewInfos), AccRemoveSeqs, AccSeq}; +merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList], + [OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) -> + #full_doc_info{id=Id,rev_tree=OldTree,deleted=OldDeleted,update_seq=OldSeq} + = OldDocInfo, + NewRevTree = lists:foldl( + fun({Client, #doc{revs={Pos,[_Rev|PrevRevs]}}=NewDoc}, AccTree) -> + if not MergeConflicts -> + case couch_key_tree:merge(AccTree, couch_db:doc_to_tree(NewDoc), + Limit) of + {_NewTree, conflicts} when (not OldDeleted) -> + send_result(Client, Id, {Pos-1,PrevRevs}, conflict), + AccTree; + {NewTree, conflicts} when PrevRevs /= [] -> + % Check to be sure if prev revision was specified, it's + % a leaf node in the tree + Leafs = couch_key_tree:get_all_leafs(AccTree), + IsPrevLeaf = lists:any(fun({_, {LeafPos, [LeafRevId|_]}}) -> + {LeafPos, LeafRevId} == {Pos-1, hd(PrevRevs)} + end, Leafs), + if IsPrevLeaf -> + NewTree; + true -> + send_result(Client, Id, {Pos-1,PrevRevs}, conflict), + AccTree + end; + {NewTree, no_conflicts} when AccTree == NewTree -> + % the tree didn't change at all + % meaning we are saving a rev that's already + % been editted again. + if (Pos == 1) and OldDeleted -> + % this means we are recreating a brand new document + % into a state that already existed before. + % put the rev into a subsequent edit of the deletion + #doc_info{revs=[#rev_info{rev={OldPos,OldRev}}|_]} = + couch_doc:to_doc_info(OldDocInfo), + NewRevId = couch_db:new_revid( + NewDoc#doc{revs={OldPos, [OldRev]}}), + NewDoc2 = NewDoc#doc{revs={OldPos + 1, [NewRevId, OldRev]}}, + {NewTree2, _} = couch_key_tree:merge(AccTree, + couch_db:doc_to_tree(NewDoc2), Limit), + % we changed the rev id, this tells the caller we did + send_result(Client, Id, {Pos-1,PrevRevs}, + {ok, {OldPos + 1, NewRevId}}), + NewTree2; + true -> + send_result(Client, Id, {Pos-1,PrevRevs}, conflict), + AccTree + end; + {NewTree, _} -> + NewTree + end; + true -> + {NewTree, _} = couch_key_tree:merge(AccTree, + couch_db:doc_to_tree(NewDoc), Limit), + NewTree + end + end, + OldTree, NewDocs), + if NewRevTree == OldTree -> + % nothing changed + merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo, + AccNewInfos, AccRemoveSeqs, AccSeq); + true -> + % we have updated the document, give it a new seq # + NewInfo = #full_doc_info{id=Id,update_seq=AccSeq+1,rev_tree=NewRevTree}, + RemoveSeqs = case OldSeq of + 0 -> AccRemoveSeqs; + _ -> [OldSeq | AccRemoveSeqs] + end, + merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo, + [NewInfo|AccNewInfos], RemoveSeqs, AccSeq+1) + end. + + + +new_index_entries([], Acc) -> + Acc; +new_index_entries([Info|Rest], Acc) -> + #doc_info{revs=[#rev_info{deleted=Del}|_]} = couch_doc:to_doc_info(Info), + new_index_entries(Rest, [Info#full_doc_info{deleted=Del}|Acc]). + + +stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) -> + [Info#full_doc_info{rev_tree=couch_key_tree:stem(Tree, Limit)} || + #full_doc_info{rev_tree=Tree}=Info <- DocInfos]. + +update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) -> + #db{ + id_tree = DocInfoByIdBTree, + seq_tree = DocInfoBySeqBTree, + update_seq = LastSeq, + revs_limit = RevsLimit + } = Db, + Ids = [Id || [{_Client, #doc{id=Id}}|_] <- DocsList], + % lookup up the old documents, if they exist. + OldDocLookups = couch_btree:lookup(DocInfoByIdBTree, Ids), + OldDocInfos = lists:zipwith( + fun(_Id, {ok, FullDocInfo}) -> + FullDocInfo; + (Id, not_found) -> + #full_doc_info{id=Id} + end, + Ids, OldDocLookups), + % Merge the new docs into the revision trees. + {ok, NewFullDocInfos, RemoveSeqs, NewSeq} = merge_rev_trees(RevsLimit, + MergeConflicts, DocsList, OldDocInfos, [], [], LastSeq), + + % All documents are now ready to write. + + {ok, Db2} = update_local_docs(Db, NonRepDocs), + + % Write out the document summaries (the bodies are stored in the nodes of + % the trees, the attachments are already written to disk) + {ok, FlushedFullDocInfos} = flush_trees(Db2, NewFullDocInfos, []), + IndexInfos = + new_index_entries(compute_data_sizes(FlushedFullDocInfos, []), + []), + % and the indexes + {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree, + IndexInfos, []), + {ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree, + IndexInfos, RemoveSeqs), + + Db3 = Db2#db{ + id_tree = DocInfoByIdBTree2, + seq_tree = DocInfoBySeqBTree2, + update_seq = NewSeq}, + + % Check if we just updated any design documents, and update the validation + % funs if we did. + case lists:any( + fun(<<"_design/", _/binary>>) -> true; (_) -> false end, Ids) of + false -> + Db4 = Db3; + true -> + Db4 = refresh_validate_doc_funs(Db3) + end, + + {ok, commit_data(Db4, not FullCommit)}. + +compute_data_sizes([], Acc) -> + lists:reverse(Acc); + +compute_data_sizes([FullDocInfo | RestDocInfos], Acc) -> + #full_doc_info{rev_tree=Tree} = FullDocInfo, + Size = couch_key_tree:compute_data_size(Tree), + compute_data_sizes(RestDocInfos, + [FullDocInfo#full_doc_info{data_size=Size} + | Acc]). + + + +update_local_docs(Db, []) -> + {ok, Db}; +update_local_docs(#db{local_tree=Btree}=Db, Docs) -> + Ids = [Id || {_Client, #doc{id=Id}} <- Docs], + OldDocLookups = couch_btree:lookup(Btree, Ids), + BtreeEntries = lists:zipwith( + fun({Client, #doc{id=Id,deleted=Delete,revs={0,PrevRevs},body=Body}}, + _OldDocLookup) -> + case PrevRevs of + [RevStr|_] -> + PrevRev = list_to_integer(?b2l(RevStr)); + [] -> + PrevRev = 0 + end, + %% disabled conflict checking for local docs -- APK 16 June 2010 + % OldRev = + % case OldDocLookup of + % {ok, {_, {OldRev0, _}}} -> OldRev0; + % not_found -> 0 + % end, + % case OldRev == PrevRev of + % true -> + case Delete of + false -> + send_result(Client, Id, {0, PrevRevs}, {ok, + {0, ?l2b(integer_to_list(PrevRev + 1))}}), + {update, {Id, {PrevRev + 1, Body}}}; + true -> + send_result(Client, Id, {0, PrevRevs}, + {ok, {0, <<"0">>}}), + {remove, Id} + end%; + % false -> + % send_result(Client, Id, {0, PrevRevs}, conflict), + % ignore + % end + end, Docs, OldDocLookups), + + BtreeIdsRemove = [Id || {remove, Id} <- BtreeEntries], + BtreeIdsUpdate = [{Key, Val} || {update, {Key, Val}} <- BtreeEntries], + + {ok, Btree2} = + couch_btree:add_remove(Btree, BtreeIdsUpdate, BtreeIdsRemove), + + {ok, Db#db{local_tree = Btree2}}. + + +commit_data(Db) -> + commit_data(Db, false). + +db_to_header(Db, Header) -> + Header#db_header{ + update_seq = Db#db.update_seq, + seq_tree_state = couch_btree:get_state(Db#db.seq_tree), + id_tree_state = couch_btree:get_state(Db#db.id_tree), + local_tree_state = couch_btree:get_state(Db#db.local_tree), + security_ptr = Db#db.security_ptr, + revs_limit = Db#db.revs_limit}. + +commit_data(#db{waiting_delayed_commit=nil} = Db, true) -> + Db#db{waiting_delayed_commit=erlang:send_after(1000,self(),delayed_commit)}; +commit_data(Db, true) -> + Db; +commit_data(Db, _) -> + #db{ + fd = Fd, + filepath = Filepath, + header = OldHeader, + fsync_options = FsyncOptions, + waiting_delayed_commit = Timer + } = Db, + if is_reference(Timer) -> erlang:cancel_timer(Timer); true -> ok end, + case db_to_header(Db, OldHeader) of + OldHeader -> + Db#db{waiting_delayed_commit=nil}; + Header -> + case lists:member(before_header, FsyncOptions) of + true -> ok = couch_file:sync(Filepath); + _ -> ok + end, + + ok = couch_file:write_header(Fd, Header), + + case lists:member(after_header, FsyncOptions) of + true -> ok = couch_file:sync(Filepath); + _ -> ok + end, + + Db#db{waiting_delayed_commit=nil, + header=Header, + committed_update_seq=Db#db.update_seq} + end. + + +copy_doc_attachments(#db{fd=SrcFd}=SrcDb, {Pos,_RevId}, SrcSp, DestFd) -> + {ok, {BodyData, BinInfos}} = couch_db:read_doc(SrcDb, SrcSp), + % copy the bin values + NewBinInfos = lists:map( + fun({Name, {Type, BinSp, AttLen}}) when is_tuple(BinSp) orelse BinSp == null -> + % 09 UPGRADE CODE + {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} = + couch_stream:old_copy_to_new_stream(SrcFd, BinSp, AttLen, DestFd), + {Name, Type, NewBinSp, AttLen, AttLen, Pos, Md5, identity}; + ({Name, {Type, BinSp, AttLen}}) -> + % 09 UPGRADE CODE + {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} = + couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd), + {Name, Type, NewBinSp, AttLen, AttLen, Pos, Md5, identity}; + ({Name, Type, BinSp, AttLen, _RevPos, <<>>}) when + is_tuple(BinSp) orelse BinSp == null -> + % 09 UPGRADE CODE + {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} = + couch_stream:old_copy_to_new_stream(SrcFd, BinSp, AttLen, DestFd), + {Name, Type, NewBinSp, AttLen, AttLen, AttLen, Md5, identity}; + ({Name, Type, BinSp, AttLen, RevPos, Md5}) -> + % 010 UPGRADE CODE + {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} = + couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd), + {Name, Type, NewBinSp, AttLen, AttLen, RevPos, Md5, identity}; + ({Name, Type, BinSp, AttLen, DiskLen, RevPos, Md5, Enc1}) -> + {NewBinSp, AttLen, _, Md5, _IdentityMd5} = + couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd), + Enc = case Enc1 of + true -> + % 0110 UPGRADE CODE + gzip; + false -> + % 0110 UPGRADE CODE + identity; + _ -> + Enc1 + end, + {Name, Type, NewBinSp, AttLen, DiskLen, RevPos, Md5, Enc} + end, BinInfos), + {BodyData, NewBinInfos}. + +merge_lookups(Infos, []) -> + Infos; +merge_lookups([], _) -> + []; +merge_lookups([#doc_info{}|RestInfos], [{ok, FullDocInfo}|RestLookups]) -> + [FullDocInfo|merge_lookups(RestInfos, RestLookups)]; +merge_lookups([FullDocInfo|RestInfos], Lookups) -> + [FullDocInfo|merge_lookups(RestInfos, Lookups)]. + +copy_docs(Db, #db{fd=DestFd}=NewDb, MixedInfos, Retry) -> + % lookup any necessary full_doc_infos + DocInfoIds = [Id || #doc_info{id=Id} <- MixedInfos], + LookupResults = couch_btree:lookup(Db#db.id_tree, DocInfoIds), + % COUCHDB-968, make sure we prune duplicates during compaction + Infos = lists:usort(fun(#full_doc_info{id=A}, #full_doc_info{id=B}) -> + A =< B + end, merge_lookups(MixedInfos, LookupResults)), + + NewInfos1 = [Info#full_doc_info{rev_tree=couch_key_tree:map( + fun(Rev, #leaf{ptr=Sp, size=Size0}=Leaf, leaf) -> + {Body, AttInfos} = copy_doc_attachments(Db, Rev, Sp, DestFd), + {ok, Pos} = couch_file:append_term_md5(DestFd, {Body, AttInfos}), + if Size0 > 0 -> + Leaf#leaf{ptr=Pos}; + true -> + DocSize = byte_size(term_to_binary(Body)), + AttSizes = [{element(3,A), element(4,A)} || A <- AttInfos], + Leaf#leaf{ptr=Pos, size=DocSize, atts=AttSizes} + end; + (_, _, branch) -> + ?REV_MISSING + end, RevTree)} || #full_doc_info{rev_tree=RevTree}=Info <- Infos], + + NewInfos = stem_full_doc_infos(Db, compute_data_sizes(NewInfos1, [])), + RemoveSeqs = + case Retry of + false -> + []; + true -> + % We are retrying a compaction, meaning the documents we are copying may + % already exist in our file and must be removed from the by_seq index. + Ids = [Id || #full_doc_info{id=Id} <- Infos], + Existing = couch_btree:lookup(NewDb#db.id_tree, Ids), + [Seq || {ok, #full_doc_info{update_seq=Seq}} <- Existing] + end, + + {ok, SeqTree} = couch_btree:add_remove( + NewDb#db.seq_tree, NewInfos, RemoveSeqs), + {ok, IdTree} = couch_btree:add_remove( + NewDb#db.id_tree, NewInfos, []), + NewDb#db{id_tree=IdTree, seq_tree=SeqTree}. + + + +copy_compact(Db, NewDb0, Retry) -> + FsyncOptions = [Op || Op <- NewDb0#db.fsync_options, Op == before_header], + NewDb = NewDb0#db{fsync_options=FsyncOptions}, + TotalChanges = couch_db:count_changes_since(Db, NewDb#db.update_seq), + EnumBySeqFun = + fun(DocInfo, _Offset, {AccNewDb, AccUncopied, TotalCopied}) -> + case DocInfo of + #full_doc_info{update_seq=Seq} -> + ok; + #doc_info{high_seq=Seq} -> + ok + end, + couch_task_status:update("Copied ~p of ~p changes (~p%)", + [TotalCopied, TotalChanges, (TotalCopied*100) div TotalChanges]), + if TotalCopied rem 1000 =:= 0 -> + NewDb2 = copy_docs(Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry), + if TotalCopied rem 10000 =:= 0 -> + NewDb3 = commit_data(NewDb2#db{update_seq=Seq}), + {ok, {NewDb3, [], TotalCopied + 1}}; + true -> + {ok, {NewDb2#db{update_seq=Seq}, [], TotalCopied + 1}} + end; + true -> + {ok, {AccNewDb, [DocInfo | AccUncopied], TotalCopied + 1}} + end + end, + + couch_task_status:set_update_frequency(500), + + {ok, _, {NewDb2, Uncopied, TotalChanges}} = + couch_btree:foldl(Db#db.seq_tree, EnumBySeqFun, + {NewDb, [], 0}, + [{start_key, NewDb#db.update_seq + 1}]), + + couch_task_status:update("Flushing"), + + NewDb3 = copy_docs(Db, NewDb2, lists:reverse(Uncopied), Retry), + + % copy misc header values + if NewDb3#db.security /= Db#db.security -> + {ok, Ptr} = couch_file:append_term(NewDb3#db.fd, Db#db.security), + NewDb4 = NewDb3#db{security=Db#db.security, security_ptr=Ptr}; + true -> + NewDb4 = NewDb3 + end, + + commit_data(NewDb4#db{update_seq=Db#db.update_seq}). + +start_copy_compact(#db{name=Name,filepath=Filepath,header=#db_header{purge_seq=PurgeSeq}}=Db) -> + CompactFile = Filepath ++ ".compact", + ?LOG_DEBUG("Compaction process spawned for db \"~s\"", [Name]), + case couch_file:open(CompactFile) of + {ok, Fd} -> + couch_task_status:add_task(<<"Database Compaction">>, <<Name/binary, " retry">>, <<"Starting">>), + Retry = true, + case couch_file:read_header(Fd) of + {ok, Header} -> + ok; + no_valid_header -> + ok = couch_file:write_header(Fd, Header=#db_header{}) + end; + {error, enoent} -> + couch_task_status:add_task(<<"Database Compaction">>, Name, <<"Starting">>), + {ok, Fd} = couch_file:open(CompactFile, [create]), + Retry = false, + ok = couch_file:write_header(Fd, Header=#db_header{}) + end, + NewDb = init_db(Name, CompactFile, Fd, Header), + NewDb2 = if PurgeSeq > 0 -> + {ok, PurgedIdsRevs} = couch_db:get_last_purged(Db), + {ok, Pointer} = couch_file:append_term(Fd, PurgedIdsRevs), + NewDb#db{header=Header#db_header{purge_seq=PurgeSeq, purged_docs=Pointer}}; + true -> + NewDb + end, + unlink(Fd), + + NewDb3 = copy_compact(Db, NewDb2, Retry), + close_db(NewDb3), + gen_server:cast(Db#db.main_pid, {compact_done, CompactFile}). + diff --git a/apps/couch/src/couch_doc.erl b/apps/couch/src/couch_doc.erl new file mode 100644 index 00000000..63ac0892 --- /dev/null +++ b/apps/couch/src/couch_doc.erl @@ -0,0 +1,618 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_doc). + +-export([to_doc_info/1,to_doc_info_path/1,parse_rev/1,parse_revs/1,rev_to_str/1,revs_to_strs/1]). +-export([att_foldl/3,range_att_foldl/5,att_foldl_decode/3,get_validate_doc_fun/1]). +-export([from_json_obj/1,to_json_obj/2,has_stubs/1, merge_stubs/2]). +-export([validate_docid/1]). +-export([doc_from_multi_part_stream/2]). +-export([doc_to_multi_part_stream/5, len_doc_to_multi_part_stream/4]). +-export([abort_multi_part_stream/1]). + +-include("couch_db.hrl"). + +% helpers used by to_json_obj +to_json_rev(0, []) -> + []; +to_json_rev(Start, [FirstRevId|_]) -> + [{<<"_rev">>, ?l2b([integer_to_list(Start),"-",revid_to_str(FirstRevId)])}]. + +to_json_body(true, {Body}) -> + Body ++ [{<<"_deleted">>, true}]; +to_json_body(false, {Body}) -> + Body. + +to_json_revisions(Options, Start, RevIds) -> + case lists:member(revs, Options) of + false -> []; + true -> + [{<<"_revisions">>, {[{<<"start">>, Start}, + {<<"ids">>, [revid_to_str(R) ||R <- RevIds]}]}}] + end. + +revid_to_str(RevId) when size(RevId) =:= 16 -> + ?l2b(couch_util:to_hex(RevId)); +revid_to_str(RevId) -> + RevId. + +rev_to_str({Pos, RevId}) -> + ?l2b([integer_to_list(Pos),"-",revid_to_str(RevId)]). + + +revs_to_strs([]) -> + []; +revs_to_strs([{Pos, RevId}| Rest]) -> + [rev_to_str({Pos, RevId}) | revs_to_strs(Rest)]. + +to_json_meta(Meta) -> + lists:map( + fun({revs_info, Start, RevsInfo}) -> + {JsonRevsInfo, _Pos} = lists:mapfoldl( + fun({RevId, Status}, PosAcc) -> + JsonObj = {[{<<"rev">>, rev_to_str({PosAcc, RevId})}, + {<<"status">>, ?l2b(atom_to_list(Status))}]}, + {JsonObj, PosAcc - 1} + end, Start, RevsInfo), + {<<"_revs_info">>, JsonRevsInfo}; + ({local_seq, Seq}) -> + {<<"_local_seq">>, Seq}; + ({conflicts, Conflicts}) -> + {<<"_conflicts">>, revs_to_strs(Conflicts)}; + ({deleted_conflicts, DConflicts}) -> + {<<"_deleted_conflicts">>, revs_to_strs(DConflicts)} + end, Meta). + +to_json_attachments(Attachments, Options) -> + to_json_attachments( + Attachments, + lists:member(attachments, Options), + lists:member(follows, Options), + lists:member(att_encoding_info, Options) + ). + +to_json_attachments([], _OutputData, _DataToFollow, _ShowEncInfo) -> + []; +to_json_attachments(Atts, OutputData, DataToFollow, ShowEncInfo) -> + AttProps = lists:map( + fun(#att{disk_len=DiskLen, att_len=AttLen, encoding=Enc}=Att) -> + {Att#att.name, {[ + {<<"content_type">>, Att#att.type}, + {<<"revpos">>, Att#att.revpos}] ++ + case Att#att.md5 of + <<>> -> + []; + Md5 -> + EncodedMd5 = base64:encode(Md5), + [{<<"digest">>, <<"md5-",EncodedMd5/binary>>}] + end ++ + if not OutputData orelse Att#att.data == stub -> + [{<<"length">>, DiskLen}, {<<"stub">>, true}]; + true -> + if DataToFollow -> + [{<<"length">>, DiskLen}, {<<"follows">>, true}]; + true -> + AttData = case Enc of + gzip -> + zlib:gunzip(att_to_bin(Att)); + identity -> + att_to_bin(Att) + end, + [{<<"data">>, base64:encode(AttData)}] + end + end ++ + case {ShowEncInfo, Enc} of + {false, _} -> + []; + {true, identity} -> + []; + {true, _} -> + [ + {<<"encoding">>, couch_util:to_binary(Enc)}, + {<<"encoded_length">>, AttLen} + ] + end + }} + end, Atts), + [{<<"_attachments">>, {AttProps}}]. + +to_json_obj(#doc{id=Id,deleted=Del,body=Body,revs={Start, RevIds}, + meta=Meta}=Doc,Options)-> + {[{<<"_id">>, Id}] + ++ to_json_rev(Start, RevIds) + ++ to_json_body(Del, Body) + ++ to_json_revisions(Options, Start, RevIds) + ++ to_json_meta(Meta) + ++ to_json_attachments(Doc#doc.atts, Options) + }. + +from_json_obj({Props}) -> + transfer_fields(Props, #doc{body=[]}); + +from_json_obj(_Other) -> + throw({bad_request, "Document must be a JSON object"}). + +parse_revid(RevId) when size(RevId) =:= 32 -> + RevInt = erlang:list_to_integer(?b2l(RevId), 16), + <<RevInt:128>>; +parse_revid(RevId) when length(RevId) =:= 32 -> + RevInt = erlang:list_to_integer(RevId, 16), + <<RevInt:128>>; +parse_revid(RevId) when is_binary(RevId) -> + RevId; +parse_revid(RevId) when is_list(RevId) -> + ?l2b(RevId). + + +parse_rev(Rev) when is_binary(Rev) -> + parse_rev(?b2l(Rev)); +parse_rev(Rev) when is_list(Rev) -> + SplitRev = lists:splitwith(fun($-) -> false; (_) -> true end, Rev), + case SplitRev of + {Pos, [$- | RevId]} -> {list_to_integer(Pos), parse_revid(RevId)}; + _Else -> throw({bad_request, <<"Invalid rev format">>}) + end; +parse_rev(_BadRev) -> + throw({bad_request, <<"Invalid rev format">>}). + +parse_revs([]) -> + []; +parse_revs([Rev | Rest]) -> + [parse_rev(Rev) | parse_revs(Rest)]. + + +validate_docid(Id) when is_binary(Id) -> + case couch_util:validate_utf8(Id) of + false -> throw({bad_request, <<"Document id must be valid UTF-8">>}); + true -> ok + end, + case Id of + <<"_design/", _/binary>> -> ok; + <<"_local/", _/binary>> -> ok; + <<"_", _/binary>> -> + throw({bad_request, <<"Only reserved document ids may start with underscore.">>}); + _Else -> ok + end; +validate_docid(Id) -> + ?LOG_DEBUG("Document id is not a string: ~p", [Id]), + throw({bad_request, <<"Document id must be a string">>}). + +transfer_fields([], #doc{body=Fields}=Doc) -> + % convert fields back to json object + Doc#doc{body={lists:reverse(Fields)}}; + +transfer_fields([{<<"_id">>, Id} | Rest], Doc) -> + validate_docid(Id), + transfer_fields(Rest, Doc#doc{id=Id}); + +transfer_fields([{<<"_rev">>, Rev} | Rest], #doc{revs={0, []}}=Doc) -> + {Pos, RevId} = parse_rev(Rev), + transfer_fields(Rest, + Doc#doc{revs={Pos, [RevId]}}); + +transfer_fields([{<<"_rev">>, _Rev} | Rest], Doc) -> + % we already got the rev from the _revisions + transfer_fields(Rest,Doc); + +transfer_fields([{<<"_attachments">>, {JsonBins}} | Rest], Doc) -> + Atts = lists:map(fun({Name, {BinProps}}) -> + Md5 = case couch_util:get_value(<<"digest">>, BinProps) of + <<"md5-",EncodedMd5/binary>> -> + base64:decode(EncodedMd5); + _ -> + <<>> + end, + case couch_util:get_value(<<"stub">>, BinProps) of + true -> + Type = couch_util:get_value(<<"content_type">>, BinProps), + RevPos = couch_util:get_value(<<"revpos">>, BinProps, nil), + DiskLen = couch_util:get_value(<<"length">>, BinProps), + {Enc, EncLen} = att_encoding_info(BinProps), + #att{name=Name, data=stub, type=Type, att_len=EncLen, + disk_len=DiskLen, encoding=Enc, revpos=RevPos, md5=Md5}; + _ -> + Type = couch_util:get_value(<<"content_type">>, BinProps, + ?DEFAULT_ATTACHMENT_CONTENT_TYPE), + RevPos = couch_util:get_value(<<"revpos">>, BinProps, 0), + case couch_util:get_value(<<"follows">>, BinProps) of + true -> + DiskLen = couch_util:get_value(<<"length">>, BinProps), + {Enc, EncLen} = att_encoding_info(BinProps), + #att{name=Name, data=follows, type=Type, encoding=Enc, + att_len=EncLen, disk_len=DiskLen, revpos=RevPos, md5=Md5}; + _ -> + Value = couch_util:get_value(<<"data">>, BinProps), + Bin = base64:decode(Value), + LenBin = size(Bin), + #att{name=Name, data=Bin, type=Type, att_len=LenBin, + disk_len=LenBin, revpos=RevPos} + end + end + end, JsonBins), + transfer_fields(Rest, Doc#doc{atts=Atts}); + +transfer_fields([{<<"_revisions">>, {Props}} | Rest], Doc) -> + RevIds = couch_util:get_value(<<"ids">>, Props), + Start = couch_util:get_value(<<"start">>, Props), + if not is_integer(Start) -> + throw({doc_validation, "_revisions.start isn't an integer."}); + not is_list(RevIds) -> + throw({doc_validation, "_revisions.ids isn't a array."}); + true -> + ok + end, + [throw({doc_validation, "RevId isn't a string"}) || + RevId <- RevIds, not is_binary(RevId)], + RevIds2 = [parse_revid(RevId) || RevId <- RevIds], + transfer_fields(Rest, Doc#doc{revs={Start, RevIds2}}); + +transfer_fields([{<<"_deleted">>, B} | Rest], Doc) when is_boolean(B) -> + transfer_fields(Rest, Doc#doc{deleted=B}); + +% ignored fields +transfer_fields([{<<"_revs_info">>, _} | Rest], Doc) -> + transfer_fields(Rest, Doc); +transfer_fields([{<<"_local_seq">>, _} | Rest], Doc) -> + transfer_fields(Rest, Doc); +transfer_fields([{<<"_conflicts">>, _} | Rest], Doc) -> + transfer_fields(Rest, Doc); +transfer_fields([{<<"_deleted_conflicts">>, _} | Rest], Doc) -> + transfer_fields(Rest, Doc); + +% special fields for replication documents +transfer_fields([{<<"_replication_state">>, _} = Field | Rest], + #doc{body=Fields} = Doc) -> + transfer_fields(Rest, Doc#doc{body=[Field|Fields]}); +transfer_fields([{<<"_replication_state_time">>, _} = Field | Rest], + #doc{body=Fields} = Doc) -> + transfer_fields(Rest, Doc#doc{body=[Field|Fields]}); +transfer_fields([{<<"_replication_id">>, _} = Field | Rest], + #doc{body=Fields} = Doc) -> + transfer_fields(Rest, Doc#doc{body=[Field|Fields]}); + +% unknown special field +transfer_fields([{<<"_",Name/binary>>, _} | _], _) -> + throw({doc_validation, + ?l2b(io_lib:format("Bad special document member: _~s", [Name]))}); + +transfer_fields([Field | Rest], #doc{body=Fields}=Doc) -> + transfer_fields(Rest, Doc#doc{body=[Field|Fields]}). + +att_encoding_info(BinProps) -> + DiskLen = couch_util:get_value(<<"length">>, BinProps), + case couch_util:get_value(<<"encoding">>, BinProps) of + undefined -> + {identity, DiskLen}; + Enc -> + EncodedLen = couch_util:get_value(<<"encoded_length">>, BinProps, DiskLen), + {list_to_existing_atom(?b2l(Enc)), EncodedLen} + end. + +to_doc_info(FullDocInfo) -> + {DocInfo, _Path} = to_doc_info_path(FullDocInfo), + DocInfo. + +max_seq(Tree, UpdateSeq) -> + FoldFun = fun({_Pos, _Key}, Value, _Type, MaxOldSeq) -> + case Value of + {_Deleted, _DiskPos, OldTreeSeq} -> + erlang:max(MaxOldSeq, OldTreeSeq); + #leaf{seq=LeafSeq} -> + erlang:max(MaxOldSeq, LeafSeq); + _ -> + MaxOldSeq + end + end, + couch_key_tree:fold(FoldFun, UpdateSeq, Tree). + +to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree,update_seq=FDISeq}) -> + RevInfosAndPath = + [{#rev_info{deleted=Del,body_sp=Bp,seq=Seq,rev={Pos,RevId}}, Path} || + {#leaf{deleted=Del, ptr=Bp, seq=Seq},{Pos, [RevId|_]}=Path} <- + couch_key_tree:get_all_leafs(Tree)], + SortedRevInfosAndPath = lists:sort( + fun({#rev_info{deleted=DeletedA,rev=RevA}, _PathA}, + {#rev_info{deleted=DeletedB,rev=RevB}, _PathB}) -> + % sort descending by {not deleted, rev} + {not DeletedA, RevA} > {not DeletedB, RevB} + end, RevInfosAndPath), + [{_RevInfo, WinPath}|_] = SortedRevInfosAndPath, + RevInfos = [RevInfo || {RevInfo, _Path} <- SortedRevInfosAndPath], + {#doc_info{id=Id, high_seq=max_seq(Tree, FDISeq), revs=RevInfos}, WinPath}. + + + + +att_foldl(#att{data=Bin}, Fun, Acc) when is_binary(Bin) -> + Fun(Bin, Acc); +att_foldl(#att{data={Fd,Sp},att_len=Len}, Fun, Acc) when is_tuple(Sp) orelse Sp == null -> + % 09 UPGRADE CODE + couch_stream:old_foldl(Fd, Sp, Len, Fun, Acc); +att_foldl(#att{data={Fd,Sp},md5=Md5}, Fun, Acc) -> + couch_stream:foldl(Fd, Sp, Md5, Fun, Acc); +att_foldl(#att{data=DataFun,att_len=Len}, Fun, Acc) when is_function(DataFun) -> + fold_streamed_data(DataFun, Len, Fun, Acc). + +range_att_foldl(#att{data={Fd,Sp}}, From, To, Fun, Acc) -> + couch_stream:range_foldl(Fd, Sp, From, To, Fun, Acc). + +att_foldl_decode(#att{data={Fd,Sp},md5=Md5,encoding=Enc}, Fun, Acc) -> + couch_stream:foldl_decode(Fd, Sp, Md5, Enc, Fun, Acc); +att_foldl_decode(#att{data=Fun2,att_len=Len, encoding=identity}, Fun, Acc) -> + fold_streamed_data(Fun2, Len, Fun, Acc). + +att_to_bin(#att{data=Bin}) when is_binary(Bin) -> + Bin; +att_to_bin(#att{data=Iolist}) when is_list(Iolist) -> + iolist_to_binary(Iolist); +att_to_bin(#att{data={_Fd,_Sp}}=Att) -> + iolist_to_binary( + lists:reverse(att_foldl( + Att, + fun(Bin,Acc) -> [Bin|Acc] end, + [] + )) + ); +att_to_bin(#att{data=DataFun, att_len=Len}) when is_function(DataFun)-> + iolist_to_binary( + lists:reverse(fold_streamed_data( + DataFun, + Len, + fun(Data, Acc) -> [Data | Acc] end, + [] + )) + ). + +get_validate_doc_fun({Props}) -> + get_validate_doc_fun(couch_doc:from_json_obj({Props})); +get_validate_doc_fun(#doc{body={Props}}=DDoc) -> + case couch_util:get_value(<<"validate_doc_update">>, Props) of + undefined -> + nil; + _Else -> + fun(EditDoc, DiskDoc, Ctx, SecObj) -> + couch_query_servers:validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx, SecObj) + end + end. + + +has_stubs(#doc{atts=Atts}) -> + has_stubs(Atts); +has_stubs([]) -> + false; +has_stubs([#att{data=stub}|_]) -> + true; +has_stubs([_Att|Rest]) -> + has_stubs(Rest). + +merge_stubs(#doc{id=Id,atts=MemBins}=StubsDoc, #doc{atts=DiskBins}) -> + BinDict = dict:from_list([{Name, Att} || #att{name=Name}=Att <- DiskBins]), + MergedBins = lists:map( + fun(#att{name=Name, data=stub, revpos=StubRevPos}) -> + case dict:find(Name, BinDict) of + {ok, #att{revpos=DiskRevPos}=DiskAtt} + when DiskRevPos == StubRevPos orelse StubRevPos == nil -> + DiskAtt; + _ -> + throw({missing_stub, + <<"Invalid attachment stub in ", Id/binary, " for ", Name/binary>>}) + end; + (Att) -> + Att + end, MemBins), + StubsDoc#doc{atts= MergedBins}. + +fold_streamed_data(_RcvFun, 0, _Fun, Acc) -> + Acc; +fold_streamed_data(RcvFun, LenLeft, Fun, Acc) when LenLeft > 0-> + Bin = RcvFun(), + ResultAcc = Fun(Bin, Acc), + fold_streamed_data(RcvFun, LenLeft - size(Bin), Fun, ResultAcc). + +len_doc_to_multi_part_stream(Boundary, JsonBytes, Atts, SendEncodedAtts) -> + AttsSize = lists:foldl(fun(#att{data=Data} = Att, AccAttsSize) -> + case Data of + stub -> + AccAttsSize; + _ -> + AccAttsSize + + 4 + % "\r\n\r\n" + case SendEncodedAtts of + true -> + Att#att.att_len; + _ -> + Att#att.disk_len + end + + 4 + % "\r\n--" + size(Boundary) + end + end, 0, Atts), + if AttsSize == 0 -> + {<<"application/json">>, iolist_size(JsonBytes)}; + true -> + {<<"multipart/related; boundary=\"", Boundary/binary, "\"">>, + 2 + % "--" + size(Boundary) + + 36 + % "\r\ncontent-type: application/json\r\n\r\n" + iolist_size(JsonBytes) + + 4 + % "\r\n--" + size(Boundary) + + + AttsSize + + 2 % "--" + } + end. + +doc_to_multi_part_stream(Boundary, JsonBytes, Atts, WriteFun, + SendEncodedAtts) -> + case lists:any(fun(#att{data=Data})-> Data /= stub end, Atts) of + true -> + WriteFun([<<"--", Boundary/binary, + "\r\ncontent-type: application/json\r\n\r\n">>, + JsonBytes, <<"\r\n--", Boundary/binary>>]), + atts_to_mp(Atts, Boundary, WriteFun, SendEncodedAtts); + false -> + WriteFun(JsonBytes) + end. + +atts_to_mp([], _Boundary, WriteFun, _SendEncAtts) -> + WriteFun(<<"--">>); +atts_to_mp([#att{data=stub} | RestAtts], Boundary, WriteFun, + SendEncodedAtts) -> + atts_to_mp(RestAtts, Boundary, WriteFun, SendEncodedAtts); +atts_to_mp([Att | RestAtts], Boundary, WriteFun, + SendEncodedAtts) -> + WriteFun(<<"\r\n\r\n">>), + AttFun = case SendEncodedAtts of + false -> + fun att_foldl_decode/3; + true -> + fun att_foldl/3 + end, + AttFun(Att, fun(Data, _) -> WriteFun(Data) end, ok), + WriteFun(<<"\r\n--", Boundary/binary>>), + atts_to_mp(RestAtts, Boundary, WriteFun, SendEncodedAtts). + + +doc_from_multi_part_stream(ContentType, DataFun) -> + Parent = self(), + Parser = spawn_link(fun() -> + {<<"--">>, _, _} = couch_httpd:parse_multipart_request( + ContentType, DataFun, + fun(Next) -> mp_parse_doc(Next, []) end), + unlink(Parent), + Parent ! {self(), finished} + end), + Parser ! {get_doc_bytes, self()}, + receive + {doc_bytes, DocBytes} -> + Doc = from_json_obj(?JSON_DECODE(DocBytes)), + % we'll send the Parser process ID to the remote nodes so they can + % retrieve their own copies of the attachment data + Atts2 = lists:map( + fun(#att{data=follows}=A) -> + A#att{data={follows, Parser}}; + (A) -> + A + end, Doc#doc.atts), + WaitFun = fun() -> + receive {Parser, finished} -> ok end, + erlang:put(mochiweb_request_recv, true) + end, + {ok, Doc#doc{atts=Atts2}, WaitFun, Parser} + end. + +mp_parse_doc({headers, H}, []) -> + case couch_util:get_value("content-type", H) of + {"application/json", _} -> + fun (Next) -> + mp_parse_doc(Next, []) + end + end; +mp_parse_doc({body, Bytes}, AccBytes) -> + fun (Next) -> + mp_parse_doc(Next, [Bytes | AccBytes]) + end; +mp_parse_doc(body_end, AccBytes) -> + receive {get_doc_bytes, From} -> + From ! {doc_bytes, lists:reverse(AccBytes)} + end, + fun (Next) -> + mp_parse_atts(Next, {[], 0, orddict:new(), []}) + end. + +mp_parse_atts({headers, _}, Acc) -> + fun(Next) -> mp_parse_atts(Next, Acc) end; +mp_parse_atts(body_end, Acc) -> + fun(Next) -> mp_parse_atts(Next, Acc) end; +mp_parse_atts({body, Bytes}, {DataList, Offset, Counters, Waiting}) -> + NewAcc = maybe_send_data({DataList++[Bytes], Offset, Counters, Waiting}), + fun(Next) -> mp_parse_atts(Next, NewAcc) end; +mp_parse_atts(eof, {DataList, Offset, Counters, Waiting}) -> + N = list_to_integer(couch_config:get("cluster", "n", "3")), + M = length(Counters), + case (M == N) andalso DataList == [] of + true -> + ok; + false -> + receive {get_bytes, From} -> + C2 = orddict:update_counter(From, 1, Counters), + NewAcc = maybe_send_data({DataList, Offset, C2, [From|Waiting]}), + mp_parse_atts(eof, NewAcc) + after 3600000 -> + ok + end + end. + +maybe_send_data({ChunkList, Offset, Counters, Waiting}) -> + receive {get_bytes, From} -> + NewCounters = orddict:update_counter(From, 1, Counters), + maybe_send_data({ChunkList, Offset, NewCounters, [From|Waiting]}) + after 0 -> + % reply to as many writers as possible + NewWaiting = lists:filter(fun(Writer) -> + WhichChunk = orddict:fetch(Writer, Counters), + ListIndex = WhichChunk - Offset, + if ListIndex =< length(ChunkList) -> + Writer ! {bytes, lists:nth(ListIndex, ChunkList)}, + false; + true -> + true + end + end, Waiting), + + % check if we can drop a chunk from the head of the list + case Counters of + [] -> + SmallestIndex = 0; + _ -> + SmallestIndex = lists:min(element(2, lists:unzip(Counters))) + end, + Size = length(Counters), + N = list_to_integer(couch_config:get("cluster", "n", "3")), + if Size == N andalso SmallestIndex == (Offset+1) -> + NewChunkList = tl(ChunkList), + NewOffset = Offset+1; + true -> + NewChunkList = ChunkList, + NewOffset = Offset + end, + + % we should wait for a writer if no one has written the last chunk + LargestIndex = lists:max([0|element(2, lists:unzip(Counters))]), + if LargestIndex >= (Offset + length(ChunkList)) -> + % someone has written all possible chunks, keep moving + {NewChunkList, NewOffset, Counters, NewWaiting}; + true -> + receive {get_bytes, X} -> + C2 = orddict:update_counter(X, 1, Counters), + maybe_send_data({NewChunkList, NewOffset, C2, [X|NewWaiting]}) + end + end + end. + +abort_multi_part_stream(Parser) -> + abort_multi_part_stream(Parser, erlang:monitor(process, Parser)). + +abort_multi_part_stream(Parser, MonRef) -> + case is_process_alive(Parser) of + true -> + Parser ! {get_bytes, self()}, + receive + {bytes, _Bytes} -> + abort_multi_part_stream(Parser, MonRef); + {'DOWN', MonRef, _, _, _} -> + ok + end; + false -> + erlang:demonitor(MonRef, [flush]) + end. diff --git a/apps/couch/src/couch_drv.erl b/apps/couch/src/couch_drv.erl new file mode 100644 index 00000000..9f4cfac1 --- /dev/null +++ b/apps/couch/src/couch_drv.erl @@ -0,0 +1,38 @@ +-module(couch_drv). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/0]). + +-include("couch_db.hrl"). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init([]) -> + case erl_ddll:load(code:priv_dir(couch), "couch_icu_driver") of + ok -> + {ok, nil}; + {error, already_loaded} -> + ?LOG_INFO("~p reloading couch_erl_driver", [?MODULE]), + ok = erl_ddll:reload(code:priv_dir(couch), "couch_erl_driver"), + {ok, nil}; + {error, Error} -> + {stop, erl_ddll:format_error(Error)} + end. + +handle_call(_Request, _From, State) -> + {reply, ok, State}. + +handle_cast(_Request, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/apps/couch/src/couch_event_sup.erl b/apps/couch/src/couch_event_sup.erl new file mode 100644 index 00000000..07c48790 --- /dev/null +++ b/apps/couch/src/couch_event_sup.erl @@ -0,0 +1,73 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% The purpose of this module is to allow event handlers to particpate in Erlang +%% supervisor trees. It provide a monitorable process that crashes if the event +%% handler fails. The process, when shutdown, deregisters the event handler. + +-module(couch_event_sup). +-behaviour(gen_server). + +-include("couch_db.hrl"). + +-export([start_link/3,start_link/4, stop/1]). +-export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2,code_change/3]). + +% +% Instead calling the +% ok = gen_event:add_sup_handler(error_logger, my_log, Args) +% +% do this: +% {ok, LinkedPid} = couch_event_sup:start_link(error_logger, my_log, Args) +% +% The benefit is the event is now part of the process tree, and can be +% started, restarted and shutdown consistently like the rest of the server +% components. +% +% And now if the "event" crashes, the supervisor is notified and can restart +% the event handler. +% +% Use this form to named process: +% {ok, LinkedPid} = couch_event_sup:start_link({local, my_log}, error_logger, my_log, Args) +% + +start_link(EventMgr, EventHandler, Args) -> + gen_server:start_link(couch_event_sup, {EventMgr, EventHandler, Args}, []). + +start_link(ServerName, EventMgr, EventHandler, Args) -> + gen_server:start_link(ServerName, couch_event_sup, {EventMgr, EventHandler, Args}, []). + +stop(Pid) -> + gen_server:cast(Pid, stop). + +init({EventMgr, EventHandler, Args}) -> + case gen_event:add_sup_handler(EventMgr, EventHandler, Args) of + ok -> + {ok, {EventMgr, EventHandler}}; + {stop, Error} -> + {stop, Error} + end. + +terminate(_Reason, _State) -> + ok. + +handle_call(_Whatever, _From, State) -> + {ok, State}. + +handle_cast(stop, State) -> + {stop, normal, State}. + +handle_info({gen_event_EXIT, _Handler, Reason}, State) -> + {stop, Reason, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/apps/couch/src/couch_external_manager.erl b/apps/couch/src/couch_external_manager.erl new file mode 100644 index 00000000..0c66ef8c --- /dev/null +++ b/apps/couch/src/couch_external_manager.erl @@ -0,0 +1,101 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_external_manager). +-behaviour(gen_server). + +-export([start_link/0, execute/2, config_change/2]). +-export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2]). + +-include("couch_db.hrl"). + +start_link() -> + gen_server:start_link({local, couch_external_manager}, + couch_external_manager, [], []). + +execute(UrlName, JsonReq) -> + Pid = gen_server:call(couch_external_manager, {get, UrlName}), + case Pid of + {error, Reason} -> + Reason; + _ -> + couch_external_server:execute(Pid, JsonReq) + end. + +config_change("external", UrlName) -> + gen_server:call(couch_external_manager, {config, UrlName}). + +% gen_server API + +init([]) -> + process_flag(trap_exit, true), + Handlers = ets:new(couch_external_manager_handlers, [set, private]), + couch_config:register(fun ?MODULE:config_change/2), + {ok, Handlers}. + +terminate(_Reason, Handlers) -> + ets:foldl(fun({_UrlName, Pid}, nil) -> + couch_external_server:stop(Pid), + nil + end, nil, Handlers), + ok. + +handle_call({get, UrlName}, _From, Handlers) -> + case ets:lookup(Handlers, UrlName) of + [] -> + case couch_config:get("external", UrlName, nil) of + nil -> + Msg = lists:flatten( + io_lib:format("No server configured for ~p.", [UrlName])), + {reply, {error, {unknown_external_server, ?l2b(Msg)}}, Handlers}; + Command -> + {ok, NewPid} = couch_external_server:start_link(UrlName, Command), + true = ets:insert(Handlers, {UrlName, NewPid}), + {reply, NewPid, Handlers} + end; + [{UrlName, Pid}] -> + {reply, Pid, Handlers} + end; +handle_call({config, UrlName}, _From, Handlers) -> + % A newly added handler and a handler that had it's command + % changed are treated exactly the same. + + % Shutdown the old handler. + case ets:lookup(Handlers, UrlName) of + [{UrlName, Pid}] -> + couch_external_server:stop(Pid); + [] -> + ok + end, + % Wait for next request to boot the handler. + {reply, ok, Handlers}. + +handle_cast(_Whatever, State) -> + {noreply, State}. + +handle_info({'EXIT', Pid, normal}, Handlers) -> + ?LOG_INFO("EXTERNAL: Server ~p terminated normally", [Pid]), + % The process terminated normally without us asking - Remove Pid from the + % handlers table so we don't attempt to reuse it + ets:match_delete(Handlers, {'_', Pid}), + {noreply, Handlers}; + +handle_info({'EXIT', Pid, Reason}, Handlers) -> + ?LOG_INFO("EXTERNAL: Server ~p died. (reason: ~p)", [Pid, Reason]), + % Remove Pid from the handlers table so we don't try closing + % it a second time in terminate/2. + ets:match_delete(Handlers, {'_', Pid}), + {stop, normal, Handlers}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + diff --git a/apps/couch/src/couch_external_server.erl b/apps/couch/src/couch_external_server.erl new file mode 100644 index 00000000..045fcee9 --- /dev/null +++ b/apps/couch/src/couch_external_server.erl @@ -0,0 +1,69 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_external_server). +-behaviour(gen_server). + +-export([start_link/2, stop/1, execute/2]). +-export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2, code_change/3]). + +-include("couch_db.hrl"). + +% External API + +start_link(Name, Command) -> + gen_server:start_link(couch_external_server, [Name, Command], []). + +stop(Pid) -> + gen_server:cast(Pid, stop). + +execute(Pid, JsonReq) -> + gen_server:call(Pid, {execute, JsonReq}, infinity). + +% Gen Server Handlers + +init([Name, Command]) -> + ?LOG_INFO("EXTERNAL: Starting process for: ~s", [Name]), + ?LOG_INFO("COMMAND: ~s", [Command]), + process_flag(trap_exit, true), + Timeout = list_to_integer(couch_config:get("couchdb", "os_process_timeout", + "5000")), + {ok, Pid} = couch_os_process:start_link(Command, [{timeout, Timeout}]), + couch_config:register(fun("couchdb", "os_process_timeout", NewTimeout) -> + couch_os_process:set_timeout(Pid, list_to_integer(NewTimeout)) + end), + {ok, {Name, Command, Pid}}. + +terminate(_Reason, {_Name, _Command, Pid}) -> + couch_os_process:stop(Pid), + ok. + +handle_call({execute, JsonReq}, _From, {Name, Command, Pid}) -> + {reply, couch_os_process:prompt(Pid, JsonReq), {Name, Command, Pid}}. + +handle_info({'EXIT', _Pid, normal}, State) -> + {noreply, State}; +handle_info({'EXIT', Pid, Reason}, {Name, Command, Pid}) -> + ?LOG_INFO("EXTERNAL: Process for ~s exiting. (reason: ~w)", [Name, Reason]), + {stop, Reason, {Name, Command, Pid}}. + +handle_cast(stop, {Name, Command, Pid}) -> + ?LOG_INFO("EXTERNAL: Shutting down ~s", [Name]), + exit(Pid, normal), + {stop, normal, {Name, Command, Pid}}; +handle_cast(_Whatever, State) -> + {noreply, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + diff --git a/apps/couch/src/couch_file.erl b/apps/couch/src/couch_file.erl new file mode 100644 index 00000000..1985f5eb --- /dev/null +++ b/apps/couch/src/couch_file.erl @@ -0,0 +1,625 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_file). +-behaviour(gen_server). + +-include("couch_db.hrl"). + +-define(SIZE_BLOCK, 4096). + +-record(file, { + fd, + tail_append_begin = 0, % 09 UPGRADE CODE + eof = 0 + }). + +-export([open/1, open/2, close/1, bytes/1, sync/1, append_binary/2,old_pread/3]). +-export([append_term/2, pread_term/2, pread_iolist/2, write_header/2]). +-export([pread_binary/2, read_header/1, truncate/2, upgrade_old_header/2]). +-export([append_term_md5/2,append_binary_md5/2]). +-export([init/1, terminate/2, handle_call/3, handle_cast/2, code_change/3, handle_info/2]). +-export([delete/2,delete/3,init_delete_dir/1]). + +%%---------------------------------------------------------------------- +%% Args: Valid Options are [create] and [create,overwrite]. +%% Files are opened in read/write mode. +%% Returns: On success, {ok, Fd} +%% or {error, Reason} if the file could not be opened. +%%---------------------------------------------------------------------- + +open(Filepath) -> + open(Filepath, []). + +open(Filepath, Options) -> + case gen_server:start_link(couch_file, + {Filepath, Options, self(), Ref = make_ref()}, []) of + {ok, Fd} -> + {ok, Fd}; + ignore -> + % get the error + receive + {Ref, Pid, Error} -> + case process_info(self(), trap_exit) of + {trap_exit, true} -> receive {'EXIT', Pid, _} -> ok end; + {trap_exit, false} -> ok + end, + case Error of + {error, eacces} -> {file_permission_error, Filepath}; + _ -> Error + end + end; + Error -> + Error + end. + + +%%---------------------------------------------------------------------- +%% Purpose: To append an Erlang term to the end of the file. +%% Args: Erlang term to serialize and append to the file. +%% Returns: {ok, Pos} where Pos is the file offset to the beginning the +%% serialized term. Use pread_term to read the term back. +%% or {error, Reason}. +%%---------------------------------------------------------------------- + +append_term(Fd, Term) -> + append_binary(Fd, term_to_binary(Term, [compressed, {minor_version,1}])). + +append_term_md5(Fd, Term) -> + append_binary_md5(Fd, term_to_binary(Term, [compressed, {minor_version,1}])). + + +%%---------------------------------------------------------------------- +%% Purpose: To append an Erlang binary to the end of the file. +%% Args: Erlang term to serialize and append to the file. +%% Returns: {ok, Pos} where Pos is the file offset to the beginning the +%% serialized term. Use pread_term to read the term back. +%% or {error, Reason}. +%%---------------------------------------------------------------------- + +append_binary(Fd, Bin) -> + Size = iolist_size(Bin), + gen_server:call(Fd, {append_bin, + [<<0:1/integer,Size:31/integer>>, Bin]}, infinity). + +append_binary_md5(Fd, Bin) -> + Size = iolist_size(Bin), + gen_server:call(Fd, {append_bin, + [<<1:1/integer,Size:31/integer>>, couch_util:md5(Bin), Bin]}, infinity). + + +%%---------------------------------------------------------------------- +%% Purpose: Reads a term from a file that was written with append_term +%% Args: Pos, the offset into the file where the term is serialized. +%% Returns: {ok, Term} +%% or {error, Reason}. +%%---------------------------------------------------------------------- + + +pread_term(Fd, Pos) -> + {ok, Bin} = pread_binary(Fd, Pos), + {ok, binary_to_term(Bin)}. + + +%%---------------------------------------------------------------------- +%% Purpose: Reads a binrary from a file that was written with append_binary +%% Args: Pos, the offset into the file where the term is serialized. +%% Returns: {ok, Term} +%% or {error, Reason}. +%%---------------------------------------------------------------------- + +pread_binary(Fd, Pos) -> + {ok, L} = pread_iolist(Fd, Pos), + {ok, iolist_to_binary(L)}. + + +pread_iolist(Fd, Pos) -> + case gen_server:call(Fd, {pread_iolist, Pos}, infinity) of + {ok, IoList, <<>>} -> + {ok, IoList}; + {ok, IoList, Md5} -> + case couch_util:md5(IoList) of + Md5 -> + {ok, IoList}; + _ -> + exit({file_corruption, <<"file corruption">>}) + end; + Error -> + Error + end. + +%%---------------------------------------------------------------------- +%% Purpose: The length of a file, in bytes. +%% Returns: {ok, Bytes} +%% or {error, Reason}. +%%---------------------------------------------------------------------- + +% length in bytes +bytes(Fd) -> + gen_server:call(Fd, bytes, infinity). + +%%---------------------------------------------------------------------- +%% Purpose: Truncate a file to the number of bytes. +%% Returns: ok +%% or {error, Reason}. +%%---------------------------------------------------------------------- + +truncate(Fd, Pos) -> + gen_server:call(Fd, {truncate, Pos}, infinity). + +%%---------------------------------------------------------------------- +%% Purpose: Ensure all bytes written to the file are flushed to disk. +%% Returns: ok +%% or {error, Reason}. +%%---------------------------------------------------------------------- + +sync(Filepath) when is_list(Filepath) -> + {ok, Fd} = file:open(Filepath, [append, raw]), + try ok = file:sync(Fd) after ok = file:close(Fd) end; +sync(Fd) -> + gen_server:call(Fd, sync, infinity). + +%%---------------------------------------------------------------------- +%% Purpose: Close the file. +%% Returns: ok +%%---------------------------------------------------------------------- +close(Fd) -> + gen_server:call(Fd, close, infinity). + +delete(RootDir, Filepath) -> + delete(RootDir, Filepath, true). + + +delete(RootDir, Filepath, Async) -> + DelFile = filename:join([RootDir,".delete", ?b2l(couch_uuids:random())]), + case file:rename(Filepath, DelFile) of + ok -> + if (Async) -> + spawn(file, delete, [DelFile]), + ok; + true -> + file:delete(DelFile) + end; + Error -> + Error + end. + + +init_delete_dir(RootDir) -> + Dir = filename:join(RootDir,".delete"), + % note: ensure_dir requires an actual filename companent, which is the + % reason for "foo". + filelib:ensure_dir(filename:join(Dir,"foo")), + filelib:fold_files(Dir, ".*", true, + fun(Filename, _) -> + ok = file:delete(Filename) + end, ok). + + +% 09 UPGRADE CODE +old_pread(Fd, Pos, Len) -> + {ok, <<RawBin:Len/binary>>, false} = gen_server:call(Fd, {pread, Pos, Len}, infinity), + {ok, RawBin}. + +% 09 UPGRADE CODE +upgrade_old_header(Fd, Sig) -> + gen_server:call(Fd, {upgrade_old_header, Sig}, infinity). + + +read_header(Fd) -> + case gen_server:call(Fd, find_header, infinity) of + {ok, Bin} -> + {ok, binary_to_term(Bin)}; + Else -> + Else + end. + +write_header(Fd, Data) -> + Bin = term_to_binary(Data), + Md5 = couch_util:md5(Bin), + % now we assemble the final header binary and write to disk + FinalBin = <<Md5/binary, Bin/binary>>, + gen_server:call(Fd, {write_header, FinalBin}, infinity). + + + + +init_status_error(ReturnPid, Ref, Error) -> + ReturnPid ! {Ref, self(), Error}, + ignore. + +% server functions + +init({Filepath, Options, ReturnPid, Ref}) -> + process_flag(trap_exit, true), + erlang:send_after(60000, self(), maybe_close), + case lists:member(create, Options) of + true -> + filelib:ensure_dir(Filepath), + case file:open(Filepath, [read, append, raw, binary]) of + {ok, Fd} -> + {ok, Length} = file:position(Fd, eof), + case Length > 0 of + true -> + % this means the file already exists and has data. + % FYI: We don't differentiate between empty files and non-existant + % files here. + case lists:member(overwrite, Options) of + true -> + {ok, 0} = file:position(Fd, 0), + ok = file:truncate(Fd), + ok = file:sync(Fd), + maybe_track_open_os_files(Options), + {ok, #file{fd=Fd}}; + false -> + ok = file:close(Fd), + init_status_error(ReturnPid, Ref, file_exists) + end; + false -> + maybe_track_open_os_files(Options), + {ok, #file{fd=Fd}} + end; + Error -> + init_status_error(ReturnPid, Ref, Error) + end; + false -> + % open in read mode first, so we don't create the file if it doesn't exist. + case file:open(Filepath, [read, raw]) of + {ok, Fd_Read} -> + {ok, Fd} = file:open(Filepath, [read, append, raw, binary]), + ok = file:close(Fd_Read), + maybe_track_open_os_files(Options), + {ok, Length} = file:position(Fd, eof), + {ok, #file{fd=Fd, eof=Length}}; + Error -> + init_status_error(ReturnPid, Ref, Error) + end + end. + +maybe_track_open_os_files(_FileOptions) -> + couch_stats_collector:track_process_count({couchdb, open_os_files}). + +terminate(_Reason, #file{fd = nil}) -> + ok; +terminate(_Reason, #file{fd = Fd}) -> + file:close(Fd). + +handle_call(close, _From, #file{fd=Fd}=File) -> + {stop, normal, file:close(Fd), File#file{fd = nil}}; + +handle_call({pread_iolist, Pos}, _From, File) -> + {RawData, NextPos} = try + % up to 8Kbs of read ahead + read_raw_iolist_int(File, Pos, 2 * ?SIZE_BLOCK - (Pos rem ?SIZE_BLOCK)) + catch + _:_ -> + read_raw_iolist_int(File, Pos, 4) + end, + <<Prefix:1/integer, Len:31/integer, RestRawData/binary>> = + iolist_to_binary(RawData), + case Prefix of + 1 -> + {Md5, IoList} = extract_md5( + maybe_read_more_iolist(RestRawData, 16 + Len, NextPos, File)), + {reply, {ok, IoList, Md5}, File}; + 0 -> + IoList = maybe_read_more_iolist(RestRawData, Len, NextPos, File), + {reply, {ok, IoList, <<>>}, File} + end; +handle_call({pread, Pos, Bytes}, _From, #file{fd=Fd,tail_append_begin=TailAppendBegin}=File) -> + {ok, Bin} = file:pread(Fd, Pos, Bytes), + {reply, {ok, Bin, Pos >= TailAppendBegin}, File}; +handle_call(bytes, _From, #file{eof=Length}=File) -> + {reply, {ok, Length}, File}; +handle_call(sync, _From, #file{fd=Fd}=File) -> + {reply, file:sync(Fd), File}; +handle_call({truncate, Pos}, _From, #file{fd=Fd}=File) -> + {ok, Pos} = file:position(Fd, Pos), + case file:truncate(Fd) of + ok -> + {reply, ok, File#file{eof=Pos}}; + Error -> + {reply, Error, File} + end; +handle_call({append_bin, Bin}, _From, #file{fd=Fd, eof=Pos}=File) -> + Blocks = make_blocks(Pos rem ?SIZE_BLOCK, Bin), + case file:write(Fd, Blocks) of + ok -> + {reply, {ok, Pos}, File#file{eof=Pos+iolist_size(Blocks)}}; + Error -> + {reply, Error, File} + end; +handle_call({write_header, Bin}, _From, #file{fd=Fd, eof=Pos}=File) -> + BinSize = size(Bin), + case Pos rem ?SIZE_BLOCK of + 0 -> + Padding = <<>>; + BlockOffset -> + Padding = <<0:(8*(?SIZE_BLOCK-BlockOffset))>> + end, + FinalBin = [Padding, <<1, BinSize:32/integer>> | make_blocks(5, [Bin])], + case file:write(Fd, FinalBin) of + ok -> + {reply, ok, File#file{eof=Pos+iolist_size(FinalBin)}}; + Error -> + {reply, Error, File} + end; + + +handle_call({upgrade_old_header, Prefix}, _From, #file{fd=Fd}=File) -> + case (catch read_old_header(Fd, Prefix)) of + {ok, Header} -> + TailAppendBegin = File#file.eof, + Bin = term_to_binary(Header), + Md5 = couch_util:md5(Bin), + % now we assemble the final header binary and write to disk + FinalBin = <<Md5/binary, Bin/binary>>, + {reply, ok, _} = handle_call({write_header, FinalBin}, ok, File), + ok = write_old_header(Fd, <<"upgraded">>, TailAppendBegin), + {reply, ok, File#file{tail_append_begin=TailAppendBegin}}; + _Error -> + case (catch read_old_header(Fd, <<"upgraded">>)) of + {ok, TailAppendBegin} -> + {reply, ok, File#file{tail_append_begin = TailAppendBegin}}; + _Error2 -> + {reply, ok, File} + end + end; + + +handle_call(find_header, _From, #file{fd=Fd, eof=Pos}=File) -> + {reply, find_header(Fd, Pos div ?SIZE_BLOCK), File}. + +% 09 UPGRADE CODE +-define(HEADER_SIZE, 2048). % size of each segment of the doubly written header + +% 09 UPGRADE CODE +read_old_header(Fd, Prefix) -> + {ok, Bin} = file:pread(Fd, 0, 2*(?HEADER_SIZE)), + <<Bin1:(?HEADER_SIZE)/binary, Bin2:(?HEADER_SIZE)/binary>> = Bin, + Result = + % read the first header + case extract_header(Prefix, Bin1) of + {ok, Header1} -> + case extract_header(Prefix, Bin2) of + {ok, Header2} -> + case Header1 == Header2 of + true -> + % Everything is completely normal! + {ok, Header1}; + false -> + % To get here we must have two different header versions with signatures intact. + % It's weird but possible (a commit failure right at the 2k boundary). Log it and take the first. + ?LOG_INFO("Header version differences.~nPrimary Header: ~p~nSecondary Header: ~p", [Header1, Header2]), + {ok, Header1} + end; + Error -> + % error reading second header. It's ok, but log it. + ?LOG_INFO("Secondary header corruption (error: ~p). Using primary header.", [Error]), + {ok, Header1} + end; + Error -> + % error reading primary header + case extract_header(Prefix, Bin2) of + {ok, Header2} -> + % log corrupt primary header. It's ok since the secondary is still good. + ?LOG_INFO("Primary header corruption (error: ~p). Using secondary header.", [Error]), + {ok, Header2}; + _ -> + % error reading secondary header too + % return the error, no need to log anything as the caller will be responsible for dealing with the error. + Error + end + end, + case Result of + {ok, {pointer_to_header_data, Ptr}} -> + pread_term(Fd, Ptr); + _ -> + Result + end. + +% 09 UPGRADE CODE +extract_header(Prefix, Bin) -> + SizeOfPrefix = size(Prefix), + SizeOfTermBin = ?HEADER_SIZE - + SizeOfPrefix - + 16, % md5 sig + + <<HeaderPrefix:SizeOfPrefix/binary, TermBin:SizeOfTermBin/binary, Sig:16/binary>> = Bin, + + % check the header prefix + case HeaderPrefix of + Prefix -> + % check the integrity signature + case couch_util:md5(TermBin) == Sig of + true -> + Header = binary_to_term(TermBin), + {ok, Header}; + false -> + header_corrupt + end; + _ -> + unknown_header_type + end. + + +% 09 UPGRADE CODE +write_old_header(Fd, Prefix, Data) -> + TermBin = term_to_binary(Data), + % the size of all the bytes written to the header, including the md5 signature (16 bytes) + FilledSize = byte_size(Prefix) + byte_size(TermBin) + 16, + {TermBin2, FilledSize2} = + case FilledSize > ?HEADER_SIZE of + true -> + % too big! + {ok, Pos} = append_binary(Fd, TermBin), + PtrBin = term_to_binary({pointer_to_header_data, Pos}), + {PtrBin, byte_size(Prefix) + byte_size(PtrBin) + 16}; + false -> + {TermBin, FilledSize} + end, + ok = file:sync(Fd), + % pad out the header with zeros, then take the md5 hash + PadZeros = <<0:(8*(?HEADER_SIZE - FilledSize2))>>, + Sig = couch_util:md5([TermBin2, PadZeros]), + % now we assemble the final header binary and write to disk + WriteBin = <<Prefix/binary, TermBin2/binary, PadZeros/binary, Sig/binary>>, + ?HEADER_SIZE = size(WriteBin), % sanity check + DblWriteBin = [WriteBin, WriteBin], + ok = file:pwrite(Fd, 0, DblWriteBin), + ok = file:sync(Fd). + + +handle_cast(close, Fd) -> + {stop,normal,Fd}. + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_info(maybe_close, Fd) -> + case process_info(self(), monitored_by) of + {monitored_by, [_StatsCollector]} -> + {stop, normal, Fd}; + {monitored_by, []} -> + ?LOG_ERROR("~p ~p is un-monitored, maybe stats collector died", + [?MODULE, self()]), + {stop, normal, Fd}; + _Else -> + erlang:send_after(10000, self(), maybe_close), + {noreply, Fd} + end; +handle_info({'EXIT', _, normal}, Fd) -> + {noreply, Fd}; +handle_info({'EXIT', _, Reason}, Fd) -> + {stop, Reason, Fd}. + + +find_header(_Fd, -1) -> + no_valid_header; +find_header(Fd, Block) -> + case (catch load_header(Fd, Block)) of + {ok, Bin} -> + {ok, Bin}; + _Error -> + find_header(Fd, Block -1) + end. + +load_header(Fd, Block) -> + {ok, <<1, HeaderLen:32/integer, RestBlock/binary>>} = + file:pread(Fd, Block * ?SIZE_BLOCK, ?SIZE_BLOCK), + TotalBytes = calculate_total_read_len(1, HeaderLen), + case TotalBytes > byte_size(RestBlock) of + false -> + <<RawBin:TotalBytes/binary, _/binary>> = RestBlock; + true -> + {ok, Missing} = file:pread( + Fd, (Block * ?SIZE_BLOCK) + 5 + byte_size(RestBlock), + TotalBytes - byte_size(RestBlock)), + RawBin = <<RestBlock/binary, Missing/binary>> + end, + <<Md5Sig:16/binary, HeaderBin/binary>> = + iolist_to_binary(remove_block_prefixes(5, RawBin)), + Md5Sig = couch_util:md5(HeaderBin), + {ok, HeaderBin}. + +maybe_read_more_iolist(Buffer, DataSize, _, _) + when DataSize =< byte_size(Buffer) -> + <<Data:DataSize/binary, _/binary>> = Buffer, + [Data]; +maybe_read_more_iolist(Buffer, DataSize, NextPos, File) -> + {Missing, _} = + read_raw_iolist_int(File, NextPos, DataSize - byte_size(Buffer)), + [Buffer, Missing]. + +-spec read_raw_iolist_int(#file{}, Pos::non_neg_integer(), Len::non_neg_integer()) -> + {Data::iolist(), CurPos::non_neg_integer()}. +read_raw_iolist_int(Fd, {Pos, _Size}, Len) -> % 0110 UPGRADE CODE + read_raw_iolist_int(Fd, Pos, Len); +read_raw_iolist_int(#file{fd=Fd, tail_append_begin=TAB}, Pos, Len) -> + BlockOffset = Pos rem ?SIZE_BLOCK, + TotalBytes = calculate_total_read_len(BlockOffset, Len), + {ok, <<RawBin:TotalBytes/binary>>} = file:pread(Fd, Pos, TotalBytes), + if Pos >= TAB -> + {remove_block_prefixes(BlockOffset, RawBin), Pos + TotalBytes}; + true -> + % 09 UPGRADE CODE + <<ReturnBin:Len/binary, _/binary>> = RawBin, + {[ReturnBin], Pos + Len} + end. + +-spec extract_md5(iolist()) -> {binary(), iolist()}. +extract_md5(FullIoList) -> + {Md5List, IoList} = split_iolist(FullIoList, 16, []), + {iolist_to_binary(Md5List), IoList}. + +calculate_total_read_len(0, FinalLen) -> + calculate_total_read_len(1, FinalLen) + 1; +calculate_total_read_len(BlockOffset, FinalLen) -> + case ?SIZE_BLOCK - BlockOffset of + BlockLeft when BlockLeft >= FinalLen -> + FinalLen; + BlockLeft -> + FinalLen + ((FinalLen - BlockLeft) div (?SIZE_BLOCK -1)) + + if ((FinalLen - BlockLeft) rem (?SIZE_BLOCK -1)) =:= 0 -> 0; + true -> 1 end + end. + +remove_block_prefixes(_BlockOffset, <<>>) -> + []; +remove_block_prefixes(0, <<_BlockPrefix,Rest/binary>>) -> + remove_block_prefixes(1, Rest); +remove_block_prefixes(BlockOffset, Bin) -> + BlockBytesAvailable = ?SIZE_BLOCK - BlockOffset, + case size(Bin) of + Size when Size > BlockBytesAvailable -> + <<DataBlock:BlockBytesAvailable/binary,Rest/binary>> = Bin, + [DataBlock | remove_block_prefixes(0, Rest)]; + _Size -> + [Bin] + end. + +make_blocks(_BlockOffset, []) -> + []; +make_blocks(0, IoList) -> + [<<0>> | make_blocks(1, IoList)]; +make_blocks(BlockOffset, IoList) -> + case split_iolist(IoList, (?SIZE_BLOCK - BlockOffset), []) of + {Begin, End} -> + [Begin | make_blocks(0, End)]; + _SplitRemaining -> + IoList + end. + +%% @doc Returns a tuple where the first element contains the leading SplitAt +%% bytes of the original iolist, and the 2nd element is the tail. If SplitAt +%% is larger than byte_size(IoList), return the difference. +-spec split_iolist(IoList::iolist(), SplitAt::non_neg_integer(), Acc::list()) -> + {iolist(), iolist()} | non_neg_integer(). +split_iolist(List, 0, BeginAcc) -> + {lists:reverse(BeginAcc), List}; +split_iolist([], SplitAt, _BeginAcc) -> + SplitAt; +split_iolist([<<Bin/binary>> | Rest], SplitAt, BeginAcc) when SplitAt > byte_size(Bin) -> + split_iolist(Rest, SplitAt - byte_size(Bin), [Bin | BeginAcc]); +split_iolist([<<Bin/binary>> | Rest], SplitAt, BeginAcc) -> + <<Begin:SplitAt/binary,End/binary>> = Bin, + split_iolist([End | Rest], 0, [Begin | BeginAcc]); +split_iolist([Sublist| Rest], SplitAt, BeginAcc) when is_list(Sublist) -> + case split_iolist(Sublist, SplitAt, BeginAcc) of + {Begin, End} -> + {Begin, [End | Rest]}; + SplitRemaining -> + split_iolist(Rest, SplitAt - (SplitAt - SplitRemaining), [Sublist | BeginAcc]) + end; +split_iolist([Byte | Rest], SplitAt, BeginAcc) when is_integer(Byte) -> + split_iolist(Rest, SplitAt - 1, [Byte | BeginAcc]). diff --git a/apps/couch/src/couch_httpd.erl b/apps/couch/src/couch_httpd.erl new file mode 100644 index 00000000..602bdf2b --- /dev/null +++ b/apps/couch/src/couch_httpd.erl @@ -0,0 +1,1019 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd). +-include("couch_db.hrl"). + +-export([start_link/0, start_link/1, stop/0, handle_request/5]). + +-export([header_value/2,header_value/3,qs_value/2,qs_value/3,qs/1,qs_json_value/3]). +-export([path/1,absolute_uri/2,body_length/1]). +-export([verify_is_server_admin/1,unquote/1,quote/1,recv/2,recv_chunked/4,error_info/1]). +-export([make_fun_spec_strs/1]). +-export([make_arity_1_fun/1, make_arity_2_fun/1, make_arity_3_fun/1]). +-export([parse_form/1,json_body/1,json_body_obj/1,body/1,doc_etag/1, make_etag/1, etag_respond/3]). +-export([primary_header_value/2,partition/1,serve_file/3,serve_file/4, server_header/0]). +-export([start_chunked_response/3,send_chunk/2,log_request/2]). +-export([start_response_length/4, start_response/3, send/2]). +-export([start_json_response/2, start_json_response/3, end_json_response/1]). +-export([send_response/4,send_method_not_allowed/2,send_error/4, send_redirect/2,send_chunked_error/2]). +-export([send_json/2,send_json/3,send_json/4,last_chunk/1,parse_multipart_request/3]). +-export([accepted_encodings/1,handle_request_int/5,validate_referer/1,validate_ctype/2]). + +start_link() -> + start_link(http). +start_link(http) -> + Port = couch_config:get("httpd", "port", "5984"), + start_link(?MODULE, [{port, Port}]); +start_link(https) -> + Port = couch_config:get("ssl", "port", "6984"), + CertFile = couch_config:get("ssl", "cert_file", nil), + KeyFile = couch_config:get("ssl", "key_file", nil), + Options = case CertFile /= nil andalso KeyFile /= nil of + true -> + [{port, Port}, + {ssl, true}, + {ssl_opts, [ + {certfile, CertFile}, + {keyfile, KeyFile}]}]; + false -> + io:format("SSL enabled but PEM certificates are missing.", []), + throw({error, missing_certs}) + end, + start_link(https, Options). +start_link(Name, Options) -> + % read config and register for configuration changes + + % just stop if one of the config settings change. couch_server_sup + % will restart us and then we will pick up the new settings. + + BindAddress = couch_config:get("httpd", "bind_address", any), + DefaultSpec = "{couch_httpd_db, handle_request}", + DefaultFun = make_arity_1_fun( + couch_config:get("httpd", "default_handler", DefaultSpec) + ), + + UrlHandlersList = lists:map( + fun({UrlKey, SpecStr}) -> + {?l2b(UrlKey), make_arity_1_fun(SpecStr)} + end, couch_config:get("httpd_global_handlers")), + + DbUrlHandlersList = lists:map( + fun({UrlKey, SpecStr}) -> + {?l2b(UrlKey), make_arity_2_fun(SpecStr)} + end, couch_config:get("httpd_db_handlers")), + + DesignUrlHandlersList = lists:map( + fun({UrlKey, SpecStr}) -> + {?l2b(UrlKey), make_arity_3_fun(SpecStr)} + end, couch_config:get("httpd_design_handlers")), + + UrlHandlers = dict:from_list(UrlHandlersList), + DbUrlHandlers = dict:from_list(DbUrlHandlersList), + DesignUrlHandlers = dict:from_list(DesignUrlHandlersList), + {ok, ServerOptions} = couch_util:parse_term( + couch_config:get("httpd", "server_options", "[]")), + {ok, SocketOptions} = couch_util:parse_term( + couch_config:get("httpd", "socket_options", "[]")), + Loop = fun(Req)-> + case SocketOptions of + [] -> + ok; + _ -> + ok = mochiweb_socket:setopts(Req:get(socket), SocketOptions) + end, + apply(?MODULE, handle_request, [ + Req, DefaultFun, UrlHandlers, DbUrlHandlers, DesignUrlHandlers + ]) + end, + + % and off we go + + {ok, Pid} = case mochiweb_http:start(Options ++ ServerOptions ++ [ + {loop, Loop}, + {name, Name}, + {ip, BindAddress} + ]) of + {ok, MochiPid} -> {ok, MochiPid}; + {error, Reason} -> + io:format("Failure to start Mochiweb: ~s~n",[Reason]), + throw({error, Reason}) + end, + + ok = couch_config:register( + fun("httpd", "bind_address") -> + ?MODULE:stop(); + ("httpd", "port") -> + ?MODULE:stop(); + ("httpd", "default_handler") -> + ?MODULE:stop(); + ("httpd", "server_options") -> + ?MODULE:stop(); + ("httpd", "socket_options") -> + ?MODULE:stop(); + ("httpd_global_handlers", _) -> + ?MODULE:stop(); + ("httpd_db_handlers", _) -> + ?MODULE:stop(); + ("vhosts", _) -> + ?MODULE:stop(); + ("ssl", _) -> + ?MODULE:stop() + end, Pid), + + {ok, Pid}. + +% SpecStr is a string like "{my_module, my_fun}" +% or "{my_module, my_fun, <<"my_arg">>}" +make_arity_1_fun(SpecStr) -> + case couch_util:parse_term(SpecStr) of + {ok, {Mod, Fun, SpecArg}} -> + fun(Arg) -> Mod:Fun(Arg, SpecArg) end; + {ok, {Mod, Fun}} -> + fun(Arg) -> Mod:Fun(Arg) end + end. + +make_arity_2_fun(SpecStr) -> + case couch_util:parse_term(SpecStr) of + {ok, {Mod, Fun, SpecArg}} -> + fun(Arg1, Arg2) -> Mod:Fun(Arg1, Arg2, SpecArg) end; + {ok, {Mod, Fun}} -> + fun(Arg1, Arg2) -> Mod:Fun(Arg1, Arg2) end + end. + +make_arity_3_fun(SpecStr) -> + case couch_util:parse_term(SpecStr) of + {ok, {Mod, Fun, SpecArg}} -> + fun(Arg1, Arg2, Arg3) -> Mod:Fun(Arg1, Arg2, Arg3, SpecArg) end; + {ok, {Mod, Fun}} -> + fun(Arg1, Arg2, Arg3) -> Mod:Fun(Arg1, Arg2, Arg3) end + end. + +% SpecStr is "{my_module, my_fun}, {my_module2, my_fun2}" +make_fun_spec_strs(SpecStr) -> + re:split(SpecStr, "(?<=})\\s*,\\s*(?={)", [{return, list}]). + +stop() -> + mochiweb_http:stop(?MODULE). + + +handle_request(MochiReq, DefaultFun, UrlHandlers, DbUrlHandlers, + DesignUrlHandlers) -> + + MochiReq1 = couch_httpd_vhost:match_vhost(MochiReq), + handle_request_int(MochiReq1, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers). + +handle_request_int(MochiReq, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers) -> + Begin = now(), + AuthenticationSrcs = make_fun_spec_strs( + couch_config:get("httpd", "authentication_handlers")), + % for the path, use the raw path with the query string and fragment + % removed, but URL quoting left intact + RawUri = MochiReq:get(raw_path), + {"/" ++ Path, _, _} = mochiweb_util:urlsplit_path(RawUri), + + Headers = MochiReq:get(headers), + + % get requested path + RequestedPath = case MochiReq:get_header_value("x-couchdb-vhost-path") of + undefined -> RawUri; + P -> P + end, + + HandlerKey = + case mochiweb_util:partition(Path, "/") of + {"", "", ""} -> + <<"/">>; % Special case the root url handler + {FirstPart, _, _} -> + list_to_binary(FirstPart) + end, + ?LOG_DEBUG("~p ~s ~p from ~p~nHeaders: ~p", [ + MochiReq:get(method), + RawUri, + MochiReq:get(version), + MochiReq:get(peer), + mochiweb_headers:to_list(MochiReq:get(headers)) + ]), + + Method1 = + case MochiReq:get(method) of + % already an atom + Meth when is_atom(Meth) -> Meth; + + % Non standard HTTP verbs aren't atoms (COPY, MOVE etc) so convert when + % possible (if any module references the atom, then it's existing). + Meth -> couch_util:to_existing_atom(Meth) + end, + increment_method_stats(Method1), + + % allow broken HTTP clients to fake a full method vocabulary with an X-HTTP-METHOD-OVERRIDE header + MethodOverride = MochiReq:get_primary_header_value("X-HTTP-Method-Override"), + Method2 = case lists:member(MethodOverride, ["GET", "HEAD", "POST", "PUT", "DELETE", "TRACE", "CONNECT", "COPY"]) of + true -> + ?LOG_INFO("MethodOverride: ~s (real method was ~s)", [MethodOverride, Method1]), + case Method1 of + 'POST' -> couch_util:to_existing_atom(MethodOverride); + _ -> + % Ignore X-HTTP-Method-Override when the original verb isn't POST. + % I'd like to send a 406 error to the client, but that'd require a nasty refactor. + % throw({not_acceptable, <<"X-HTTP-Method-Override may only be used with POST requests.">>}) + Method1 + end; + _ -> Method1 + end, + + % alias HEAD to GET as mochiweb takes care of stripping the body + Method = case Method2 of + 'HEAD' -> 'GET'; + Other -> Other + end, + + HttpReq = #httpd{ + mochi_req = MochiReq, + peer = MochiReq:get(peer), + method = Method, + requested_path_parts = [list_to_binary(couch_httpd:unquote(Part)) + || Part <- string:tokens(RequestedPath, "/")], + path_parts = [list_to_binary(couch_httpd:unquote(Part)) + || Part <- string:tokens(Path, "/")], + db_url_handlers = DbUrlHandlers, + design_url_handlers = DesignUrlHandlers, + default_fun = DefaultFun, + url_handlers = UrlHandlers + }, + + HandlerFun = couch_util:dict_find(HandlerKey, UrlHandlers, DefaultFun), + + {ok, Resp} = + try + case authenticate_request(HttpReq, AuthenticationSrcs) of + #httpd{} = Req -> + HandlerFun(Req); + Response -> + Response + end + catch + throw:{http_head_abort, Resp0} -> + {ok, Resp0}; + throw:{invalid_json, S} -> + ?LOG_ERROR("attempted upload of invalid JSON (set log_level to debug to log it)", []), + ?LOG_DEBUG("Invalid JSON: ~p",[S]), + send_error(HttpReq, {bad_request, io_lib:format("invalid UTF-8 JSON: ~p",[S])}); + throw:unacceptable_encoding -> + ?LOG_ERROR("unsupported encoding method for the response", []), + send_error(HttpReq, {not_acceptable, "unsupported encoding"}); + throw:bad_accept_encoding_value -> + ?LOG_ERROR("received invalid Accept-Encoding header", []), + send_error(HttpReq, bad_request); + exit:normal -> + exit(normal); + throw:Error -> + ?LOG_DEBUG("Minor error in HTTP request: ~p",[Error]), + ?LOG_DEBUG("Stacktrace: ~p",[erlang:get_stacktrace()]), + send_error(HttpReq, Error); + error:badarg -> + ?LOG_ERROR("Badarg error in HTTP request",[]), + ?LOG_INFO("Stacktrace: ~p",[erlang:get_stacktrace()]), + send_error(HttpReq, badarg); + error:function_clause -> + ?LOG_ERROR("function_clause error in HTTP request",[]), + ?LOG_INFO("Stacktrace: ~p",[erlang:get_stacktrace()]), + send_error(HttpReq, function_clause); + Tag:Error -> + ?LOG_ERROR("Uncaught error in HTTP request: ~p",[{Tag, Error}]), + ?LOG_INFO("Stacktrace: ~p",[erlang:get_stacktrace()]), + send_error(HttpReq, Error) + end, + RequestTime = round(timer:now_diff(now(), Begin)/1000), + couch_stats_collector:record({couchdb, request_time}, RequestTime), + couch_stats_collector:increment({httpd, requests}), + {ok, Resp}. + +% Try authentication handlers in order until one sets a user_ctx +% the auth funs also have the option of returning a response +% move this to couch_httpd_auth? +authenticate_request(#httpd{user_ctx=#user_ctx{}} = Req, _AuthSrcs) -> + Req; +authenticate_request(#httpd{} = Req, []) -> + case couch_config:get("couch_httpd_auth", "require_valid_user", "false") of + "true" -> + throw({unauthorized, <<"Authentication required.">>}); + "false" -> + Req#httpd{user_ctx=#user_ctx{}} + end; +authenticate_request(#httpd{} = Req, [AuthSrc|Rest]) -> + AuthFun = make_arity_1_fun(AuthSrc), + R = case AuthFun(Req) of + #httpd{user_ctx=#user_ctx{}=UserCtx}=Req2 -> + Req2#httpd{user_ctx=UserCtx#user_ctx{handler=?l2b(AuthSrc)}}; + Else -> Else + end, + authenticate_request(R, Rest); +authenticate_request(Response, _AuthSrcs) -> + Response. + +increment_method_stats(Method) -> + couch_stats_collector:increment({httpd_request_methods, Method}). + +validate_referer(Req) -> + Host = host_for_request(Req), + Referer = header_value(Req, "Referer", fail), + case Referer of + fail -> + throw({bad_request, <<"Referer header required.">>}); + Referer -> + {_,RefererHost,_,_,_} = mochiweb_util:urlsplit(Referer), + if + RefererHost =:= Host -> ok; + true -> throw({bad_request, <<"Referer header must match host.">>}) + end + end. + +validate_ctype(Req, Ctype) -> + case couch_httpd:header_value(Req, "Content-Type") of + undefined -> + throw({bad_ctype, "Content-Type must be "++Ctype}); + ReqCtype -> + % ?LOG_ERROR("Ctype ~p ReqCtype ~p",[Ctype,ReqCtype]), + case re:split(ReqCtype, ";", [{return, list}]) of + [Ctype] -> ok; + [Ctype, _Rest] -> ok; + _Else -> + throw({bad_ctype, "Content-Type must be "++Ctype}) + end + end. + +% Utilities + +partition(Path) -> + mochiweb_util:partition(Path, "/"). + +header_value(#httpd{mochi_req=MochiReq}, Key) -> + MochiReq:get_header_value(Key). + +header_value(#httpd{mochi_req=MochiReq}, Key, Default) -> + case MochiReq:get_header_value(Key) of + undefined -> Default; + Value -> Value + end. + +primary_header_value(#httpd{mochi_req=MochiReq}, Key) -> + MochiReq:get_primary_header_value(Key). + +accepted_encodings(#httpd{mochi_req=MochiReq}) -> + case MochiReq:accepted_encodings(["gzip", "identity"]) of + bad_accept_encoding_value -> + throw(bad_accept_encoding_value); + [] -> + throw(unacceptable_encoding); + EncList -> + EncList + end. + +serve_file(Req, RelativePath, DocumentRoot) -> + serve_file(Req, RelativePath, DocumentRoot, []). + +serve_file(#httpd{mochi_req=MochiReq}=Req, RelativePath, DocumentRoot, ExtraHeaders) -> + {ok, MochiReq:serve_file(RelativePath, DocumentRoot, + server_header() ++ couch_httpd_auth:cookie_auth_header(Req, []) ++ ExtraHeaders)}. + +qs_value(Req, Key) -> + qs_value(Req, Key, undefined). + +qs_value(Req, Key, Default) -> + couch_util:get_value(Key, qs(Req), Default). + +qs_json_value(Req, Key, Default) -> + case qs_value(Req, Key, Default) of + Default -> + Default; + Result -> + ?JSON_DECODE(Result) + end. + +qs(#httpd{mochi_req=MochiReq}) -> + MochiReq:parse_qs(). + +path(#httpd{mochi_req=MochiReq}) -> + MochiReq:get(path). + +host_for_request(#httpd{mochi_req=MochiReq}) -> + XHost = couch_config:get("httpd", "x_forwarded_host", "X-Forwarded-Host"), + case MochiReq:get_header_value(XHost) of + undefined -> + case MochiReq:get_header_value("Host") of + undefined -> + {ok, {Address, Port}} = inet:sockname(MochiReq:get(socket)), + inet_parse:ntoa(Address) ++ ":" ++ integer_to_list(Port); + Value1 -> + Value1 + end; + Value -> Value + end. + +absolute_uri(#httpd{mochi_req=MochiReq}=Req, Path) -> + Host = host_for_request(Req), + XSsl = couch_config:get("httpd", "x_forwarded_ssl", "X-Forwarded-Ssl"), + Scheme = case MochiReq:get_header_value(XSsl) of + "on" -> "https"; + _ -> + XProto = couch_config:get("httpd", "x_forwarded_proto", "X-Forwarded-Proto"), + case MochiReq:get_header_value(XProto) of + %% Restrict to "https" and "http" schemes only + "https" -> "https"; + _ -> case MochiReq:get(scheme) of + https -> "https"; + http -> "http" + end + end + end, + Scheme ++ "://" ++ Host ++ Path. + +unquote(UrlEncodedString) -> + mochiweb_util:unquote(UrlEncodedString). + +quote(UrlDecodedString) -> + mochiweb_util:quote_plus(UrlDecodedString). + +parse_form(#httpd{mochi_req=MochiReq}) -> + mochiweb_multipart:parse_form(MochiReq). + +recv(#httpd{mochi_req=MochiReq}, Len) -> + MochiReq:recv(Len). + +recv_chunked(#httpd{mochi_req=MochiReq}, MaxChunkSize, ChunkFun, InitState) -> + % Fun is called once with each chunk + % Fun({Length, Binary}, State) + % called with Length == 0 on the last time. + MochiReq:stream_body(MaxChunkSize, ChunkFun, InitState). + +body_length(Req) -> + case header_value(Req, "Transfer-Encoding") of + undefined -> + case header_value(Req, "Content-Length") of + undefined -> undefined; + Length -> list_to_integer(Length) + end; + "chunked" -> chunked; + Unknown -> {unknown_transfer_encoding, Unknown} + end. + +body(#httpd{mochi_req=MochiReq, req_body=undefined} = Req) -> + case body_length(Req) of + undefined -> + MaxSize = list_to_integer( + couch_config:get("couchdb", "max_document_size", "4294967296")), + MochiReq:recv_body(MaxSize); + chunked -> + ChunkFun = fun({0, _Footers}, Acc) -> + lists:reverse(Acc); + ({_Len, Chunk}, Acc) -> + [Chunk | Acc] + end, + recv_chunked(Req, 8192, ChunkFun, []); + Len -> + MochiReq:recv_body(Len) + end; +body(#httpd{req_body=ReqBody}) -> + ReqBody. + +json_body(Httpd) -> + ?JSON_DECODE(body(Httpd)). + +json_body_obj(Httpd) -> + case json_body(Httpd) of + {Props} -> {Props}; + _Else -> + throw({bad_request, "Request body must be a JSON object"}) + end. + + + +doc_etag(#doc{revs={Start, [DiskRev|_]}}) -> + "\"" ++ ?b2l(couch_doc:rev_to_str({Start, DiskRev})) ++ "\"". + +make_etag(Term) -> + <<SigInt:128/integer>> = couch_util:md5(term_to_binary(Term)), + list_to_binary("\"" ++ lists:flatten(io_lib:format("~.36B",[SigInt])) ++ "\""). + +etag_match(Req, CurrentEtag) when is_binary(CurrentEtag) -> + etag_match(Req, binary_to_list(CurrentEtag)); + +etag_match(Req, CurrentEtag) -> + EtagsToMatch = string:tokens( + couch_httpd:header_value(Req, "If-None-Match", ""), ", "), + lists:member(CurrentEtag, EtagsToMatch). + +etag_respond(Req, CurrentEtag, RespFun) -> + case etag_match(Req, CurrentEtag) of + true -> + % the client has this in their cache. + couch_httpd:send_response(Req, 304, [{"Etag", CurrentEtag}], <<>>); + false -> + % Run the function. + RespFun() + end. + +verify_is_server_admin(#httpd{user_ctx=UserCtx}) -> + verify_is_server_admin(UserCtx); +verify_is_server_admin(#user_ctx{roles=Roles}) -> + case lists:member(<<"_admin">>, Roles) of + true -> ok; + false -> throw({unauthorized, <<"You are not a server admin.">>}) + end. + +log_request(#httpd{mochi_req=MochiReq,peer=Peer}, Code) -> + ?LOG_INFO("~s - - ~p ~s ~B", [ + Peer, + couch_util:to_existing_atom(MochiReq:get(method)), + MochiReq:get(raw_path), + couch_util:to_integer(Code) + ]). + + +start_response_length(#httpd{mochi_req=MochiReq}=Req, Code, Headers, Length) -> + log_request(Req, Code), + couch_stats_collector:increment({httpd_status_codes, Code}), + Resp = MochiReq:start_response_length({Code, Headers ++ server_header() ++ couch_httpd_auth:cookie_auth_header(Req, Headers), Length}), + case MochiReq:get(method) of + 'HEAD' -> throw({http_head_abort, Resp}); + _ -> ok + end, + {ok, Resp}. + +start_response(#httpd{mochi_req=MochiReq}=Req, Code, Headers) -> + log_request(Req, Code), + couch_stats_collector:increment({httpd_status_cdes, Code}), + CookieHeader = couch_httpd_auth:cookie_auth_header(Req, Headers), + Headers2 = Headers ++ server_header() ++ CookieHeader, + Resp = MochiReq:start_response({Code, Headers2}), + case MochiReq:get(method) of + 'HEAD' -> throw({http_head_abort, Resp}); + _ -> ok + end, + {ok, Resp}. + +send(Resp, Data) -> + Resp:send(Data), + {ok, Resp}. + +no_resp_conn_header([]) -> + true; +no_resp_conn_header([{Hdr, _}|Rest]) -> + case string:to_lower(Hdr) of + "connection" -> false; + _ -> no_resp_conn_header(Rest) + end. + +http_1_0_keep_alive(Req, Headers) -> + KeepOpen = Req:should_close() == false, + IsHttp10 = Req:get(version) == {1, 0}, + NoRespHeader = no_resp_conn_header(Headers), + case KeepOpen andalso IsHttp10 andalso NoRespHeader of + true -> [{"Connection", "Keep-Alive"} | Headers]; + false -> Headers + end. + +start_chunked_response(#httpd{mochi_req=MochiReq}=Req, Code, Headers) -> + log_request(Req, Code), + couch_stats_collector:increment({httpd_status_codes, Code}), + Headers2 = http_1_0_keep_alive(MochiReq, Headers), + Resp = MochiReq:respond({Code, Headers2 ++ server_header() ++ couch_httpd_auth:cookie_auth_header(Req, Headers2), chunked}), + case MochiReq:get(method) of + 'HEAD' -> throw({http_head_abort, Resp}); + _ -> ok + end, + {ok, Resp}. + +send_chunk(Resp, Data) -> + case iolist_size(Data) of + 0 -> ok; % do nothing + _ -> Resp:write_chunk(Data) + end, + {ok, Resp}. + +last_chunk(Resp) -> + Resp:write_chunk([]), + {ok, Resp}. + +send_response(#httpd{mochi_req=MochiReq}=Req, Code, Headers, Body) -> + log_request(Req, Code), + couch_stats_collector:increment({httpd_status_codes, Code}), + Headers2 = http_1_0_keep_alive(MochiReq, Headers), + if Code >= 400 -> + ?LOG_DEBUG("httpd ~p error response:~n ~s", [Code, Body]); + true -> ok + end, + {ok, MochiReq:respond({Code, Headers2 ++ server_header() ++ couch_httpd_auth:cookie_auth_header(Req, Headers2), Body})}. + +send_method_not_allowed(Req, Methods) -> + send_error(Req, 405, [{"Allow", Methods}], <<"method_not_allowed">>, ?l2b("Only " ++ Methods ++ " allowed")). + +send_json(Req, Value) -> + send_json(Req, 200, Value). + +send_json(Req, Code, Value) -> + send_json(Req, Code, [], Value). + +send_json(Req, Code, Headers, Value) -> + initialize_jsonp(Req), + DefaultHeaders = [ + {"Content-Type", negotiate_content_type(Req)}, + {"Cache-Control", "must-revalidate"} + ], + Body = [start_jsonp(), ?JSON_ENCODE(Value), end_jsonp(), $\n], + send_response(Req, Code, DefaultHeaders ++ Headers, Body). + +start_json_response(Req, Code) -> + start_json_response(Req, Code, []). + +start_json_response(Req, Code, Headers) -> + initialize_jsonp(Req), + DefaultHeaders = [ + {"Content-Type", negotiate_content_type(Req)}, + {"Cache-Control", "must-revalidate"} + ], + {ok, Resp} = start_chunked_response(Req, Code, DefaultHeaders ++ Headers), + case start_jsonp() of + [] -> ok; + Start -> send_chunk(Resp, Start) + end, + {ok, Resp}. + +end_json_response(Resp) -> + send_chunk(Resp, end_jsonp() ++ [$\n]), + last_chunk(Resp). + +initialize_jsonp(Req) -> + case get(jsonp) of + undefined -> put(jsonp, qs_value(Req, "callback", no_jsonp)); + _ -> ok + end, + case get(jsonp) of + no_jsonp -> []; + [] -> []; + CallBack -> + try + % make sure jsonp is configured on (default off) + case couch_config:get("httpd", "allow_jsonp", "false") of + "true" -> + validate_callback(CallBack); + _Else -> + put(jsonp, no_jsonp) + end + catch + Error -> + put(jsonp, no_jsonp), + throw(Error) + end + end. + +start_jsonp() -> + case get(jsonp) of + no_jsonp -> []; + [] -> []; + CallBack -> CallBack ++ "(" + end. + +end_jsonp() -> + Resp = case get(jsonp) of + no_jsonp -> []; + [] -> []; + _ -> ");" + end, + put(jsonp, undefined), + Resp. + +validate_callback(CallBack) when is_binary(CallBack) -> + validate_callback(binary_to_list(CallBack)); +validate_callback([]) -> + ok; +validate_callback([Char | Rest]) -> + case Char of + _ when Char >= $a andalso Char =< $z -> ok; + _ when Char >= $A andalso Char =< $Z -> ok; + _ when Char >= $0 andalso Char =< $9 -> ok; + _ when Char == $. -> ok; + _ when Char == $_ -> ok; + _ when Char == $[ -> ok; + _ when Char == $] -> ok; + _ -> + throw({bad_request, invalid_callback}) + end, + validate_callback(Rest). + + +error_info({Error, Reason}) when is_list(Reason) -> + error_info({Error, ?l2b(Reason)}); +error_info(bad_request) -> + {400, <<"bad_request">>, <<>>}; +error_info({bad_request, Reason}) -> + {400, <<"bad_request">>, Reason}; +error_info({query_parse_error, Reason}) -> + {400, <<"query_parse_error">>, Reason}; +% Prior art for md5 mismatch resulting in a 400 is from AWS S3 +error_info(md5_mismatch) -> + {400, <<"content_md5_mismatch">>, <<"Possible message corruption.">>}; +error_info(not_found) -> + {404, <<"not_found">>, <<"missing">>}; +error_info({not_found, Reason}) -> + {404, <<"not_found">>, Reason}; +error_info({not_acceptable, Reason}) -> + {406, <<"not_acceptable">>, Reason}; +error_info(conflict) -> + {409, <<"conflict">>, <<"Document update conflict.">>}; +error_info({forbidden, Msg}) -> + {403, <<"forbidden">>, Msg}; +error_info({unauthorized, Msg}) -> + {401, <<"unauthorized">>, Msg}; +error_info(file_exists) -> + {412, <<"file_exists">>, <<"The database could not be " + "created, the file already exists.">>}; +error_info({bad_ctype, Reason}) -> + {415, <<"bad_content_type">>, Reason}; +error_info(requested_range_not_satisfiable) -> + {416, <<"requested_range_not_satisfiable">>, <<"Requested range not satisfiable">>}; +error_info({error, illegal_database_name}) -> + {400, <<"illegal_database_name">>, <<"Only lowercase characters (a-z), " + "digits (0-9), and any of the characters _, $, (, ), +, -, and / " + "are allowed. Must begin with a letter.">>}; +error_info({missing_stub, Reason}) -> + {412, <<"missing_stub">>, Reason}; +error_info({Error, Reason}) -> + {500, couch_util:to_binary(Error), couch_util:to_binary(Reason)}; +error_info(Error) -> + {500, <<"unknown_error">>, couch_util:to_binary(Error)}. + +error_headers(#httpd{mochi_req=MochiReq}=Req, Code, ErrorStr, ReasonStr) -> + if Code == 401 -> + % this is where the basic auth popup is triggered + case MochiReq:get_header_value("X-CouchDB-WWW-Authenticate") of + undefined -> + case couch_config:get("httpd", "WWW-Authenticate", nil) of + nil -> + % If the client is a browser and the basic auth popup isn't turned on + % redirect to the session page. + case ErrorStr of + <<"unauthorized">> -> + case couch_config:get("couch_httpd_auth", "authentication_redirect", nil) of + nil -> {Code, []}; + AuthRedirect -> + case couch_config:get("couch_httpd_auth", "require_valid_user", "false") of + "true" -> + % send the browser popup header no matter what if we are require_valid_user + {Code, [{"WWW-Authenticate", "Basic realm=\"server\""}]}; + _False -> + case MochiReq:accepts_content_type("application/json") of + true -> + {Code, []}; + false -> + case MochiReq:accepts_content_type("text/html") of + true -> + % Redirect to the path the user requested, not + % the one that is used internally. + UrlReturnRaw = case MochiReq:get_header_value("x-couchdb-vhost-path") of + undefined -> + MochiReq:get(path); + VHostPath -> + VHostPath + end, + RedirectLocation = lists:flatten([ + AuthRedirect, + "?return=", couch_util:url_encode(UrlReturnRaw), + "&reason=", couch_util:url_encode(ReasonStr) + ]), + {302, [{"Location", absolute_uri(Req, RedirectLocation)}]}; + false -> + {Code, []} + end + end + end + end; + _Else -> + {Code, []} + end; + Type -> + {Code, [{"WWW-Authenticate", Type}]} + end; + Type -> + {Code, [{"WWW-Authenticate", Type}]} + end; + true -> + {Code, []} + end. + +send_error(_Req, {already_sent, Resp, _Error}) -> + {ok, Resp}; + +send_error(Req, Error) -> + {Code, ErrorStr, ReasonStr} = error_info(Error), + {Code1, Headers} = error_headers(Req, Code, ErrorStr, ReasonStr), + send_error(Req, Code1, Headers, ErrorStr, ReasonStr). + +send_error(Req, Code, ErrorStr, ReasonStr) -> + send_error(Req, Code, [], ErrorStr, ReasonStr). + +send_error(Req, Code, Headers, ErrorStr, ReasonStr) -> + send_json(Req, Code, Headers, + {[{<<"error">>, ErrorStr}, + {<<"reason">>, ReasonStr}]}). + +% give the option for list functions to output html or other raw errors +send_chunked_error(Resp, {_Error, {[{<<"body">>, Reason}]}}) -> + send_chunk(Resp, Reason), + last_chunk(Resp); + +send_chunked_error(Resp, Error) -> + {Code, ErrorStr, ReasonStr} = error_info(Error), + JsonError = {[{<<"code">>, Code}, + {<<"error">>, ErrorStr}, + {<<"reason">>, ReasonStr}]}, + send_chunk(Resp, ?l2b([$\n,?JSON_ENCODE(JsonError),$\n])), + last_chunk(Resp). + +send_redirect(Req, Path) -> + Headers = [{"Location", couch_httpd:absolute_uri(Req, Path)}], + send_response(Req, 301, Headers, <<>>). + +negotiate_content_type(Req) -> + case get(jsonp) of + no_jsonp -> negotiate_content_type1(Req); + [] -> negotiate_content_type1(Req); + _Callback -> "text/javascript" + end. + +negotiate_content_type1(#httpd{mochi_req=MochiReq}) -> + %% Determine the appropriate Content-Type header for a JSON response + %% depending on the Accept header in the request. A request that explicitly + %% lists the correct JSON MIME type will get that type, otherwise the + %% response will have the generic MIME type "text/plain" + AcceptedTypes = case MochiReq:get_header_value("Accept") of + undefined -> []; + AcceptHeader -> string:tokens(AcceptHeader, ", ") + end, + case lists:member("application/json", AcceptedTypes) of + true -> "application/json"; + false -> "text/plain;charset=utf-8" + end. + +server_header() -> + [{"Server", "CouchDB/" ++ couch:version() ++ + " (Erlang OTP/" ++ erlang:system_info(otp_release) ++ ")"}]. + + +-record(mp, {boundary, buffer, data_fun, callback}). + + +parse_multipart_request(ContentType, DataFun, Callback) -> + Boundary0 = iolist_to_binary(get_boundary(ContentType)), + Boundary = <<"\r\n--", Boundary0/binary>>, + Mp = #mp{boundary= Boundary, + buffer= <<>>, + data_fun=DataFun, + callback=Callback}, + {Mp2, _NilCallback} = read_until(Mp, <<"--", Boundary0/binary>>, + fun(Next)-> nil_callback(Next) end), + #mp{buffer=Buffer, data_fun=DataFun2, callback=Callback2} = + parse_part_header(Mp2), + {Buffer, DataFun2, Callback2}. + +nil_callback(_Data)-> + fun(Next) -> nil_callback(Next) end. + +get_boundary({"multipart/" ++ _, Opts}) -> + case couch_util:get_value("boundary", Opts) of + S when is_list(S) -> + S + end; +get_boundary(ContentType) -> + {"multipart/" ++ _ , Opts} = mochiweb_util:parse_header(ContentType), + get_boundary({"multipart/", Opts}). + + + +split_header(<<>>) -> + []; +split_header(Line) -> + {Name, [$: | Value]} = lists:splitwith(fun (C) -> C =/= $: end, + binary_to_list(Line)), + [{string:to_lower(string:strip(Name)), + mochiweb_util:parse_header(Value)}]. + +read_until(#mp{data_fun=DataFun, buffer=Buffer}=Mp, Pattern, Callback) -> + case find_in_binary(Pattern, Buffer) of + not_found -> + Callback2 = Callback(Buffer), + {Buffer2, DataFun2} = DataFun(), + Buffer3 = iolist_to_binary(Buffer2), + read_until(Mp#mp{data_fun=DataFun2,buffer=Buffer3}, Pattern, Callback2); + {partial, 0} -> + {NewData, DataFun2} = DataFun(), + read_until(Mp#mp{data_fun=DataFun2, + buffer= iolist_to_binary([Buffer,NewData])}, + Pattern, Callback); + {partial, Skip} -> + <<DataChunk:Skip/binary, Rest/binary>> = Buffer, + Callback2 = Callback(DataChunk), + {NewData, DataFun2} = DataFun(), + read_until(Mp#mp{data_fun=DataFun2, + buffer= iolist_to_binary([Rest | NewData])}, + Pattern, Callback2); + {exact, 0} -> + PatternLen = size(Pattern), + <<_:PatternLen/binary, Rest/binary>> = Buffer, + {Mp#mp{buffer= Rest}, Callback}; + {exact, Skip} -> + PatternLen = size(Pattern), + <<DataChunk:Skip/binary, _:PatternLen/binary, Rest/binary>> = Buffer, + Callback2 = Callback(DataChunk), + {Mp#mp{buffer= Rest}, Callback2} + end. + + +parse_part_header(#mp{callback=UserCallBack}=Mp) -> + {Mp2, AccCallback} = read_until(Mp, <<"\r\n\r\n">>, + fun(Next) -> acc_callback(Next, []) end), + HeaderData = AccCallback(get_data), + + Headers = + lists:foldl(fun(Line, Acc) -> + split_header(Line) ++ Acc + end, [], re:split(HeaderData,<<"\r\n">>, [])), + NextCallback = UserCallBack({headers, Headers}), + parse_part_body(Mp2#mp{callback=NextCallback}). + +parse_part_body(#mp{boundary=Prefix, callback=Callback}=Mp) -> + {Mp2, WrappedCallback} = read_until(Mp, Prefix, + fun(Data) -> body_callback_wrapper(Data, Callback) end), + Callback2 = WrappedCallback(get_callback), + Callback3 = Callback2(body_end), + case check_for_last(Mp2#mp{callback=Callback3}) of + {last, #mp{callback=Callback3}=Mp3} -> + Mp3#mp{callback=Callback3(eof)}; + {more, Mp3} -> + parse_part_header(Mp3) + end. + +acc_callback(get_data, Acc)-> + iolist_to_binary(lists:reverse(Acc)); +acc_callback(Data, Acc)-> + fun(Next) -> acc_callback(Next, [Data | Acc]) end. + +body_callback_wrapper(get_callback, Callback) -> + Callback; +body_callback_wrapper(Data, Callback) -> + Callback2 = Callback({body, Data}), + fun(Next) -> body_callback_wrapper(Next, Callback2) end. + + +check_for_last(#mp{buffer=Buffer, data_fun=DataFun}=Mp) -> + case Buffer of + <<"--",_/binary>> -> {last, Mp}; + <<_, _, _/binary>> -> {more, Mp}; + _ -> % not long enough + {Data, DataFun2} = DataFun(), + check_for_last(Mp#mp{buffer= <<Buffer/binary, Data/binary>>, + data_fun = DataFun2}) + end. + +find_in_binary(B, Data) when size(B) > 0 -> + case size(Data) - size(B) of + Last when Last < 0 -> + partial_find(B, Data, 0, size(Data)); + Last -> + find_in_binary(B, size(B), Data, 0, Last) + end. + +find_in_binary(B, BS, D, N, Last) when N =< Last-> + case D of + <<_:N/binary, B:BS/binary, _/binary>> -> + {exact, N}; + _ -> + find_in_binary(B, BS, D, 1 + N, Last) + end; +find_in_binary(B, BS, D, N, Last) when N =:= 1 + Last -> + partial_find(B, D, N, BS - 1). + +partial_find(_B, _D, _N, 0) -> + not_found; +partial_find(B, D, N, K) -> + <<B1:K/binary, _/binary>> = B, + case D of + <<_Skip:N/binary, B1/binary>> -> + {partial, N}; + _ -> + partial_find(B, D, 1 + N, K - 1) + end. + + diff --git a/apps/couch/src/couch_httpd_auth.erl b/apps/couch/src/couch_httpd_auth.erl new file mode 100644 index 00000000..9f6ed18a --- /dev/null +++ b/apps/couch/src/couch_httpd_auth.erl @@ -0,0 +1,360 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_auth). +-include("couch_db.hrl"). + +-export([default_authentication_handler/1,special_test_authentication_handler/1]). +-export([cookie_authentication_handler/1]). +-export([null_authentication_handler/1]). +-export([proxy_authentification_handler/1]). +-export([cookie_auth_header/2]). +-export([handle_session_req/1]). + +-import(couch_httpd, [header_value/2, send_json/2,send_json/4, send_method_not_allowed/2]). + +special_test_authentication_handler(Req) -> + case header_value(Req, "WWW-Authenticate") of + "X-Couch-Test-Auth " ++ NamePass -> + % NamePass is a colon separated string: "joe schmoe:a password". + [Name, Pass] = re:split(NamePass, ":", [{return, list}]), + case {Name, Pass} of + {"Jan Lehnardt", "apple"} -> ok; + {"Christopher Lenz", "dog food"} -> ok; + {"Noah Slater", "biggiesmalls endian"} -> ok; + {"Chris Anderson", "mp3"} -> ok; + {"Damien Katz", "pecan pie"} -> ok; + {_, _} -> + throw({unauthorized, <<"Name or password is incorrect.">>}) + end, + Req#httpd{user_ctx=#user_ctx{name=?l2b(Name)}}; + _ -> + % No X-Couch-Test-Auth credentials sent, give admin access so the + % previous authentication can be restored after the test + Req#httpd{user_ctx=#user_ctx{roles=[<<"_admin">>]}} + end. + +basic_name_pw(Req) -> + AuthorizationHeader = header_value(Req, "Authorization"), + case AuthorizationHeader of + "Basic " ++ Base64Value -> + case string:tokens(?b2l(base64:decode(Base64Value)),":") of + ["_", "_"] -> + % special name and pass to be logged out + nil; + [User, Pass] -> + {User, Pass}; + [User | Pass] -> + {User, string:join(Pass, ":")}; + _ -> + nil + end; + _ -> + nil + end. + +default_authentication_handler(Req) -> + case basic_name_pw(Req) of + {User, Pass} -> + case couch_auth_cache:get_user_creds(User) of + nil -> + throw({unauthorized, <<"Name or password is incorrect.">>}); + UserProps -> + UserSalt = couch_util:get_value(<<"salt">>, UserProps, <<>>), + PasswordHash = hash_password(?l2b(Pass), UserSalt), + ExpectedHash = couch_util:get_value(<<"password_sha">>, UserProps, nil), + case couch_util:verify(ExpectedHash, PasswordHash) of + true -> + Req#httpd{user_ctx=#user_ctx{ + name=?l2b(User), + roles=couch_util:get_value(<<"roles">>, UserProps, []) + }}; + _Else -> + throw({unauthorized, <<"Name or password is incorrect.">>}) + end + end; + nil -> + case couch_server:has_admins() of + true -> + Req; + false -> + case couch_config:get("couch_httpd_auth", "require_valid_user", "false") of + "true" -> Req; + % If no admins, and no user required, then everyone is admin! + % Yay, admin party! + _ -> Req#httpd{user_ctx=#user_ctx{roles=[<<"_admin">>]}} + end + end + end. + +null_authentication_handler(Req) -> + Req#httpd{user_ctx=#user_ctx{roles=[<<"_admin">>]}}. + +%% @doc proxy auth handler. +% +% This handler allows creation of a userCtx object from a user authenticated remotly. +% The client just pass specific headers to CouchDB and the handler create the userCtx. +% Headers name can be defined in local.ini. By thefault they are : +% +% * X-Auth-CouchDB-UserName : contain the username, (x_auth_username in +% couch_httpd_auth section) +% * X-Auth-CouchDB-Roles : contain the user roles, list of roles separated by a +% comma (x_auth_roles in couch_httpd_auth section) +% * X-Auth-CouchDB-Token : token to authenticate the authorization (x_auth_token +% in couch_httpd_auth section). This token is an hmac-sha1 created from secret key +% and username. The secret key should be the same in the client and couchdb node. s +% ecret key is the secret key in couch_httpd_auth section of ini. This token is optional +% if value of proxy_use_secret key in couch_httpd_auth section of ini isn't true. +% +proxy_authentification_handler(Req) -> + case proxy_auth_user(Req) of + nil -> Req; + Req2 -> Req2 + end. + +proxy_auth_user(Req) -> + XHeaderUserName = couch_config:get("couch_httpd_auth", "x_auth_username", + "X-Auth-CouchDB-UserName"), + XHeaderRoles = couch_config:get("couch_httpd_auth", "x_auth_roles", + "X-Auth-CouchDB-Roles"), + XHeaderToken = couch_config:get("couch_httpd_auth", "x_auth_token", + "X-Auth-CouchDB-Token"), + case header_value(Req, XHeaderUserName) of + undefined -> nil; + UserName -> + Roles = case header_value(Req, XHeaderRoles) of + undefined -> []; + Else -> + [?l2b(R) || R <- string:tokens(Else, ",")] + end, + case couch_config:get("couch_httpd_auth", "proxy_use_secret", "false") of + "true" -> + case couch_config:get("couch_httpd_auth", "secret", nil) of + nil -> + Req#httpd{user_ctx=#user_ctx{name=?l2b(UserName), roles=Roles}}; + Secret -> + ExpectedToken = couch_util:to_hex(crypto:sha_mac(Secret, UserName)), + case header_value(Req, XHeaderToken) of + Token when Token == ExpectedToken -> + Req#httpd{user_ctx=#user_ctx{name=?l2b(UserName), + roles=Roles}}; + _ -> nil + end + end; + _ -> + Req#httpd{user_ctx=#user_ctx{name=?l2b(UserName), roles=Roles}} + end + end. + + +cookie_authentication_handler(#httpd{mochi_req=MochiReq}=Req) -> + case MochiReq:get_cookie_value("AuthSession") of + undefined -> Req; + [] -> Req; + Cookie -> + [User, TimeStr | HashParts] = try + AuthSession = couch_util:decodeBase64Url(Cookie), + [_A, _B | _Cs] = string:tokens(?b2l(AuthSession), ":") + catch + _:_Error -> + Reason = <<"Malformed AuthSession cookie. Please clear your cookies.">>, + throw({bad_request, Reason}) + end, + % Verify expiry and hash + CurrentTime = make_cookie_time(), + case couch_config:get("couch_httpd_auth", "secret", nil) of + nil -> + ?LOG_DEBUG("cookie auth secret is not set",[]), + Req; + SecretStr -> + Secret = ?l2b(SecretStr), + case couch_auth_cache:get_user_creds(User) of + nil -> Req; + UserProps -> + UserSalt = couch_util:get_value(<<"salt">>, UserProps, <<"">>), + FullSecret = <<Secret/binary, UserSalt/binary>>, + ExpectedHash = crypto:sha_mac(FullSecret, User ++ ":" ++ TimeStr), + Hash = ?l2b(string:join(HashParts, ":")), + Timeout = to_int(couch_config:get("couch_httpd_auth", "timeout", 600)), + ?LOG_DEBUG("timeout ~p", [Timeout]), + case (catch erlang:list_to_integer(TimeStr, 16)) of + TimeStamp when CurrentTime < TimeStamp + Timeout -> + case couch_util:verify(ExpectedHash, Hash) of + true -> + TimeLeft = TimeStamp + Timeout - CurrentTime, + ?LOG_DEBUG("Successful cookie auth as: ~p", [User]), + Req#httpd{user_ctx=#user_ctx{ + name=?l2b(User), + roles=couch_util:get_value(<<"roles">>, UserProps, []) + }, auth={FullSecret, TimeLeft < Timeout*0.9}}; + _Else -> + Req + end; + _Else -> + Req + end + end + end + end. + +cookie_auth_header(#httpd{user_ctx=#user_ctx{name=null}}, _Headers) -> []; +cookie_auth_header(#httpd{user_ctx=#user_ctx{name=User}, auth={Secret, true}}=Req, Headers) -> + % Note: we only set the AuthSession cookie if: + % * a valid AuthSession cookie has been received + % * we are outside a 10% timeout window + % * and if an AuthSession cookie hasn't already been set e.g. by a login + % or logout handler. + % The login and logout handlers need to set the AuthSession cookie + % themselves. + CookieHeader = couch_util:get_value("Set-Cookie", Headers, ""), + Cookies = mochiweb_cookies:parse_cookie(CookieHeader), + AuthSession = couch_util:get_value("AuthSession", Cookies), + if AuthSession == undefined -> + TimeStamp = make_cookie_time(), + [cookie_auth_cookie(Req, ?b2l(User), Secret, TimeStamp)]; + true -> + [] + end; +cookie_auth_header(_Req, _Headers) -> []. + +cookie_auth_cookie(Req, User, Secret, TimeStamp) -> + SessionData = User ++ ":" ++ erlang:integer_to_list(TimeStamp, 16), + Hash = crypto:sha_mac(Secret, SessionData), + mochiweb_cookies:cookie("AuthSession", + couch_util:encodeBase64Url(SessionData ++ ":" ++ ?b2l(Hash)), + [{path, "/"}] ++ cookie_scheme(Req)). + +hash_password(Password, Salt) -> + ?l2b(couch_util:to_hex(crypto:sha(<<Password/binary, Salt/binary>>))). + +ensure_cookie_auth_secret() -> + case couch_config:get("couch_httpd_auth", "secret", nil) of + nil -> + NewSecret = ?b2l(couch_uuids:random()), + couch_config:set("couch_httpd_auth", "secret", NewSecret), + NewSecret; + Secret -> Secret + end. + +% session handlers +% Login handler with user db +handle_session_req(#httpd{method='POST', mochi_req=MochiReq}=Req) -> + ReqBody = MochiReq:recv_body(), + Form = case MochiReq:get_primary_header_value("content-type") of + % content type should be json + "application/x-www-form-urlencoded" ++ _ -> + mochiweb_util:parse_qs(ReqBody); + "application/json" ++ _ -> + {Pairs} = ?JSON_DECODE(ReqBody), + lists:map(fun({Key, Value}) -> + {?b2l(Key), ?b2l(Value)} + end, Pairs); + _ -> + [] + end, + UserName = ?l2b(couch_util:get_value("name", Form, "")), + Password = ?l2b(couch_util:get_value("password", Form, "")), + ?LOG_DEBUG("Attempt Login: ~s",[UserName]), + User = case couch_auth_cache:get_user_creds(UserName) of + nil -> []; + Result -> Result + end, + UserSalt = couch_util:get_value(<<"salt">>, User, <<>>), + PasswordHash = hash_password(Password, UserSalt), + ExpectedHash = couch_util:get_value(<<"password_sha">>, User, nil), + case couch_util:verify(ExpectedHash, PasswordHash) of + true -> + % setup the session cookie + Secret = ?l2b(ensure_cookie_auth_secret()), + CurrentTime = make_cookie_time(), + Cookie = cookie_auth_cookie(Req, ?b2l(UserName), <<Secret/binary, UserSalt/binary>>, CurrentTime), + % TODO document the "next" feature in Futon + {Code, Headers} = case couch_httpd:qs_value(Req, "next", nil) of + nil -> + {200, [Cookie]}; + Redirect -> + {302, [Cookie, {"Location", couch_httpd:absolute_uri(Req, Redirect)}]} + end, + send_json(Req#httpd{req_body=ReqBody}, Code, Headers, + {[ + {ok, true}, + {name, couch_util:get_value(<<"name">>, User, null)}, + {roles, couch_util:get_value(<<"roles">>, User, [])} + ]}); + _Else -> + % clear the session + Cookie = mochiweb_cookies:cookie("AuthSession", "", [{path, "/"}] ++ cookie_scheme(Req)), + send_json(Req, 401, [Cookie], {[{error, <<"unauthorized">>},{reason, <<"Name or password is incorrect.">>}]}) + end; +% get user info +% GET /_session +handle_session_req(#httpd{method='GET', user_ctx=UserCtx}=Req) -> + Name = UserCtx#user_ctx.name, + ForceLogin = couch_httpd:qs_value(Req, "basic", "false"), + case {Name, ForceLogin} of + {null, "true"} -> + throw({unauthorized, <<"Please login.">>}); + {Name, _} -> + send_json(Req, {[ + % remove this ok + {ok, true}, + {<<"userCtx">>, {[ + {name, Name}, + {roles, UserCtx#user_ctx.roles} + ]}}, + {info, {[ + {authentication_db, ?l2b(couch_config:get("couch_httpd_auth", "authentication_db"))}, + {authentication_handlers, [auth_name(H) || H <- couch_httpd:make_fun_spec_strs( + couch_config:get("httpd", "authentication_handlers"))]} + ] ++ maybe_value(authenticated, UserCtx#user_ctx.handler, fun(Handler) -> + auth_name(?b2l(Handler)) + end)}} + ]}) + end; +% logout by deleting the session +handle_session_req(#httpd{method='DELETE'}=Req) -> + Cookie = mochiweb_cookies:cookie("AuthSession", "", [{path, "/"}] ++ cookie_scheme(Req)), + {Code, Headers} = case couch_httpd:qs_value(Req, "next", nil) of + nil -> + {200, [Cookie]}; + Redirect -> + {302, [Cookie, {"Location", couch_httpd:absolute_uri(Req, Redirect)}]} + end, + send_json(Req, Code, Headers, {[{ok, true}]}); +handle_session_req(Req) -> + send_method_not_allowed(Req, "GET,HEAD,POST,DELETE"). + +maybe_value(_Key, undefined, _Fun) -> []; +maybe_value(Key, Else, Fun) -> + [{Key, Fun(Else)}]. + +auth_name(String) when is_list(String) -> + [_,_,_,_,_,Name|_] = re:split(String, "[\\W_]", [{return, list}]), + ?l2b(Name). + +to_int(Value) when is_binary(Value) -> + to_int(?b2l(Value)); +to_int(Value) when is_list(Value) -> + list_to_integer(Value); +to_int(Value) when is_integer(Value) -> + Value. + +make_cookie_time() -> + {NowMS, NowS, _} = erlang:now(), + NowMS * 1000000 + NowS. + +cookie_scheme(#httpd{mochi_req=MochiReq}) -> + [{http_only, true}] ++ + case MochiReq:get(scheme) of + http -> []; + https -> [{secure, true}] + end. diff --git a/apps/couch/src/couch_httpd_db.erl b/apps/couch/src/couch_httpd_db.erl new file mode 100644 index 00000000..0bf97e26 --- /dev/null +++ b/apps/couch/src/couch_httpd_db.erl @@ -0,0 +1,1311 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_db). +-include("couch_db.hrl"). + +-export([handle_request/1, handle_compact_req/2, handle_design_req/2, + db_req/2, couch_doc_open/4,handle_changes_req/2, + update_doc_result_to_json/1, update_doc_result_to_json/2, + handle_design_info_req/3, handle_view_cleanup_req/2]). + +-import(couch_httpd, + [send_json/2,send_json/3,send_json/4,send_method_not_allowed/2, + send_response/4,start_json_response/2,start_json_response/3, + send_chunk/2,last_chunk/1,end_json_response/1, + start_chunked_response/3, absolute_uri/2, send/2, + start_response_length/4]). + +-record(doc_query_args, { + options = [], + rev = nil, + open_revs = [], + update_type = interactive_edit, + atts_since = nil +}). + +% Database request handlers +handle_request(#httpd{path_parts=[DbName|RestParts],method=Method, + db_url_handlers=DbUrlHandlers}=Req)-> + case {Method, RestParts} of + {'PUT', []} -> + create_db_req(Req, DbName); + {'DELETE', []} -> + % if we get ?rev=... the user is using a faulty script where the + % document id is empty by accident. Let them recover safely. + case couch_httpd:qs_value(Req, "rev", false) of + false -> delete_db_req(Req, DbName); + _Rev -> throw({bad_request, + "You tried to DELETE a database with a ?=rev parameter. " + ++ "Did you mean to DELETE a document instead?"}) + end; + {_, []} -> + do_db_req(Req, fun db_req/2); + {_, [SecondPart|_]} -> + Handler = couch_util:dict_find(SecondPart, DbUrlHandlers, fun db_req/2), + do_db_req(Req, Handler) + end. + +handle_changes_req(#httpd{method='POST'}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + handle_changes_req1(Req, Db); +handle_changes_req(#httpd{method='GET'}=Req, Db) -> + handle_changes_req1(Req, Db); +handle_changes_req(#httpd{path_parts=[_,<<"_changes">>]}=Req, _Db) -> + send_method_not_allowed(Req, "GET,HEAD,POST"). + +handle_changes_req1(Req, Db) -> + MakeCallback = fun(Resp) -> + fun({change, Change, _}, "continuous") -> + send_chunk(Resp, [?JSON_ENCODE(Change) | "\n"]); + ({change, Change, Prepend}, _) -> + send_chunk(Resp, [Prepend, ?JSON_ENCODE(Change)]); + (start, "continuous") -> + ok; + (start, _) -> + send_chunk(Resp, "{\"results\":[\n"); + ({stop, EndSeq}, "continuous") -> + send_chunk( + Resp, + [?JSON_ENCODE({[{<<"last_seq">>, EndSeq}]}) | "\n"] + ), + end_json_response(Resp); + ({stop, EndSeq}, _) -> + send_chunk( + Resp, + io_lib:format("\n],\n\"last_seq\":~w}\n", [EndSeq]) + ), + end_json_response(Resp); + (timeout, _) -> + send_chunk(Resp, "\n") + end + end, + ChangesArgs = parse_changes_query(Req), + ChangesFun = couch_changes:handle_changes(ChangesArgs, Req, Db), + WrapperFun = case ChangesArgs#changes_args.feed of + "normal" -> + {ok, Info} = couch_db:get_db_info(Db), + CurrentEtag = couch_httpd:make_etag(Info), + fun(FeedChangesFun) -> + couch_httpd:etag_respond( + Req, + CurrentEtag, + fun() -> + {ok, Resp} = couch_httpd:start_json_response( + Req, 200, [{"Etag", CurrentEtag}] + ), + FeedChangesFun(MakeCallback(Resp)) + end + ) + end; + _ -> + % "longpoll" or "continuous" + {ok, Resp} = couch_httpd:start_json_response(Req, 200), + fun(FeedChangesFun) -> + FeedChangesFun(MakeCallback(Resp)) + end + end, + couch_stats_collector:increment( + {httpd, clients_requesting_changes} + ), + try + WrapperFun(ChangesFun) + after + couch_stats_collector:decrement( + {httpd, clients_requesting_changes} + ) + end. + +handle_compact_req(#httpd{method='POST',path_parts=[DbName,_,Id|_]}=Req, Db) -> + ok = couch_db:check_is_admin(Db), + couch_httpd:validate_ctype(Req, "application/json"), + {ok, _} = couch_view_compactor:start_compact(DbName, Id), + send_json(Req, 202, {[{ok, true}]}); + +handle_compact_req(#httpd{method='POST'}=Req, Db) -> + ok = couch_db:check_is_admin(Db), + couch_httpd:validate_ctype(Req, "application/json"), + ok = couch_db:start_compact(Db), + send_json(Req, 202, {[{ok, true}]}); + +handle_compact_req(Req, _Db) -> + send_method_not_allowed(Req, "POST"). + +handle_view_cleanup_req(#httpd{method='POST'}=Req, Db) -> + % delete unreferenced index files + ok = couch_db:check_is_admin(Db), + couch_httpd:validate_ctype(Req, "application/json"), + ok = couch_view:cleanup_index_files(Db), + send_json(Req, 202, {[{ok, true}]}); + +handle_view_cleanup_req(Req, _Db) -> + send_method_not_allowed(Req, "POST"). + + +handle_design_req(#httpd{ + path_parts=[_DbName, _Design, DesignName, <<"_",_/binary>> = Action | _Rest], + design_url_handlers = DesignUrlHandlers + }=Req, Db) -> + % load ddoc + DesignId = <<"_design/", DesignName/binary>>, + DDoc = couch_httpd_db:couch_doc_open(Db, DesignId, nil, []), + Handler = couch_util:dict_find(Action, DesignUrlHandlers, fun(_, _, _) -> + throw({not_found, <<"missing handler: ", Action/binary>>}) + end), + Handler(Req, Db, DDoc); + +handle_design_req(Req, Db) -> + db_req(Req, Db). + +handle_design_info_req(#httpd{ + method='GET', + path_parts=[_DbName, _Design, DesignName, _] + }=Req, Db, _DDoc) -> + DesignId = <<"_design/", DesignName/binary>>, + {ok, GroupInfoList} = couch_view:get_group_info(Db, DesignId), + send_json(Req, 200, {[ + {name, DesignName}, + {view_index, {GroupInfoList}} + ]}); + +handle_design_info_req(Req, _Db, _DDoc) -> + send_method_not_allowed(Req, "GET"). + +create_db_req(#httpd{user_ctx=UserCtx}=Req, DbName) -> + ok = couch_httpd:verify_is_server_admin(Req), + case couch_server:create(DbName, [{user_ctx, UserCtx}]) of + {ok, Db} -> + couch_db:close(Db), + DbUrl = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName)), + send_json(Req, 201, [{"Location", DbUrl}], {[{ok, true}]}); + Error -> + throw(Error) + end. + +delete_db_req(#httpd{user_ctx=UserCtx}=Req, DbName) -> + ok = couch_httpd:verify_is_server_admin(Req), + case couch_server:delete(DbName, [{user_ctx, UserCtx}]) of + ok -> + send_json(Req, 200, {[{ok, true}]}); + Error -> + throw(Error) + end. + +do_db_req(#httpd{user_ctx=UserCtx,path_parts=[DbName|_]}=Req, Fun) -> + case couch_db:open(DbName, [{user_ctx, UserCtx}]) of + {ok, Db} -> + try + Fun(Req, Db) + after + catch couch_db:close(Db) + end; + Error -> + throw(Error) + end. + +db_req(#httpd{method='GET',path_parts=[_DbName]}=Req, Db) -> + {ok, DbInfo} = couch_db:get_db_info(Db), + send_json(Req, {DbInfo}); + +db_req(#httpd{method='POST',path_parts=[DbName]}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + Doc = couch_doc:from_json_obj(couch_httpd:json_body(Req)), + Doc2 = case Doc#doc.id of + <<"">> -> + Doc#doc{id=couch_uuids:new(), revs={0, []}}; + _ -> + Doc + end, + DocId = Doc2#doc.id, + case couch_httpd:qs_value(Req, "batch") of + "ok" -> + % async_batching + spawn(fun() -> + case catch(couch_db:update_doc(Db, Doc2, [])) of + {ok, _} -> ok; + Error -> + ?LOG_INFO("Batch doc error (~s): ~p",[DocId, Error]) + end + end), + + send_json(Req, 202, [], {[ + {ok, true}, + {id, DocId} + ]}); + _Normal -> + % normal + {ok, NewRev} = couch_db:update_doc(Db, Doc2, []), + DocUrl = absolute_uri( + Req, binary_to_list(<<"/",DbName/binary,"/", DocId/binary>>)), + send_json(Req, 201, [{"Location", DocUrl}], {[ + {ok, true}, + {id, DocId}, + {rev, couch_doc:rev_to_str(NewRev)} + ]}) + end; + + +db_req(#httpd{path_parts=[_DbName]}=Req, _Db) -> + send_method_not_allowed(Req, "DELETE,GET,HEAD,POST"); + +db_req(#httpd{method='POST',path_parts=[_,<<"_ensure_full_commit">>]}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + UpdateSeq = couch_db:get_update_seq(Db), + CommittedSeq = couch_db:get_committed_update_seq(Db), + {ok, StartTime} = + case couch_httpd:qs_value(Req, "seq") of + undefined -> + couch_db:ensure_full_commit(Db); + RequiredStr -> + RequiredSeq = list_to_integer(RequiredStr), + if RequiredSeq > UpdateSeq -> + throw({bad_request, + "can't do a full commit ahead of current update_seq"}); + RequiredSeq > CommittedSeq -> + couch_db:ensure_full_commit(Db); + true -> + {ok, Db#db.instance_start_time} + end + end, + send_json(Req, 201, {[ + {ok, true}, + {instance_start_time, StartTime} + ]}); + +db_req(#httpd{path_parts=[_,<<"_ensure_full_commit">>]}=Req, _Db) -> + send_method_not_allowed(Req, "POST"); + +db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> + couch_stats_collector:increment({httpd, bulk_requests}), + couch_httpd:validate_ctype(Req, "application/json"), + {JsonProps} = couch_httpd:json_body_obj(Req), + DocsArray = couch_util:get_value(<<"docs">>, JsonProps), + case couch_httpd:header_value(Req, "X-Couch-Full-Commit") of + "true" -> + Options = [full_commit]; + "false" -> + Options = [delay_commit]; + _ -> + Options = [] + end, + case couch_util:get_value(<<"new_edits">>, JsonProps, true) of + true -> + Docs = lists:map( + fun({ObjProps} = JsonObj) -> + Doc = couch_doc:from_json_obj(JsonObj), + validate_attachment_names(Doc), + Id = case Doc#doc.id of + <<>> -> couch_uuids:new(); + Id0 -> Id0 + end, + case couch_util:get_value(<<"_rev">>, ObjProps) of + undefined -> + Revs = {0, []}; + Rev -> + {Pos, RevId} = couch_doc:parse_rev(Rev), + Revs = {Pos, [RevId]} + end, + Doc#doc{id=Id,revs=Revs} + end, + DocsArray), + Options2 = + case couch_util:get_value(<<"all_or_nothing">>, JsonProps) of + true -> [all_or_nothing|Options]; + _ -> Options + end, + case couch_db:update_docs(Db, Docs, Options2) of + {ok, Results} -> + % output the results + DocResults = lists:zipwith(fun update_doc_result_to_json/2, + Docs, Results), + send_json(Req, 201, DocResults); + {aborted, Errors} -> + ErrorsJson = + lists:map(fun update_doc_result_to_json/1, Errors), + send_json(Req, 417, ErrorsJson) + end; + false -> + Docs = lists:map(fun(JsonObj) -> + Doc = couch_doc:from_json_obj(JsonObj), + validate_attachment_names(Doc), + Doc + end, DocsArray), + {ok, Errors} = couch_db:update_docs(Db, Docs, Options, replicated_changes), + ErrorsJson = + lists:map(fun update_doc_result_to_json/1, Errors), + send_json(Req, 201, ErrorsJson) + end; +db_req(#httpd{path_parts=[_,<<"_bulk_docs">>]}=Req, _Db) -> + send_method_not_allowed(Req, "POST"); + +db_req(#httpd{method='POST',path_parts=[_,<<"_purge">>]}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + {IdsRevs} = couch_httpd:json_body_obj(Req), + IdsRevs2 = [{Id, couch_doc:parse_revs(Revs)} || {Id, Revs} <- IdsRevs], + + case couch_db:purge_docs(Db, IdsRevs2) of + {ok, PurgeSeq, PurgedIdsRevs} -> + PurgedIdsRevs2 = [{Id, couch_doc:revs_to_strs(Revs)} || {Id, Revs} <- PurgedIdsRevs], + send_json(Req, 200, {[{<<"purge_seq">>, PurgeSeq}, {<<"purged">>, {PurgedIdsRevs2}}]}); + Error -> + throw(Error) + end; + +db_req(#httpd{path_parts=[_,<<"_purge">>]}=Req, _Db) -> + send_method_not_allowed(Req, "POST"); + +db_req(#httpd{method='GET',path_parts=[_,<<"_all_docs">>]}=Req, Db) -> + Keys = couch_httpd:qs_json_value(Req, "keys", nil), + all_docs_view(Req, Db, Keys); + +db_req(#httpd{method='POST',path_parts=[_,<<"_all_docs">>]}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + {Fields} = couch_httpd:json_body_obj(Req), + case couch_util:get_value(<<"keys">>, Fields, nil) of + nil -> + ?LOG_DEBUG("POST to _all_docs with no keys member.", []), + all_docs_view(Req, Db, nil); + Keys when is_list(Keys) -> + all_docs_view(Req, Db, Keys); + _ -> + throw({bad_request, "`keys` member must be a array."}) + end; + +db_req(#httpd{path_parts=[_,<<"_all_docs">>]}=Req, _Db) -> + send_method_not_allowed(Req, "GET,HEAD,POST"); + +db_req(#httpd{method='POST',path_parts=[_,<<"_missing_revs">>]}=Req, Db) -> + {JsonDocIdRevs} = couch_httpd:json_body_obj(Req), + JsonDocIdRevs2 = [{Id, [couch_doc:parse_rev(RevStr) || RevStr <- RevStrs]} || {Id, RevStrs} <- JsonDocIdRevs], + {ok, Results} = couch_db:get_missing_revs(Db, JsonDocIdRevs2), + Results2 = [{Id, couch_doc:revs_to_strs(Revs)} || {Id, Revs, _} <- Results], + send_json(Req, {[ + {missing_revs, {Results2}} + ]}); + +db_req(#httpd{path_parts=[_,<<"_missing_revs">>]}=Req, _Db) -> + send_method_not_allowed(Req, "POST"); + +db_req(#httpd{method='POST',path_parts=[_,<<"_revs_diff">>]}=Req, Db) -> + {JsonDocIdRevs} = couch_httpd:json_body_obj(Req), + JsonDocIdRevs2 = + [{Id, couch_doc:parse_revs(RevStrs)} || {Id, RevStrs} <- JsonDocIdRevs], + {ok, Results} = couch_db:get_missing_revs(Db, JsonDocIdRevs2), + Results2 = + lists:map(fun({Id, MissingRevs, PossibleAncestors}) -> + {Id, + {[{missing, couch_doc:revs_to_strs(MissingRevs)}] ++ + if PossibleAncestors == [] -> + []; + true -> + [{possible_ancestors, + couch_doc:revs_to_strs(PossibleAncestors)}] + end}} + end, Results), + send_json(Req, {Results2}); + +db_req(#httpd{path_parts=[_,<<"_revs_diff">>]}=Req, _Db) -> + send_method_not_allowed(Req, "POST"); + +db_req(#httpd{method='PUT',path_parts=[_,<<"_security">>]}=Req, Db) -> + SecObj = couch_httpd:json_body(Req), + ok = couch_db:set_security(Db, SecObj), + send_json(Req, {[{<<"ok">>, true}]}); + +db_req(#httpd{method='GET',path_parts=[_,<<"_security">>]}=Req, Db) -> + send_json(Req, couch_db:get_security(Db)); + +db_req(#httpd{path_parts=[_,<<"_security">>]}=Req, _Db) -> + send_method_not_allowed(Req, "PUT,GET"); + +db_req(#httpd{method='PUT',path_parts=[_,<<"_revs_limit">>]}=Req, + Db) -> + Limit = couch_httpd:json_body(Req), + ok = couch_db:set_revs_limit(Db, Limit), + send_json(Req, {[{<<"ok">>, true}]}); + +db_req(#httpd{method='GET',path_parts=[_,<<"_revs_limit">>]}=Req, Db) -> + send_json(Req, couch_db:get_revs_limit(Db)); + +db_req(#httpd{path_parts=[_,<<"_revs_limit">>]}=Req, _Db) -> + send_method_not_allowed(Req, "PUT,GET"); + +% Special case to enable using an unencoded slash in the URL of design docs, +% as slashes in document IDs must otherwise be URL encoded. +db_req(#httpd{method='GET',mochi_req=MochiReq, path_parts=[DbName,<<"_design/",_/binary>>|_]}=Req, _Db) -> + PathFront = "/" ++ couch_httpd:quote(binary_to_list(DbName)) ++ "/", + [_|PathTail] = re:split(MochiReq:get(raw_path), "_design%2F", + [{return, list}]), + couch_httpd:send_redirect(Req, PathFront ++ "_design/" ++ + mochiweb_util:join(PathTail, "_design%2F")); + +db_req(#httpd{path_parts=[_DbName,<<"_design">>,Name]}=Req, Db) -> + db_doc_req(Req, Db, <<"_design/",Name/binary>>); + +db_req(#httpd{path_parts=[_DbName,<<"_design">>,Name|FileNameParts]}=Req, Db) -> + db_attachment_req(Req, Db, <<"_design/",Name/binary>>, FileNameParts); + + +% Special case to allow for accessing local documents without %2F +% encoding the docid. Throws out requests that don't have the second +% path part or that specify an attachment name. +db_req(#httpd{path_parts=[_DbName, <<"_local">>]}, _Db) -> + throw({bad_request, <<"Invalid _local document id.">>}); + +db_req(#httpd{path_parts=[_DbName, <<"_local/">>]}, _Db) -> + throw({bad_request, <<"Invalid _local document id.">>}); + +db_req(#httpd{path_parts=[_DbName, <<"_local">>, Name]}=Req, Db) -> + db_doc_req(Req, Db, <<"_local/", Name/binary>>); + +db_req(#httpd{path_parts=[_DbName, <<"_local">> | _Rest]}, _Db) -> + throw({bad_request, <<"_local documents do not accept attachments.">>}); + +db_req(#httpd{path_parts=[_, DocId]}=Req, Db) -> + db_doc_req(Req, Db, DocId); + +db_req(#httpd{path_parts=[_, DocId | FileNameParts]}=Req, Db) -> + db_attachment_req(Req, Db, DocId, FileNameParts). + +all_docs_view(Req, Db, Keys) -> + RawCollator = fun(A, B) -> A < B end, + #view_query_args{ + start_key = StartKey, + start_docid = StartDocId, + end_key = EndKey, + end_docid = EndDocId, + limit = Limit, + skip = SkipCount, + direction = Dir, + inclusive_end = Inclusive + } = QueryArgs + = couch_httpd_view:parse_view_params(Req, Keys, map, RawCollator), + {ok, Info} = couch_db:get_db_info(Db), + CurrentEtag = couch_httpd:make_etag(Info), + couch_httpd:etag_respond(Req, CurrentEtag, fun() -> + + TotalRowCount = couch_util:get_value(doc_count, Info), + StartId = if is_binary(StartKey) -> StartKey; + true -> StartDocId + end, + EndId = if is_binary(EndKey) -> EndKey; + true -> EndDocId + end, + FoldAccInit = {Limit, SkipCount, undefined, []}, + UpdateSeq = couch_db:get_update_seq(Db), + JsonParams = case couch_httpd:qs_value(Req, "update_seq") of + "true" -> + [{update_seq, UpdateSeq}]; + _Else -> + [] + end, + case Keys of + nil -> + FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs, CurrentEtag, Db, UpdateSeq, + TotalRowCount, #view_fold_helper_funs{ + reduce_count = fun couch_db:enum_docs_reduce_to_count/1, + send_row = fun all_docs_send_json_view_row/6 + }), + AdapterFun = fun(#full_doc_info{id=Id}=FullDocInfo, Offset, Acc) -> + case couch_doc:to_doc_info(FullDocInfo) of + #doc_info{revs=[#rev_info{deleted=false}|_]} = DocInfo -> + FoldlFun({{Id, Id}, DocInfo}, Offset, Acc); + #doc_info{revs=[#rev_info{deleted=true}|_]} -> + {ok, Acc} + end + end, + {ok, LastOffset, FoldResult} = couch_db:enum_docs(Db, + AdapterFun, FoldAccInit, [{start_key, StartId}, {dir, Dir}, + {if Inclusive -> end_key; true -> end_key_gt end, EndId}]), + couch_httpd_view:finish_view_fold(Req, TotalRowCount, LastOffset, FoldResult, JsonParams); + _ -> + FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs, CurrentEtag, Db, UpdateSeq, + TotalRowCount, #view_fold_helper_funs{ + reduce_count = fun(Offset) -> Offset end, + send_row = fun all_docs_send_json_view_row/6 + }), + KeyFoldFun = case Dir of + fwd -> + fun lists:foldl/3; + rev -> + fun lists:foldr/3 + end, + FoldResult = KeyFoldFun( + fun(Key, FoldAcc) -> + DocInfo = (catch couch_db:get_doc_info(Db, Key)), + Doc = case DocInfo of + {ok, #doc_info{id = Id} = Di} -> + {{Id, Id}, Di}; + not_found -> + {{Key, error}, not_found}; + _ -> + ?LOG_ERROR("Invalid DocInfo: ~p", [DocInfo]), + throw({error, invalid_doc_info}) + end, + {_, FoldAcc2} = FoldlFun(Doc, 0, FoldAcc), + FoldAcc2 + end, FoldAccInit, Keys), + couch_httpd_view:finish_view_fold(Req, TotalRowCount, 0, FoldResult, JsonParams) + end + end). + +all_docs_send_json_view_row(Resp, Db, KV, IncludeDocs, Conflicts, RowFront) -> + JsonRow = all_docs_view_row_obj(Db, KV, IncludeDocs, Conflicts), + send_chunk(Resp, RowFront ++ ?JSON_ENCODE(JsonRow)), + {ok, ",\r\n"}. + +all_docs_view_row_obj(_Db, {{DocId, error}, Value}, _IncludeDocs, _Conflicts) -> + {[{key, DocId}, {error, Value}]}; +all_docs_view_row_obj(Db, {_KeyDocId, DocInfo}, true, Conflicts) -> + case DocInfo of + #doc_info{revs = [#rev_info{deleted = true} | _]} -> + {all_docs_row(DocInfo) ++ [{doc, null}]}; + _ -> + {all_docs_row(DocInfo) ++ couch_httpd_view:doc_member( + Db, DocInfo, if Conflicts -> [conflicts]; true -> [] end)} + end; +all_docs_view_row_obj(_Db, {_KeyDocId, DocInfo}, _IncludeDocs, _Conflicts) -> + {all_docs_row(DocInfo)}. + +all_docs_row(#doc_info{id = Id, revs = [RevInfo | _]}) -> + #rev_info{rev = Rev, deleted = Del} = RevInfo, + [ {id, Id}, {key, Id}, + {value, {[{rev, couch_doc:rev_to_str(Rev)}] ++ case Del of + true -> [{deleted, true}]; + false -> [] + end}} ]. + + +db_doc_req(#httpd{method='DELETE'}=Req, Db, DocId) -> + % check for the existence of the doc to handle the 404 case. + couch_doc_open(Db, DocId, nil, []), + case couch_httpd:qs_value(Req, "rev") of + undefined -> + update_doc(Req, Db, DocId, + couch_doc_from_req(Req, DocId, {[{<<"_deleted">>,true}]})); + Rev -> + update_doc(Req, Db, DocId, + couch_doc_from_req(Req, DocId, + {[{<<"_rev">>, ?l2b(Rev)},{<<"_deleted">>,true}]})) + end; + +db_doc_req(#httpd{method = 'GET', mochi_req = MochiReq} = Req, Db, DocId) -> + #doc_query_args{ + rev = Rev, + open_revs = Revs, + options = Options1, + atts_since = AttsSince + } = parse_doc_query(Req), + Options = case AttsSince of + nil -> + Options1; + RevList when is_list(RevList) -> + [{atts_since, RevList}, attachments | Options1] + end, + case Revs of + [] -> + Doc = couch_doc_open(Db, DocId, Rev, Options), + send_doc(Req, Doc, Options); + _ -> + {ok, Results} = couch_db:open_doc_revs(Db, DocId, Revs, Options), + case MochiReq:accepts_content_type("multipart/mixed") of + false -> + {ok, Resp} = start_json_response(Req, 200), + send_chunk(Resp, "["), + % We loop through the docs. The first time through the separator + % is whitespace, then a comma on subsequent iterations. + lists:foldl( + fun(Result, AccSeparator) -> + case Result of + {ok, Doc} -> + JsonDoc = couch_doc:to_json_obj(Doc, Options), + Json = ?JSON_ENCODE({[{ok, JsonDoc}]}), + send_chunk(Resp, AccSeparator ++ Json); + {{not_found, missing}, RevId} -> + RevStr = couch_doc:rev_to_str(RevId), + Json = ?JSON_ENCODE({[{"missing", RevStr}]}), + send_chunk(Resp, AccSeparator ++ Json) + end, + "," % AccSeparator now has a comma + end, + "", Results), + send_chunk(Resp, "]"), + end_json_response(Resp); + true -> + send_docs_multipart(Req, Results, Options) + end + end; + + +db_doc_req(#httpd{method='POST'}=Req, Db, DocId) -> + couch_httpd:validate_referer(Req), + couch_doc:validate_docid(DocId), + couch_httpd:validate_ctype(Req, "multipart/form-data"), + Form = couch_httpd:parse_form(Req), + case couch_util:get_value("_doc", Form) of + undefined -> + Rev = couch_doc:parse_rev(couch_util:get_value("_rev", Form)), + {ok, [{ok, Doc}]} = couch_db:open_doc_revs(Db, DocId, [Rev], []); + Json -> + Doc = couch_doc_from_req(Req, DocId, ?JSON_DECODE(Json)) + end, + UpdatedAtts = [ + #att{name=validate_attachment_name(Name), + type=list_to_binary(ContentType), + data=Content} || + {Name, {ContentType, _}, Content} <- + proplists:get_all_values("_attachments", Form) + ], + #doc{atts=OldAtts} = Doc, + OldAtts2 = lists:flatmap( + fun(#att{name=OldName}=Att) -> + case [1 || A <- UpdatedAtts, A#att.name == OldName] of + [] -> [Att]; % the attachment wasn't in the UpdatedAtts, return it + _ -> [] % the attachment was in the UpdatedAtts, drop it + end + end, OldAtts), + NewDoc = Doc#doc{ + atts = UpdatedAtts ++ OldAtts2 + }, + {ok, NewRev} = couch_db:update_doc(Db, NewDoc, []), + + send_json(Req, 201, [{"Etag", "\"" ++ ?b2l(couch_doc:rev_to_str(NewRev)) ++ "\""}], {[ + {ok, true}, + {id, DocId}, + {rev, couch_doc:rev_to_str(NewRev)} + ]}); + +db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) -> + #doc_query_args{ + update_type = UpdateType + } = parse_doc_query(Req), + couch_doc:validate_docid(DocId), + + Loc = absolute_uri(Req, "/" ++ ?b2l(Db#db.name) ++ "/" ++ ?b2l(DocId)), + RespHeaders = [{"Location", Loc}], + case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of + ("multipart/related;" ++ _) = ContentType -> + {ok, Doc0, WaitFun, Parser} = couch_doc:doc_from_multi_part_stream( + ContentType, fun() -> receive_request_data(Req) end), + Doc = couch_doc_from_req(Req, DocId, Doc0), + try + Result = update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType), + WaitFun(), + Result + catch throw:Err -> + % Document rejected by a validate_doc_update function. + couch_doc:abort_multi_part_stream(Parser), + throw(Err) + end; + _Else -> + case couch_httpd:qs_value(Req, "batch") of + "ok" -> + % batch + Doc = couch_doc_from_req(Req, DocId, couch_httpd:json_body(Req)), + + spawn(fun() -> + case catch(couch_db:update_doc(Db, Doc, [])) of + {ok, _} -> ok; + Error -> + ?LOG_INFO("Batch doc error (~s): ~p",[DocId, Error]) + end + end), + send_json(Req, 202, [], {[ + {ok, true}, + {id, DocId} + ]}); + _Normal -> + % normal + Body = couch_httpd:json_body(Req), + Doc = couch_doc_from_req(Req, DocId, Body), + update_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType) + end + end; + +db_doc_req(#httpd{method='COPY'}=Req, Db, SourceDocId) -> + SourceRev = + case extract_header_rev(Req, couch_httpd:qs_value(Req, "rev")) of + missing_rev -> nil; + Rev -> Rev + end, + {TargetDocId, TargetRevs} = parse_copy_destination_header(Req), + % open old doc + Doc = couch_doc_open(Db, SourceDocId, SourceRev, []), + % save new doc + {ok, NewTargetRev} = couch_db:update_doc(Db, + Doc#doc{id=TargetDocId, revs=TargetRevs}, []), + % respond + send_json(Req, 201, + [{"Etag", "\"" ++ ?b2l(couch_doc:rev_to_str(NewTargetRev)) ++ "\""}], + update_doc_result_to_json(TargetDocId, {ok, NewTargetRev})); + +db_doc_req(Req, _Db, _DocId) -> + send_method_not_allowed(Req, "DELETE,GET,HEAD,POST,PUT,COPY"). + + +send_doc(Req, Doc, Options) -> + case Doc#doc.meta of + [] -> + DiskEtag = couch_httpd:doc_etag(Doc), + % output etag only when we have no meta + couch_httpd:etag_respond(Req, DiskEtag, fun() -> + send_doc_efficiently(Req, Doc, [{"Etag", DiskEtag}], Options) + end); + _ -> + send_doc_efficiently(Req, Doc, [], Options) + end. + + +send_doc_efficiently(Req, #doc{atts=[]}=Doc, Headers, Options) -> + send_json(Req, 200, Headers, couch_doc:to_json_obj(Doc, Options)); +send_doc_efficiently(#httpd{mochi_req = MochiReq} = Req, + #doc{atts = Atts} = Doc, Headers, Options) -> + case lists:member(attachments, Options) of + true -> + case MochiReq:accepts_content_type("multipart/related") of + false -> + send_json(Req, 200, Headers, couch_doc:to_json_obj(Doc, Options)); + true -> + Boundary = couch_uuids:random(), + JsonBytes = ?JSON_ENCODE(couch_doc:to_json_obj(Doc, + [attachments, follows, att_encoding_info | Options])), + {ContentType, Len} = couch_doc:len_doc_to_multi_part_stream( + Boundary,JsonBytes, Atts, true), + CType = {<<"Content-Type">>, ContentType}, + {ok, Resp} = start_response_length(Req, 200, [CType|Headers], Len), + couch_doc:doc_to_multi_part_stream(Boundary,JsonBytes,Atts, + fun(Data) -> couch_httpd:send(Resp, Data) end, true) + end; + false -> + send_json(Req, 200, Headers, couch_doc:to_json_obj(Doc, Options)) + end. + +send_docs_multipart(Req, Results, Options1) -> + OuterBoundary = couch_uuids:random(), + InnerBoundary = couch_uuids:random(), + Options = [attachments, follows, att_encoding_info | Options1], + CType = {"Content-Type", + "multipart/mixed; boundary=\"" ++ ?b2l(OuterBoundary) ++ "\""}, + {ok, Resp} = start_chunked_response(Req, 200, [CType]), + couch_httpd:send_chunk(Resp, <<"--", OuterBoundary/binary>>), + lists:foreach( + fun({ok, #doc{atts=Atts}=Doc}) -> + JsonBytes = ?JSON_ENCODE(couch_doc:to_json_obj(Doc, Options)), + {ContentType, _Len} = couch_doc:len_doc_to_multi_part_stream( + InnerBoundary, JsonBytes, Atts, true), + couch_httpd:send_chunk(Resp, <<"\r\nContent-Type: ", + ContentType/binary, "\r\n\r\n">>), + couch_doc:doc_to_multi_part_stream(InnerBoundary, JsonBytes, Atts, + fun(Data) -> couch_httpd:send_chunk(Resp, Data) + end, true), + couch_httpd:send_chunk(Resp, <<"\r\n--", OuterBoundary/binary>>); + ({{not_found, missing}, RevId}) -> + RevStr = couch_doc:rev_to_str(RevId), + Json = ?JSON_ENCODE({[{"missing", RevStr}]}), + couch_httpd:send_chunk(Resp, + [<<"\r\nContent-Type: application/json; error=\"true\"\r\n\r\n">>, + Json, + <<"\r\n--", OuterBoundary/binary>>]) + end, Results), + couch_httpd:send_chunk(Resp, <<"--">>), + couch_httpd:last_chunk(Resp). + +send_ranges_multipart(Req, ContentType, Len, Att, Ranges) -> + Boundary = couch_uuids:random(), + CType = {"Content-Type", + "multipart/byteranges; boundary=\"" ++ ?b2l(Boundary) ++ "\""}, + {ok, Resp} = start_chunked_response(Req, 206, [CType]), + couch_httpd:send_chunk(Resp, <<"--", Boundary/binary>>), + lists:foreach(fun({From, To}) -> + ContentRange = make_content_range(From, To, Len), + couch_httpd:send_chunk(Resp, + <<"\r\nContent-Type: ", ContentType/binary, "\r\n", + "Content-Range: ", ContentRange/binary, "\r\n", + "\r\n">>), + couch_doc:range_att_foldl(Att, From, To + 1, + fun(Seg, _) -> send_chunk(Resp, Seg) end, {ok, Resp}), + couch_httpd:send_chunk(Resp, <<"\r\n--", Boundary/binary>>) + end, Ranges), + couch_httpd:send_chunk(Resp, <<"--">>), + couch_httpd:last_chunk(Resp), + {ok, Resp}. + +receive_request_data(Req) -> + receive_request_data(Req, couch_httpd:body_length(Req)). + +receive_request_data(Req, LenLeft) when LenLeft > 0 -> + Len = erlang:min(4096, LenLeft), + Data = couch_httpd:recv(Req, Len), + {Data, fun() -> receive_request_data(Req, LenLeft - iolist_size(Data)) end}; +receive_request_data(_Req, _) -> + throw(<<"expected more data">>). + +make_content_range(From, To, Len) -> + ?l2b(io_lib:format("bytes ~B-~B/~B", [From, To, Len])). + +update_doc_result_to_json({{Id, Rev}, Error}) -> + {_Code, Err, Msg} = couch_httpd:error_info(Error), + {[{id, Id}, {rev, couch_doc:rev_to_str(Rev)}, + {error, Err}, {reason, Msg}]}. + +update_doc_result_to_json(#doc{id=DocId}, Result) -> + update_doc_result_to_json(DocId, Result); +update_doc_result_to_json(DocId, {ok, NewRev}) -> + {[{id, DocId}, {rev, couch_doc:rev_to_str(NewRev)}]}; +update_doc_result_to_json(DocId, Error) -> + {_Code, ErrorStr, Reason} = couch_httpd:error_info(Error), + {[{id, DocId}, {error, ErrorStr}, {reason, Reason}]}. + + +update_doc(Req, Db, DocId, Doc) -> + update_doc(Req, Db, DocId, Doc, []). + +update_doc(Req, Db, DocId, Doc, Headers) -> + update_doc(Req, Db, DocId, Doc, Headers, interactive_edit). + +update_doc(Req, Db, DocId, #doc{deleted=Deleted}=Doc, Headers, UpdateType) -> + case couch_httpd:header_value(Req, "X-Couch-Full-Commit") of + "true" -> + Options = [full_commit]; + "false" -> + Options = [delay_commit]; + _ -> + Options = [] + end, + {ok, NewRev} = couch_db:update_doc(Db, Doc, Options, UpdateType), + NewRevStr = couch_doc:rev_to_str(NewRev), + ResponseHeaders = [{"Etag", <<"\"", NewRevStr/binary, "\"">>}] ++ Headers, + send_json(Req, if Deleted -> 200; true -> 201 end, + ResponseHeaders, {[ + {ok, true}, + {id, DocId}, + {rev, NewRevStr}]}). + +couch_doc_from_req(Req, DocId, #doc{revs=Revs}=Doc) -> + validate_attachment_names(Doc), + ExplicitDocRev = + case Revs of + {Start,[RevId|_]} -> {Start, RevId}; + _ -> undefined + end, + case extract_header_rev(Req, ExplicitDocRev) of + missing_rev -> + Revs2 = {0, []}; + ExplicitDocRev -> + Revs2 = Revs; + {Pos, Rev} -> + Revs2 = {Pos, [Rev]} + end, + Doc#doc{id=DocId, revs=Revs2}; +couch_doc_from_req(Req, DocId, Json) -> + couch_doc_from_req(Req, DocId, couch_doc:from_json_obj(Json)). + + +% Useful for debugging +% couch_doc_open(Db, DocId) -> +% couch_doc_open(Db, DocId, nil, []). + +couch_doc_open(Db, DocId, Rev, Options) -> + case Rev of + nil -> % open most recent rev + case couch_db:open_doc(Db, DocId, Options) of + {ok, Doc} -> + Doc; + Error -> + throw(Error) + end; + _ -> % open a specific rev (deletions come back as stubs) + case couch_db:open_doc_revs(Db, DocId, [Rev], Options) of + {ok, [{ok, Doc}]} -> + Doc; + {ok, [{{not_found, missing}, Rev}]} -> + throw(not_found); + {ok, [Else]} -> + throw(Else) + end + end. + +% Attachment request handlers + +db_attachment_req(#httpd{method='GET',mochi_req=MochiReq}=Req, Db, DocId, FileNameParts) -> + FileName = list_to_binary(mochiweb_util:join(lists:map(fun binary_to_list/1, FileNameParts),"/")), + #doc_query_args{ + rev=Rev, + options=Options + } = parse_doc_query(Req), + #doc{ + atts=Atts + } = Doc = couch_doc_open(Db, DocId, Rev, Options), + case [A || A <- Atts, A#att.name == FileName] of + [] -> + throw({not_found, "Document is missing attachment"}); + [#att{type=Type, encoding=Enc, disk_len=DiskLen, att_len=AttLen}=Att] -> + Etag = couch_httpd:doc_etag(Doc), + ReqAcceptsAttEnc = lists:member( + atom_to_list(Enc), + couch_httpd:accepted_encodings(Req) + ), + Len = case {Enc, ReqAcceptsAttEnc} of + {identity, _} -> + % stored and served in identity form + DiskLen; + {_, false} when DiskLen =/= AttLen -> + % Stored encoded, but client doesn't accept the encoding we used, + % so we need to decode on the fly. DiskLen is the identity length + % of the attachment. + DiskLen; + {_, true} -> + % Stored and served encoded. AttLen is the encoded length. + AttLen; + _ -> + % We received an encoded attachment and stored it as such, so we + % don't know the identity length. The client doesn't accept the + % encoding, and since we cannot serve a correct Content-Length + % header we'll fall back to a chunked response. + undefined + end, + Headers = [ + {"ETag", Etag}, + {"Cache-Control", "must-revalidate"}, + {"Content-Type", binary_to_list(Type)} + ] ++ case ReqAcceptsAttEnc of + true when Enc =/= identity -> + % RFC 2616 says that the 'identify' encoding should not be used in + % the Content-Encoding header + [{"Content-Encoding", atom_to_list(Enc)}]; + _ -> + [] + end ++ case Enc of + identity -> + [{"Accept-Ranges", "bytes"}]; + _ -> + [{"Accept-Ranges", "none"}] + end, + AttFun = case ReqAcceptsAttEnc of + false -> + fun couch_doc:att_foldl_decode/3; + true -> + fun couch_doc:att_foldl/3 + end, + couch_httpd:etag_respond( + Req, + Etag, + fun() -> + case Len of + undefined -> + {ok, Resp} = start_chunked_response(Req, 200, Headers), + AttFun(Att, fun(Seg, _) -> send_chunk(Resp, Seg) end, {ok, Resp}), + last_chunk(Resp); + _ -> + Ranges = parse_ranges(MochiReq:get(range), Len), + case {Enc, Ranges} of + {identity, [{From, To}]} -> + Headers1 = [{<<"Content-Range">>, make_content_range(From, To, Len)}] + ++ Headers, + {ok, Resp} = start_response_length(Req, 206, Headers1, To - From + 1), + couch_doc:range_att_foldl(Att, From, To + 1, + fun(Seg, _) -> send(Resp, Seg) end, {ok, Resp}); + {identity, Ranges} when is_list(Ranges) -> + send_ranges_multipart(Req, Type, Len, Att, Ranges); + _ -> + Headers1 = Headers ++ + if Enc =:= identity orelse ReqAcceptsAttEnc =:= true -> + [{"Content-MD5", base64:encode(Att#att.md5)}]; + true -> + [] + end, + {ok, Resp} = start_response_length(Req, 200, Headers1, Len), + AttFun(Att, fun(Seg, _) -> send(Resp, Seg) end, {ok, Resp}) + end + end + end + ) + end; + + +db_attachment_req(#httpd{method=Method,mochi_req=MochiReq}=Req, Db, DocId, FileNameParts) + when (Method == 'PUT') or (Method == 'DELETE') -> + FileName = validate_attachment_name( + mochiweb_util:join( + lists:map(fun binary_to_list/1, + FileNameParts),"/")), + + NewAtt = case Method of + 'DELETE' -> + []; + _ -> + [#att{ + name = FileName, + type = case couch_httpd:header_value(Req,"Content-Type") of + undefined -> + % We could throw an error here or guess by the FileName. + % Currently, just giving it a default. + <<"application/octet-stream">>; + CType -> + list_to_binary(CType) + end, + data = case couch_httpd:body_length(Req) of + undefined -> + <<"">>; + {unknown_transfer_encoding, Unknown} -> + exit({unknown_transfer_encoding, Unknown}); + chunked -> + fun(MaxChunkSize, ChunkFun, InitState) -> + couch_httpd:recv_chunked(Req, MaxChunkSize, + ChunkFun, InitState) + end; + 0 -> + <<"">>; + Length when is_integer(Length) -> + Expect = case couch_httpd:header_value(Req, "expect") of + undefined -> + undefined; + Value when is_list(Value) -> + string:to_lower(Value) + end, + case Expect of + "100-continue" -> + MochiReq:start_raw_response({100, gb_trees:empty()}); + _Else -> + ok + end, + + + fun(Size) -> couch_httpd:recv(Req, Size) end + end, + att_len = case couch_httpd:header_value(Req,"Content-Length") of + undefined -> + undefined; + Length -> + list_to_integer(Length) + end, + md5 = get_md5_header(Req), + encoding = case string:to_lower(string:strip( + couch_httpd:header_value(Req,"Content-Encoding","identity") + )) of + "identity" -> + identity; + "gzip" -> + gzip; + _ -> + throw({ + bad_ctype, + "Only gzip and identity content-encodings are supported" + }) + end + }] + end, + + Doc = case extract_header_rev(Req, couch_httpd:qs_value(Req, "rev")) of + missing_rev -> % make the new doc + couch_doc:validate_docid(DocId), + #doc{id=DocId}; + Rev -> + case couch_db:open_doc_revs(Db, DocId, [Rev], []) of + {ok, [{ok, Doc0}]} -> Doc0; + {ok, [Error]} -> throw(Error) + end + end, + + #doc{atts=Atts, revs = {Pos, Revs}} = Doc, + DocEdited = Doc#doc{ + % prune revision list as a workaround for key tree bug (COUCHDB-902) + revs = {Pos, case Revs of [] -> []; [Hd|_] -> [Hd] end}, + atts = NewAtt ++ [A || A <- Atts, A#att.name /= FileName] + }, + {ok, UpdatedRev} = couch_db:update_doc(Db, DocEdited, []), + #db{name=DbName} = Db, + + {Status, Headers} = case Method of + 'DELETE' -> + {200, []}; + _ -> + {201, [{"Etag", "\"" ++ ?b2l(couch_doc:rev_to_str(UpdatedRev)) ++ "\""}, + {"Location", absolute_uri(Req, "/" ++ + binary_to_list(DbName) ++ "/" ++ + binary_to_list(DocId) ++ "/" ++ + binary_to_list(FileName) + )}]} + end, + send_json(Req,Status, Headers, {[ + {ok, true}, + {id, DocId}, + {rev, couch_doc:rev_to_str(UpdatedRev)} + ]}); + +db_attachment_req(Req, _Db, _DocId, _FileNameParts) -> + send_method_not_allowed(Req, "DELETE,GET,HEAD,PUT"). + +parse_ranges(undefined, _Len) -> + undefined; +parse_ranges(fail, _Len) -> + undefined; +parse_ranges(Ranges, Len) -> + parse_ranges(Ranges, Len, []). + +parse_ranges([], _Len, Acc) -> + lists:reverse(Acc); +parse_ranges([{From, To}|_], _Len, _Acc) when is_integer(From) andalso is_integer(To) andalso To < From -> + throw(requested_range_not_satisfiable); +parse_ranges([{From, To}|Rest], Len, Acc) when is_integer(To) andalso To >= Len -> + parse_ranges([{From, Len-1}] ++ Rest, Len, Acc); +parse_ranges([{none, To}|Rest], Len, Acc) -> + parse_ranges([{Len - To, Len - 1}] ++ Rest, Len, Acc); +parse_ranges([{From, none}|Rest], Len, Acc) -> + parse_ranges([{From, Len - 1}] ++ Rest, Len, Acc); +parse_ranges([{From,To}|Rest], Len, Acc) -> + parse_ranges(Rest, Len, [{From, To}] ++ Acc). + +get_md5_header(Req) -> + ContentMD5 = couch_httpd:header_value(Req, "Content-MD5"), + Length = couch_httpd:body_length(Req), + Trailer = couch_httpd:header_value(Req, "Trailer"), + case {ContentMD5, Length, Trailer} of + _ when is_list(ContentMD5) orelse is_binary(ContentMD5) -> + base64:decode(ContentMD5); + {_, chunked, undefined} -> + <<>>; + {_, chunked, _} -> + case re:run(Trailer, "\\bContent-MD5\\b", [caseless]) of + {match, _} -> + md5_in_footer; + _ -> + <<>> + end; + _ -> + <<>> + end. + +parse_doc_query(Req) -> + lists:foldl(fun({Key,Value}, Args) -> + case {Key, Value} of + {"attachments", "true"} -> + Options = [attachments | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"meta", "true"} -> + Options = [revs_info, conflicts, deleted_conflicts | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"revs", "true"} -> + Options = [revs | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"local_seq", "true"} -> + Options = [local_seq | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"revs_info", "true"} -> + Options = [revs_info | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"conflicts", "true"} -> + Options = [conflicts | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"deleted_conflicts", "true"} -> + Options = [deleted_conflicts | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + {"rev", Rev} -> + Args#doc_query_args{rev=couch_doc:parse_rev(Rev)}; + {"open_revs", "all"} -> + Args#doc_query_args{open_revs=all}; + {"open_revs", RevsJsonStr} -> + JsonArray = ?JSON_DECODE(RevsJsonStr), + Args#doc_query_args{open_revs=couch_doc:parse_revs(JsonArray)}; + {"atts_since", RevsJsonStr} -> + JsonArray = ?JSON_DECODE(RevsJsonStr), + Args#doc_query_args{atts_since = couch_doc:parse_revs(JsonArray)}; + {"new_edits", "false"} -> + Args#doc_query_args{update_type=replicated_changes}; + {"new_edits", "true"} -> + Args#doc_query_args{update_type=interactive_edit}; + {"att_encoding_info", "true"} -> + Options = [att_encoding_info | Args#doc_query_args.options], + Args#doc_query_args{options=Options}; + _Else -> % unknown key value pair, ignore. + Args + end + end, #doc_query_args{}, couch_httpd:qs(Req)). + +parse_changes_query(Req) -> + lists:foldl(fun({Key, Value}, Args) -> + case {Key, Value} of + {"feed", _} -> + Args#changes_args{feed=Value}; + {"descending", "true"} -> + Args#changes_args{dir=rev}; + {"since", _} -> + Args#changes_args{since=list_to_integer(Value)}; + {"limit", _} -> + Args#changes_args{limit=list_to_integer(Value)}; + {"style", _} -> + Args#changes_args{style=list_to_existing_atom(Value)}; + {"heartbeat", "true"} -> + Args#changes_args{heartbeat=true}; + {"heartbeat", _} -> + Args#changes_args{heartbeat=list_to_integer(Value)}; + {"timeout", _} -> + Args#changes_args{timeout=list_to_integer(Value)}; + {"include_docs", "true"} -> + Args#changes_args{include_docs=true}; + {"conflicts", "true"} -> + Args#changes_args{conflicts=true}; + {"filter", _} -> + Args#changes_args{filter=Value}; + _Else -> % unknown key value pair, ignore. + Args + end + end, #changes_args{}, couch_httpd:qs(Req)). + +extract_header_rev(Req, ExplicitRev) when is_binary(ExplicitRev) or is_list(ExplicitRev)-> + extract_header_rev(Req, couch_doc:parse_rev(ExplicitRev)); +extract_header_rev(Req, ExplicitRev) -> + Etag = case couch_httpd:header_value(Req, "If-Match") of + undefined -> undefined; + Value -> couch_doc:parse_rev(string:strip(Value, both, $")) + end, + case {ExplicitRev, Etag} of + {undefined, undefined} -> missing_rev; + {_, undefined} -> ExplicitRev; + {undefined, _} -> Etag; + _ when ExplicitRev == Etag -> Etag; + _ -> + throw({bad_request, "Document rev and etag have different values"}) + end. + + +parse_copy_destination_header(Req) -> + case couch_httpd:header_value(Req, "Destination") of + undefined -> + throw({bad_request, "Destination header in mandatory for COPY."}); + Destination -> + case re:run(Destination, "\\?", [{capture, none}]) of + nomatch -> + {list_to_binary(Destination), {0, []}}; + match -> + [DocId, RevQs] = re:split(Destination, "\\?", [{return, list}]), + [_RevQueryKey, Rev] = re:split(RevQs, "=", [{return, list}]), + {Pos, RevId} = couch_doc:parse_rev(Rev), + {list_to_binary(DocId), {Pos, [RevId]}} + end + end. + +validate_attachment_names(Doc) -> + lists:foreach(fun(#att{name=Name}) -> + validate_attachment_name(Name) + end, Doc#doc.atts). + +validate_attachment_name(Name) when is_list(Name) -> + validate_attachment_name(list_to_binary(Name)); +validate_attachment_name(<<"_",_/binary>>) -> + throw({bad_request, <<"Attachment name can't start with '_'">>}); +validate_attachment_name(Name) -> + case couch_util:validate_utf8(Name) of + true -> Name; + false -> throw({bad_request, <<"Attachment name is not UTF-8 encoded">>}) + end. + diff --git a/apps/couch/src/couch_httpd_external.erl b/apps/couch/src/couch_httpd_external.erl new file mode 100644 index 00000000..2e91fb50 --- /dev/null +++ b/apps/couch/src/couch_httpd_external.erl @@ -0,0 +1,169 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_external). + +-export([handle_external_req/2, handle_external_req/3]). +-export([send_external_response/2, json_req_obj/2, json_req_obj/3]). +-export([default_or_content_type/2, parse_external_response/1]). + +-import(couch_httpd,[send_error/4]). + +-include("couch_db.hrl"). + +% handle_external_req/2 +% for the old type of config usage: +% _external = {couch_httpd_external, handle_external_req} +% with urls like +% /db/_external/action/design/name +handle_external_req(#httpd{ + path_parts=[_DbName, _External, UrlName | _Path] + }=HttpReq, Db) -> + process_external_req(HttpReq, Db, UrlName); +handle_external_req(#httpd{path_parts=[_, _]}=Req, _Db) -> + send_error(Req, 404, <<"external_server_error">>, <<"No server name specified.">>); +handle_external_req(Req, _) -> + send_error(Req, 404, <<"external_server_error">>, <<"Broken assumption">>). + +% handle_external_req/3 +% for this type of config usage: +% _action = {couch_httpd_external, handle_external_req, <<"action">>} +% with urls like +% /db/_action/design/name +handle_external_req(HttpReq, Db, Name) -> + process_external_req(HttpReq, Db, Name). + +process_external_req(HttpReq, Db, Name) -> + + Response = couch_external_manager:execute(binary_to_list(Name), + json_req_obj(HttpReq, Db)), + + case Response of + {unknown_external_server, Msg} -> + send_error(HttpReq, 404, <<"external_server_error">>, Msg); + _ -> + send_external_response(HttpReq, Response) + end. +json_req_obj(Req, Db) -> json_req_obj(Req, Db, null). +json_req_obj(#httpd{mochi_req=Req, + method=Method, + requested_path_parts=RequestedPath, + path_parts=Path, + req_body=ReqBody + }, Db, DocId) -> + Body = case ReqBody of + undefined -> Req:recv_body(); + Else -> Else + end, + ParsedForm = case Req:get_primary_header_value("content-type") of + "application/x-www-form-urlencoded" ++ _ -> + case Body of + undefined -> []; + _ -> mochiweb_util:parse_qs(Body) + end; + _ -> + [] + end, + Headers = Req:get(headers), + Hlist = mochiweb_headers:to_list(Headers), + {ok, Info} = couch_db:get_db_info(Db), + +% add headers... + {[{<<"info">>, {Info}}, + {<<"id">>, DocId}, + {<<"uuid">>, couch_uuids:new()}, + {<<"method">>, Method}, + {<<"requested_path">>, RequestedPath}, + {<<"path">>, Path}, + {<<"query">>, json_query_keys(to_json_terms(Req:parse_qs()))}, + {<<"headers">>, to_json_terms(Hlist)}, + {<<"body">>, Body}, + {<<"peer">>, ?l2b(Req:get(peer))}, + {<<"form">>, to_json_terms(ParsedForm)}, + {<<"cookie">>, to_json_terms(Req:parse_cookie())}, + {<<"userCtx">>, couch_util:json_user_ctx(Db)}, + {<<"secObj">>, couch_db:get_security(Db)}]}. + +to_json_terms(Data) -> + to_json_terms(Data, []). + +to_json_terms([], Acc) -> + {lists:reverse(Acc)}; +to_json_terms([{Key, Value} | Rest], Acc) when is_atom(Key) -> + to_json_terms(Rest, [{list_to_binary(atom_to_list(Key)), list_to_binary(Value)} | Acc]); +to_json_terms([{Key, Value} | Rest], Acc) -> + to_json_terms(Rest, [{list_to_binary(Key), list_to_binary(Value)} | Acc]). + +json_query_keys({Json}) -> + json_query_keys(Json, []). +json_query_keys([], Acc) -> + {lists:reverse(Acc)}; +json_query_keys([{<<"startkey">>, Value} | Rest], Acc) -> + json_query_keys(Rest, [{<<"startkey">>, couch_util:json_decode(Value)}|Acc]); +json_query_keys([{<<"endkey">>, Value} | Rest], Acc) -> + json_query_keys(Rest, [{<<"endkey">>, couch_util:json_decode(Value)}|Acc]); +json_query_keys([{<<"key">>, Value} | Rest], Acc) -> + json_query_keys(Rest, [{<<"key">>, couch_util:json_decode(Value)}|Acc]); +json_query_keys([Term | Rest], Acc) -> + json_query_keys(Rest, [Term|Acc]). + +send_external_response(#httpd{mochi_req=MochiReq}=Req, Response) -> + #extern_resp_args{ + code = Code, + data = Data, + ctype = CType, + headers = Headers + } = parse_external_response(Response), + couch_httpd:log_request(Req, Code), + Resp = MochiReq:respond({Code, + default_or_content_type(CType, Headers ++ couch_httpd:server_header()), Data}), + {ok, Resp}. + +parse_external_response({Response}) -> + lists:foldl(fun({Key,Value}, Args) -> + case {Key, Value} of + {"", _} -> + Args; + {<<"code">>, Value} -> + Args#extern_resp_args{code=Value}; + {<<"stop">>, true} -> + Args#extern_resp_args{stop=true}; + {<<"json">>, Value} -> + Args#extern_resp_args{ + data=?JSON_ENCODE(Value), + ctype="application/json"}; + {<<"body">>, Value} -> + Args#extern_resp_args{data=Value, ctype="text/html; charset=utf-8"}; + {<<"base64">>, Value} -> + Args#extern_resp_args{ + data=base64:decode(Value), + ctype="application/binary" + }; + {<<"headers">>, {Headers}} -> + NewHeaders = lists:map(fun({Header, HVal}) -> + {binary_to_list(Header), binary_to_list(HVal)} + end, Headers), + Args#extern_resp_args{headers=NewHeaders}; + _ -> % unknown key + Msg = lists:flatten(io_lib:format("Invalid data from external server: ~p", [{Key, Value}])), + throw({external_response_error, Msg}) + end + end, #extern_resp_args{}, Response). + +default_or_content_type(DefaultContentType, Headers) -> + IsContentType = fun({X, _}) -> string:to_lower(X) == "content-type" end, + case lists:any(IsContentType, Headers) of + false -> + [{"Content-Type", DefaultContentType} | Headers]; + true -> + Headers + end. diff --git a/apps/couch/src/couch_httpd_misc_handlers.erl b/apps/couch/src/couch_httpd_misc_handlers.erl new file mode 100644 index 00000000..15f0cad3 --- /dev/null +++ b/apps/couch/src/couch_httpd_misc_handlers.erl @@ -0,0 +1,284 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_misc_handlers). + +-export([handle_welcome_req/2,handle_favicon_req/2,handle_utils_dir_req/2, + handle_all_dbs_req/1,handle_replicate_req/1,handle_restart_req/1, + handle_uuids_req/1,handle_config_req/1,handle_log_req/1, + handle_task_status_req/1]). + +-export([increment_update_seq_req/2]). + + +-include("couch_db.hrl"). + +-import(couch_httpd, + [send_json/2,send_json/3,send_json/4,send_method_not_allowed/2, + start_json_response/2,send_chunk/2,last_chunk/1,end_json_response/1, + start_chunked_response/3, send_error/4]). + +% httpd global handlers + +handle_welcome_req(#httpd{method='GET'}=Req, WelcomeMessage) -> + send_json(Req, {[ + {couchdb, WelcomeMessage}, + {version, list_to_binary(couch:version())} + ]}); +handle_welcome_req(Req, _) -> + send_method_not_allowed(Req, "GET,HEAD"). + +handle_favicon_req(#httpd{method='GET'}=Req, DocumentRoot) -> + {{Year,Month,Day},Time} = erlang:localtime(), + OneYearFromNow = {{Year+1,Month,Day},Time}, + CachingHeaders = [ + %favicon should expire a year from now + {"Cache-Control", "public, max-age=31536000"}, + {"Expires", httpd_util:rfc1123_date(OneYearFromNow)} + ], + couch_httpd:serve_file(Req, "favicon.ico", DocumentRoot, CachingHeaders); + +handle_favicon_req(Req, _) -> + send_method_not_allowed(Req, "GET,HEAD"). + +handle_utils_dir_req(#httpd{method='GET'}=Req, DocumentRoot) -> + "/" ++ UrlPath = couch_httpd:path(Req), + case couch_httpd:partition(UrlPath) of + {_ActionKey, "/", RelativePath} -> + % GET /_utils/path or GET /_utils/ + couch_httpd:serve_file(Req, RelativePath, DocumentRoot); + {_ActionKey, "", _RelativePath} -> + % GET /_utils + RedirectPath = couch_httpd:path(Req) ++ "/", + couch_httpd:send_redirect(Req, RedirectPath) + end; +handle_utils_dir_req(Req, _) -> + send_method_not_allowed(Req, "GET,HEAD"). + +handle_all_dbs_req(#httpd{method='GET'}=Req) -> + {ok, DbNames} = couch_server:all_databases(), + send_json(Req, DbNames); +handle_all_dbs_req(Req) -> + send_method_not_allowed(Req, "GET,HEAD"). + + +handle_task_status_req(#httpd{method='GET'}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), + % convert the list of prop lists to a list of json objects + send_json(Req, [{Props} || Props <- couch_task_status:all()]); +handle_task_status_req(Req) -> + send_method_not_allowed(Req, "GET,HEAD"). + +handle_replicate_req(#httpd{method='POST'}=Req) -> + couch_httpd:validate_ctype(Req, "application/json"), + PostBody = couch_httpd:json_body_obj(Req), + try couch_rep:replicate(PostBody, Req#httpd.user_ctx) of + {ok, {continuous, RepId}} -> + send_json(Req, 202, {[{ok, true}, {<<"_local_id">>, RepId}]}); + {ok, {cancelled, RepId}} -> + send_json(Req, 200, {[{ok, true}, {<<"_local_id">>, RepId}]}); + {ok, {JsonResults}} -> + send_json(Req, {[{ok, true} | JsonResults]}); + {error, {Type, Details}} -> + send_json(Req, 500, {[{error, Type}, {reason, Details}]}); + {error, not_found} -> + send_json(Req, 404, {[{error, not_found}]}); + {error, Reason} -> + try + send_json(Req, 500, {[{error, Reason}]}) + catch + exit:{json_encode, _} -> + send_json(Req, 500, {[{error, couch_util:to_binary(Reason)}]}) + end + catch + throw:{db_not_found, Msg} -> + send_json(Req, 404, {[{error, db_not_found}, {reason, Msg}]}); + throw:{unauthorized, Msg} -> + send_json(Req, 404, {[{error, unauthorized}, {reason, Msg}]}) + end; +handle_replicate_req(Req) -> + send_method_not_allowed(Req, "POST"). + + +handle_restart_req(#httpd{method='POST'}=Req) -> + couch_httpd:validate_ctype(Req, "application/json"), + ok = couch_httpd:verify_is_server_admin(Req), + couch_server_sup:restart_core_server(), + send_json(Req, 200, {[{ok, true}]}); +handle_restart_req(Req) -> + send_method_not_allowed(Req, "POST"). + + +handle_uuids_req(#httpd{method='GET'}=Req) -> + Count = list_to_integer(couch_httpd:qs_value(Req, "count", "1")), + UUIDs = [couch_uuids:new() || _ <- lists:seq(1, Count)], + Etag = couch_httpd:make_etag(UUIDs), + couch_httpd:etag_respond(Req, Etag, fun() -> + CacheBustingHeaders = [ + {"Date", httpd_util:rfc1123_date()}, + {"Cache-Control", "no-cache"}, + % Past date, ON PURPOSE! + {"Expires", "Fri, 01 Jan 1990 00:00:00 GMT"}, + {"Pragma", "no-cache"}, + {"ETag", Etag} + ], + send_json(Req, 200, CacheBustingHeaders, {[{<<"uuids">>, UUIDs}]}) + end); +handle_uuids_req(Req) -> + send_method_not_allowed(Req, "GET"). + + +% Config request handler + + +% GET /_config/ +% GET /_config +handle_config_req(#httpd{method='GET', path_parts=[_]}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), + Grouped = lists:foldl(fun({{Section, Key}, Value}, Acc) -> + case dict:is_key(Section, Acc) of + true -> + dict:append(Section, {list_to_binary(Key), list_to_binary(Value)}, Acc); + false -> + dict:store(Section, [{list_to_binary(Key), list_to_binary(Value)}], Acc) + end + end, dict:new(), couch_config:all()), + KVs = dict:fold(fun(Section, Values, Acc) -> + [{list_to_binary(Section), {Values}} | Acc] + end, [], Grouped), + send_json(Req, 200, {KVs}); +% GET /_config/Section +handle_config_req(#httpd{method='GET', path_parts=[_,Section]}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), + KVs = [{list_to_binary(Key), list_to_binary(Value)} + || {Key, Value} <- couch_config:get(Section)], + send_json(Req, 200, {KVs}); +% GET /_config/Section/Key +handle_config_req(#httpd{method='GET', path_parts=[_, Section, Key]}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), + case couch_config:get(Section, Key, null) of + null -> + throw({not_found, unknown_config_value}); + Value -> + send_json(Req, 200, list_to_binary(Value)) + end; +% PUT or DELETE /_config/Section/Key +handle_config_req(#httpd{method=Method, path_parts=[_, Section, Key]}=Req) + when (Method == 'PUT') or (Method == 'DELETE') -> + ok = couch_httpd:verify_is_server_admin(Req), + Persist = couch_httpd:header_value(Req, "X-Couch-Persist") /= "false", + case couch_config:get(<<"httpd">>, <<"config_whitelist">>, null) of + null -> + % No whitelist; allow all changes. + handle_approved_config_req(Req, Persist); + WhitelistValue -> + % Provide a failsafe to protect against inadvertently locking + % onesself out of the config by supplying a syntactically-incorrect + % Erlang term. To intentionally lock down the whitelist, supply a + % well-formed list which does not include the whitelist config + % variable itself. + FallbackWhitelist = [{<<"httpd">>, <<"config_whitelist">>}], + + Whitelist = case couch_util:parse_term(WhitelistValue) of + {ok, Value} when is_list(Value) -> + Value; + {ok, _NonListValue} -> + FallbackWhitelist; + {error, _} -> + [{WhitelistSection, WhitelistKey}] = FallbackWhitelist, + ?LOG_ERROR("Only whitelisting ~s/~s due to error parsing: ~p", + [WhitelistSection, WhitelistKey, WhitelistValue]), + FallbackWhitelist + end, + + IsRequestedKeyVal = fun(Element) -> + case Element of + {A, B} -> + % For readability, tuples may be used instead of binaries + % in the whitelist. + case {couch_util:to_binary(A), couch_util:to_binary(B)} of + {Section, Key} -> + true; + {Section, <<"*">>} -> + true; + _Else -> + false + end; + _Else -> + false + end + end, + + case lists:any(IsRequestedKeyVal, Whitelist) of + true -> + % Allow modifying this whitelisted variable. + handle_approved_config_req(Req, Persist); + _NotWhitelisted -> + % Disallow modifying this non-whitelisted variable. + send_error(Req, 400, <<"modification_not_allowed">>, + ?l2b("This config variable is read-only")) + end + end; +handle_config_req(Req) -> + send_method_not_allowed(Req, "GET,PUT,DELETE"). + +% PUT /_config/Section/Key +% "value" +handle_approved_config_req(#httpd{method='PUT', path_parts=[_, Section, Key]}=Req, Persist) -> + Value = couch_httpd:json_body(Req), + OldValue = couch_config:get(Section, Key, ""), + case couch_config:set(Section, Key, ?b2l(Value), Persist) of + ok -> + send_json(Req, 200, list_to_binary(OldValue)); + Error -> + throw(Error) + end; +% DELETE /_config/Section/Key +handle_approved_config_req(#httpd{method='DELETE',path_parts=[_,Section,Key]}=Req, Persist) -> + case couch_config:get(Section, Key, null) of + null -> + throw({not_found, unknown_config_value}); + OldValue -> + couch_config:delete(Section, Key, Persist), + send_json(Req, 200, list_to_binary(OldValue)) + end. + + +% httpd db handlers + +increment_update_seq_req(#httpd{method='POST'}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + {ok, NewSeq} = couch_db:increment_update_seq(Db), + send_json(Req, {[{ok, true}, + {update_seq, NewSeq} + ]}); +increment_update_seq_req(Req, _Db) -> + send_method_not_allowed(Req, "POST"). + +% httpd log handlers + +handle_log_req(#httpd{method='GET'}=Req) -> + ok = couch_httpd:verify_is_server_admin(Req), + Bytes = list_to_integer(couch_httpd:qs_value(Req, "bytes", "1000")), + Offset = list_to_integer(couch_httpd:qs_value(Req, "offset", "0")), + Chunk = couch_log:read(Bytes, Offset), + {ok, Resp} = start_chunked_response(Req, 200, [ + % send a plaintext response + {"Content-Type", "text/plain; charset=utf-8"}, + {"Content-Length", integer_to_list(length(Chunk))} + ]), + send_chunk(Resp, Chunk), + last_chunk(Resp); +handle_log_req(Req) -> + send_method_not_allowed(Req, "GET"). + + diff --git a/apps/couch/src/couch_httpd_oauth.erl b/apps/couch/src/couch_httpd_oauth.erl new file mode 100644 index 00000000..05ee10e2 --- /dev/null +++ b/apps/couch/src/couch_httpd_oauth.erl @@ -0,0 +1,176 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_oauth). +-include("couch_db.hrl"). + +-export([oauth_authentication_handler/1, handle_oauth_req/1, consumer_lookup/2]). + +% OAuth auth handler using per-node user db +oauth_authentication_handler(#httpd{mochi_req=MochiReq}=Req) -> + serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> + AccessToken = couch_util:get_value("oauth_token", Params), + case couch_config:get("oauth_token_secrets", AccessToken) of + undefined -> + couch_httpd:send_error(Req, 400, <<"invalid_token">>, + <<"Invalid OAuth token.">>); + TokenSecret -> + ?LOG_DEBUG("OAuth URL is: ~p", [URL]), + case oauth:verify(Signature, atom_to_list(MochiReq:get(method)), URL, Params, Consumer, TokenSecret) of + true -> + set_user_ctx(Req, AccessToken); + false -> + Req + end + end + end, true). + +% Look up the consumer key and get the roles to give the consumer +set_user_ctx(Req, AccessToken) -> + % TODO move to db storage + Name = case couch_config:get("oauth_token_users", AccessToken) of + undefined -> throw({bad_request, unknown_oauth_token}); + Value -> ?l2b(Value) + end, + case couch_auth_cache:get_user_creds(Name) of + nil -> Req; + User -> + Roles = couch_util:get_value(<<"roles">>, User, []), + Req#httpd{user_ctx=#user_ctx{name=Name, roles=Roles}} + end. + +% OAuth request_token +handle_oauth_req(#httpd{path_parts=[_OAuth, <<"request_token">>], method=Method}=Req) -> + serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> + AccessToken = couch_util:get_value("oauth_token", Params), + TokenSecret = couch_config:get("oauth_token_secrets", AccessToken), + case oauth:verify(Signature, atom_to_list(Method), URL, Params, Consumer, TokenSecret) of + true -> + ok(Req, <<"oauth_token=requestkey&oauth_token_secret=requestsecret">>); + false -> + invalid_signature(Req) + end + end, false); +handle_oauth_req(#httpd{path_parts=[_OAuth, <<"authorize">>]}=Req) -> + {ok, serve_oauth_authorize(Req)}; +handle_oauth_req(#httpd{path_parts=[_OAuth, <<"access_token">>], method='GET'}=Req) -> + serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> + case oauth:token(Params) of + "requestkey" -> + case oauth:verify(Signature, "GET", URL, Params, Consumer, "requestsecret") of + true -> + ok(Req, <<"oauth_token=accesskey&oauth_token_secret=accesssecret">>); + false -> + invalid_signature(Req) + end; + _ -> + couch_httpd:send_error(Req, 400, <<"invalid_token">>, <<"Invalid OAuth token.">>) + end + end, false); +handle_oauth_req(#httpd{path_parts=[_OAuth, <<"access_token">>]}=Req) -> + couch_httpd:send_method_not_allowed(Req, "GET"). + +invalid_signature(Req) -> + couch_httpd:send_error(Req, 400, <<"invalid_signature">>, <<"Invalid signature value.">>). + +% This needs to be protected i.e. force user to login using HTTP Basic Auth or form-based login. +serve_oauth_authorize(#httpd{method=Method}=Req) -> + case Method of + 'GET' -> + % Confirm with the User that they want to authenticate the Consumer + serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> + AccessToken = couch_util:get_value("oauth_token", Params), + TokenSecret = couch_config:get("oauth_token_secrets", AccessToken), + case oauth:verify(Signature, "GET", URL, Params, Consumer, TokenSecret) of + true -> + ok(Req, <<"oauth_token=requestkey&oauth_token_secret=requestsecret">>); + false -> + invalid_signature(Req) + end + end, false); + 'POST' -> + % If the User has confirmed, we direct the User back to the Consumer with a verification code + serve_oauth(Req, fun(URL, Params, Consumer, Signature) -> + AccessToken = couch_util:get_value("oauth_token", Params), + TokenSecret = couch_config:get("oauth_token_secrets", AccessToken), + case oauth:verify(Signature, "POST", URL, Params, Consumer, TokenSecret) of + true -> + %redirect(oauth_callback, oauth_token, oauth_verifier), + ok(Req, <<"oauth_token=requestkey&oauth_token_secret=requestsecret">>); + false -> + invalid_signature(Req) + end + end, false); + _ -> + couch_httpd:send_method_not_allowed(Req, "GET,POST") + end. + +serve_oauth(#httpd{mochi_req=MochiReq}=Req, Fun, FailSilently) -> + % 1. In the HTTP Authorization header as defined in OAuth HTTP Authorization Scheme. + % 2. As the HTTP POST request body with a content-type of application/x-www-form-urlencoded. + % 3. Added to the URLs in the query part (as defined by [RFC3986] section 3). + AuthHeader = case MochiReq:get_header_value("authorization") of + undefined -> + ""; + Else -> + [Head | Tail] = re:split(Else, "\\s", [{parts, 2}, {return, list}]), + case [string:to_lower(Head) | Tail] of + ["oauth", Rest] -> Rest; + _ -> "" + end + end, + HeaderParams = oauth_uri:params_from_header_string(AuthHeader), + %Realm = couch_util:get_value("realm", HeaderParams), + Params = proplists:delete("realm", HeaderParams) ++ MochiReq:parse_qs(), + ?LOG_DEBUG("OAuth Params: ~p", [Params]), + case couch_util:get_value("oauth_version", Params, "1.0") of + "1.0" -> + case couch_util:get_value("oauth_consumer_key", Params, undefined) of + undefined -> + case FailSilently of + true -> Req; + false -> couch_httpd:send_error(Req, 400, <<"invalid_consumer">>, <<"Invalid consumer.">>) + end; + ConsumerKey -> + SigMethod = couch_util:get_value("oauth_signature_method", Params), + case consumer_lookup(ConsumerKey, SigMethod) of + none -> + couch_httpd:send_error(Req, 400, <<"invalid_consumer">>, <<"Invalid consumer (key or signature method).">>); + Consumer -> + Signature = couch_util:get_value("oauth_signature", Params), + URL = couch_httpd:absolute_uri(Req, MochiReq:get(raw_path)), + Fun(URL, proplists:delete("oauth_signature", Params), + Consumer, Signature) + end + end; + _ -> + couch_httpd:send_error(Req, 400, <<"invalid_oauth_version">>, <<"Invalid OAuth version.">>) + end. + +consumer_lookup(Key, MethodStr) -> + SignatureMethod = case MethodStr of + "PLAINTEXT" -> plaintext; + "HMAC-SHA1" -> hmac_sha1; + %"RSA-SHA1" -> rsa_sha1; + _Else -> undefined + end, + case SignatureMethod of + undefined -> none; + _SupportedMethod -> + case couch_config:get("oauth_consumer_secrets", Key, undefined) of + undefined -> none; + Secret -> {Key, Secret, SignatureMethod} + end + end. + +ok(#httpd{mochi_req=MochiReq}, Body) -> + {ok, MochiReq:respond({200, [], Body})}. diff --git a/apps/couch/src/couch_httpd_proxy.erl b/apps/couch/src/couch_httpd_proxy.erl new file mode 100644 index 00000000..c196f72d --- /dev/null +++ b/apps/couch/src/couch_httpd_proxy.erl @@ -0,0 +1,431 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +-module(couch_httpd_proxy). + +-export([handle_proxy_req/2]). + +-include("couch_db.hrl"). +-include_lib("ibrowse/include/ibrowse.hrl"). + +-define(TIMEOUT, infinity). +-define(PKT_SIZE, 4096). + + +handle_proxy_req(Req, ProxyDest) -> + + %% Bug in Mochiweb? + %% Reported here: http://github.com/mochi/mochiweb/issues/issue/16 + erase(mochiweb_request_body_length), + + Method = get_method(Req), + Url = get_url(Req, ProxyDest), + Version = get_version(Req), + Headers = get_headers(Req), + Body = get_body(Req), + Options = [ + {http_vsn, Version}, + {headers_as_is, true}, + {response_format, binary}, + {stream_to, {self(), once}} + ], + case ibrowse:send_req(Url, Headers, Method, Body, Options, ?TIMEOUT) of + {ibrowse_req_id, ReqId} -> + stream_response(Req, ProxyDest, ReqId); + {error, Reason} -> + throw({error, Reason}) + end. + + +get_method(#httpd{mochi_req=MochiReq}) -> + case MochiReq:get(method) of + Method when is_atom(Method) -> + list_to_atom(string:to_lower(atom_to_list(Method))); + Method when is_list(Method) -> + list_to_atom(string:to_lower(Method)); + Method when is_binary(Method) -> + list_to_atom(string:to_lower(?b2l(Method))) + end. + + +get_url(Req, ProxyDest) when is_binary(ProxyDest) -> + get_url(Req, ?b2l(ProxyDest)); +get_url(#httpd{mochi_req=MochiReq}=Req, ProxyDest) -> + BaseUrl = case mochiweb_util:partition(ProxyDest, "/") of + {[], "/", _} -> couch_httpd:absolute_uri(Req, ProxyDest); + _ -> ProxyDest + end, + ProxyPrefix = "/" ++ ?b2l(hd(Req#httpd.path_parts)), + RequestedPath = MochiReq:get(raw_path), + case mochiweb_util:partition(RequestedPath, ProxyPrefix) of + {[], ProxyPrefix, []} -> + BaseUrl; + {[], ProxyPrefix, [$/ | DestPath]} -> + remove_trailing_slash(BaseUrl) ++ "/" ++ DestPath; + {[], ProxyPrefix, DestPath} -> + remove_trailing_slash(BaseUrl) ++ "/" ++ DestPath; + _Else -> + throw({invalid_url_path, {ProxyPrefix, RequestedPath}}) + end. + +get_version(#httpd{mochi_req=MochiReq}) -> + MochiReq:get(version). + + +get_headers(#httpd{mochi_req=MochiReq}) -> + to_ibrowse_headers(mochiweb_headers:to_list(MochiReq:get(headers)), []). + +to_ibrowse_headers([], Acc) -> + lists:reverse(Acc); +to_ibrowse_headers([{K, V} | Rest], Acc) when is_atom(K) -> + to_ibrowse_headers([{atom_to_list(K), V} | Rest], Acc); +to_ibrowse_headers([{K, V} | Rest], Acc) when is_list(K) -> + case string:to_lower(K) of + "content-length" -> + to_ibrowse_headers(Rest, [{content_length, V} | Acc]); + % This appears to make ibrowse too smart. + %"transfer-encoding" -> + % to_ibrowse_headers(Rest, [{transfer_encoding, V} | Acc]); + _ -> + to_ibrowse_headers(Rest, [{K, V} | Acc]) + end. + +get_body(#httpd{method='GET'}) -> + fun() -> eof end; +get_body(#httpd{method='HEAD'}) -> + fun() -> eof end; +get_body(#httpd{method='DELETE'}) -> + fun() -> eof end; +get_body(#httpd{mochi_req=MochiReq}) -> + case MochiReq:get(body_length) of + undefined -> + <<>>; + {unknown_transfer_encoding, Unknown} -> + exit({unknown_transfer_encoding, Unknown}); + chunked -> + {fun stream_chunked_body/1, {init, MochiReq, 0}}; + 0 -> + <<>>; + Length when is_integer(Length) andalso Length > 0 -> + {fun stream_length_body/1, {init, MochiReq, Length}}; + Length -> + exit({invalid_body_length, Length}) + end. + + +remove_trailing_slash(Url) -> + rem_slash(lists:reverse(Url)). + +rem_slash([]) -> + []; +rem_slash([$\s | RevUrl]) -> + rem_slash(RevUrl); +rem_slash([$\t | RevUrl]) -> + rem_slash(RevUrl); +rem_slash([$\r | RevUrl]) -> + rem_slash(RevUrl); +rem_slash([$\n | RevUrl]) -> + rem_slash(RevUrl); +rem_slash([$/ | RevUrl]) -> + rem_slash(RevUrl); +rem_slash(RevUrl) -> + lists:reverse(RevUrl). + + +stream_chunked_body({init, MReq, 0}) -> + % First chunk, do expect-continue dance. + init_body_stream(MReq), + stream_chunked_body({stream, MReq, 0, [], ?PKT_SIZE}); +stream_chunked_body({stream, MReq, 0, Buf, BRem}) -> + % Finished a chunk, get next length. If next length + % is 0, its time to try and read trailers. + {CRem, Data} = read_chunk_length(MReq), + case CRem of + 0 -> + BodyData = lists:reverse(Buf, Data), + {ok, BodyData, {trailers, MReq, [], ?PKT_SIZE}}; + _ -> + stream_chunked_body( + {stream, MReq, CRem, [Data | Buf], BRem-size(Data)} + ) + end; +stream_chunked_body({stream, MReq, CRem, Buf, BRem}) when BRem =< 0 -> + % Time to empty our buffers to the upstream socket. + BodyData = lists:reverse(Buf), + {ok, BodyData, {stream, MReq, CRem, [], ?PKT_SIZE}}; +stream_chunked_body({stream, MReq, CRem, Buf, BRem}) -> + % Buffer some more data from the client. + Length = lists:min([CRem, BRem]), + Socket = MReq:get(socket), + NewState = case mochiweb_socket:recv(Socket, Length, ?TIMEOUT) of + {ok, Data} when size(Data) == CRem -> + case mochiweb_socket:recv(Socket, 2, ?TIMEOUT) of + {ok, <<"\r\n">>} -> + {stream, MReq, 0, [<<"\r\n">>, Data | Buf], BRem-Length-2}; + _ -> + exit(normal) + end; + {ok, Data} -> + {stream, MReq, CRem-Length, [Data | Buf], BRem-Length}; + _ -> + exit(normal) + end, + stream_chunked_body(NewState); +stream_chunked_body({trailers, MReq, Buf, BRem}) when BRem =< 0 -> + % Empty our buffers and send data upstream. + BodyData = lists:reverse(Buf), + {ok, BodyData, {trailers, MReq, [], ?PKT_SIZE}}; +stream_chunked_body({trailers, MReq, Buf, BRem}) -> + % Read another trailer into the buffer or stop on an + % empty line. + Socket = MReq:get(socket), + mochiweb_socket:setopts(Socket, [{packet, line}]), + case mochiweb_socket:recv(Socket, 0, ?TIMEOUT) of + {ok, <<"\r\n">>} -> + mochiweb_socket:setopts(Socket, [{packet, raw}]), + BodyData = lists:reverse(Buf, <<"\r\n">>), + {ok, BodyData, eof}; + {ok, Footer} -> + mochiweb_socket:setopts(Socket, [{packet, raw}]), + NewState = {trailers, MReq, [Footer | Buf], BRem-size(Footer)}, + stream_chunked_body(NewState); + _ -> + exit(normal) + end; +stream_chunked_body(eof) -> + % Tell ibrowse we're done sending data. + eof. + + +stream_length_body({init, MochiReq, Length}) -> + % Do the expect-continue dance + init_body_stream(MochiReq), + stream_length_body({stream, MochiReq, Length}); +stream_length_body({stream, _MochiReq, 0}) -> + % Finished streaming. + eof; +stream_length_body({stream, MochiReq, Length}) -> + BufLen = lists:min([Length, ?PKT_SIZE]), + case MochiReq:recv(BufLen) of + <<>> -> eof; + Bin -> {ok, Bin, {stream, MochiReq, Length-BufLen}} + end. + + +init_body_stream(MochiReq) -> + Expect = case MochiReq:get_header_value("expect") of + undefined -> + undefined; + Value when is_list(Value) -> + string:to_lower(Value) + end, + case Expect of + "100-continue" -> + MochiReq:start_raw_response({100, gb_trees:empty()}); + _Else -> + ok + end. + + +read_chunk_length(MochiReq) -> + Socket = MochiReq:get(socket), + mochiweb_socket:setopts(Socket, [{packet, line}]), + case mochiweb_socket:recv(Socket, 0, ?TIMEOUT) of + {ok, Header} -> + mochiweb_socket:setopts(Socket, [{packet, raw}]), + Splitter = fun(C) -> + C =/= $\r andalso C =/= $\n andalso C =/= $\s + end, + {Hex, _Rest} = lists:splitwith(Splitter, ?b2l(Header)), + {mochihex:to_int(Hex), Header}; + _ -> + exit(normal) + end. + + +stream_response(Req, ProxyDest, ReqId) -> + receive + {ibrowse_async_headers, ReqId, "100", _} -> + % ibrowse doesn't handle 100 Continue responses which + % means we have to discard them so the proxy client + % doesn't get confused. + ibrowse:stream_next(ReqId), + stream_response(Req, ProxyDest, ReqId); + {ibrowse_async_headers, ReqId, Status, Headers} -> + {Source, Dest} = get_urls(Req, ProxyDest), + FixedHeaders = fix_headers(Source, Dest, Headers, []), + case body_length(FixedHeaders) of + chunked -> + {ok, Resp} = couch_httpd:start_chunked_response( + Req, list_to_integer(Status), FixedHeaders + ), + ibrowse:stream_next(ReqId), + stream_chunked_response(Req, ReqId, Resp), + {ok, Resp}; + Length when is_integer(Length) -> + {ok, Resp} = couch_httpd:start_response_length( + Req, list_to_integer(Status), FixedHeaders, Length + ), + ibrowse:stream_next(ReqId), + stream_length_response(Req, ReqId, Resp), + {ok, Resp}; + _ -> + {ok, Resp} = couch_httpd:start_response( + Req, list_to_integer(Status), FixedHeaders + ), + ibrowse:stream_next(ReqId), + stream_length_response(Req, ReqId, Resp), + % XXX: MochiWeb apparently doesn't look at the + % response to see if it must force close the + % connection. So we help it out here. + erlang:put(mochiweb_request_force_close, true), + {ok, Resp} + end + end. + + +stream_chunked_response(Req, ReqId, Resp) -> + receive + {ibrowse_async_response, ReqId, {error, Reason}} -> + throw({error, Reason}); + {ibrowse_async_response, ReqId, Chunk} -> + couch_httpd:send_chunk(Resp, Chunk), + ibrowse:stream_next(ReqId), + stream_chunked_response(Req, ReqId, Resp); + {ibrowse_async_response_end, ReqId} -> + couch_httpd:last_chunk(Resp) + end. + + +stream_length_response(Req, ReqId, Resp) -> + receive + {ibrowse_async_response, ReqId, {error, Reason}} -> + throw({error, Reason}); + {ibrowse_async_response, ReqId, Chunk} -> + couch_httpd:send(Resp, Chunk), + ibrowse:stream_next(ReqId), + stream_length_response(Req, ReqId, Resp); + {ibrowse_async_response_end, ReqId} -> + ok + end. + + +get_urls(Req, ProxyDest) -> + SourceUrl = couch_httpd:absolute_uri(Req, "/" ++ hd(Req#httpd.path_parts)), + Source = parse_url(?b2l(iolist_to_binary(SourceUrl))), + case (catch parse_url(ProxyDest)) of + Dest when is_record(Dest, url) -> + {Source, Dest}; + _ -> + DestUrl = couch_httpd:absolute_uri(Req, ProxyDest), + {Source, parse_url(DestUrl)} + end. + + +fix_headers(_, _, [], Acc) -> + lists:reverse(Acc); +fix_headers(Source, Dest, [{K, V} | Rest], Acc) -> + Fixed = case string:to_lower(K) of + "location" -> rewrite_location(Source, Dest, V); + "content-location" -> rewrite_location(Source, Dest, V); + "uri" -> rewrite_location(Source, Dest, V); + "destination" -> rewrite_location(Source, Dest, V); + "set-cookie" -> rewrite_cookie(Source, Dest, V); + _ -> V + end, + fix_headers(Source, Dest, Rest, [{K, Fixed} | Acc]). + + +rewrite_location(Source, #url{host=Host, port=Port, protocol=Proto}, Url) -> + case (catch parse_url(Url)) of + #url{host=Host, port=Port, protocol=Proto} = Location -> + DestLoc = #url{ + protocol=Source#url.protocol, + host=Source#url.host, + port=Source#url.port, + path=join_url_path(Source#url.path, Location#url.path) + }, + url_to_url(DestLoc); + #url{} -> + Url; + _ -> + url_to_url(Source#url{path=join_url_path(Source#url.path, Url)}) + end. + + +rewrite_cookie(_Source, _Dest, Cookie) -> + Cookie. + + +parse_url(Url) when is_binary(Url) -> + ibrowse_lib:parse_url(?b2l(Url)); +parse_url(Url) when is_list(Url) -> + ibrowse_lib:parse_url(?b2l(iolist_to_binary(Url))). + + +join_url_path(Src, Dst) -> + Src2 = case lists:reverse(Src) of + "/" ++ RestSrc -> lists:reverse(RestSrc); + _ -> Src + end, + Dst2 = case Dst of + "/" ++ RestDst -> RestDst; + _ -> Dst + end, + Src2 ++ "/" ++ Dst2. + + +url_to_url(#url{host=Host, port=Port, path=Path, protocol=Proto} = Url) -> + LPort = case {Proto, Port} of + {http, 80} -> ""; + {https, 443} -> ""; + _ -> ":" ++ integer_to_list(Port) + end, + LPath = case Path of + "/" ++ _RestPath -> Path; + _ -> "/" ++ Path + end, + HostPart = case Url#url.host_type of + ipv6_address -> + "[" ++ Host ++ "]"; + _ -> + Host + end, + atom_to_list(Proto) ++ "://" ++ HostPart ++ LPort ++ LPath. + + +body_length(Headers) -> + case is_chunked(Headers) of + true -> chunked; + _ -> content_length(Headers) + end. + + +is_chunked([]) -> + false; +is_chunked([{K, V} | Rest]) -> + case string:to_lower(K) of + "transfer-encoding" -> + string:to_lower(V) == "chunked"; + _ -> + is_chunked(Rest) + end. + +content_length([]) -> + undefined; +content_length([{K, V} | Rest]) -> + case string:to_lower(K) of + "content-length" -> + list_to_integer(V); + _ -> + content_length(Rest) + end. + diff --git a/apps/couch/src/couch_httpd_rewrite.erl b/apps/couch/src/couch_httpd_rewrite.erl new file mode 100644 index 00000000..8480c1e9 --- /dev/null +++ b/apps/couch/src/couch_httpd_rewrite.erl @@ -0,0 +1,463 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +% +% bind_path is based on bind method from Webmachine + + +%% @doc Module for URL rewriting by pattern matching. + +-module(couch_httpd_rewrite). +-export([handle_rewrite_req/3]). +-include("couch_db.hrl"). + +-define(SEPARATOR, $\/). +-define(MATCH_ALL, {bind, <<"*">>}). + + +%% doc The http rewrite handler. All rewriting is done from +%% /dbname/_design/ddocname/_rewrite by default. +%% +%% each rules should be in rewrites member of the design doc. +%% Ex of a complete rule : +%% +%% { +%% .... +%% "rewrites": [ +%% { +%% "from": "", +%% "to": "index.html", +%% "method": "GET", +%% "query": {} +%% } +%% ] +%% } +%% +%% from: is the path rule used to bind current uri to the rule. It +%% use pattern matching for that. +%% +%% to: rule to rewrite an url. It can contain variables depending on binding +%% variables discovered during pattern matching and query args (url args and from +%% the query member.) +%% +%% method: method to bind the request method to the rule. by default "*" +%% query: query args you want to define they can contain dynamic variable +%% by binding the key to the bindings +%% +%% +%% to and from are path with patterns. pattern can be string starting with ":" or +%% "*". ex: +%% /somepath/:var/* +%% +%% This path is converted in erlang list by splitting "/". Each var are +%% converted in atom. "*" is converted to '*' atom. The pattern matching is done +%% by splitting "/" in request url in a list of token. A string pattern will +%% match equal token. The star atom ('*' in single quotes) will match any number +%% of tokens, but may only be present as the last pathtern in a pathspec. If all +%% tokens are matched and all pathterms are used, then the pathspec matches. It works +%% like webmachine. Each identified token will be reused in to rule and in query +%% +%% The pattern matching is done by first matching the request method to a rule. by +%% default all methods match a rule. (method is equal to "*" by default). Then +%% It will try to match the path to one rule. If no rule match, then a 404 error +%% is displayed. +%% +%% Once a rule is found we rewrite the request url using the "to" and +%% "query" members. The identified token are matched to the rule and +%% will replace var. if '*' is found in the rule it will contain the remaining +%% part if it exists. +%% +%% Examples: +%% +%% Dispatch rule URL TO Tokens +%% +%% {"from": "/a/b", /a/b?k=v /some/b?k=v var =:= b +%% "to": "/some/"} k = v +%% +%% {"from": "/a/b", /a/b /some/b?var=b var =:= b +%% "to": "/some/:var"} +%% +%% {"from": "/a", /a /some +%% "to": "/some/*"} +%% +%% {"from": "/a/*", /a/b/c /some/b/c +%% "to": "/some/*"} +%% +%% {"from": "/a", /a /some +%% "to": "/some/*"} +%% +%% {"from": "/a/:foo/*", /a/b/c /some/b/c?foo=b foo =:= b +%% "to": "/some/:foo/*"} +%% +%% {"from": "/a/:foo", /a/b /some/?k=b&foo=b foo =:= b +%% "to": "/some", +%% "query": { +%% "k": ":foo" +%% }} +%% +%% {"from": "/a", /a?foo=b /some/b foo =:= b +%% "to": "/some/:foo", +%% }} + + + +handle_rewrite_req(#httpd{ + path_parts=[DbName, <<"_design">>, DesignName, _Rewrite|PathParts], + method=Method, + mochi_req=MochiReq}=Req, _Db, DDoc) -> + + % we are in a design handler + DesignId = <<"_design/", DesignName/binary>>, + Prefix = <<"/", DbName/binary, "/", DesignId/binary>>, + QueryList = lists:map(fun decode_query_value/1, couch_httpd:qs(Req)), + + #doc{body={Props}} = DDoc, + + % get rules from ddoc + case couch_util:get_value(<<"rewrites">>, Props) of + undefined -> + couch_httpd:send_error(Req, 404, <<"rewrite_error">>, + <<"Invalid path.">>); + Bin when is_binary(Bin) -> + couch_httpd:send_error(Req, 400, <<"rewrite_error">>, + <<"Rewrite rules are a String. They must be a JSON Array.">>); + Rules -> + % create dispatch list from rules + DispatchList = [make_rule(Rule) || {Rule} <- Rules], + Method1 = couch_util:to_binary(Method), + + %% get raw path by matching url to a rule. + RawPath = case try_bind_path(DispatchList, Method1, + PathParts, QueryList) of + no_dispatch_path -> + throw(not_found); + {NewPathParts, Bindings} -> + Parts = [quote_plus(X) || X <- NewPathParts], + + % build new path, reencode query args, eventually convert + % them to json + Bindings1 = maybe_encode_bindings(Bindings), + Path = binary_to_list( + iolist_to_binary([ + string:join(Parts, [?SEPARATOR]), + [["?", mochiweb_util:urlencode(Bindings1)] + || Bindings1 =/= [] ] + ])), + + % if path is relative detect it and rewrite path + case mochiweb_util:safe_relative_path(Path) of + undefined -> + ?b2l(Prefix) ++ "/" ++ Path; + P1 -> + ?b2l(Prefix) ++ "/" ++ P1 + end + + end, + + % normalize final path (fix levels "." and "..") + RawPath1 = ?b2l(iolist_to_binary(normalize_path(RawPath))), + + ?LOG_DEBUG("rewrite to ~p ~n", [RawPath1]), + + % build a new mochiweb request + MochiReq1 = mochiweb_request:new(MochiReq:get(socket), + MochiReq:get(method), + RawPath1, + MochiReq:get(version), + MochiReq:get(headers)), + + % cleanup, It force mochiweb to reparse raw uri. + MochiReq1:cleanup(), + + #httpd{ + db_url_handlers = DbUrlHandlers, + design_url_handlers = DesignUrlHandlers, + default_fun = DefaultFun, + url_handlers = UrlHandlers + } = Req, + couch_httpd:handle_request_int(MochiReq1, DefaultFun, + UrlHandlers, DbUrlHandlers, DesignUrlHandlers) + end. + +quote_plus({bind, X}) -> + mochiweb_util:quote_plus(X); +quote_plus(X) -> + mochiweb_util:quote_plus(X). + +%% @doc Try to find a rule matching current url. If none is found +%% 404 error not_found is raised +try_bind_path([], _Method, _PathParts, _QueryList) -> + no_dispatch_path; +try_bind_path([Dispatch|Rest], Method, PathParts, QueryList) -> + [{PathParts1, Method1}, RedirectPath, QueryArgs, Formats] = Dispatch, + case bind_method(Method1, Method) of + true -> + case bind_path(PathParts1, PathParts, []) of + {ok, Remaining, Bindings} -> + Bindings1 = Bindings ++ QueryList, + % we parse query args from the rule and fill + % it eventually with bindings vars + QueryArgs1 = make_query_list(QueryArgs, Bindings1, + Formats, []), + % remove params in QueryLists1 that are already in + % QueryArgs1 + Bindings2 = lists:foldl(fun({K, V}, Acc) -> + K1 = to_binding(K), + KV = case couch_util:get_value(K1, QueryArgs1) of + undefined -> [{K1, V}]; + _V1 -> [] + end, + Acc ++ KV + end, [], Bindings1), + + FinalBindings = Bindings2 ++ QueryArgs1, + NewPathParts = make_new_path(RedirectPath, FinalBindings, + Remaining, []), + {NewPathParts, FinalBindings}; + fail -> + try_bind_path(Rest, Method, PathParts, QueryList) + end; + false -> + try_bind_path(Rest, Method, PathParts, QueryList) + end. + +%% rewriting dynamically the quey list given as query member in +%% rewrites. Each value is replaced by one binding or an argument +%% passed in url. +make_query_list([], _Bindings, _Formats, Acc) -> + Acc; +make_query_list([{Key, {Value}}|Rest], Bindings, Formats, Acc) -> + Value1 = {Value}, + make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]); +make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) when is_binary(Value) -> + Value1 = replace_var(Value, Bindings, Formats), + make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]); +make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) when is_list(Value) -> + Value1 = replace_var(Value, Bindings, Formats), + make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value1}|Acc]); +make_query_list([{Key, Value}|Rest], Bindings, Formats, Acc) -> + make_query_list(Rest, Bindings, Formats, [{to_binding(Key), Value}|Acc]). + +replace_var(<<"*">>=Value, Bindings, Formats) -> + get_var(Value, Bindings, Value, Formats); +replace_var(<<":", Var/binary>> = Value, Bindings, Formats) -> + get_var(Var, Bindings, Value, Formats); +replace_var(Value, _Bindings, _Formats) when is_binary(Value) -> + Value; +replace_var(Value, Bindings, Formats) when is_list(Value) -> + lists:reverse(lists:foldl(fun + (<<":", Var/binary>>=Value1, Acc) -> + [get_var(Var, Bindings, Value1, Formats)|Acc]; + (Value1, Acc) -> + [Value1|Acc] + end, [], Value)); +replace_var(Value, _Bindings, _Formats) -> + Value. + +maybe_json(Key, Value) -> + case lists:member(Key, [<<"key">>, <<"startkey">>, <<"start_key">>, + <<"endkey">>, <<"end_key">>, <<"keys">>]) of + true -> + ?JSON_ENCODE(Value); + false -> + Value + end. + +get_var(VarName, Props, Default, Formats) -> + VarName1 = to_binding(VarName), + Val = couch_util:get_value(VarName1, Props, Default), + maybe_format(VarName, Val, Formats). + +maybe_format(VarName, Value, Formats) -> + case couch_util:get_value(VarName, Formats) of + undefined -> + Value; + Format -> + format(Format, Value) + end. + +format(<<"int">>, Value) when is_integer(Value) -> + Value; +format(<<"int">>, Value) when is_binary(Value) -> + format(<<"int">>, ?b2l(Value)); +format(<<"int">>, Value) when is_list(Value) -> + case (catch list_to_integer(Value)) of + IntVal when is_integer(IntVal) -> + IntVal; + _ -> + Value + end; +format(<<"bool">>, Value) when is_binary(Value) -> + format(<<"bool">>, ?b2l(Value)); +format(<<"bool">>, Value) when is_list(Value) -> + case string:to_lower(Value) of + "true" -> true; + "false" -> false; + _ -> Value + end; +format(_Format, Value) -> + Value. + +%% doc: build new patch from bindings. bindings are query args +%% (+ dynamic query rewritten if needed) and bindings found in +%% bind_path step. +make_new_path([], _Bindings, _Remaining, Acc) -> + lists:reverse(Acc); +make_new_path([?MATCH_ALL], _Bindings, Remaining, Acc) -> + Acc1 = lists:reverse(Acc) ++ Remaining, + Acc1; +make_new_path([?MATCH_ALL|_Rest], _Bindings, Remaining, Acc) -> + Acc1 = lists:reverse(Acc) ++ Remaining, + Acc1; +make_new_path([{bind, P}|Rest], Bindings, Remaining, Acc) -> + P2 = case couch_util:get_value({bind, P}, Bindings) of + undefined -> << "undefined">>; + P1 -> + iolist_to_binary(P1) + end, + make_new_path(Rest, Bindings, Remaining, [P2|Acc]); +make_new_path([P|Rest], Bindings, Remaining, Acc) -> + make_new_path(Rest, Bindings, Remaining, [P|Acc]). + + +%% @doc If method of the query fith the rule method. If the +%% method rule is '*', which is the default, all +%% request method will bind. It allows us to make rules +%% depending on HTTP method. +bind_method(?MATCH_ALL, _Method ) -> + true; +bind_method({bind, Method}, Method) -> + true; +bind_method(_, _) -> + false. + + +%% @doc bind path. Using the rule from we try to bind variables given +%% to the current url by pattern matching +bind_path([], [], Bindings) -> + {ok, [], Bindings}; +bind_path([?MATCH_ALL], [Match|_RestMatch]=Rest, Bindings) -> + {ok, Rest, [{?MATCH_ALL, Match}|Bindings]}; +bind_path(_, [], _) -> + fail; +bind_path([{bind, Token}|RestToken],[Match|RestMatch],Bindings) -> + bind_path(RestToken, RestMatch, [{{bind, Token}, Match}|Bindings]); +bind_path([Token|RestToken], [Token|RestMatch], Bindings) -> + bind_path(RestToken, RestMatch, Bindings); +bind_path(_, _, _) -> + fail. + + +%% normalize path. +normalize_path(Path) -> + "/" ++ string:join(normalize_path1(string:tokens(Path, + "/"), []), [?SEPARATOR]). + + +normalize_path1([], Acc) -> + lists:reverse(Acc); +normalize_path1([".."|Rest], Acc) -> + Acc1 = case Acc of + [] -> [".."|Acc]; + [T|_] when T =:= ".." -> [".."|Acc]; + [_|R] -> R + end, + normalize_path1(Rest, Acc1); +normalize_path1(["."|Rest], Acc) -> + normalize_path1(Rest, Acc); +normalize_path1([Path|Rest], Acc) -> + normalize_path1(Rest, [Path|Acc]). + + +%% @doc transform json rule in erlang for pattern matching +make_rule(Rule) -> + Method = case couch_util:get_value(<<"method">>, Rule) of + undefined -> ?MATCH_ALL; + M -> to_binding(M) + end, + QueryArgs = case couch_util:get_value(<<"query">>, Rule) of + undefined -> []; + {Args} -> Args + end, + FromParts = case couch_util:get_value(<<"from">>, Rule) of + undefined -> [?MATCH_ALL]; + From -> + parse_path(From) + end, + ToParts = case couch_util:get_value(<<"to">>, Rule) of + undefined -> + throw({error, invalid_rewrite_target}); + To -> + parse_path(To) + end, + Formats = case couch_util:get_value(<<"formats">>, Rule) of + undefined -> []; + {Fmts} -> Fmts + end, + [{FromParts, Method}, ToParts, QueryArgs, Formats]. + +parse_path(Path) -> + {ok, SlashRE} = re:compile(<<"\\/">>), + path_to_list(re:split(Path, SlashRE), [], 0). + +%% @doc convert a path rule (from or to) to an erlang list +%% * and path variable starting by ":" are converted +%% in erlang atom. +path_to_list([], Acc, _DotDotCount) -> + lists:reverse(Acc); +path_to_list([<<>>|R], Acc, DotDotCount) -> + path_to_list(R, Acc, DotDotCount); +path_to_list([<<"*">>|R], Acc, DotDotCount) -> + path_to_list(R, [?MATCH_ALL|Acc], DotDotCount); +path_to_list([<<"..">>|R], Acc, DotDotCount) when DotDotCount == 2 -> + case couch_config:get("httpd", "secure_rewrites", "true") of + "false" -> + path_to_list(R, [<<"..">>|Acc], DotDotCount+1); + _Else -> + ?LOG_INFO("insecure_rewrite_rule ~p blocked", [lists:reverse(Acc) ++ [<<"..">>] ++ R]), + throw({insecure_rewrite_rule, "too many ../.. segments"}) + end; +path_to_list([<<"..">>|R], Acc, DotDotCount) -> + path_to_list(R, [<<"..">>|Acc], DotDotCount+1); +path_to_list([P|R], Acc, DotDotCount) -> + P1 = case P of + <<":", Var/binary>> -> + to_binding(Var); + _ -> P + end, + path_to_list(R, [P1|Acc], DotDotCount). + +maybe_encode_bindings([]) -> + []; +maybe_encode_bindings(Props) -> + lists:foldl(fun + ({{bind, <<"*">>}, _V}, Acc) -> + Acc; + ({{bind, K}, V}, Acc) -> + V1 = iolist_to_binary(maybe_json(K, V)), + [{K, V1}|Acc] + end, [], Props). + +decode_query_value({K,V}) -> + case lists:member(K, ["key", "startkey", "start_key", + "endkey", "end_key", "keys"]) of + true -> + {to_binding(K), ?JSON_DECODE(V)}; + false -> + {to_binding(K), ?l2b(V)} + end. + +to_binding({bind, V}) -> + {bind, V}; +to_binding(V) when is_list(V) -> + to_binding(?l2b(V)); +to_binding(V) -> + {bind, V}. diff --git a/apps/couch/src/couch_httpd_show.erl b/apps/couch/src/couch_httpd_show.erl new file mode 100644 index 00000000..58f046e4 --- /dev/null +++ b/apps/couch/src/couch_httpd_show.erl @@ -0,0 +1,404 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_show). + +-export([handle_doc_show_req/3, handle_doc_update_req/3, handle_view_list_req/3, + handle_view_list/6, get_fun_key/3]). + +-include("couch_db.hrl"). + +-import(couch_httpd, + [send_json/2,send_json/3,send_json/4,send_method_not_allowed/2, + start_json_response/2,send_chunk/2,last_chunk/1,send_chunked_error/2, + start_chunked_response/3, send_error/4]). + + +% /db/_design/foo/_show/bar/docid +% show converts a json doc to a response of any content-type. +% it looks up the doc an then passes it to the query server. +% then it sends the response from the query server to the http client. + +maybe_open_doc(Db, DocId) -> + case catch couch_httpd_db:couch_doc_open(Db, DocId, nil, [conflicts]) of + {not_found, missing} -> nil; + {not_found,deleted} -> nil; + Doc -> Doc + end. +handle_doc_show_req(#httpd{ + path_parts=[_, _, _, _, ShowName, DocId] + }=Req, Db, DDoc) -> + + % open the doc + Doc = maybe_open_doc(Db, DocId), + + % we don't handle revs here b/c they are an internal api + % returns 404 if there is no doc with DocId + handle_doc_show(Req, Db, DDoc, ShowName, Doc, DocId); + +handle_doc_show_req(#httpd{ + path_parts=[_, _, _, _, ShowName, DocId|Rest] + }=Req, Db, DDoc) -> + + DocParts = [DocId|Rest], + DocId1 = ?l2b(string:join([?b2l(P)|| P <- DocParts], "/")), + + % open the doc + Doc = maybe_open_doc(Db, DocId1), + + % we don't handle revs here b/c they are an internal api + % pass 404 docs to the show function + handle_doc_show(Req, Db, DDoc, ShowName, Doc, DocId1); + +handle_doc_show_req(#httpd{ + path_parts=[_, _, _, _, ShowName] + }=Req, Db, DDoc) -> + % with no docid the doc is nil + handle_doc_show(Req, Db, DDoc, ShowName, nil); + +handle_doc_show_req(Req, _Db, _DDoc) -> + send_error(Req, 404, <<"show_error">>, <<"Invalid path.">>). + +handle_doc_show(Req, Db, DDoc, ShowName, Doc) -> + handle_doc_show(Req, Db, DDoc, ShowName, Doc, null). + +handle_doc_show(Req, Db, DDoc, ShowName, Doc, DocId) -> + % get responder for ddoc/showname + CurrentEtag = show_etag(Req, Doc, DDoc, []), + couch_httpd:etag_respond(Req, CurrentEtag, fun() -> + JsonReq = couch_httpd_external:json_req_obj(Req, Db, DocId), + JsonDoc = couch_query_servers:json_doc(Doc), + [<<"resp">>, ExternalResp] = + couch_query_servers:ddoc_prompt(DDoc, [<<"shows">>, ShowName], [JsonDoc, JsonReq]), + JsonResp = apply_etag(ExternalResp, CurrentEtag), + couch_httpd_external:send_external_response(Req, JsonResp) + end). + + + +show_etag(#httpd{user_ctx=UserCtx}=Req, Doc, DDoc, More) -> + Accept = couch_httpd:header_value(Req, "Accept"), + DocPart = case Doc of + nil -> nil; + Doc -> couch_httpd:doc_etag(Doc) + end, + couch_httpd:make_etag({couch_httpd:doc_etag(DDoc), DocPart, Accept, UserCtx#user_ctx.roles, More}). + +get_fun_key(DDoc, Type, Name) -> + #doc{body={Props}} = DDoc, + Lang = couch_util:get_value(<<"language">>, Props, <<"javascript">>), + Src = couch_util:get_nested_json_value({Props}, [Type, Name]), + {Lang, Src}. + +% /db/_design/foo/update/bar/docid +% updates a doc based on a request +% handle_doc_update_req(#httpd{method = 'GET'}=Req, _Db, _DDoc) -> +% % anything but GET +% send_method_not_allowed(Req, "POST,PUT,DELETE,ETC"); + +handle_doc_update_req(#httpd{ + path_parts=[_, _, _, _, UpdateName, DocId|Rest] + }=Req, Db, DDoc) -> + DocParts = [DocId|Rest], + DocId1 = ?l2b(string:join([?b2l(P)|| P <- DocParts], "/")), + Doc = try couch_httpd_db:couch_doc_open(Db, DocId1, nil, [conflicts]) + catch + _ -> nil + end, + send_doc_update_response(Req, Db, DDoc, UpdateName, Doc, DocId1); + +handle_doc_update_req(#httpd{ + path_parts=[_, _, _, _, UpdateName] + }=Req, Db, DDoc) -> + send_doc_update_response(Req, Db, DDoc, UpdateName, nil, null); + +handle_doc_update_req(Req, _Db, _DDoc) -> + send_error(Req, 404, <<"update_error">>, <<"Invalid path.">>). + +send_doc_update_response(Req, Db, DDoc, UpdateName, Doc, DocId) -> + JsonReq = couch_httpd_external:json_req_obj(Req, Db, DocId), + JsonDoc = couch_query_servers:json_doc(Doc), + JsonResp1 = case couch_query_servers:ddoc_prompt(DDoc, + [<<"updates">>, UpdateName], [JsonDoc, JsonReq]) of + [<<"up">>, {NewJsonDoc}, {JsonResp}] -> + Options = case couch_httpd:header_value(Req, "X-Couch-Full-Commit", + "false") of + "true" -> + [full_commit]; + _ -> + [] + end, + NewDoc = couch_doc:from_json_obj({NewJsonDoc}), + {ok, NewRev} = couch_db:update_doc(Db, NewDoc, Options), + NewRevStr = couch_doc:rev_to_str(NewRev), + {[{<<"code">>, 201}, {<<"headers">>, + {[{<<"X-Couch-Update-NewRev">>, NewRevStr}]}} | JsonResp]}; + [<<"up">>, _Other, {JsonResp}] -> + {[{<<"code">>, 200} | JsonResp]} + end, + + % todo set location field + couch_httpd_external:send_external_response(Req, JsonResp1). + + +% view-list request with view and list from same design doc. +handle_view_list_req(#httpd{method='GET', + path_parts=[_, _, DesignName, _, ListName, ViewName]}=Req, Db, DDoc) -> + Keys = couch_httpd:qs_json_value(Req, "keys", nil), + handle_view_list(Req, Db, DDoc, ListName, {DesignName, ViewName}, Keys); + +% view-list request with view and list from different design docs. +handle_view_list_req(#httpd{method='GET', + path_parts=[_, _, _, _, ListName, ViewDesignName, ViewName]}=Req, Db, DDoc) -> + Keys = couch_httpd:qs_json_value(Req, "keys", nil), + handle_view_list(Req, Db, DDoc, ListName, {ViewDesignName, ViewName}, Keys); + +handle_view_list_req(#httpd{method='GET'}=Req, _Db, _DDoc) -> + send_error(Req, 404, <<"list_error">>, <<"Invalid path.">>); + +handle_view_list_req(#httpd{method='POST', + path_parts=[_, _, DesignName, _, ListName, ViewName]}=Req, Db, DDoc) -> + % {Props2} = couch_httpd:json_body(Req), + ReqBody = couch_httpd:body(Req), + {Props2} = ?JSON_DECODE(ReqBody), + Keys = couch_util:get_value(<<"keys">>, Props2, nil), + handle_view_list(Req#httpd{req_body=ReqBody}, Db, DDoc, ListName, {DesignName, ViewName}, Keys); + +handle_view_list_req(#httpd{method='POST', + path_parts=[_, _, _, _, ListName, ViewDesignName, ViewName]}=Req, Db, DDoc) -> + % {Props2} = couch_httpd:json_body(Req), + ReqBody = couch_httpd:body(Req), + {Props2} = ?JSON_DECODE(ReqBody), + Keys = couch_util:get_value(<<"keys">>, Props2, nil), + handle_view_list(Req#httpd{req_body=ReqBody}, Db, DDoc, ListName, {ViewDesignName, ViewName}, Keys); + +handle_view_list_req(#httpd{method='POST'}=Req, _Db, _DDoc) -> + send_error(Req, 404, <<"list_error">>, <<"Invalid path.">>); + +handle_view_list_req(Req, _Db, _DDoc) -> + send_method_not_allowed(Req, "GET,POST,HEAD"). + +handle_view_list(Req, Db, DDoc, LName, {ViewDesignName, ViewName}, Keys) -> + ViewDesignId = <<"_design/", ViewDesignName/binary>>, + {ViewType, View, Group, QueryArgs} = couch_httpd_view:load_view(Req, Db, {ViewDesignId, ViewName}, Keys), + Etag = list_etag(Req, Db, Group, View, QueryArgs, {couch_httpd:doc_etag(DDoc), Keys}), + couch_httpd:etag_respond(Req, Etag, fun() -> + output_list(ViewType, Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) + end). + +list_etag(#httpd{user_ctx=UserCtx}=Req, Db, Group, View, QueryArgs, More) -> + Accept = couch_httpd:header_value(Req, "Accept"), + couch_httpd_view:view_etag(Db, Group, View, QueryArgs, {More, Accept, UserCtx#user_ctx.roles}). + +output_list(map, Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) -> + output_map_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group); +output_list(reduce, Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) -> + output_reduce_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group). + +% next step: +% use with_ddoc_proc/2 to make this simpler +output_map_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) -> + #view_query_args{ + limit = Limit, + skip = SkipCount + } = QueryArgs, + + FoldAccInit = {Limit, SkipCount, undefined, []}, + {ok, RowCount} = couch_view:get_row_count(View), + + + couch_query_servers:with_ddoc_proc(DDoc, fun(QServer) -> + + ListFoldHelpers = #view_fold_helper_funs{ + reduce_count = fun couch_view:reduce_to_count/1, + start_response = StartListRespFun = make_map_start_resp_fun(QServer, Db, LName), + send_row = make_map_send_row_fun(QServer) + }, + CurrentSeq = Group#group.current_seq, + + {ok, _, FoldResult} = case Keys of + nil -> + FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs, Etag, Db, CurrentSeq, RowCount, ListFoldHelpers), + couch_view:fold(View, FoldlFun, FoldAccInit, + couch_httpd_view:make_key_options(QueryArgs)); + Keys -> + lists:foldl( + fun(Key, {ok, _, FoldAcc}) -> + QueryArgs2 = QueryArgs#view_query_args{ + start_key = Key, + end_key = Key + }, + FoldlFun = couch_httpd_view:make_view_fold_fun(Req, QueryArgs2, Etag, Db, CurrentSeq, RowCount, ListFoldHelpers), + couch_view:fold(View, FoldlFun, FoldAcc, + couch_httpd_view:make_key_options(QueryArgs2)) + end, {ok, nil, FoldAccInit}, Keys) + end, + finish_list(Req, QServer, Etag, FoldResult, StartListRespFun, CurrentSeq, RowCount) + end). + + +output_reduce_list(Req, Db, DDoc, LName, View, QueryArgs, Etag, Keys, Group) -> + #view_query_args{ + limit = Limit, + skip = SkipCount, + group_level = GroupLevel + } = QueryArgs, + + CurrentSeq = Group#group.current_seq, + + couch_query_servers:with_ddoc_proc(DDoc, fun(QServer) -> + StartListRespFun = make_reduce_start_resp_fun(QServer, Db, LName), + SendListRowFun = make_reduce_send_row_fun(QServer, Db), + {ok, GroupRowsFun, RespFun} = couch_httpd_view:make_reduce_fold_funs(Req, + GroupLevel, QueryArgs, Etag, CurrentSeq, + #reduce_fold_helper_funs{ + start_response = StartListRespFun, + send_row = SendListRowFun + }), + FoldAccInit = {Limit, SkipCount, undefined, []}, + {ok, FoldResult} = case Keys of + nil -> + couch_view:fold_reduce(View, RespFun, FoldAccInit, [{key_group_fun, GroupRowsFun} | + couch_httpd_view:make_key_options(QueryArgs)]); + Keys -> + lists:foldl( + fun(Key, {ok, FoldAcc}) -> + couch_view:fold_reduce(View, RespFun, FoldAcc, + [{key_group_fun, GroupRowsFun} | + couch_httpd_view:make_key_options( + QueryArgs#view_query_args{start_key=Key, end_key=Key})] + ) + end, {ok, FoldAccInit}, Keys) + end, + finish_list(Req, QServer, Etag, FoldResult, StartListRespFun, CurrentSeq, null) + end). + + +make_map_start_resp_fun(QueryServer, Db, LName) -> + fun(Req, Etag, TotalRows, Offset, _Acc, UpdateSeq) -> + Head = {[{<<"total_rows">>, TotalRows}, {<<"offset">>, Offset}, {<<"update_seq">>, UpdateSeq}]}, + start_list_resp(QueryServer, LName, Req, Db, Head, Etag) + end. + +make_reduce_start_resp_fun(QueryServer, Db, LName) -> + fun(Req2, Etag, _Acc, UpdateSeq) -> + start_list_resp(QueryServer, LName, Req2, Db, {[{<<"update_seq">>, UpdateSeq}]}, Etag) + end. + +start_list_resp(QServer, LName, Req, Db, Head, Etag) -> + JsonReq = couch_httpd_external:json_req_obj(Req, Db), + [<<"start">>,Chunks,JsonResp] = couch_query_servers:ddoc_proc_prompt(QServer, + [<<"lists">>, LName], [Head, JsonReq]), + JsonResp2 = apply_etag(JsonResp, Etag), + #extern_resp_args{ + code = Code, + ctype = CType, + headers = ExtHeaders + } = couch_httpd_external:parse_external_response(JsonResp2), + JsonHeaders = couch_httpd_external:default_or_content_type(CType, ExtHeaders), + {ok, Resp} = start_chunked_response(Req, Code, JsonHeaders), + {ok, Resp, ?b2l(?l2b(Chunks))}. + +make_map_send_row_fun(QueryServer) -> + fun(Resp, Db, Row, IncludeDocs, Conflicts, RowFront) -> + send_list_row( + Resp, QueryServer, Db, Row, RowFront, IncludeDocs, Conflicts) + end. + +make_reduce_send_row_fun(QueryServer, Db) -> + fun(Resp, Row, RowFront) -> + send_list_row(Resp, QueryServer, Db, Row, RowFront, false, false) + end. + +send_list_row(Resp, QueryServer, Db, Row, RowFront, IncludeDoc, Conflicts) -> + try + [Go,Chunks] = prompt_list_row( + QueryServer, Db, Row, IncludeDoc, Conflicts), + Chunk = RowFront ++ ?b2l(?l2b(Chunks)), + send_non_empty_chunk(Resp, Chunk), + case Go of + <<"chunks">> -> + {ok, ""}; + <<"end">> -> + {stop, stop} + end + catch + throw:Error -> + send_chunked_error(Resp, Error), + throw({already_sent, Resp, Error}) + end. + + +prompt_list_row({Proc, _DDocId}, Db, {{_Key, _DocId}, _} = Kv, + IncludeDoc, Conflicts) -> + JsonRow = couch_httpd_view:view_row_obj(Db, Kv, IncludeDoc, Conflicts), + couch_query_servers:proc_prompt(Proc, [<<"list_row">>, JsonRow]); + +prompt_list_row({Proc, _DDocId}, _, {Key, Value}, _IncludeDoc, _Conflicts) -> + JsonRow = {[{key, Key}, {value, Value}]}, + couch_query_servers:proc_prompt(Proc, [<<"list_row">>, JsonRow]). + +send_non_empty_chunk(Resp, Chunk) -> + case Chunk of + [] -> ok; + _ -> send_chunk(Resp, Chunk) + end. + +finish_list(Req, {Proc, _DDocId}, Etag, FoldResult, StartFun, CurrentSeq, TotalRows) -> + FoldResult2 = case FoldResult of + {Limit, SkipCount, Response, RowAcc} -> + {Limit, SkipCount, Response, RowAcc, nil}; + Else -> + Else + end, + case FoldResult2 of + {_, _, undefined, _, _} -> + {ok, Resp, BeginBody} = + render_head_for_empty_list(StartFun, Req, Etag, CurrentSeq, TotalRows), + [<<"end">>, Chunks] = couch_query_servers:proc_prompt(Proc, [<<"list_end">>]), + Chunk = BeginBody ++ ?b2l(?l2b(Chunks)), + send_non_empty_chunk(Resp, Chunk); + {_, _, Resp, stop, _} -> + ok; + {_, _, Resp, _, _} -> + [<<"end">>, Chunks] = couch_query_servers:proc_prompt(Proc, [<<"list_end">>]), + send_non_empty_chunk(Resp, ?b2l(?l2b(Chunks))) + end, + last_chunk(Resp). + + +render_head_for_empty_list(StartListRespFun, Req, Etag, CurrentSeq, null) -> + StartListRespFun(Req, Etag, [], CurrentSeq); % for reduce +render_head_for_empty_list(StartListRespFun, Req, Etag, CurrentSeq, TotalRows) -> + StartListRespFun(Req, Etag, TotalRows, null, [], CurrentSeq). + +apply_etag({ExternalResponse}, CurrentEtag) -> + % Here we embark on the delicate task of replacing or creating the + % headers on the JsonResponse object. We need to control the Etag and + % Vary headers. If the external function controls the Etag, we'd have to + % run it to check for a match, which sort of defeats the purpose. + case couch_util:get_value(<<"headers">>, ExternalResponse, nil) of + nil -> + % no JSON headers + % add our Etag and Vary headers to the response + {[{<<"headers">>, {[{<<"Etag">>, CurrentEtag}, {<<"Vary">>, <<"Accept">>}]}} | ExternalResponse]}; + JsonHeaders -> + {[case Field of + {<<"headers">>, JsonHeaders} -> % add our headers + JsonHeadersEtagged = couch_util:json_apply_field({<<"Etag">>, CurrentEtag}, JsonHeaders), + JsonHeadersVaried = couch_util:json_apply_field({<<"Vary">>, <<"Accept">>}, JsonHeadersEtagged), + {<<"headers">>, JsonHeadersVaried}; + _ -> % skip non-header fields + Field + end || Field <- ExternalResponse]} + end. + diff --git a/apps/couch/src/couch_httpd_stats_handlers.erl b/apps/couch/src/couch_httpd_stats_handlers.erl new file mode 100644 index 00000000..41aeaed0 --- /dev/null +++ b/apps/couch/src/couch_httpd_stats_handlers.erl @@ -0,0 +1,56 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_stats_handlers). +-include("couch_db.hrl"). + +-export([handle_stats_req/1]). +-import(couch_httpd, [ + send_json/2, send_json/3, send_json/4, send_method_not_allowed/2, + start_json_response/2, send_chunk/2, end_json_response/1, + start_chunked_response/3, send_error/4 +]). + +handle_stats_req(#httpd{method='GET', path_parts=[_]}=Req) -> + flush(Req), + send_json(Req, couch_stats_aggregator:all(range(Req))); + +handle_stats_req(#httpd{method='GET', path_parts=[_, _Mod]}) -> + throw({bad_request, <<"Stat names must have exactly to parts.">>}); + +handle_stats_req(#httpd{method='GET', path_parts=[_, Mod, Key]}=Req) -> + flush(Req), + Stats = couch_stats_aggregator:get_json({list_to_atom(binary_to_list(Mod)), + list_to_atom(binary_to_list(Key))}, range(Req)), + send_json(Req, {[{Mod, {[{Key, Stats}]}}]}); + +handle_stats_req(#httpd{method='GET', path_parts=[_, _Mod, _Key | _Extra]}) -> + throw({bad_request, <<"Stat names must have exactly two parts.">>}); + +handle_stats_req(Req) -> + send_method_not_allowed(Req, "GET"). + +range(Req) -> + case couch_util:get_value("range", couch_httpd:qs(Req)) of + undefined -> + 0; + Value -> + list_to_integer(Value) + end. + +flush(Req) -> + case couch_util:get_value("flush", couch_httpd:qs(Req)) of + "true" -> + couch_stats_aggregator:collect_sample(); + _Else -> + ok + end. diff --git a/apps/couch/src/couch_httpd_vhost.erl b/apps/couch/src/couch_httpd_vhost.erl new file mode 100644 index 00000000..03dd02ae --- /dev/null +++ b/apps/couch/src/couch_httpd_vhost.erl @@ -0,0 +1,407 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + + +-module(couch_httpd_vhost). +-behaviour(gen_server). + +-export([start_link/0, init/1, handle_call/3, handle_info/2, handle_cast/2]). +-export([code_change/3, terminate/2]). +-export([match_vhost/1, urlsplit_netloc/2]). +-export([redirect_to_vhost/2]). + +-include("couch_db.hrl"). + +-define(SEPARATOR, $\/). +-define(MATCH_ALL, {bind, '*'}). + +-record(vhosts, { + vhost_globals, + vhosts = [], + vhost_fun +}). + + +%% doc the vhost manager. +%% This gen_server keep state of vhosts added to the ini and try to +%% match the Host header (or forwarded) against rules built against +%% vhost list. +%% +%% Declaration of vhosts take place in the configuration file : +%% +%% [vhosts] +%% example.com = /example +%% *.example.com = /example +%% +%% The first line will rewrite the rquest to display the content of the +%% example database. This rule works only if the Host header is +%% 'example.com' and won't work for CNAMEs. Second rule on the other hand +%% match all CNAMES to example db. So www.example.com or db.example.com +%% will work. +%% +%% The wildcard ('*') should always be the last in the cnames: +%% +%% "*.db.example.com = /" will match all cname on top of db +%% examples to the root of the machine. +%% +%% +%% Rewriting Hosts to path +%% ----------------------- +%% +%% Like in the _rewrite handler you could match some variable and use +%them to create the target path. Some examples: +%% +%% [vhosts] +%% *.example.com = /* +%% :dbname.example.com = /:dbname +%% :ddocname.:dbname.example.com = /:dbname/_design/:ddocname/_rewrite +%% +%% First rule pass wildcard as dbname, second do the same but use a +%% variable name and the third one allows you to use any app with +%% @ddocname in any db with @dbname . +%% +%% You could also change the default function to handle request by +%% changing the setting `redirect_vhost_handler` in `httpd` section of +%% the Ini: +%% +%% [httpd] +%% redirect_vhost_handler = {Module, Fun} +%% +%% The function take 2 args : the mochiweb request object and the target +%%% path. + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +%% @doc Try to find a rule matching current Host heade. some rule is +%% found it rewrite the Mochiweb Request else it return current Request. +match_vhost(MochiReq) -> + {ok, MochiReq1} = gen_server:call(couch_httpd_vhost, {match_vhost, + MochiReq}), + + MochiReq1. + + +%% -------------------- +%% gen_server functions +%% -------------------- + +init(_) -> + process_flag(trap_exit, true), + + % init state + VHosts = make_vhosts(), + VHostGlobals = re:split( + couch_config:get("httpd", "vhost_global_handlers", ""), + ", ?", + [{return, list}] + ), + + % Set vhost fun + DefaultVHostFun = "{couch_httpd_vhost, redirect_to_vhost}", + VHostFun = couch_httpd:make_arity_2_fun( + couch_config:get("httpd", "redirect_vhost_handler", DefaultVHostFun) + ), + + + Self = self(), + % register for changes in vhosts section + ok = couch_config:register( + fun("vhosts") -> + ok = gen_server:call(Self, vhosts_changed, infinity) + end + ), + + % register for changes in vhost_global_handlers key + ok = couch_config:register( + fun("httpd", "vhost_global_handlers") -> + ok = gen_server:call(Self, vhosts_global_changed, infinity) + end + ), + + ok = couch_config:register( + fun("httpd", "redirect_vhost_handler") -> + ok = gen_server:call(Self, fun_changed, infinity) + end + ), + + {ok, #vhosts{ + vhost_globals = VHostGlobals, + vhosts = VHosts, + vhost_fun = VHostFun} + }. + + +handle_call({match_vhost, MochiReq}, _From, State) -> + #vhosts{ + vhost_globals = VHostGlobals, + vhosts = VHosts, + vhost_fun = Fun + } = State, + + {"/" ++ VPath, Query, Fragment} = mochiweb_util:urlsplit_path(MochiReq:get(raw_path)), + VPathParts = string:tokens(VPath, "/"), + + XHost = couch_config:get("httpd", "x_forwarded_host", "X-Forwarded-Host"), + VHost = case MochiReq:get_header_value(XHost) of + undefined -> + case MochiReq:get_header_value("Host") of + undefined -> []; + Value1 -> Value1 + end; + Value -> Value + end, + {VHostParts, VhostPort} = split_host_port(VHost), + FinalMochiReq = case try_bind_vhost(VHosts, lists:reverse(VHostParts), + VhostPort, VPathParts) of + no_vhost_matched -> MochiReq; + {VhostTarget, NewPath} -> + case vhost_global(VHostGlobals, MochiReq) of + true -> + MochiReq; + _Else -> + NewPath1 = mochiweb_util:urlunsplit_path({NewPath, Query, + Fragment}), + MochiReq1 = mochiweb_request:new(MochiReq:get(socket), + MochiReq:get(method), + NewPath1, + MochiReq:get(version), + MochiReq:get(headers)), + Fun(MochiReq1, VhostTarget) + end + end, + {reply, {ok, FinalMochiReq}, State}; + +% update vhosts +handle_call(vhosts_changed, _From, State) -> + {reply, ok, State#vhosts{vhosts= make_vhosts()}}; + + +% update vhosts_globals +handle_call(vhosts_global_changed, _From, State) -> + VHostGlobals = re:split( + couch_config:get("httpd", "vhost_global_handlers", ""), + ", ?", + [{return, list}] + ), + {reply, ok, State#vhosts{vhost_globals=VHostGlobals}}; +% change fun +handle_call(fun_changed, _From, State) -> + DefaultVHostFun = "{couch_httpd_vhosts, redirect_to_vhost}", + VHostFun = couch_httpd:make_arity_2_fun( + couch_config:get("httpd", "redirect_vhost_handler", DefaultVHostFun) + ), + {reply, ok, State#vhosts{vhost_fun=VHostFun}}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Msg, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +append_path("/"=_Target, "/"=_Path) -> + "/"; +append_path(Target, Path) -> + Target ++ Path. + +% default redirect vhost handler + +redirect_to_vhost(MochiReq, VhostTarget) -> + Path = MochiReq:get(raw_path), + Target = append_path(VhostTarget, Path), + + ?LOG_DEBUG("Vhost Target: '~p'~n", [Target]), + + Headers = mochiweb_headers:enter("x-couchdb-vhost-path", Path, + MochiReq:get(headers)), + + % build a new mochiweb request + MochiReq1 = mochiweb_request:new(MochiReq:get(socket), + MochiReq:get(method), + Target, + MochiReq:get(version), + Headers), + % cleanup, It force mochiweb to reparse raw uri. + MochiReq1:cleanup(), + + MochiReq1. + +%% if so, then it will not be rewritten, but will run as a normal couchdb request. +%* normally you'd use this for _uuids _utils and a few of the others you want to +%% keep available on vhosts. You can also use it to make databases 'global'. +vhost_global( VhostGlobals, MochiReq) -> + RawUri = MochiReq:get(raw_path), + {"/" ++ Path, _, _} = mochiweb_util:urlsplit_path(RawUri), + + Front = case couch_httpd:partition(Path) of + {"", "", ""} -> + "/"; % Special case the root url handler + {FirstPart, _, _} -> + FirstPart + end, + [true] == [true||V <- VhostGlobals, V == Front]. + +%% bind host +%% first it try to bind the port then the hostname. +try_bind_vhost([], _HostParts, _Port, _PathParts) -> + no_vhost_matched; +try_bind_vhost([VhostSpec|Rest], HostParts, Port, PathParts) -> + {{VHostParts, VPort, VPath}, Path} = VhostSpec, + case bind_port(VPort, Port) of + ok -> + case bind_vhost(lists:reverse(VHostParts), HostParts, []) of + {ok, Bindings, Remainings} -> + case bind_path(VPath, PathParts) of + {ok, PathParts1} -> + Path1 = make_target(Path, Bindings, Remainings, []), + {make_path(Path1), make_path(PathParts1)}; + fail -> + try_bind_vhost(Rest, HostParts, Port, + PathParts) + end; + fail -> try_bind_vhost(Rest, HostParts, Port, PathParts) + end; + fail -> try_bind_vhost(Rest, HostParts, Port, PathParts) + end. + +%% doc: build new patch from bindings. bindings are query args +%% (+ dynamic query rewritten if needed) and bindings found in +%% bind_path step. +%% TODO: merge code wit rewrite. But we need to make sure we are +%% in string here. +make_target([], _Bindings, _Remaining, Acc) -> + lists:reverse(Acc); +make_target([?MATCH_ALL], _Bindings, Remaining, Acc) -> + Acc1 = lists:reverse(Acc) ++ Remaining, + Acc1; +make_target([?MATCH_ALL|_Rest], _Bindings, Remaining, Acc) -> + Acc1 = lists:reverse(Acc) ++ Remaining, + Acc1; +make_target([{bind, P}|Rest], Bindings, Remaining, Acc) -> + P2 = case couch_util:get_value({bind, P}, Bindings) of + undefined -> "undefined"; + P1 -> P1 + end, + make_target(Rest, Bindings, Remaining, [P2|Acc]); +make_target([P|Rest], Bindings, Remaining, Acc) -> + make_target(Rest, Bindings, Remaining, [P|Acc]). + +%% bind port +bind_port(Port, Port) -> ok; +bind_port('*', _) -> ok; +bind_port(_,_) -> fail. + +%% bind bhost +bind_vhost([],[], Bindings) -> {ok, Bindings, []}; +bind_vhost([?MATCH_ALL], [], _Bindings) -> fail; +bind_vhost([?MATCH_ALL], Rest, Bindings) -> {ok, Bindings, Rest}; +bind_vhost([], _HostParts, _Bindings) -> fail; +bind_vhost([{bind, Token}|Rest], [Match|RestHost], Bindings) -> + bind_vhost(Rest, RestHost, [{{bind, Token}, Match}|Bindings]); +bind_vhost([Cname|Rest], [Cname|RestHost], Bindings) -> + bind_vhost(Rest, RestHost, Bindings); +bind_vhost(_, _, _) -> fail. + +%% bind path +bind_path([], PathParts) -> + {ok, PathParts}; +bind_path(_VPathParts, []) -> + fail; +bind_path([Path|VRest],[Path|Rest]) -> + bind_path(VRest, Rest); +bind_path(_, _) -> + fail. + +% utilities + + +%% create vhost list from ini +make_vhosts() -> + Vhosts = lists:foldl(fun({Vhost, Path}, Acc) -> + [{parse_vhost(Vhost), split_path(Path)}|Acc] + end, [], couch_config:get("vhosts")), + lists:reverse(lists:usort(Vhosts)). + +parse_vhost(Vhost) -> + case urlsplit_netloc(Vhost, []) of + {[], Path} -> + {make_spec("*", []), '*', Path}; + {HostPort, []} -> + {H, P} = split_host_port(HostPort), + H1 = make_spec(H, []), + {H1, P, []}; + {HostPort, Path} -> + {H, P} = split_host_port(HostPort), + H1 = make_spec(H, []), + {H1, P, string:tokens(Path, "/")} + end. + + +split_host_port(HostAsString) -> + case string:rchr(HostAsString, $:) of + 0 -> + {split_host(HostAsString), '*'}; + N -> + HostPart = string:substr(HostAsString, 1, N-1), + case (catch erlang:list_to_integer(string:substr(HostAsString, + N+1, length(HostAsString)))) of + {'EXIT', _} -> + {split_host(HostAsString), '*'}; + Port -> + {split_host(HostPart), Port} + end + end. + +split_host(HostAsString) -> + string:tokens(HostAsString, "\."). + +split_path(Path) -> + make_spec(string:tokens(Path, "/"), []). + + +make_spec([], Acc) -> + lists:reverse(Acc); +make_spec([""|R], Acc) -> + make_spec(R, Acc); +make_spec(["*"|R], Acc) -> + make_spec(R, [?MATCH_ALL|Acc]); +make_spec([P|R], Acc) -> + P1 = parse_var(P), + make_spec(R, [P1|Acc]). + + +parse_var(P) -> + case P of + ":" ++ Var -> + {bind, Var}; + _ -> P + end. + + +% mochiweb doesn't export it. +urlsplit_netloc("", Acc) -> + {lists:reverse(Acc), ""}; +urlsplit_netloc(Rest=[C | _], Acc) when C =:= $/; C =:= $?; C =:= $# -> + {lists:reverse(Acc), Rest}; +urlsplit_netloc([C | Rest], Acc) -> + urlsplit_netloc(Rest, [C | Acc]). + +make_path(Parts) -> + "/" ++ string:join(Parts,[?SEPARATOR]). diff --git a/apps/couch/src/couch_httpd_view.erl b/apps/couch/src/couch_httpd_view.erl new file mode 100644 index 00000000..082a5039 --- /dev/null +++ b/apps/couch/src/couch_httpd_view.erl @@ -0,0 +1,777 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_httpd_view). +-include("couch_db.hrl"). + +-export([handle_view_req/3,handle_temp_view_req/2]). + +-export([parse_view_params/4]). +-export([make_view_fold_fun/7, finish_view_fold/4, finish_view_fold/5, view_row_obj/4]). +-export([view_etag/5, make_reduce_fold_funs/6]). +-export([design_doc_view/5, parse_bool_param/1, doc_member/3]). +-export([make_key_options/1, load_view/4]). + +-import(couch_httpd, + [send_json/2,send_json/3,send_json/4,send_method_not_allowed/2,send_chunk/2, + start_json_response/2, start_json_response/3, end_json_response/1, + send_chunked_error/2]). + +-import(couch_db,[get_update_seq/1]). + +design_doc_view(Req, Db, DName, ViewName, Keys) -> + DesignId = <<"_design/", DName/binary>>, + Stale = get_stale_type(Req), + Reduce = get_reduce_type(Req), + Result = case couch_view:get_map_view(Db, DesignId, ViewName, Stale) of + {ok, View, Group} -> + QueryArgs = parse_view_params(Req, Keys, map, view_collator(View)), + output_map_view(Req, View, Group, Db, QueryArgs, Keys); + {not_found, Reason} -> + case couch_view:get_reduce_view(Db, DesignId, ViewName, Stale) of + {ok, ReduceView, Group} -> + Collator = view_collator(ReduceView), + case Reduce of + false -> + QueryArgs = parse_view_params(Req, Keys, red_map, Collator), + MapView = couch_view:extract_map_view(ReduceView), + output_map_view(Req, MapView, Group, Db, QueryArgs, Keys); + _ -> + QueryArgs = parse_view_params(Req, Keys, reduce, Collator), + output_reduce_view(Req, Db, ReduceView, Group, QueryArgs, Keys) + end; + _ -> + throw({not_found, Reason}) + end + end, + couch_stats_collector:increment({httpd, view_reads}), + Result. + +handle_view_req(#httpd{method='GET', + path_parts=[_, _, DName, _, ViewName]}=Req, Db, _DDoc) -> + Keys = couch_httpd:qs_json_value(Req, "keys", nil), + design_doc_view(Req, Db, DName, ViewName, Keys); + +handle_view_req(#httpd{method='POST', + path_parts=[_, _, DName, _, ViewName]}=Req, Db, _DDoc) -> + couch_httpd:validate_ctype(Req, "application/json"), + {Fields} = couch_httpd:json_body_obj(Req), + case couch_util:get_value(<<"keys">>, Fields, nil) of + nil -> + Fmt = "POST to view ~p/~p in database ~p with no keys member.", + ?LOG_DEBUG(Fmt, [DName, ViewName, Db]), + design_doc_view(Req, Db, DName, ViewName, nil); + Keys when is_list(Keys) -> + design_doc_view(Req, Db, DName, ViewName, Keys); + _ -> + throw({bad_request, "`keys` member must be a array."}) + end; + +handle_view_req(Req, _Db, _DDoc) -> + send_method_not_allowed(Req, "GET,POST,HEAD"). + +handle_temp_view_req(#httpd{method='POST'}=Req, Db) -> + couch_httpd:validate_ctype(Req, "application/json"), + ok = couch_db:check_is_admin(Db), + couch_stats_collector:increment({httpd, temporary_view_reads}), + {Props} = couch_httpd:json_body_obj(Req), + Language = couch_util:get_value(<<"language">>, Props, <<"javascript">>), + {DesignOptions} = couch_util:get_value(<<"options">>, Props, {[]}), + MapSrc = couch_util:get_value(<<"map">>, Props), + Keys = couch_util:get_value(<<"keys">>, Props, nil), + Reduce = get_reduce_type(Req), + case couch_util:get_value(<<"reduce">>, Props, null) of + null -> + {ok, View, Group} = couch_view:get_temp_map_view(Db, Language, + DesignOptions, MapSrc), + QueryArgs = parse_view_params(Req, Keys, map, view_collator(View)), + output_map_view(Req, View, Group, Db, QueryArgs, Keys); + _ when Reduce =:= false -> + {ok, View, Group} = couch_view:get_temp_map_view(Db, Language, + DesignOptions, MapSrc), + QueryArgs = parse_view_params(Req, Keys, red_map, view_collator(View)), + output_map_view(Req, View, Group, Db, QueryArgs, Keys); + RedSrc -> + {ok, View, Group} = couch_view:get_temp_reduce_view(Db, Language, + DesignOptions, MapSrc, RedSrc), + QueryArgs = parse_view_params(Req, Keys, reduce, view_collator(View)), + output_reduce_view(Req, Db, View, Group, QueryArgs, Keys) + end; + +handle_temp_view_req(Req, _Db) -> + send_method_not_allowed(Req, "POST"). + +output_map_view(Req, View, Group, Db, QueryArgs, nil) -> + #view_query_args{ + limit = Limit, + skip = SkipCount + } = QueryArgs, + CurrentEtag = view_etag(Db, Group, View, QueryArgs), + couch_httpd:etag_respond(Req, CurrentEtag, fun() -> + {ok, RowCount} = couch_view:get_row_count(View), + FoldlFun = make_view_fold_fun(Req, QueryArgs, CurrentEtag, Db, Group#group.current_seq, RowCount, #view_fold_helper_funs{reduce_count=fun couch_view:reduce_to_count/1}), + FoldAccInit = {Limit, SkipCount, undefined, []}, + {ok, LastReduce, FoldResult} = couch_view:fold(View, + FoldlFun, FoldAccInit, make_key_options(QueryArgs)), + finish_view_fold(Req, RowCount, + couch_view:reduce_to_count(LastReduce), FoldResult) + end); + +output_map_view(Req, View, Group, Db, QueryArgs, Keys) -> + #view_query_args{ + limit = Limit, + skip = SkipCount + } = QueryArgs, + CurrentEtag = view_etag(Db, Group, View, QueryArgs, Keys), + couch_httpd:etag_respond(Req, CurrentEtag, fun() -> + {ok, RowCount} = couch_view:get_row_count(View), + FoldAccInit = {Limit, SkipCount, undefined, []}, + {LastReduce, FoldResult} = lists:foldl(fun(Key, {_, FoldAcc}) -> + FoldlFun = make_view_fold_fun(Req, QueryArgs#view_query_args{}, + CurrentEtag, Db, Group#group.current_seq, RowCount, + #view_fold_helper_funs{ + reduce_count = fun couch_view:reduce_to_count/1 + }), + {ok, LastReduce, FoldResult} = couch_view:fold(View, FoldlFun, + FoldAcc, make_key_options( + QueryArgs#view_query_args{start_key=Key, end_key=Key})), + {LastReduce, FoldResult} + end, {{[],[]}, FoldAccInit}, Keys), + finish_view_fold(Req, RowCount, couch_view:reduce_to_count(LastReduce), + FoldResult, [{update_seq,Group#group.current_seq}]) + end). + +output_reduce_view(Req, Db, View, Group, QueryArgs, nil) -> + #view_query_args{ + limit = Limit, + skip = Skip, + group_level = GroupLevel + } = QueryArgs, + CurrentEtag = view_etag(Db, Group, View, QueryArgs), + couch_httpd:etag_respond(Req, CurrentEtag, fun() -> + {ok, GroupRowsFun, RespFun} = make_reduce_fold_funs(Req, GroupLevel, + QueryArgs, CurrentEtag, Group#group.current_seq, + #reduce_fold_helper_funs{}), + FoldAccInit = {Limit, Skip, undefined, []}, + {ok, {_, _, Resp, _}} = couch_view:fold_reduce(View, + RespFun, FoldAccInit, [{key_group_fun, GroupRowsFun} | + make_key_options(QueryArgs)]), + finish_reduce_fold(Req, Resp) + end); + +output_reduce_view(Req, Db, View, Group, QueryArgs, Keys) -> + #view_query_args{ + limit = Limit, + skip = Skip, + group_level = GroupLevel + } = QueryArgs, + CurrentEtag = view_etag(Db, Group, View, QueryArgs, Keys), + couch_httpd:etag_respond(Req, CurrentEtag, fun() -> + {ok, GroupRowsFun, RespFun} = make_reduce_fold_funs(Req, GroupLevel, + QueryArgs, CurrentEtag, Group#group.current_seq, + #reduce_fold_helper_funs{}), + {Resp, _RedAcc3} = lists:foldl( + fun(Key, {Resp, RedAcc}) -> + % run the reduce once for each key in keys, with limit etc + % reapplied for each key + FoldAccInit = {Limit, Skip, Resp, RedAcc}, + {_, {_, _, Resp2, RedAcc2}} = couch_view:fold_reduce(View, + RespFun, FoldAccInit, [{key_group_fun, GroupRowsFun} | + make_key_options(QueryArgs#view_query_args{ + start_key=Key, end_key=Key})]), + % Switch to comma + {Resp2, RedAcc2} + end, + {undefined, []}, Keys), % Start with no comma + finish_reduce_fold(Req, Resp, [{update_seq,Group#group.current_seq}]) + end). + +reverse_key_default(?MIN_STR) -> ?MAX_STR; +reverse_key_default(?MAX_STR) -> ?MIN_STR; +reverse_key_default(Key) -> Key. + +get_stale_type(Req) -> + list_to_existing_atom(couch_httpd:qs_value(Req, "stale", "nil")). + +get_reduce_type(Req) -> + list_to_existing_atom(couch_httpd:qs_value(Req, "reduce", "true")). + +load_view(Req, Db, {ViewDesignId, ViewName}, Keys) -> + Stale = get_stale_type(Req), + Reduce = get_reduce_type(Req), + case couch_view:get_map_view(Db, ViewDesignId, ViewName, Stale) of + {ok, View, Group} -> + QueryArgs = parse_view_params(Req, Keys, map, view_collator(View)), + {map, View, Group, QueryArgs}; + {not_found, _Reason} -> + case couch_view:get_reduce_view(Db, ViewDesignId, ViewName, Stale) of + {ok, ReduceView, Group} -> + Collator = view_collator(ReduceView), + case Reduce of + false -> + QueryArgs = parse_view_params(Req, Keys, map_red, Collator), + MapView = couch_view:extract_map_view(ReduceView), + {map, MapView, Group, QueryArgs}; + _ -> + QueryArgs = parse_view_params(Req, Keys, reduce, Collator), + {reduce, ReduceView, Group, QueryArgs} + end; + {not_found, Reason} -> + throw({not_found, Reason}) + end + end. + +view_collator({reduce, _N, _Lang, View}) -> + view_collator(View); + +view_collator({temp_reduce, View}) -> + view_collator(View); + +view_collator(#view{btree=Btree}) -> + % Return an "is-less-than" predicate by calling into the btree's + % collator. For raw collation, couch_btree compares arbitrary + % Erlang terms, but for normal (ICU) collation, it expects + % {Json, Id} tuples. + fun + ({_JsonA, _IdA}=A, {_JsonB, _IdB}=B) -> + couch_btree:less(Btree, A, B); + (JsonA, JsonB) -> + couch_btree:less(Btree, {JsonA, null}, {JsonB, null}) + end. + +% query_parse_error could be removed +% we wouldn't need to pass the view type, it'd just parse params. +% I'm not sure what to do about the error handling, but +% it might simplify things to have a parse_view_params function +% that doesn't throw(). +parse_view_params(Req, Keys, ViewType, LessThan) -> + QueryList = couch_httpd:qs(Req), + QueryParams = + lists:foldl(fun({K, V}, Acc) -> + parse_view_param(K, V) ++ Acc + end, [], QueryList), + IsMultiGet = (Keys =/= nil), + Args = #view_query_args{ + view_type=ViewType, + multi_get=IsMultiGet + }, + QueryArgs = lists:foldl(fun({K, V}, Args2) -> + validate_view_query(K, V, Args2) + end, Args, lists:reverse(QueryParams)), % Reverse to match QS order. + warn_on_empty_key_range(QueryArgs, LessThan), + GroupLevel = QueryArgs#view_query_args.group_level, + case {ViewType, GroupLevel, IsMultiGet} of + {reduce, exact, true} -> + QueryArgs; + {reduce, _, false} -> + QueryArgs; + {reduce, _, _} -> + % we can simplify code if we just drop this error message. + Msg = <<"Multi-key fetchs for reduce " + "view must include `group=true`">>, + throw({query_parse_error, Msg}); + _ -> + QueryArgs + end, + QueryArgs. + +parse_view_param("", _) -> + []; +parse_view_param("key", Value) -> + JsonKey = ?JSON_DECODE(Value), + [{start_key, JsonKey}, {end_key, JsonKey}]; +% TODO: maybe deprecate startkey_docid +parse_view_param("startkey_docid", Value) -> + [{start_docid, ?l2b(Value)}]; +parse_view_param("start_key_doc_id", Value) -> + [{start_docid, ?l2b(Value)}]; +% TODO: maybe deprecate endkey_docid +parse_view_param("endkey_docid", Value) -> + [{end_docid, ?l2b(Value)}]; +parse_view_param("end_key_doc_id", Value) -> + [{end_docid, ?l2b(Value)}]; +% TODO: maybe deprecate startkey +parse_view_param("startkey", Value) -> + [{start_key, ?JSON_DECODE(Value)}]; +parse_view_param("start_key", Value) -> + [{start_key, ?JSON_DECODE(Value)}]; +% TODO: maybe deprecate endkey +parse_view_param("endkey", Value) -> + [{end_key, ?JSON_DECODE(Value)}]; +parse_view_param("end_key", Value) -> + [{end_key, ?JSON_DECODE(Value)}]; +parse_view_param("limit", Value) -> + [{limit, parse_positive_int_param(Value)}]; +parse_view_param("count", _Value) -> + throw({query_parse_error, <<"Query parameter 'count' is now 'limit'.">>}); +parse_view_param("stale", "ok") -> + [{stale, ok}]; +parse_view_param("stale", "update_after") -> + [{stale, update_after}]; +parse_view_param("stale", _Value) -> + throw({query_parse_error, + <<"stale only available as stale=ok or as stale=update_after">>}); +parse_view_param("update", _Value) -> + throw({query_parse_error, <<"update=false is now stale=ok">>}); +parse_view_param("descending", Value) -> + [{descending, parse_bool_param(Value)}]; +parse_view_param("skip", Value) -> + [{skip, parse_int_param(Value)}]; +parse_view_param("group", Value) -> + case parse_bool_param(Value) of + true -> [{group_level, exact}]; + false -> [{group_level, 0}] + end; +parse_view_param("group_level", Value) -> + [{group_level, parse_positive_int_param(Value)}]; +parse_view_param("inclusive_end", Value) -> + [{inclusive_end, parse_bool_param(Value)}]; +parse_view_param("reduce", Value) -> + [{reduce, parse_bool_param(Value)}]; +parse_view_param("include_docs", Value) -> + [{include_docs, parse_bool_param(Value)}]; +parse_view_param("conflicts", Value) -> + [{conflicts, parse_bool_param(Value)}]; +parse_view_param("list", Value) -> + [{list, ?l2b(Value)}]; +parse_view_param("callback", _) -> + []; % Verified in the JSON response functions +parse_view_param(Key, Value) -> + [{extra, {Key, Value}}]. + +warn_on_empty_key_range(#view_query_args{start_key=undefined}, _Lt) -> + ok; +warn_on_empty_key_range(#view_query_args{end_key=undefined}, _Lt) -> + ok; +warn_on_empty_key_range(#view_query_args{start_key=A, end_key=A}, _Lt) -> + ok; +warn_on_empty_key_range(#view_query_args{ + start_key=StartKey, end_key=EndKey, direction=Dir}, LessThan) -> + case {Dir, LessThan(StartKey, EndKey)} of + {fwd, false} -> + throw({query_parse_error, + <<"No rows can match your key range, reverse your ", + "start_key and end_key or set descending=true">>}); + {rev, true} -> + throw({query_parse_error, + <<"No rows can match your key range, reverse your ", + "start_key and end_key or set descending=false">>}); + _ -> ok + end. + +validate_view_query(start_key, Value, Args) -> + case Args#view_query_args.multi_get of + true -> + Msg = <<"Query parameter `start_key` is " + "not compatible with multi-get">>, + throw({query_parse_error, Msg}); + _ -> + Args#view_query_args{start_key=Value} + end; +validate_view_query(start_docid, Value, Args) -> + Args#view_query_args{start_docid=Value}; +validate_view_query(end_key, Value, Args) -> + case Args#view_query_args.multi_get of + true-> + Msg = <<"Query parameter `end_key` is " + "not compatible with multi-get">>, + throw({query_parse_error, Msg}); + _ -> + Args#view_query_args{end_key=Value} + end; +validate_view_query(end_docid, Value, Args) -> + Args#view_query_args{end_docid=Value}; +validate_view_query(limit, Value, Args) -> + Args#view_query_args{limit=Value}; +validate_view_query(list, Value, Args) -> + Args#view_query_args{list=Value}; +validate_view_query(stale, ok, Args) -> + Args#view_query_args{stale=ok}; +validate_view_query(stale, update_after, Args) -> + Args#view_query_args{stale=update_after}; +validate_view_query(stale, _, Args) -> + Args; +validate_view_query(descending, true, Args) -> + case Args#view_query_args.direction of + rev -> Args; % Already reversed + fwd -> + Args#view_query_args{ + direction = rev, + start_docid = + reverse_key_default(Args#view_query_args.start_docid), + end_docid = + reverse_key_default(Args#view_query_args.end_docid) + } + end; +validate_view_query(descending, false, Args) -> + Args; % Ignore default condition +validate_view_query(skip, Value, Args) -> + Args#view_query_args{skip=Value}; +validate_view_query(group_level, Value, Args) -> + case Args#view_query_args.view_type of + reduce -> + Args#view_query_args{group_level=Value}; + _ -> + Msg = <<"Invalid URL parameter 'group' or " + " 'group_level' for non-reduce view.">>, + throw({query_parse_error, Msg}) + end; +validate_view_query(inclusive_end, Value, Args) -> + Args#view_query_args{inclusive_end=Value}; +validate_view_query(reduce, false, Args) -> + Args; +validate_view_query(reduce, _, Args) -> + case Args#view_query_args.view_type of + map -> + Msg = <<"Invalid URL parameter `reduce` for map view.">>, + throw({query_parse_error, Msg}); + _ -> + Args + end; +validate_view_query(include_docs, true, Args) -> + case Args#view_query_args.view_type of + reduce -> + Msg = <<"Query parameter `include_docs` " + "is invalid for reduce views.">>, + throw({query_parse_error, Msg}); + _ -> + Args#view_query_args{include_docs=true} + end; +% Use the view_query_args record's default value +validate_view_query(include_docs, _Value, Args) -> + Args; +validate_view_query(conflicts, true, Args) -> + case Args#view_query_args.view_type of + reduce -> + Msg = <<"Query parameter `conflicts` " + "is invalid for reduce views.">>, + throw({query_parse_error, Msg}); + _ -> + Args#view_query_args{conflicts = true} + end; +validate_view_query(extra, _Value, Args) -> + Args. + +make_view_fold_fun(Req, QueryArgs, Etag, Db, UpdateSeq, TotalViewCount, HelperFuns) -> + #view_fold_helper_funs{ + start_response = StartRespFun, + send_row = SendRowFun, + reduce_count = ReduceCountFun + } = apply_default_helper_funs(HelperFuns), + + #view_query_args{ + include_docs = IncludeDocs, + conflicts = Conflicts + } = QueryArgs, + + fun({{Key, DocId}, Value}, OffsetReds, + {AccLimit, AccSkip, Resp, RowFunAcc}) -> + case {AccLimit, AccSkip, Resp} of + {0, _, _} -> + % we've done "limit" rows, stop foldling + {stop, {0, 0, Resp, RowFunAcc}}; + {_, AccSkip, _} when AccSkip > 0 -> + % just keep skipping + {ok, {AccLimit, AccSkip - 1, Resp, RowFunAcc}}; + {_, _, undefined} -> + % rendering the first row, first we start the response + Offset = ReduceCountFun(OffsetReds), + {ok, Resp2, RowFunAcc0} = StartRespFun(Req, Etag, + TotalViewCount, Offset, RowFunAcc, UpdateSeq), + {Go, RowFunAcc2} = SendRowFun(Resp2, Db, {{Key, DocId}, Value}, + IncludeDocs, Conflicts, RowFunAcc0), + {Go, {AccLimit - 1, 0, Resp2, RowFunAcc2}}; + {AccLimit, _, Resp} when (AccLimit > 0) -> + % rendering all other rows + {Go, RowFunAcc2} = SendRowFun(Resp, Db, {{Key, DocId}, Value}, + IncludeDocs, Conflicts, RowFunAcc), + {Go, {AccLimit - 1, 0, Resp, RowFunAcc2}} + end + end. + +make_reduce_fold_funs(Req, GroupLevel, _QueryArgs, Etag, UpdateSeq, HelperFuns) -> + #reduce_fold_helper_funs{ + start_response = StartRespFun, + send_row = SendRowFun + } = apply_default_helper_funs(HelperFuns), + + GroupRowsFun = + fun({_Key1,_}, {_Key2,_}) when GroupLevel == 0 -> + true; + ({Key1,_}, {Key2,_}) + when is_integer(GroupLevel) and is_list(Key1) and is_list(Key2) -> + lists:sublist(Key1, GroupLevel) == lists:sublist(Key2, GroupLevel); + ({Key1,_}, {Key2,_}) -> + Key1 == Key2 + end, + + RespFun = fun + (_Key, _Red, {AccLimit, AccSkip, Resp, RowAcc}) when AccSkip > 0 -> + % keep skipping + {ok, {AccLimit, AccSkip - 1, Resp, RowAcc}}; + (_Key, _Red, {0, _AccSkip, Resp, RowAcc}) -> + % we've exhausted limit rows, stop + {stop, {0, _AccSkip, Resp, RowAcc}}; + + (_Key, Red, {AccLimit, 0, undefined, RowAcc0}) when GroupLevel == 0 -> + % we haven't started responding yet and group=false + {ok, Resp2, RowAcc} = StartRespFun(Req, Etag, RowAcc0, UpdateSeq), + {Go, RowAcc2} = SendRowFun(Resp2, {null, Red}, RowAcc), + {Go, {AccLimit - 1, 0, Resp2, RowAcc2}}; + (_Key, Red, {AccLimit, 0, Resp, RowAcc}) when GroupLevel == 0 -> + % group=false but we've already started the response + {Go, RowAcc2} = SendRowFun(Resp, {null, Red}, RowAcc), + {Go, {AccLimit - 1, 0, Resp, RowAcc2}}; + + (Key, Red, {AccLimit, 0, undefined, RowAcc0}) + when is_integer(GroupLevel), is_list(Key) -> + % group_level and we haven't responded yet + {ok, Resp2, RowAcc} = StartRespFun(Req, Etag, RowAcc0, UpdateSeq), + {Go, RowAcc2} = SendRowFun(Resp2, + {lists:sublist(Key, GroupLevel), Red}, RowAcc), + {Go, {AccLimit - 1, 0, Resp2, RowAcc2}}; + (Key, Red, {AccLimit, 0, Resp, RowAcc}) + when is_integer(GroupLevel), is_list(Key) -> + % group_level and we've already started the response + {Go, RowAcc2} = SendRowFun(Resp, + {lists:sublist(Key, GroupLevel), Red}, RowAcc), + {Go, {AccLimit - 1, 0, Resp, RowAcc2}}; + + (Key, Red, {AccLimit, 0, undefined, RowAcc0}) -> + % group=true and we haven't responded yet + {ok, Resp2, RowAcc} = StartRespFun(Req, Etag, RowAcc0, UpdateSeq), + {Go, RowAcc2} = SendRowFun(Resp2, {Key, Red}, RowAcc), + {Go, {AccLimit - 1, 0, Resp2, RowAcc2}}; + (Key, Red, {AccLimit, 0, Resp, RowAcc}) -> + % group=true and we've already started the response + {Go, RowAcc2} = SendRowFun(Resp, {Key, Red}, RowAcc), + {Go, {AccLimit - 1, 0, Resp, RowAcc2}} + end, + {ok, GroupRowsFun, RespFun}. + +apply_default_helper_funs( + #view_fold_helper_funs{ + start_response = StartResp, + send_row = SendRow + }=Helpers) -> + StartResp2 = case StartResp of + undefined -> fun json_view_start_resp/6; + _ -> StartResp + end, + + SendRow2 = case SendRow of + undefined -> fun send_json_view_row/6; + _ -> SendRow + end, + + Helpers#view_fold_helper_funs{ + start_response = StartResp2, + send_row = SendRow2 + }; + + +apply_default_helper_funs( + #reduce_fold_helper_funs{ + start_response = StartResp, + send_row = SendRow + }=Helpers) -> + StartResp2 = case StartResp of + undefined -> fun json_reduce_start_resp/4; + _ -> StartResp + end, + + SendRow2 = case SendRow of + undefined -> fun send_json_reduce_row/3; + _ -> SendRow + end, + + Helpers#reduce_fold_helper_funs{ + start_response = StartResp2, + send_row = SendRow2 + }. + +make_key_options(#view_query_args{direction = Dir}=QueryArgs) -> + [{dir,Dir} | make_start_key_option(QueryArgs) ++ + make_end_key_option(QueryArgs)]. + +make_start_key_option( + #view_query_args{ + start_key = StartKey, + start_docid = StartDocId}) -> + if StartKey == undefined -> + []; + true -> + [{start_key, {StartKey, StartDocId}}] + end. + +make_end_key_option(#view_query_args{end_key = undefined}) -> + []; +make_end_key_option( + #view_query_args{end_key = EndKey, + end_docid = EndDocId, + inclusive_end = true}) -> + [{end_key, {EndKey, EndDocId}}]; +make_end_key_option( + #view_query_args{ + end_key = EndKey, + end_docid = EndDocId, + inclusive_end = false}) -> + [{end_key_gt, {EndKey,reverse_key_default(EndDocId)}}]. + +json_view_start_resp(Req, Etag, TotalViewCount, Offset, _Acc, UpdateSeq) -> + {ok, Resp} = start_json_response(Req, 200, [{"Etag", Etag}]), + BeginBody = case couch_httpd:qs_value(Req, "update_seq") of + "true" -> + io_lib:format( + "{\"total_rows\":~w,\"update_seq\":~w," + "\"offset\":~w,\"rows\":[\r\n", + [TotalViewCount, UpdateSeq, Offset]); + _Else -> + io_lib:format( + "{\"total_rows\":~w,\"offset\":~w,\"rows\":[\r\n", + [TotalViewCount, Offset]) + end, + {ok, Resp, BeginBody}. + +send_json_view_row(Resp, Db, Kv, IncludeDocs, Conflicts, RowFront) -> + JsonObj = view_row_obj(Db, Kv, IncludeDocs, Conflicts), + send_chunk(Resp, RowFront ++ ?JSON_ENCODE(JsonObj)), + {ok, ",\r\n"}. + +json_reduce_start_resp(Req, Etag, _Acc0, UpdateSeq) -> + {ok, Resp} = start_json_response(Req, 200, [{"Etag", Etag}]), + case couch_httpd:qs_value(Req, "update_seq") of + "true" -> + {ok, Resp, io_lib:format("{\"update_seq\":~w,\"rows\":[\r\n",[UpdateSeq])}; + _Else -> + {ok, Resp, "{\"rows\":[\r\n"} + end. + +send_json_reduce_row(Resp, {Key, Value}, RowFront) -> + send_chunk(Resp, RowFront ++ ?JSON_ENCODE({[{key, Key}, {value, Value}]})), + {ok, ",\r\n"}. + +view_etag(Db, Group, View, QueryArgs) -> + view_etag(Db, Group, View, QueryArgs, nil). + +view_etag(Db, Group, {reduce, _, _, View}, QueryArgs, Extra) -> + view_etag(Db, Group, View, QueryArgs, Extra); +view_etag(Db, Group, {temp_reduce, View}, QueryArgs, Extra) -> + view_etag(Db, Group, View, QueryArgs, Extra); +view_etag(_Db, #group{sig=Sig, current_seq=CurrentSeq}, _View, #view_query_args{include_docs=true}, Extra) -> + couch_httpd:make_etag({Sig, CurrentSeq, Extra}); +view_etag(_Db, #group{sig=Sig}, #view{update_seq=UpdateSeq, purge_seq=PurgeSeq}, _QueryArgs, Extra) -> + couch_httpd:make_etag({Sig, UpdateSeq, PurgeSeq, Extra}). + +% the view row has an error +view_row_obj(_Db, {{Key, error}, Value}, _IncludeDocs, _Conflicts) -> + {[{key, Key}, {error, Value}]}; +% include docs in the view output +view_row_obj(Db, {{Key, DocId}, {Props}}, true, Conflicts) -> + Rev = case couch_util:get_value(<<"_rev">>, Props) of + undefined -> + nil; + Rev0 -> + couch_doc:parse_rev(Rev0) + end, + IncludeId = couch_util:get_value(<<"_id">>, Props, DocId), + view_row_with_doc(Db, {{Key, DocId}, {Props}}, {IncludeId, Rev}, Conflicts); +view_row_obj(Db, {{Key, DocId}, Value}, true, Conflicts) -> + view_row_with_doc(Db, {{Key, DocId}, Value}, {DocId, nil}, Conflicts); +% the normal case for rendering a view row +view_row_obj(_Db, {{Key, DocId}, Value}, _IncludeDocs, _Conflicts) -> + {[{id, DocId}, {key, Key}, {value, Value}]}. + +view_row_with_doc(Db, {{Key, DocId}, Value}, IdRev, Conflicts) -> + {[{id, DocId}, {key, Key}, {value, Value}] ++ + doc_member(Db, IdRev, if Conflicts -> [conflicts]; true -> [] end)}. + +doc_member(Db, #doc_info{id = Id, revs = [#rev_info{rev = Rev} | _]} = Info, + Options) -> + ?LOG_DEBUG("Include Doc: ~p ~p", [Id, Rev]), + case couch_db:open_doc(Db, Info, [deleted | Options]) of + {ok, Doc} -> + [{doc, couch_doc:to_json_obj(Doc, [])}]; + _ -> + [{doc, null}] + end; +doc_member(Db, {DocId, Rev}, Options) -> + ?LOG_DEBUG("Include Doc: ~p ~p", [DocId, Rev]), + case (catch couch_httpd_db:couch_doc_open(Db, DocId, Rev, Options)) of + #doc{} = Doc -> + JsonDoc = couch_doc:to_json_obj(Doc, []), + [{doc, JsonDoc}]; + _Else -> + [{doc, null}] + end. + +finish_view_fold(Req, TotalRows, Offset, FoldResult) -> + finish_view_fold(Req, TotalRows, Offset, FoldResult, []). + +finish_view_fold(Req, TotalRows, Offset, FoldResult, Fields) -> + case FoldResult of + {_, _, undefined, _} -> + % nothing found in the view or keys, nothing has been returned + % send empty view + send_json(Req, 200, {[ + {total_rows, TotalRows}, + {offset, Offset}, + {rows, []} + ] ++ Fields}); + {_, _, Resp, _} -> + % end the view + send_chunk(Resp, "\r\n]}"), + end_json_response(Resp) + end. + +finish_reduce_fold(Req, Resp) -> + finish_reduce_fold(Req, Resp, []). + +finish_reduce_fold(Req, Resp, Fields) -> + case Resp of + undefined -> + send_json(Req, 200, {[ + {rows, []} + ] ++ Fields}); + Resp -> + send_chunk(Resp, "\r\n]}"), + end_json_response(Resp) + end. + +parse_bool_param(Val) -> + case string:to_lower(Val) of + "true" -> true; + "false" -> false; + _ -> + Msg = io_lib:format("Invalid boolean parameter: ~p", [Val]), + throw({query_parse_error, ?l2b(Msg)}) + end. + +parse_int_param(Val) -> + case (catch list_to_integer(Val)) of + IntVal when is_integer(IntVal) -> + IntVal; + _ -> + Msg = io_lib:format("Invalid value for integer parameter: ~p", [Val]), + throw({query_parse_error, ?l2b(Msg)}) + end. + +parse_positive_int_param(Val) -> + case parse_int_param(Val) of + IntVal when IntVal >= 0 -> + IntVal; + _ -> + Fmt = "Invalid value for positive integer parameter: ~p", + Msg = io_lib:format(Fmt, [Val]), + throw({query_parse_error, ?l2b(Msg)}) + end. + diff --git a/apps/couch/src/couch_key_tree.erl b/apps/couch/src/couch_key_tree.erl new file mode 100644 index 00000000..5e24e0f7 --- /dev/null +++ b/apps/couch/src/couch_key_tree.erl @@ -0,0 +1,458 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% @doc Data structure used to represent document edit histories. + +%% A key tree is used to represent the edit history of a document. Each node of +%% the tree represents a particular version. Relations between nodes represent +%% the order that these edits were applied. For instance, a set of three edits +%% would produce a tree of versions A->B->C indicating that edit C was based on +%% version B which was in turn based on A. In a world without replication (and +%% no ability to disable MVCC checks), all histories would be forced to be +%% linear lists of edits due to constraints imposed by MVCC (ie, new edits must +%% be based on the current version). However, we have replication, so we must +%% deal with not so easy cases, which lead to trees. +%% +%% Consider a document in state A. This doc is replicated to a second node. We +%% then edit the document on each node leaving it in two different states, B +%% and C. We now have two key trees, A->B and A->C. When we go to replicate a +%% second time, the key tree must combine these two trees which gives us +%% A->(B|C). This is how conflicts are introduced. In terms of the key tree, we +%% say that we have two leaves (B and C) that are not deleted. The presense of +%% the multiple leaves indicate conflict. To remove a conflict, one of the +%% edits (B or C) can be deleted, which results in, A->(B|C->D) where D is an +%% edit that is specially marked with the a deleted=true flag. +%% +%% What makes this a bit more complicated is that there is a limit to the +%% number of revisions kept, specified in couch_db.hrl (default is 1000). When +%% this limit is exceeded only the last 1000 are kept. This comes in to play +%% when branches are merged. The comparison has to begin at the same place in +%% the branches. A revision id is of the form N-XXXXXXX where N is the current +%% revision. So each path will have a start number, calculated in +%% couch_doc:to_path using the formula N - length(RevIds) + 1 So, .eg. if a doc +%% was edit 1003 times this start number would be 4, indicating that 3 +%% revisions were truncated. +%% +%% This comes into play in @see merge_at/3 which recursively walks down one +%% tree or the other until they begin at the same revision. + +-module(couch_key_tree). + +-export([merge/3, find_missing/2, get_key_leafs/2, + get_full_key_paths/2, get/2, compute_data_size/1]). +-export([map/2, get_all_leafs/1, count_leafs/1, remove_leafs/2, + get_all_leafs_full/1,stem/2,map_leafs/2, fold/3]). + +-include("couch_db.hrl"). + +% Tree::term() is really a tree(), but we don't want to require R13B04 yet +-type branch() :: {Key::term(), Value::term(), Tree::term()}. +-type path() :: {Start::pos_integer(), branch()}. +-type tree() :: [branch()]. % sorted by key + +% partial trees arranged by how much they are cut off. + +-spec merge([path()], path(), pos_integer()) -> {[path()], + conflicts | no_conflicts}. +merge(Paths, Path, Depth) -> + {Merged, Conflicts} = merge(Paths, Path), + {stem(Merged, Depth), Conflicts}. + +%% @doc Merge a path with an existing list of paths, returning a new list of +%% paths. A return of conflicts indicates a new conflict was discovered in this +%% merge. Conflicts may already exist in the original list of paths. +-spec merge([path()], path()) -> {[path()], conflicts | no_conflicts}. +merge(Paths, Path) -> + {ok, Merged, HasConflicts} = merge_one(Paths, Path, [], false), + if HasConflicts -> + Conflicts = conflicts; + (length(Merged) =/= length(Paths)) and (length(Merged) =/= 1) -> + Conflicts = conflicts; + true -> + Conflicts = no_conflicts + end, + {lists:sort(Merged), Conflicts}. + +-spec merge_one(Original::[path()], Inserted::path(), [path()], boolean()) -> + {ok, Merged::[path()], NewConflicts::boolean()}. +merge_one([], Insert, OutAcc, ConflictsAcc) -> + {ok, [Insert | OutAcc], ConflictsAcc}; +merge_one([{Start, Tree}|Rest], {StartInsert, TreeInsert}, Acc, HasConflicts) -> + case merge_at([Tree], StartInsert - Start, [TreeInsert]) of + {ok, [Merged], Conflicts} -> + MergedStart = lists:min([Start, StartInsert]), + {ok, Rest ++ [{MergedStart, Merged} | Acc], Conflicts or HasConflicts}; + no -> + AccOut = [{Start, Tree} | Acc], + merge_one(Rest, {StartInsert, TreeInsert}, AccOut, HasConflicts) + end. + +-spec merge_at(tree(), Place::integer(), tree()) -> + {ok, Merged::tree(), HasConflicts::boolean()} | no. +merge_at(_Ours, _Place, []) -> + no; +merge_at([], _Place, _Insert) -> + no; +merge_at([{Key, Value, SubTree}|Sibs], Place, InsertTree) when Place > 0 -> + % inserted starts later than committed, need to drill into committed subtree + case merge_at(SubTree, Place - 1, InsertTree) of + {ok, Merged, Conflicts} -> + {ok, [{Key, Value, Merged} | Sibs], Conflicts}; + no -> + % first branch didn't merge, move to next branch + case merge_at(Sibs, Place, InsertTree) of + {ok, Merged, Conflicts} -> + {ok, [{Key, Value, SubTree} | Merged], Conflicts}; + no -> + no + end + end; +merge_at(OurTree, Place, [{Key, Value, SubTree}]) when Place < 0 -> + % inserted starts earlier than committed, need to drill into insert subtree + case merge_at(OurTree, Place + 1, SubTree) of + {ok, Merged, Conflicts} -> + {ok, [{Key, Value, Merged}], Conflicts}; + no -> + no + end; +merge_at([{Key, V1, SubTree}|Sibs], 0, [{Key, V2, InsertSubTree}]) -> + {Merged, Conflicts} = merge_simple(SubTree, InsertSubTree), + {ok, [{Key, value_pref(V1, V2), Merged} | Sibs], Conflicts}; +merge_at([{OurKey, _, _} | _], 0, [{Key, _, _}]) when OurKey > Key -> + % siblings keys are ordered, no point in continuing + no; +merge_at([Tree | Sibs], 0, InsertTree) -> + case merge_at(Sibs, 0, InsertTree) of + {ok, Merged, Conflicts} -> + {ok, [Tree | Merged], Conflicts}; + no -> + no + end. + +% key tree functions + +-spec merge_simple(tree(), tree()) -> {Merged::tree(), NewConflicts::boolean()}. +merge_simple([], B) -> + {B, false}; +merge_simple(A, []) -> + {A, false}; +merge_simple([{Key, V1, SubA} | NextA], [{Key, V2, SubB} | NextB]) -> + {MergedSubTree, Conflict1} = merge_simple(SubA, SubB), + {MergedNextTree, Conflict2} = merge_simple(NextA, NextB), + Value = value_pref(V1, V2), + {[{Key, Value, MergedSubTree} | MergedNextTree], Conflict1 or Conflict2}; +merge_simple([{A, _, _} = Tree | Next], [{B, _, _} | _] = Insert) when A < B -> + {Merged, Conflict} = merge_simple(Next, Insert), + % if Merged has more branches than the input we added a new conflict + {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))}; +merge_simple(Ours, [Tree | Next]) -> + {Merged, Conflict} = merge_simple(Ours, Next), + {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))}. + +find_missing(_Tree, []) -> + []; +find_missing([], SeachKeys) -> + SeachKeys; +find_missing([{Start, {Key, Value, SubTree}} | RestTree], SeachKeys) -> + PossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos >= Start], + ImpossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos < Start], + Missing = find_missing_simple(Start, [{Key, Value, SubTree}], PossibleKeys), + find_missing(RestTree, ImpossibleKeys ++ Missing). + +find_missing_simple(_Pos, _Tree, []) -> + []; +find_missing_simple(_Pos, [], SeachKeys) -> + SeachKeys; +find_missing_simple(Pos, [{Key, _, SubTree} | RestTree], SeachKeys) -> + PossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos >= Pos], + ImpossibleKeys = [{KeyPos, KeyValue} || {KeyPos, KeyValue} <- SeachKeys, KeyPos < Pos], + + SrcKeys2 = PossibleKeys -- [{Pos, Key}], + SrcKeys3 = find_missing_simple(Pos + 1, SubTree, SrcKeys2), + ImpossibleKeys ++ find_missing_simple(Pos, RestTree, SrcKeys3). + + +filter_leafs([], _Keys, FilteredAcc, RemovedKeysAcc) -> + {FilteredAcc, RemovedKeysAcc}; +filter_leafs([{Pos, [{LeafKey, _}|_]} = Path |Rest], Keys, FilteredAcc, RemovedKeysAcc) -> + FilteredKeys = lists:delete({Pos, LeafKey}, Keys), + if FilteredKeys == Keys -> + % this leaf is not a key we are looking to remove + filter_leafs(Rest, Keys, [Path | FilteredAcc], RemovedKeysAcc); + true -> + % this did match a key, remove both the node and the input key + filter_leafs(Rest, FilteredKeys, FilteredAcc, [{Pos, LeafKey} | RemovedKeysAcc]) + end. + +% Removes any branches from the tree whose leaf node(s) are in the Keys +remove_leafs(Trees, Keys) -> + % flatten each branch in a tree into a tree path + Paths = get_all_leafs_full(Trees), + + % filter out any that are in the keys list. + {FilteredPaths, RemovedKeys} = filter_leafs(Paths, Keys, [], []), + + SortedPaths = lists:sort( + [{Pos + 1 - length(Path), Path} || {Pos, Path} <- FilteredPaths] + ), + + % convert paths back to trees + NewTree = lists:foldl( + fun({StartPos, Path},TreeAcc) -> + [SingleTree] = lists:foldl( + fun({K,V},NewTreeAcc) -> [{K,V,NewTreeAcc}] end, [], Path), + {NewTrees, _} = merge(TreeAcc, {StartPos, SingleTree}), + NewTrees + end, [], SortedPaths), + {NewTree, RemovedKeys}. + + +% get the leafs in the tree matching the keys. The matching key nodes can be +% leafs or an inner nodes. If an inner node, then the leafs for that node +% are returned. +get_key_leafs(Tree, Keys) -> + get_key_leafs(Tree, Keys, []). + +get_key_leafs(_, [], Acc) -> + {Acc, []}; +get_key_leafs([], Keys, Acc) -> + {Acc, Keys}; +get_key_leafs([{Pos, Tree}|Rest], Keys, Acc) -> + {Gotten, RemainingKeys} = get_key_leafs_simple(Pos, [Tree], Keys, []), + get_key_leafs(Rest, RemainingKeys, Gotten ++ Acc). + +get_key_leafs_simple(_Pos, _Tree, [], _KeyPathAcc) -> + {[], []}; +get_key_leafs_simple(_Pos, [], KeysToGet, _KeyPathAcc) -> + {[], KeysToGet}; +get_key_leafs_simple(Pos, [{Key, _Value, SubTree}=Tree | RestTree], KeysToGet, KeyPathAcc) -> + case lists:delete({Pos, Key}, KeysToGet) of + KeysToGet -> % same list, key not found + {LeafsFound, KeysToGet2} = get_key_leafs_simple(Pos + 1, SubTree, KeysToGet, [Key | KeyPathAcc]), + {RestLeafsFound, KeysRemaining} = get_key_leafs_simple(Pos, RestTree, KeysToGet2, KeyPathAcc), + {LeafsFound ++ RestLeafsFound, KeysRemaining}; + KeysToGet2 -> + LeafsFound = get_all_leafs_simple(Pos, [Tree], KeyPathAcc), + LeafKeysFound = [LeafKeyFound || {LeafKeyFound, _} <- LeafsFound], + KeysToGet2 = KeysToGet2 -- LeafKeysFound, + {RestLeafsFound, KeysRemaining} = get_key_leafs_simple(Pos, RestTree, KeysToGet2, KeyPathAcc), + {LeafsFound ++ RestLeafsFound, KeysRemaining} + end. + +get(Tree, KeysToGet) -> + {KeyPaths, KeysNotFound} = get_full_key_paths(Tree, KeysToGet), + FixedResults = [ {Value, {Pos, [Key0 || {Key0, _} <- Path]}} || {Pos, [{_Key, Value}|_]=Path} <- KeyPaths], + {FixedResults, KeysNotFound}. + +get_full_key_paths(Tree, Keys) -> + get_full_key_paths(Tree, Keys, []). + +get_full_key_paths(_, [], Acc) -> + {Acc, []}; +get_full_key_paths([], Keys, Acc) -> + {Acc, Keys}; +get_full_key_paths([{Pos, Tree}|Rest], Keys, Acc) -> + {Gotten, RemainingKeys} = get_full_key_paths(Pos, [Tree], Keys, []), + get_full_key_paths(Rest, RemainingKeys, Gotten ++ Acc). + + +get_full_key_paths(_Pos, _Tree, [], _KeyPathAcc) -> + {[], []}; +get_full_key_paths(_Pos, [], KeysToGet, _KeyPathAcc) -> + {[], KeysToGet}; +get_full_key_paths(Pos, [{KeyId, Value, SubTree} | RestTree], KeysToGet, KeyPathAcc) -> + KeysToGet2 = KeysToGet -- [{Pos, KeyId}], + CurrentNodeResult = + case length(KeysToGet2) =:= length(KeysToGet) of + true -> % not in the key list. + []; + false -> % this node is the key list. return it + [{Pos, [{KeyId, Value} | KeyPathAcc]}] + end, + {KeysGotten, KeysRemaining} = get_full_key_paths(Pos + 1, SubTree, KeysToGet2, [{KeyId, Value} | KeyPathAcc]), + {KeysGotten2, KeysRemaining2} = get_full_key_paths(Pos, RestTree, KeysRemaining, KeyPathAcc), + {CurrentNodeResult ++ KeysGotten ++ KeysGotten2, KeysRemaining2}. + +get_all_leafs_full(Tree) -> + get_all_leafs_full(Tree, []). + +get_all_leafs_full([], Acc) -> + Acc; +get_all_leafs_full([{Pos, Tree} | Rest], Acc) -> + get_all_leafs_full(Rest, get_all_leafs_full_simple(Pos, [Tree], []) ++ Acc). + +get_all_leafs_full_simple(_Pos, [], _KeyPathAcc) -> + []; +get_all_leafs_full_simple(Pos, [{KeyId, Value, []} | RestTree], KeyPathAcc) -> + [{Pos, [{KeyId, Value} | KeyPathAcc]} | get_all_leafs_full_simple(Pos, RestTree, KeyPathAcc)]; +get_all_leafs_full_simple(Pos, [{KeyId, Value, SubTree} | RestTree], KeyPathAcc) -> + get_all_leafs_full_simple(Pos + 1, SubTree, [{KeyId, Value} | KeyPathAcc]) ++ get_all_leafs_full_simple(Pos, RestTree, KeyPathAcc). + +get_all_leafs(Trees) -> + get_all_leafs(Trees, []). + +get_all_leafs([], Acc) -> + Acc; +get_all_leafs([{Pos, Tree}|Rest], Acc) -> + get_all_leafs(Rest, get_all_leafs_simple(Pos, [Tree], []) ++ Acc). + +get_all_leafs_simple(_Pos, [], _KeyPathAcc) -> + []; +get_all_leafs_simple(Pos, [{KeyId, Value, []} | RestTree], KeyPathAcc) -> + [{Value, {Pos, [KeyId | KeyPathAcc]}} | get_all_leafs_simple(Pos, RestTree, KeyPathAcc)]; +get_all_leafs_simple(Pos, [{KeyId, _Value, SubTree} | RestTree], KeyPathAcc) -> + get_all_leafs_simple(Pos + 1, SubTree, [KeyId | KeyPathAcc]) ++ get_all_leafs_simple(Pos, RestTree, KeyPathAcc). + + +count_leafs([]) -> + 0; +count_leafs([{_Pos,Tree}|Rest]) -> + count_leafs_simple([Tree]) + count_leafs(Rest). + +count_leafs_simple([]) -> + 0; +count_leafs_simple([{_Key, _Value, []} | RestTree]) -> + 1 + count_leafs_simple(RestTree); +count_leafs_simple([{_Key, _Value, SubTree} | RestTree]) -> + count_leafs_simple(SubTree) + count_leafs_simple(RestTree). + +compute_data_size(Tree) -> + {TotBodySizes,TotAttSizes} = + tree_fold(fun({_Pos, _Key, _Value},branch,Acc) -> + {ok,Acc}; + ({_Pos, _Key, Value},leaf,Acc) -> + {ok, sum_up_sizes(Value, Acc)} + end,{0,[]},Tree), + SumTotAttSizes = lists:foldl(fun({_K,V},Acc) -> + V + Acc + end,0,TotAttSizes), + TotBodySizes + SumTotAttSizes. + +sum_up_sizes(#leaf{deleted=true}, Acc) -> + Acc; +sum_up_sizes(#leaf{deleted=false, size=DocBodySize, atts=AttSizes},Acc) -> + {TotBodySizes,TotalAttSizes} = Acc, + {TotBodySizes + DocBodySize, add_att_sizes(TotalAttSizes, AttSizes)}. + +add_att_sizes(TotalAttSizes,AttSizes) -> + lists:umerge(TotalAttSizes, lists:sort(AttSizes)). + +tree_fold(_Fun, Acc, []) -> + Acc; + +tree_fold(Fun, Acc, [{Pos, Branch} | Rest]) -> + Acc1 = tree_fold_simple(Fun, Pos, [Branch], Acc), + tree_fold(Fun, Acc1, Rest). + +tree_fold_simple(_Fun, _Pos, [], Acc) -> + Acc; + +tree_fold_simple(Fun, Pos, [{Key, Value, []} | RestTree], Acc) -> + case Fun({Pos, Key, Value}, leaf, Acc) of + {ok, Acc1} -> + tree_fold_simple(Fun, Pos, RestTree, Acc1); + {stop, Acc1} -> + Acc1 + end; + +tree_fold_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree], Acc) -> + Acc1 = tree_fold_simple(Fun, Pos + 1, SubTree, Acc), + case Fun({Pos, Key, Value}, branch, Acc1) of + {ok, Acc2} -> + tree_fold_simple(Fun, Pos, RestTree, Acc2); + {stop, Acc2} -> + Acc2 + end. + +fold(_Fun, Acc, []) -> + Acc; +fold(Fun, Acc0, [{Pos, Tree}|Rest]) -> + Acc1 = fold_simple(Fun, Acc0, Pos, [Tree]), + fold(Fun, Acc1, Rest). + +fold_simple(_Fun, Acc, _Pos, []) -> + Acc; +fold_simple(Fun, Acc0, Pos, [{Key, Value, SubTree} | RestTree]) -> + Type = if SubTree == [] -> leaf; true -> branch end, + Acc1 = Fun({Pos, Key}, Value, Type, Acc0), + Acc2 = fold_simple(Fun, Acc1, Pos+1, SubTree), + fold_simple(Fun, Acc2, Pos, RestTree). + + +map(_Fun, []) -> + []; +map(Fun, [{Pos, Tree}|Rest]) -> + case erlang:fun_info(Fun, arity) of + {arity, 2} -> + [NewTree] = map_simple(fun(A,B,_C) -> Fun(A,B) end, Pos, [Tree]), + [{Pos, NewTree} | map(Fun, Rest)]; + {arity, 3} -> + [NewTree] = map_simple(Fun, Pos, [Tree]), + [{Pos, NewTree} | map(Fun, Rest)] + end. + +map_simple(_Fun, _Pos, []) -> + []; +map_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) -> + Value2 = Fun({Pos, Key}, Value, + if SubTree == [] -> leaf; true -> branch end), + [{Key, Value2, map_simple(Fun, Pos + 1, SubTree)} | map_simple(Fun, Pos, RestTree)]. + + +map_leafs(_Fun, []) -> + []; +map_leafs(Fun, [{Pos, Tree}|Rest]) -> + [NewTree] = map_leafs_simple(Fun, Pos, [Tree]), + [{Pos, NewTree} | map_leafs(Fun, Rest)]. + +map_leafs_simple(_Fun, _Pos, []) -> + []; +map_leafs_simple(Fun, Pos, [{Key, Value, []} | RestTree]) -> + Value2 = Fun({Pos, Key}, Value), + [{Key, Value2, []} | map_leafs_simple(Fun, Pos, RestTree)]; +map_leafs_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) -> + [{Key, Value, map_leafs_simple(Fun, Pos + 1, SubTree)} | map_leafs_simple(Fun, Pos, RestTree)]. + + +stem(Trees, Limit) -> + % flatten each branch in a tree into a tree path, sort by starting rev # + Paths = lists:sort(lists:map(fun({Pos, Path}) -> + StemmedPath = lists:sublist(Path, Limit), + {Pos + 1 - length(StemmedPath), StemmedPath} + end, get_all_leafs_full(Trees))), + + % convert paths back to trees + lists:foldl( + fun({StartPos, Path},TreeAcc) -> + [SingleTree] = lists:foldl( + fun({K,V},NewTreeAcc) -> [{K,V,NewTreeAcc}] end, [], Path), + {NewTrees, _} = merge(TreeAcc, {StartPos, SingleTree}), + NewTrees + end, [], Paths). + + +value_pref(Tuple, _) when is_tuple(Tuple), + (tuple_size(Tuple) == 3 orelse tuple_size(Tuple) == 4) -> + Tuple; +value_pref(_, Tuple) when is_tuple(Tuple), + (tuple_size(Tuple) == 3 orelse tuple_size(Tuple) == 4) -> + Tuple; +value_pref(?REV_MISSING, Other) -> + Other; +value_pref(Other, ?REV_MISSING) -> + Other; +value_pref(Last, _) -> + Last. + + +% Tests moved to test/etap/06?-*.t + diff --git a/apps/couch/src/couch_log.erl b/apps/couch/src/couch_log.erl new file mode 100644 index 00000000..362d092d --- /dev/null +++ b/apps/couch/src/couch_log.erl @@ -0,0 +1,191 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_log). +-behaviour(gen_event). + +-export([start_link/0,stop/0]). +-export([debug/2, info/2, error/2]). +-export([debug_on/0,info_on/0,get_level/0,get_level_integer/0, set_level/1]). +-export([init/1, handle_event/2, terminate/2, code_change/3, handle_info/2, handle_call/2]). +-export([read/2]). + +-define(LEVEL_ERROR, 3). +-define(LEVEL_INFO, 2). +-define(LEVEL_DEBUG, 1). +-define(LEVEL_TMI, 0). + +debug(Format, Args) -> + {ConsoleMsg, FileMsg} = get_log_messages(self(), debug, Format, Args), + gen_event:sync_notify(error_logger, {couch_debug, ConsoleMsg, FileMsg}). + +info(Format, Args) -> + {ConsoleMsg, FileMsg} = get_log_messages(self(), info, Format, Args), + gen_event:sync_notify(error_logger, {couch_info, ConsoleMsg, FileMsg}). + +error(Format, Args) -> + {ConsoleMsg, FileMsg} = get_log_messages(self(), error, Format, Args), + gen_event:sync_notify(error_logger, {couch_error, ConsoleMsg, FileMsg}). + + +level_integer(error) -> ?LEVEL_ERROR; +level_integer(info) -> ?LEVEL_INFO; +level_integer(debug) -> ?LEVEL_DEBUG; +level_integer(tmi) -> ?LEVEL_TMI; +level_integer(_Else) -> ?LEVEL_ERROR. % anything else default to ERROR level + +level_atom(?LEVEL_ERROR) -> error; +level_atom(?LEVEL_INFO) -> info; +level_atom(?LEVEL_DEBUG) -> debug; +level_atom(?LEVEL_TMI) -> tmi. + + +start_link() -> + couch_event_sup:start_link({local, couch_log}, error_logger, couch_log, []). + +stop() -> + couch_event_sup:stop(couch_log). + +init([]) -> + % read config and register for configuration changes + + % just stop if one of the config settings change. couch_server_sup + % will restart us and then we will pick up the new settings. + ok = couch_config:register( + fun("log", "file") -> + ?MODULE:stop(); + ("log", "level") -> + ?MODULE:stop(); + ("log", "include_sasl") -> + ?MODULE:stop() + end), + + Filename = couch_config:get("log", "file", "couchdb.log"), + Level = level_integer(list_to_atom(couch_config:get("log", "level", "info"))), + Sasl = list_to_atom(couch_config:get("log", "include_sasl", "true")), + + case ets:info(?MODULE) of + undefined -> ets:new(?MODULE, [named_table]); + _ -> ok + end, + ets:insert(?MODULE, {level, Level}), + + case file:open(Filename, [append]) of + {ok, Fd} -> + {ok, {Fd, Level, Sasl}}; + {error, eacces} -> + {stop, {file_permission_error, Filename}}; + Error -> + {stop, Error} + end. + +debug_on() -> + get_level_integer() =< ?LEVEL_DEBUG. + +info_on() -> + get_level_integer() =< ?LEVEL_INFO. + +set_level(LevelAtom) -> + set_level_integer(level_integer(LevelAtom)). + +get_level() -> + level_atom(get_level_integer()). + +get_level_integer() -> + try + ets:lookup_element(?MODULE, level, 2) + catch error:badarg -> + ?LEVEL_ERROR + end. + +set_level_integer(Int) -> + gen_event:call(error_logger, couch_log, {set_level_integer, Int}). + +handle_event({couch_error, ConMsg, FileMsg}, {Fd, _LogLevel, _Sasl}=State) -> + log(Fd, ConMsg, FileMsg), + {ok, State}; +handle_event({couch_info, ConMsg, FileMsg}, {Fd, LogLevel, _Sasl}=State) +when LogLevel =< ?LEVEL_INFO -> + log(Fd, ConMsg, FileMsg), + {ok, State}; +handle_event({couch_debug, ConMsg, FileMsg}, {Fd, LogLevel, _Sasl}=State) +when LogLevel =< ?LEVEL_DEBUG -> + log(Fd, ConMsg, FileMsg), + {ok, State}; +handle_event({error_report, _, {Pid, _, _}}=Event, {Fd, _LogLevel, Sasl}=State) +when Sasl =/= false -> + {ConMsg, FileMsg} = get_log_messages(Pid, error, "~p", [Event]), + log(Fd, ConMsg, FileMsg), + {ok, State}; +handle_event({error, _, {Pid, Format, Args}}, {Fd, _LogLevel, Sasl}=State) +when Sasl =/= false -> + {ConMsg, FileMsg} = get_log_messages(Pid, error, Format, Args), + log(Fd, ConMsg, FileMsg), + {ok, State}; +handle_event({_, _, {Pid, _, _}}=Event, {Fd, LogLevel, _Sasl}=State) +when LogLevel =< ?LEVEL_TMI -> + % log every remaining event if tmi! + {ConMsg, FileMsg} = get_log_messages(Pid, tmi, "~p", [Event]), + log(Fd, ConMsg, FileMsg), + {ok, State}; +handle_event(_Event, State) -> + {ok, State}. + +handle_call({set_level_integer, NewLevel}, {Fd, _LogLevel, Sasl}) -> + ets:insert(?MODULE, {level, NewLevel}), + {ok, ok, {Fd, NewLevel, Sasl}}. + +handle_info(_Info, State) -> + {ok, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +terminate(_Arg, {Fd, _LoggingLevel, _Sasl}) -> + file:close(Fd). + +log(Fd, ConsoleMsg, FileMsg) -> + ok = io:put_chars(ConsoleMsg), + ok = io:put_chars(Fd, FileMsg). + +get_log_messages(Pid, Level, Format, Args) -> + Nonce = case erlang:get(nonce) of + undefined -> "--------"; + Else -> Else + end, + ConsoleMsg = unicode:characters_to_binary(io_lib:format( + "[~s] [~p] [~s] " ++ Format ++ "~n", [Level, Pid, Nonce | Args])), + FileMsg = ["[", httpd_util:rfc1123_date(), "] ", ConsoleMsg], + {ConsoleMsg, iolist_to_binary(FileMsg)}. + +read(Bytes, Offset) -> + LogFileName = couch_config:get("log", "file"), + LogFileSize = filelib:file_size(LogFileName), + MaxChunkSize = list_to_integer( + couch_config:get("httpd", "log_max_chunk_size", "1000000")), + case Bytes > MaxChunkSize of + true -> + throw({bad_request, "'bytes' cannot exceed " ++ + integer_to_list(MaxChunkSize)}); + false -> + ok + end, + + {ok, Fd} = file:open(LogFileName, [read]), + Start = lists:max([LogFileSize - Bytes, 0]) + Offset, + + % TODO: truncate chopped first line + % TODO: make streaming + + {ok, Chunk} = file:pread(Fd, Start, LogFileSize), + ok = file:close(Fd), + Chunk. diff --git a/apps/couch/src/couch_native_process.erl b/apps/couch/src/couch_native_process.erl new file mode 100644 index 00000000..b512f712 --- /dev/null +++ b/apps/couch/src/couch_native_process.erl @@ -0,0 +1,402 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% +% You may obtain a copy of the License at +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, +% software distributed under the License is distributed on an +% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +% either express or implied. +% +% See the License for the specific language governing permissions +% and limitations under the License. +% +% This file drew much inspiration from erlview, which was written by and +% copyright Michael McDaniel [http://autosys.us], and is also under APL 2.0 +% +% +% This module provides the smallest possible native view-server. +% With this module in-place, you can add the following to your couch INI files: +% [native_query_servers] +% erlang={couch_native_process, start_link, []} +% +% Which will then allow following example map function to be used: +% +% fun({Doc}) -> +% % Below, we emit a single record - the _id as key, null as value +% DocId = couch_util:get_value(Doc, <<"_id">>, null), +% Emit(DocId, null) +% end. +% +% which should be roughly the same as the javascript: +% emit(doc._id, null); +% +% This module exposes enough functions such that a native erlang server can +% act as a fully-fleged view server, but no 'helper' functions specifically +% for simplifying your erlang view code. It is expected other third-party +% extensions will evolve which offer useful layers on top of this view server +% to help simplify your view code. +-module(couch_native_process). +-behaviour(gen_server). + +-export([start_link/0,init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3, + handle_info/2]). +-export([set_timeout/2, prompt/2]). + +-define(STATE, native_proc_state). +-record(evstate, {ddocs, funs=[], query_config=[], list_pid=nil, timeout=5000}). + +-include("couch_db.hrl"). + +start_link() -> + gen_server:start_link(?MODULE, [], []). + +% this is a bit messy, see also couch_query_servers handle_info +% stop(_Pid) -> +% ok. + +set_timeout(Pid, TimeOut) -> + gen_server:call(Pid, {set_timeout, TimeOut}). + +prompt(Pid, Data) when is_list(Data) -> + gen_server:call(Pid, {prompt, Data}). + +% gen_server callbacks +init([]) -> + {ok, #evstate{ddocs=dict:new()}}. + +handle_call({set_timeout, TimeOut}, _From, State) -> + {reply, ok, State#evstate{timeout=TimeOut}}; + +handle_call({prompt, Data}, _From, State) -> + ?LOG_DEBUG("Prompt native qs: ~s",[?JSON_ENCODE(Data)]), + {NewState, Resp} = try run(State, to_binary(Data)) of + {S, R} -> {S, R} + catch + throw:{error, Why} -> + {State, [<<"error">>, Why, Why]} + end, + + case Resp of + {error, Reason} -> + Msg = io_lib:format("couch native server error: ~p", [Reason]), + {reply, [<<"error">>, <<"native_query_server">>, list_to_binary(Msg)], NewState}; + [<<"error">> | Rest] -> + % Msg = io_lib:format("couch native server error: ~p", [Rest]), + % TODO: markh? (jan) + {reply, [<<"error">> | Rest], NewState}; + [<<"fatal">> | Rest] -> + % Msg = io_lib:format("couch native server error: ~p", [Rest]), + % TODO: markh? (jan) + {stop, fatal, [<<"error">> | Rest], NewState}; + Resp -> + {reply, Resp, NewState} + end. + +handle_cast(foo, State) -> {noreply, State}. +handle_info({'EXIT',_,normal}, State) -> {noreply, State}; +handle_info({'EXIT',_,Reason}, State) -> + {stop, Reason, State}. +terminate(_Reason, _State) -> ok. +code_change(_OldVersion, State, _Extra) -> {ok, State}. + +run(#evstate{list_pid=Pid}=State, [<<"list_row">>, Row]) when is_pid(Pid) -> + Pid ! {self(), list_row, Row}, + receive + {Pid, chunks, Data} -> + {State, [<<"chunks">>, Data]}; + {Pid, list_end, Data} -> + receive + {'EXIT', Pid, normal} -> ok + after State#evstate.timeout -> + throw({timeout, list_cleanup}) + end, + process_flag(trap_exit, erlang:get(do_trap)), + {State#evstate{list_pid=nil}, [<<"end">>, Data]} + after State#evstate.timeout -> + throw({timeout, list_row}) + end; +run(#evstate{list_pid=Pid}=State, [<<"list_end">>]) when is_pid(Pid) -> + Pid ! {self(), list_end}, + Resp = + receive + {Pid, list_end, Data} -> + receive + {'EXIT', Pid, normal} -> ok + after State#evstate.timeout -> + throw({timeout, list_cleanup}) + end, + [<<"end">>, Data] + after State#evstate.timeout -> + throw({timeout, list_end}) + end, + process_flag(trap_exit, erlang:get(do_trap)), + {State#evstate{list_pid=nil}, Resp}; +run(#evstate{list_pid=Pid}=State, _Command) when is_pid(Pid) -> + {State, [<<"error">>, list_error, list_error]}; +run(#evstate{ddocs=DDocs}, [<<"reset">>]) -> + {#evstate{ddocs=DDocs}, true}; +run(#evstate{ddocs=DDocs}, [<<"reset">>, QueryConfig]) -> + {#evstate{ddocs=DDocs, query_config=QueryConfig}, true}; +run(#evstate{funs=Funs}=State, [<<"add_fun">> , BinFunc]) -> + FunInfo = makefun(State, BinFunc), + {State#evstate{funs=Funs ++ [FunInfo]}, true}; +run(State, [<<"map_doc">> , Doc]) -> + Resp = lists:map(fun({Sig, Fun}) -> + erlang:put(Sig, []), + Fun(Doc), + lists:reverse(erlang:get(Sig)) + end, State#evstate.funs), + {State, Resp}; +run(State, [<<"reduce">>, Funs, KVs]) -> + {Keys, Vals} = + lists:foldl(fun([K, V], {KAcc, VAcc}) -> + {[K | KAcc], [V | VAcc]} + end, {[], []}, KVs), + Keys2 = lists:reverse(Keys), + Vals2 = lists:reverse(Vals), + {State, catch reduce(State, Funs, Keys2, Vals2, false)}; +run(State, [<<"rereduce">>, Funs, Vals]) -> + {State, catch reduce(State, Funs, null, Vals, true)}; +run(#evstate{ddocs=DDocs}=State, [<<"ddoc">>, <<"new">>, DDocId, DDoc]) -> + DDocs2 = store_ddoc(DDocs, DDocId, DDoc), + {State#evstate{ddocs=DDocs2}, true}; +run(#evstate{ddocs=DDocs}=State, [<<"ddoc">>, DDocId | Rest]) -> + DDoc = load_ddoc(DDocs, DDocId), + ddoc(State, DDoc, Rest); +run(_, Unknown) -> + ?LOG_ERROR("Native Process: Unknown command: ~p~n", [Unknown]), + throw({error, unknown_command}). + +ddoc(State, {DDoc}, [FunPath, Args]) -> + % load fun from the FunPath + BFun = lists:foldl(fun + (Key, {Props}) when is_list(Props) -> + couch_util:get_value(Key, Props, nil); + (_Key, Fun) when is_binary(Fun) -> + Fun; + (_Key, nil) -> + throw({error, not_found}); + (_Key, _Fun) -> + throw({error, malformed_ddoc}) + end, {DDoc}, FunPath), + ddoc(State, makefun(State, BFun, {DDoc}), FunPath, Args). + +ddoc(State, {_, Fun}, [<<"validate_doc_update">>], Args) -> + {State, (catch apply(Fun, Args))}; +ddoc(State, {_, Fun}, [<<"filters">>|_], [Docs, Req]) -> + Resp = lists:map(fun(Doc) -> (catch Fun(Doc, Req)) =:= true end, Docs), + {State, [true, Resp]}; +ddoc(State, {_, Fun}, [<<"shows">>|_], Args) -> + Resp = case (catch apply(Fun, Args)) of + FunResp when is_list(FunResp) -> + FunResp; + {FunResp} -> + [<<"resp">>, {FunResp}]; + FunResp -> + FunResp + end, + {State, Resp}; +ddoc(State, {_, Fun}, [<<"updates">>|_], Args) -> + Resp = case (catch apply(Fun, Args)) of + [JsonDoc, JsonResp] -> + [<<"up">>, JsonDoc, JsonResp] + end, + {State, Resp}; +ddoc(State, {Sig, Fun}, [<<"lists">>|_], Args) -> + Self = self(), + SpawnFun = fun() -> + LastChunk = (catch apply(Fun, Args)), + case start_list_resp(Self, Sig) of + started -> + receive + {Self, list_row, _Row} -> ignore; + {Self, list_end} -> ignore + after State#evstate.timeout -> + throw({timeout, list_cleanup_pid}) + end; + _ -> + ok + end, + LastChunks = + case erlang:get(Sig) of + undefined -> [LastChunk]; + OtherChunks -> [LastChunk | OtherChunks] + end, + Self ! {self(), list_end, lists:reverse(LastChunks)} + end, + erlang:put(do_trap, process_flag(trap_exit, true)), + Pid = spawn_link(SpawnFun), + Resp = + receive + {Pid, start, Chunks, JsonResp} -> + [<<"start">>, Chunks, JsonResp] + after State#evstate.timeout -> + throw({timeout, list_start}) + end, + {State#evstate{list_pid=Pid}, Resp}. + +store_ddoc(DDocs, DDocId, DDoc) -> + dict:store(DDocId, DDoc, DDocs). +load_ddoc(DDocs, DDocId) -> + try dict:fetch(DDocId, DDocs) of + {DDoc} -> {DDoc} + catch + _:_Else -> throw({error, ?l2b(io_lib:format("Native Query Server missing DDoc with Id: ~s",[DDocId]))}) + end. + +bindings(State, Sig) -> + bindings(State, Sig, nil). +bindings(State, Sig, DDoc) -> + Self = self(), + + Log = fun(Msg) -> + ?LOG_INFO(Msg, []) + end, + + Emit = fun(Id, Value) -> + Curr = erlang:get(Sig), + erlang:put(Sig, [[Id, Value] | Curr]) + end, + + Start = fun(Headers) -> + erlang:put(list_headers, Headers) + end, + + Send = fun(Chunk) -> + Curr = + case erlang:get(Sig) of + undefined -> []; + Else -> Else + end, + erlang:put(Sig, [Chunk | Curr]) + end, + + GetRow = fun() -> + case start_list_resp(Self, Sig) of + started -> + ok; + _ -> + Chunks = + case erlang:get(Sig) of + undefined -> []; + CurrChunks -> CurrChunks + end, + Self ! {self(), chunks, lists:reverse(Chunks)} + end, + erlang:put(Sig, []), + receive + {Self, list_row, Row} -> Row; + {Self, list_end} -> nil + after State#evstate.timeout -> + throw({timeout, list_pid_getrow}) + end + end, + + FoldRows = fun(Fun, Acc) -> foldrows(GetRow, Fun, Acc) end, + + Bindings = [ + {'Log', Log}, + {'Emit', Emit}, + {'Start', Start}, + {'Send', Send}, + {'GetRow', GetRow}, + {'FoldRows', FoldRows} + ], + case DDoc of + {_Props} -> + Bindings ++ [{'DDoc', DDoc}]; + _Else -> Bindings + end. + +% thanks to erlview, via: +% http://erlang.org/pipermail/erlang-questions/2003-November/010544.html +makefun(State, Source) -> + Sig = couch_util:md5(Source), + BindFuns = bindings(State, Sig), + {Sig, makefun(State, Source, BindFuns)}. +makefun(State, Source, {DDoc}) -> + Sig = couch_util:md5(lists:flatten([Source, term_to_binary(DDoc)])), + BindFuns = bindings(State, Sig, {DDoc}), + {Sig, makefun(State, Source, BindFuns)}; +makefun(_State, Source, BindFuns) when is_list(BindFuns) -> + FunStr = binary_to_list(Source), + {ok, Tokens, _} = erl_scan:string(FunStr), + Form = case (catch erl_parse:parse_exprs(Tokens)) of + {ok, [ParsedForm]} -> + ParsedForm; + {error, {LineNum, _Mod, [Mesg, Params]}}=Error -> + io:format(standard_error, "Syntax error on line: ~p~n", [LineNum]), + io:format(standard_error, "~s~p~n", [Mesg, Params]), + throw(Error) + end, + Bindings = lists:foldl(fun({Name, Fun}, Acc) -> + erl_eval:add_binding(Name, Fun, Acc) + end, erl_eval:new_bindings(), BindFuns), + {value, Fun, _} = erl_eval:expr(Form, Bindings), + Fun. + +reduce(State, BinFuns, Keys, Vals, ReReduce) -> + Funs = case is_list(BinFuns) of + true -> + lists:map(fun(BF) -> makefun(State, BF) end, BinFuns); + _ -> + [makefun(State, BinFuns)] + end, + Reds = lists:map(fun({_Sig, Fun}) -> + Fun(Keys, Vals, ReReduce) + end, Funs), + [true, Reds]. + +foldrows(GetRow, ProcRow, Acc) -> + case GetRow() of + nil -> + {ok, Acc}; + Row -> + case (catch ProcRow(Row, Acc)) of + {ok, Acc2} -> + foldrows(GetRow, ProcRow, Acc2); + {stop, Acc2} -> + {ok, Acc2} + end + end. + +start_list_resp(Self, Sig) -> + case erlang:get(list_started) of + undefined -> + Headers = + case erlang:get(list_headers) of + undefined -> {[{<<"headers">>, {[]}}]}; + CurrHdrs -> CurrHdrs + end, + Chunks = + case erlang:get(Sig) of + undefined -> []; + CurrChunks -> CurrChunks + end, + Self ! {self(), start, lists:reverse(Chunks), Headers}, + erlang:put(list_started, true), + erlang:put(Sig, []), + started; + _ -> + ok + end. + +to_binary({Data}) -> + Pred = fun({Key, Value}) -> + {to_binary(Key), to_binary(Value)} + end, + {lists:map(Pred, Data)}; +to_binary(Data) when is_list(Data) -> + [to_binary(D) || D <- Data]; +to_binary(null) -> + null; +to_binary(true) -> + true; +to_binary(false) -> + false; +to_binary(Data) when is_atom(Data) -> + list_to_binary(atom_to_list(Data)); +to_binary(Data) -> + Data. diff --git a/apps/couch/src/couch_os_daemons.erl b/apps/couch/src/couch_os_daemons.erl new file mode 100644 index 00000000..d03f550c --- /dev/null +++ b/apps/couch/src/couch_os_daemons.erl @@ -0,0 +1,364 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +-module(couch_os_daemons). +-behaviour(gen_server). + +-export([start_link/0, info/0, info/1, config_change/2]). + +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +-include("couch_db.hrl"). + +-record(daemon, { + port, + name, + cmd, + kill, + status=running, + cfg_patterns=[], + errors=[], + buf=[] +}). + +-define(PORT_OPTIONS, [stream, {line, 1024}, binary, exit_status, hide]). +-define(TIMEOUT, 5000). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +info() -> + info([]). + +info(Options) -> + gen_server:call(?MODULE, {daemon_info, Options}). + +config_change(Section, Key) -> + gen_server:cast(?MODULE, {config_change, Section, Key}). + +init(_) -> + process_flag(trap_exit, true), + ok = couch_config:register(fun couch_os_daemons:config_change/2), + Table = ets:new(?MODULE, [protected, set, {keypos, #daemon.port}]), + reload_daemons(Table), + {ok, Table}. + +terminate(_Reason, Table) -> + [stop_port(D) || D <- ets:tab2list(Table)], + ok. + +handle_call({daemon_info, Options}, _From, Table) when is_list(Options) -> + case lists:member(table, Options) of + true -> + {reply, {ok, ets:tab2list(Table)}, Table}; + _ -> + {reply, {ok, Table}, Table} + end; +handle_call(Msg, From, Table) -> + ?LOG_ERROR("Unknown call message to ~p from ~p: ~p", [?MODULE, From, Msg]), + {stop, error, Table}. + +handle_cast({config_change, Sect, Key}, Table) -> + restart_daemons(Table, Sect, Key), + case Sect of + "os_daemons" -> reload_daemons(Table); + _ -> ok + end, + {noreply, Table}; +handle_cast(stop, Table) -> + {stop, normal, Table}; +handle_cast(Msg, Table) -> + ?LOG_ERROR("Unknown cast message to ~p: ~p", [?MODULE, Msg]), + {stop, error, Table}. + +handle_info({'EXIT', Port, Reason}, Table) -> + case ets:lookup(Table, Port) of + [] -> + ?LOG_INFO("Port ~p exited after stopping: ~p~n", [Port, Reason]); + [#daemon{status=stopping}] -> + true = ets:delete(Table, Port); + [#daemon{name=Name, status=restarting}=D] -> + ?LOG_INFO("Daemon ~P restarting after config change.", [Name]), + true = ets:delete(Table, Port), + {ok, Port2} = start_port(D#daemon.cmd), + true = ets:insert(Table, D#daemon{ + port=Port2, status=running, kill=undefined, buf=[] + }); + [#daemon{name=Name, status=halted}] -> + ?LOG_ERROR("Halted daemon process: ~p", [Name]); + [D] -> + ?LOG_ERROR("Invalid port state at exit: ~p", [D]) + end, + {noreply, Table}; +handle_info({Port, closed}, Table) -> + handle_info({Port, {exit_status, closed}}, Table); +handle_info({Port, {exit_status, Status}}, Table) -> + case ets:lookup(Table, Port) of + [] -> + ?LOG_ERROR("Unknown port ~p exiting ~p", [Port, Status]), + {stop, {error, unknown_port_died, Status}, Table}; + [#daemon{name=Name, status=restarting}=D] -> + ?LOG_INFO("Daemon ~P restarting after config change.", [Name]), + true = ets:delete(Table, Port), + {ok, Port2} = start_port(D#daemon.cmd), + true = ets:insert(Table, D#daemon{ + port=Port2, status=running, kill=undefined, buf=[] + }), + {noreply, Table}; + [#daemon{status=stopping}=D] -> + % The configuration changed and this daemon is no + % longer needed. + ?LOG_DEBUG("Port ~p shut down.", [D#daemon.name]), + true = ets:delete(Table, Port), + {noreply, Table}; + [D] -> + % Port died for unknown reason. Check to see if it's + % died too many times or if we should boot it back up. + case should_halt([now() | D#daemon.errors]) of + {true, _} -> + % Halting the process. We won't try and reboot + % until the configuration changes. + Fmt = "Daemon ~p halted with exit_status ~p", + ?LOG_ERROR(Fmt, [D#daemon.name, Status]), + D2 = D#daemon{status=halted, errors=nil, buf=nil}, + true = ets:insert(Table, D2), + {noreply, Table}; + {false, Errors} -> + % We're guessing it was a random error, this daemon + % has behaved so we'll give it another chance. + Fmt = "Daemon ~p is being rebooted after exit_status ~p", + ?LOG_INFO(Fmt, [D#daemon.name, Status]), + true = ets:delete(Table, Port), + {ok, Port2} = start_port(D#daemon.cmd), + true = ets:insert(Table, D#daemon{ + port=Port2, status=running, kill=undefined, + errors=Errors, buf=[] + }), + {noreply, Table} + end; + _Else -> + throw(error) + end; +handle_info({Port, {data, {noeol, Data}}}, Table) -> + [#daemon{buf=Buf}=D] = ets:lookup(Table, Port), + true = ets:insert(Table, D#daemon{buf=[Data | Buf]}), + {noreply, Table}; +handle_info({Port, {data, {eol, Data}}}, Table) -> + [#daemon{buf=Buf}=D] = ets:lookup(Table, Port), + Line = lists:reverse(Buf, Data), + % The first line echoed back is the kill command + % for when we go to get rid of the port. Lines after + % that are considered part of the stdio API. + case D#daemon.kill of + undefined -> + true = ets:insert(Table, D#daemon{kill=?b2l(Line), buf=[]}); + _Else -> + D2 = case (catch ?JSON_DECODE(Line)) of + {invalid_json, Rejected} -> + ?LOG_ERROR("Ignoring OS daemon request: ~p", [Rejected]), + D; + JSON -> + {ok, D3} = handle_port_message(D, JSON), + D3 + end, + true = ets:insert(Table, D2#daemon{buf=[]}) + end, + {noreply, Table}; +handle_info({Port, Error}, Table) -> + ?LOG_ERROR("Unexpectd message from port ~p: ~p", [Port, Error]), + stop_port(Port), + [D] = ets:lookup(Table, Port), + true = ets:insert(Table, D#daemon{status=restarting, buf=nil}), + {noreply, Table}; +handle_info(Msg, Table) -> + ?LOG_ERROR("Unexpected info message to ~p: ~p", [?MODULE, Msg]), + {stop, error, Table}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +% Internal API + +% +% Port management helpers +% + +start_port(Command) -> + PrivDir = couch_util:priv_dir(), + Spawnkiller = filename:join(PrivDir, "couchspawnkillable"), + Port = open_port({spawn, Spawnkiller ++ " " ++ Command}, ?PORT_OPTIONS), + {ok, Port}. + + +stop_port(#daemon{port=Port, kill=undefined}=D) -> + ?LOG_ERROR("Stopping daemon without a kill command: ~p", [D#daemon.name]), + catch port_close(Port); +stop_port(#daemon{port=Port}=D) -> + ?LOG_DEBUG("Stopping daemon: ~p", [D#daemon.name]), + os:cmd(D#daemon.kill), + catch port_close(Port). + + +handle_port_message(#daemon{port=Port}=Daemon, [<<"get">>, Section]) -> + KVs = couch_config:get(Section), + Data = lists:map(fun({K, V}) -> {?l2b(K), ?l2b(V)} end, KVs), + Json = iolist_to_binary(?JSON_ENCODE({Data})), + port_command(Port, <<Json/binary, "\n">>), + {ok, Daemon}; +handle_port_message(#daemon{port=Port}=Daemon, [<<"get">>, Section, Key]) -> + Value = case couch_config:get(Section, Key, null) of + null -> null; + String -> ?l2b(String) + end, + Json = iolist_to_binary(?JSON_ENCODE(Value)), + port_command(Port, <<Json/binary, "\n">>), + {ok, Daemon}; +handle_port_message(Daemon, [<<"register">>, Sec]) when is_binary(Sec) -> + Patterns = lists:usort(Daemon#daemon.cfg_patterns ++ [{?b2l(Sec)}]), + {ok, Daemon#daemon{cfg_patterns=Patterns}}; +handle_port_message(Daemon, [<<"register">>, Sec, Key]) + when is_binary(Sec) andalso is_binary(Key) -> + Pattern = {?b2l(Sec), ?b2l(Key)}, + Patterns = lists:usort(Daemon#daemon.cfg_patterns ++ [Pattern]), + {ok, Daemon#daemon{cfg_patterns=Patterns}}; +handle_port_message(#daemon{name=Name}=Daemon, [<<"log">>, Msg]) -> + handle_log_message(Name, Msg, <<"info">>), + {ok, Daemon}; +handle_port_message(#daemon{name=Name}=Daemon, [<<"log">>, Msg, {Opts}]) -> + Level = couch_util:get_value(<<"level">>, Opts, <<"info">>), + handle_log_message(Name, Msg, Level), + {ok, Daemon}; +handle_port_message(#daemon{name=Name}=Daemon, Else) -> + ?LOG_ERROR("Daemon ~p made invalid request: ~p", [Name, Else]), + {ok, Daemon}. + + +handle_log_message(Name, Msg, _Level) when not is_binary(Msg) -> + ?LOG_ERROR("Invalid log message from daemon ~p: ~p", [Name, Msg]); +handle_log_message(Name, Msg, <<"debug">>) -> + ?LOG_DEBUG("Daemon ~p :: ~s", [Name, ?b2l(Msg)]); +handle_log_message(Name, Msg, <<"info">>) -> + ?LOG_INFO("Daemon ~p :: ~s", [Name, ?b2l(Msg)]); +handle_log_message(Name, Msg, <<"error">>) -> + ?LOG_ERROR("Daemon: ~p :: ~s", [Name, ?b2l(Msg)]); +handle_log_message(Name, Msg, Level) -> + ?LOG_ERROR("Invalid log level from daemon: ~p", [Level]), + ?LOG_INFO("Daemon: ~p :: ~s", [Name, ?b2l(Msg)]). + +% +% Daemon management helpers +% + +reload_daemons(Table) -> + % List of daemons we want to have running. + Configured = lists:sort(couch_config:get("os_daemons")), + + % Remove records for daemons that were halted. + MSpecHalted = #daemon{name='$1', cmd='$2', status=halted, _='_'}, + Halted = lists:sort([{N, C} || [N, C] <- ets:match(Table, MSpecHalted)]), + ok = stop_os_daemons(Table, find_to_stop(Configured, Halted, [])), + + % Stop daemons that are running + % Start newly configured daemons + MSpecRunning = #daemon{name='$1', cmd='$2', status=running, _='_'}, + Running = lists:sort([{N, C} || [N, C] <- ets:match(Table, MSpecRunning)]), + ok = stop_os_daemons(Table, find_to_stop(Configured, Running, [])), + ok = boot_os_daemons(Table, find_to_boot(Configured, Running, [])), + ok. + + +restart_daemons(Table, Sect, Key) -> + restart_daemons(Table, Sect, Key, ets:first(Table)). + +restart_daemons(_, _, _, '$end_of_table') -> + ok; +restart_daemons(Table, Sect, Key, Port) -> + [D] = ets:lookup(Table, Port), + HasSect = lists:member({Sect}, D#daemon.cfg_patterns), + HasKey = lists:member({Sect, Key}, D#daemon.cfg_patterns), + case HasSect or HasKey of + true -> + stop_port(D), + D2 = D#daemon{status=restarting, buf=nil}, + true = ets:insert(Table, D2); + _ -> + ok + end, + restart_daemons(Table, Sect, Key, ets:next(Table, Port)). + + +stop_os_daemons(_Table, []) -> + ok; +stop_os_daemons(Table, [{Name, Cmd} | Rest]) -> + [[Port]] = ets:match(Table, #daemon{port='$1', name=Name, cmd=Cmd, _='_'}), + [D] = ets:lookup(Table, Port), + case D#daemon.status of + halted -> + ets:delete(Table, Port); + _ -> + stop_port(D), + D2 = D#daemon{status=stopping, errors=nil, buf=nil}, + true = ets:insert(Table, D2) + end, + stop_os_daemons(Table, Rest). + +boot_os_daemons(_Table, []) -> + ok; +boot_os_daemons(Table, [{Name, Cmd} | Rest]) -> + {ok, Port} = start_port(Cmd), + true = ets:insert(Table, #daemon{port=Port, name=Name, cmd=Cmd}), + boot_os_daemons(Table, Rest). + +% Elements unique to the configured set need to be booted. +find_to_boot([], _Rest, Acc) -> + % Nothing else configured. + Acc; +find_to_boot([D | R1], [D | R2], Acc) -> + % Elements are equal, daemon already running. + find_to_boot(R1, R2, Acc); +find_to_boot([D1 | R1], [D2 | _]=A2, Acc) when D1 < D2 -> + find_to_boot(R1, A2, [D1 | Acc]); +find_to_boot(A1, [_ | R2], Acc) -> + find_to_boot(A1, R2, Acc); +find_to_boot(Rest, [], Acc) -> + % No more candidates for already running. Boot all. + Rest ++ Acc. + +% Elements unique to the running set need to be killed. +find_to_stop([], Rest, Acc) -> + % The rest haven't been found, so they must all + % be ready to die. + Rest ++ Acc; +find_to_stop([D | R1], [D | R2], Acc) -> + % Elements are equal, daemon already running. + find_to_stop(R1, R2, Acc); +find_to_stop([D1 | R1], [D2 | _]=A2, Acc) when D1 < D2 -> + find_to_stop(R1, A2, Acc); +find_to_stop(A1, [D2 | R2], Acc) -> + find_to_stop(A1, R2, [D2 | Acc]); +find_to_stop(_, [], Acc) -> + % No more running daemons to worry about. + Acc. + +should_halt(Errors) -> + RetryTimeCfg = couch_config:get("os_daemon_settings", "retry_time", "5"), + RetryTime = list_to_integer(RetryTimeCfg), + + Now = now(), + RecentErrors = lists:filter(fun(Time) -> + timer:now_diff(Now, Time) =< RetryTime * 1000000 + end, Errors), + + RetryCfg = couch_config:get("os_daemon_settings", "max_retries", "3"), + Retries = list_to_integer(RetryCfg), + + {length(RecentErrors) >= Retries, RecentErrors}. diff --git a/apps/couch/src/couch_os_process.erl b/apps/couch/src/couch_os_process.erl new file mode 100644 index 00000000..2a6d92a7 --- /dev/null +++ b/apps/couch/src/couch_os_process.erl @@ -0,0 +1,180 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_os_process). +-behaviour(gen_server). + +-export([start_link/1, start_link/2, start_link/3, stop/1]). +-export([set_timeout/2, prompt/2]). +-export([send/2, writeline/2, readline/1, writejson/2, readjson/1]). +-export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2, code_change/3]). + +-include("couch_db.hrl"). + +-define(PORT_OPTIONS, [stream, {line, 1024}, binary, exit_status, hide]). + +-record(os_proc, + {command, + port, + writer, + reader, + timeout=5000 + }). + +start_link(Command) -> + start_link(Command, []). +start_link(Command, Options) -> + start_link(Command, Options, ?PORT_OPTIONS). +start_link(Command, Options, PortOptions) -> + gen_server:start_link(couch_os_process, [Command, Options, PortOptions], []). + +stop(Pid) -> + gen_server:cast(Pid, stop). + +% Read/Write API +set_timeout(Pid, TimeOut) when is_integer(TimeOut) -> + ok = gen_server:call(Pid, {set_timeout, TimeOut}). + +% Used by couch_db_update_notifier.erl +send(Pid, Data) -> + gen_server:cast(Pid, {send, Data}). + +prompt(Pid, Data) -> + case gen_server:call(Pid, {prompt, Data}, infinity) of + {ok, Result} -> + Result; + Error -> + ?LOG_ERROR("OS Process Error ~p :: ~p",[Pid,Error]), + throw(Error) + end. + +% Utility functions for reading and writing +% in custom functions +writeline(#os_proc{port=Port}, Data) -> + port_command(Port, Data ++ "\n"). + +readline(#os_proc{} = OsProc) -> + readline(OsProc, []). +readline(#os_proc{port=Port, timeout=Timeout} = OsProc, Acc) -> + receive + {Port, {data, {noeol, Data}}} -> + readline(OsProc, [Data|Acc]); + {Port, {data, {eol, Data}}} -> + lists:reverse(Acc, Data); + {Port, Err} -> + catch port_close(Port), + throw({os_process_error, Err}) + after Timeout -> + catch port_close(Port), + throw({os_process_error, "OS process timed out."}) + end. + +% Standard JSON functions +writejson(OsProc, Data) when is_record(OsProc, os_proc) -> + JsonData = ?JSON_ENCODE(Data), + % ?LOG_DEBUG("OS Process ~p Input :: ~s", [OsProc#os_proc.port, JsonData]), + true = writeline(OsProc, JsonData). + +readjson(#os_proc{} = OsProc) -> + Line = readline(OsProc), + % ?LOG_DEBUG("OS Process ~p Output :: ~s", [OsProc#os_proc.port, Line]), + case ?JSON_DECODE(Line) of + [<<"log">>, Msg] when is_binary(Msg) -> + % we got a message to log. Log it and continue + ?LOG_INFO("OS Process ~p Log :: ~s", [OsProc#os_proc.port, Msg]), + readjson(OsProc); + [<<"error">>, Id, Reason] -> + throw({couch_util:to_existing_atom(Id),Reason}); + [<<"fatal">>, Id, Reason] -> + ?LOG_INFO("OS Process ~p Fatal Error :: ~s ~p",[OsProc#os_proc.port, Id, Reason]), + throw({couch_util:to_existing_atom(Id),Reason}); + Result -> + Result + end. + + +% gen_server API +init([Command, Options, PortOptions]) -> + Spawnkiller = filename:join([code:priv_dir(couch), "couchspawnkillable.sh"]), + BaseProc = #os_proc{ + command=Command, + port=open_port({spawn, Spawnkiller ++ " " ++ Command}, PortOptions), + writer=fun writejson/2, + reader=fun readjson/1 + }, + KillCmd = readline(BaseProc), + Pid = self(), + % ?LOG_DEBUG("OS Process Start :: ~p", [BaseProc#os_proc.port]), + spawn(fun() -> + % this ensure the real os process is killed when this process dies. + erlang:monitor(process, Pid), + receive _ -> ok end, + os:cmd(?b2l(KillCmd)) + end), + OsProc = + lists:foldl(fun(Opt, Proc) -> + case Opt of + {writer, Writer} when is_function(Writer) -> + Proc#os_proc{writer=Writer}; + {reader, Reader} when is_function(Reader) -> + Proc#os_proc{reader=Reader}; + {timeout, TimeOut} when is_integer(TimeOut) -> + Proc#os_proc{timeout=TimeOut} + end + end, BaseProc, Options), + {ok, OsProc}. + +terminate(_Reason, #os_proc{port=Port}) -> + catch port_close(Port), + ok. + +handle_call({set_timeout, TimeOut}, _From, OsProc) -> + {reply, ok, OsProc#os_proc{timeout=TimeOut}}; +handle_call({prompt, Data}, _From, OsProc) -> + #os_proc{writer=Writer, reader=Reader} = OsProc, + try + Writer(OsProc, Data), + {reply, {ok, Reader(OsProc)}, OsProc} + catch + throw:{error, OsError} -> + {reply, OsError, OsProc}; + throw:{fatal, OsError} -> + {stop, normal, OsError, OsProc}; + throw:OtherError -> + {stop, normal, OtherError, OsProc} + end. + +handle_cast({send, Data}, #os_proc{writer=Writer}=OsProc) -> + try + Writer(OsProc, Data), + {noreply, OsProc} + catch + throw:OsError -> + ?LOG_ERROR("Failed sending data: ~p -> ~p", [Data, OsError]), + {stop, normal, OsProc} + end; +handle_cast(stop, OsProc) -> + {stop, normal, OsProc}; +handle_cast(Msg, OsProc) -> + ?LOG_DEBUG("OS Proc: Unknown cast: ~p", [Msg]), + {noreply, OsProc}. + +handle_info({Port, {exit_status, 0}}, #os_proc{port=Port}=OsProc) -> + ?LOG_INFO("OS Process terminated normally", []), + {stop, normal, OsProc}; +handle_info({Port, {exit_status, Status}}, #os_proc{port=Port}=OsProc) -> + ?LOG_ERROR("OS Process died with status: ~p", [Status]), + {stop, {exit_status, Status}, OsProc}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + diff --git a/apps/couch/src/couch_primary_sup.erl b/apps/couch/src/couch_primary_sup.erl new file mode 100644 index 00000000..2aff86ef --- /dev/null +++ b/apps/couch/src/couch_primary_sup.erl @@ -0,0 +1,48 @@ +-module(couch_primary_sup). +-behaviour(supervisor). +-export([init/1, start_link/0]). + +start_link() -> + supervisor:start_link({local,couch_primary_services}, ?MODULE, []). + +init([]) -> + Children = [ + {collation_driver, + {couch_drv, start_link, []}, + permanent, + brutal_kill, + worker, + [couch_drv]}, + {couch_task_status, + {couch_task_status, start_link, []}, + permanent, + brutal_kill, + worker, + [couch_task_status]}, + {couch_server, + {couch_server, sup_start_link, []}, + permanent, + brutal_kill, + worker, + [couch_server]}, + {couch_db_update_event, + {gen_event, start_link, [{local, couch_db_update}]}, + permanent, + brutal_kill, + worker, + dynamic}, + {couch_replication_supervisor, + {couch_rep_sup, start_link, []}, + permanent, + infinity, + supervisor, + [couch_rep_sup]}, + {couch_log, + {couch_log, start_link, []}, + permanent, + brutal_kill, + worker, + [couch_log]} + ], + {ok, {{one_for_one, 10, 3600}, Children}}. + diff --git a/apps/couch/src/couch_proc_manager.erl b/apps/couch/src/couch_proc_manager.erl new file mode 100644 index 00000000..438f7973 --- /dev/null +++ b/apps/couch/src/couch_proc_manager.erl @@ -0,0 +1,184 @@ +-module(couch_proc_manager). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/0, get_proc_count/0]). + +-include("couch_db.hrl"). + +-record(state, {tab}). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +get_proc_count() -> + gen_server:call(?MODULE, get_proc_count). + +init([]) -> + process_flag(trap_exit, true), + {ok, #state{tab = ets:new(procs, [{keypos, #proc.pid}])}}. + +handle_call(get_table, _From, State) -> + {reply, State#state.tab, State}; + +handle_call(get_proc_count, _From, State) -> + {reply, ets:info(State#state.tab, size), State}; + +handle_call({get_proc, #doc{body={Props}}=DDoc, DDocKey}, {Client, _}, State) -> + Lang = couch_util:get_value(<<"language">>, Props, <<"javascript">>), + try get_procs(State#state.tab, Lang) of + Procs -> + case proc_with_ddoc(DDoc, DDocKey, Procs) of + {ok, Proc0} -> + Proc = Proc0#proc{client = erlang:monitor(process, Client)}, + ets:insert(State#state.tab, Proc), + {reply, {ok, Proc, get_query_server_config()}, State}; + {error, Reason} -> + {reply, {error, Reason}, State} + end + catch {unknown_query_language, _} -> + {reply, {unknown_query_language, Lang}, State}; + error:Reason -> + ?LOG_ERROR("~p ~p ~p", [?MODULE, Reason, erlang:get_stacktrace()]), + {reply, {error, Reason}, State} + end; + +handle_call({get_proc, Lang}, {Client, _}, State) -> + try get_procs(State#state.tab, Lang) of + [Proc0|_] -> + Proc = Proc0#proc{client = erlang:monitor(process, Client)}, + ets:insert(State#state.tab, Proc), + {reply, {ok, Proc, get_query_server_config()}, State} + catch {unknown_query_language, _} -> + {reply, {unknown_query_language, Lang}, State}; + error:Reason -> + ?LOG_ERROR("~p ~p ~p", [?MODULE, Reason, erlang:get_stacktrace()]), + {reply, {error, Reason}, State} + end; + +handle_call({ret_proc, #proc{client=Ref, pid=Pid} = Proc}, _From, State) -> + erlang:demonitor(Ref, [flush]), + % We need to check if the process is alive here, as the client could be + % handing us a #proc{} with a dead one. We would have already removed the + % #proc{} from our own table, so the alternative is to do a lookup in the + % table before the insert. Don't know which approach is cheaper. + case is_process_alive(Pid) of true -> + maybe_reuse_proc(State#state.tab, Proc); + false -> ok end, + {reply, true, State}; + +handle_call(_Call, _From, State) -> + {reply, ignored, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({'EXIT', Pid, Reason}, State) -> + ?LOG_INFO("~p ~p died ~p", [?MODULE, Pid, Reason]), + ets:delete(State#state.tab, Pid), + {noreply, State}; + +handle_info({'DOWN', Ref, _, _, _Reason}, State) -> + case ets:match_object(State#state.tab, #proc{client=Ref, _='_'}) of + [] -> + ok; + [#proc{pid = Pid} = Proc] -> + case is_process_alive(Pid) of true -> + maybe_reuse_proc(State#state.tab, Proc); + false -> ok end + end, + {noreply, State}; + +handle_info(_Msg, State) -> + {noreply, State}. + +terminate(_Reason, #state{tab=Tab}) -> + ets:foldl(fun(#proc{pid=P}, _) -> couch_util:shutdown_sync(P) end, 0, Tab), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +maybe_reuse_proc(Tab, #proc{pid = Pid} = Proc) -> + Limit = couch_config:get("query_server_config", "os_process_soft_limit", "100"), + case ets:info(Tab, size) > list_to_integer(Limit) of + true -> + ets:delete(Tab, Pid), + unlink(Pid), + exit(Pid, kill); + false -> + garbage_collect(Pid), + ets:insert(Tab, Proc#proc{client=nil}) + end. + +get_procs(Tab, Lang) when is_binary(Lang) -> + get_procs(Tab, binary_to_list(Lang)); +get_procs(Tab, Lang) when is_list(Lang) -> + case ets:match_object(Tab, #proc{lang=Lang, client=nil, _='_'}) of + [] -> + {ok, NewProc} = new_proc(Lang), % check OS process limit + [NewProc]; + Procs -> + Procs + end. + +new_proc(Lang) when is_list(Lang) -> + case couch_config:get("query_servers", Lang) of + undefined -> + case couch_config:get("native_query_servers", Lang) of + undefined -> + throw({unknown_query_language, Lang}); + SpecStr -> + {ok, {M,F,A}} = couch_util:parse_term(SpecStr), + {ok, Pid} = apply(M, F, A), + make_proc(Pid, Lang, M) + end; + Command -> + {ok, Pid} = couch_os_process:start_link(Command), + make_proc(Pid, Lang, couch_os_process) + end. + +make_proc(Pid, Lang, Mod) -> + Proc = #proc{ + lang = Lang, + pid = Pid, + prompt_fun = {Mod, prompt}, + set_timeout_fun = {Mod, set_timeout}, + stop_fun = {Mod, stop} + }, + {ok, Proc}. + +get_query_server_config() -> + Limit = couch_config:get("query_server_config", <<"reduce_limit">>, "true"), + {[{<<"reduce_limit">>, list_to_atom(Limit)}]}. + +proc_with_ddoc(DDoc, DDocKey, Procs) -> + Filter = fun(#proc{ddoc_keys=Keys}) -> not lists:member(DDocKey, Keys) end, + case lists:dropwhile(Filter, Procs) of + [DDocProc|_] -> + {ok, DDocProc}; + [] -> + teach_any_proc(DDoc, DDocKey, Procs) + end. + +teach_any_proc(DDoc, DDocKey, [Proc|Rest]) -> + try + teach_ddoc(DDoc, DDocKey, Proc) + catch _:_ -> + teach_any_proc(DDoc, DDocKey, Rest) + end; +teach_any_proc(_, _, []) -> + {error, noproc}. + +teach_ddoc(DDoc, {DDocId, _Rev}=DDocKey, #proc{ddoc_keys=Keys}=Proc) -> + % send ddoc over the wire + % we only share the rev with the client we know to update code + % but it only keeps the latest copy, per each ddoc, around. + true = couch_query_servers:proc_prompt(Proc, [<<"ddoc">>, <<"new">>, + DDocId, couch_doc:to_json_obj(DDoc, [])]), + % we should remove any other ddocs keys for this docid + % because the query server overwrites without the rev + Keys2 = [{D,R} || {D,R} <- Keys, D /= DDocId], + % add ddoc to the proc + {ok, Proc#proc{ddoc_keys=[DDocKey|Keys2]}}. diff --git a/apps/couch/src/couch_query_servers.erl b/apps/couch/src/couch_query_servers.erl new file mode 100644 index 00000000..9714a0ca --- /dev/null +++ b/apps/couch/src/couch_query_servers.erl @@ -0,0 +1,284 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_query_servers). + +-export([start_doc_map/3, map_docs/2, stop_doc_map/1]). +-export([reduce/3, rereduce/3,validate_doc_update/5]). +-export([filter_docs/5]). + +-export([with_ddoc_proc/2, proc_prompt/2, ddoc_prompt/3, ddoc_proc_prompt/3, json_doc/1]). + +% For 210-os-proc-pool.t +-export([get_os_process/1, ret_os_process/1]). + +-include("couch_db.hrl"). + +start_doc_map(Lang, Functions, Lib) -> + Proc = get_os_process(Lang), + case Lib of + {[]} -> ok; + Lib -> + true = proc_prompt(Proc, [<<"add_lib">>, Lib]) + end, + lists:foreach(fun(FunctionSource) -> + true = proc_prompt(Proc, [<<"add_fun">>, FunctionSource]) + end, Functions), + {ok, Proc}. + +map_docs(Proc, Docs) -> + % send the documents + Results = lists:map( + fun(Doc) -> + Json = couch_doc:to_json_obj(Doc, []), + + FunsResults = proc_prompt(Proc, [<<"map_doc">>, Json]), + % the results are a json array of function map yields like this: + % [FunResults1, FunResults2 ...] + % where funresults is are json arrays of key value pairs: + % [[Key1, Value1], [Key2, Value2]] + % Convert the key, value pairs to tuples like + % [{Key1, Value1}, {Key2, Value2}] + lists:map( + fun(FunRs) -> + [list_to_tuple(FunResult) || FunResult <- FunRs] + end, + FunsResults) + end, + Docs), + {ok, Results}. + + +stop_doc_map(nil) -> + ok; +stop_doc_map(Proc) -> + ok = ret_os_process(Proc). + +group_reductions_results([]) -> + []; +group_reductions_results(List) -> + {Heads, Tails} = lists:foldl( + fun([H|T], {HAcc,TAcc}) -> + {[H|HAcc], [T|TAcc]} + end, {[], []}, List), + case Tails of + [[]|_] -> % no tails left + [Heads]; + _ -> + [Heads | group_reductions_results(Tails)] + end. + +rereduce(_Lang, [], _ReducedValues) -> + {ok, []}; +rereduce(Lang, RedSrcs, ReducedValues) -> + Grouped = group_reductions_results(ReducedValues), + Results = lists:zipwith( + fun + (<<"_", _/binary>> = FunSrc, Values) -> + {ok, [Result]} = builtin_reduce(rereduce, [FunSrc], [[[], V] || V <- Values], []), + Result; + (FunSrc, Values) -> + os_rereduce(Lang, [FunSrc], Values) + end, RedSrcs, Grouped), + {ok, Results}. + +reduce(_Lang, [], _KVs) -> + {ok, []}; +reduce(Lang, RedSrcs, KVs) -> + {OsRedSrcs, BuiltinReds} = lists:partition(fun + (<<"_", _/binary>>) -> false; + (_OsFun) -> true + end, RedSrcs), + {ok, OsResults} = os_reduce(Lang, OsRedSrcs, KVs), + {ok, BuiltinResults} = builtin_reduce(reduce, BuiltinReds, KVs, []), + recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, []). + +recombine_reduce_results([], [], [], Acc) -> + {ok, lists:reverse(Acc)}; +recombine_reduce_results([<<"_", _/binary>>|RedSrcs], OsResults, [BRes|BuiltinResults], Acc) -> + recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, [BRes|Acc]); +recombine_reduce_results([_OsFun|RedSrcs], [OsR|OsResults], BuiltinResults, Acc) -> + recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, [OsR|Acc]). + +os_reduce(_Lang, [], _KVs) -> + {ok, []}; +os_reduce(#proc{} = Proc, OsRedSrcs, KVs) -> + [true, Reductions] = proc_prompt(Proc, [<<"reduce">>, OsRedSrcs, KVs]), + {ok, Reductions}; +os_reduce(Lang, OsRedSrcs, KVs) -> + Proc = get_os_process(Lang), + try os_reduce(Proc, OsRedSrcs, KVs) after ok = ret_os_process(Proc) end. + +os_rereduce(#proc{} = Proc, OsRedSrcs, KVs) -> + [true, [Reduction]] = proc_prompt(Proc, [<<"rereduce">>, OsRedSrcs, KVs]), + Reduction; +os_rereduce(Lang, OsRedSrcs, KVs) -> + Proc = get_os_process(Lang), + try os_rereduce(Proc, OsRedSrcs, KVs) after ok = ret_os_process(Proc) end. + +builtin_reduce(_Re, [], _KVs, Acc) -> + {ok, lists:reverse(Acc)}; +builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) -> + Sum = builtin_sum_rows(KVs), + builtin_reduce(Re, BuiltinReds, KVs, [Sum|Acc]); +builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> + Count = length(KVs), + builtin_reduce(reduce, BuiltinReds, KVs, [Count|Acc]); +builtin_reduce(rereduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> + Count = builtin_sum_rows(KVs), + builtin_reduce(rereduce, BuiltinReds, KVs, [Count|Acc]); +builtin_reduce(Re, [<<"_stats",_/binary>>|BuiltinReds], KVs, Acc) -> + Stats = builtin_stats(Re, KVs), + builtin_reduce(Re, BuiltinReds, KVs, [Stats|Acc]). + +builtin_sum_rows(KVs) -> + lists:foldl(fun + ([_Key, Value], Acc) when is_number(Value), is_number(Acc) -> + Acc + Value; + ([_Key, Value], Acc) when is_list(Value), is_list(Acc) -> + sum_terms(Acc, Value); + ([_Key, Value], Acc) when is_number(Value), is_list(Acc) -> + sum_terms(Acc, [Value]); + ([_Key, Value], Acc) when is_list(Value), is_number(Acc) -> + sum_terms([Acc], Value); + (_Else, _Acc) -> + throw({invalid_value, <<"builtin _sum function requires map values to be numbers or lists of numbers">>}) + end, 0, KVs). + +sum_terms([], []) -> + []; +sum_terms([_|_]=Xs, []) -> + Xs; +sum_terms([], [_|_]=Ys) -> + Ys; +sum_terms([X|Xs], [Y|Ys]) when is_number(X), is_number(Y) -> + [X+Y | sum_terms(Xs,Ys)]; +sum_terms(_, _) -> + throw({invalid_value, <<"builtin _sum function requires map values to be numbers or lists of numbers">>}). + +builtin_stats(_, []) -> + {[{sum,0}, {count,0}, {min,0}, {max,0}, {sumsqr,0}]}; + +builtin_stats(reduce, [[_,First]|Rest]) when is_number(First) -> + Stats = lists:foldl(fun([_K,V], {S,C,Mi,Ma,Sq}) when is_number(V) -> + {S+V, C+1, lists:min([Mi, V]), lists:max([Ma, V]), Sq+(V*V)}; + (_, _) -> + throw({invalid_value, + <<"builtin _stats function requires map values to be numbers">>}) + end, {First,1,First,First,First*First}, Rest), + {Sum, Cnt, Min, Max, Sqr} = Stats, + {[{sum,Sum}, {count,Cnt}, {min,Min}, {max,Max}, {sumsqr,Sqr}]}; + +builtin_stats(rereduce, [[_,First]|Rest]) -> + {[{sum,Sum0}, {count,Cnt0}, {min,Min0}, {max,Max0}, {sumsqr,Sqr0}]} = First, + Stats = lists:foldl(fun([_K,Red], {S,C,Mi,Ma,Sq}) -> + {[{sum,Sum}, {count,Cnt}, {min,Min}, {max,Max}, {sumsqr,Sqr}]} = Red, + {Sum+S, Cnt+C, lists:min([Min, Mi]), lists:max([Max, Ma]), Sqr+Sq} + end, {Sum0,Cnt0,Min0,Max0,Sqr0}, Rest), + {Sum, Cnt, Min, Max, Sqr} = Stats, + {[{sum,Sum}, {count,Cnt}, {min,Min}, {max,Max}, {sumsqr,Sqr}]}. + +% use the function stored in ddoc.validate_doc_update to test an update. +validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx, SecObj) -> + JsonEditDoc = couch_doc:to_json_obj(EditDoc, [revs]), + JsonDiskDoc = json_doc(DiskDoc), + case ddoc_prompt(DDoc, [<<"validate_doc_update">>], [JsonEditDoc, JsonDiskDoc, Ctx, SecObj]) of + 1 -> + ok; + {[{<<"forbidden">>, Message}]} -> + throw({forbidden, Message}); + {[{<<"unauthorized">>, Message}]} -> + throw({unauthorized, Message}) + end. + +json_doc(nil) -> null; +json_doc(Doc) -> + couch_doc:to_json_obj(Doc, [revs]). + +filter_docs(Req, Db, DDoc, FName, Docs) -> + JsonReq = case Req of + {json_req, JsonObj} -> + JsonObj; + #httpd{} = HttpReq -> + couch_httpd_external:json_req_obj(HttpReq, Db) + end, + JsonDocs = [couch_doc:to_json_obj(Doc, [revs]) || Doc <- Docs], + [true, Passes] = ddoc_prompt(DDoc, [<<"filters">>, FName], [JsonDocs, JsonReq]), + {ok, Passes}. + +ddoc_proc_prompt({Proc, DDocId}, FunPath, Args) -> + proc_prompt(Proc, [<<"ddoc">>, DDocId, FunPath, Args]). + +ddoc_prompt(DDoc, FunPath, Args) -> + with_ddoc_proc(DDoc, fun({Proc, DDocId}) -> + proc_prompt(Proc, [<<"ddoc">>, DDocId, FunPath, Args]) + end). + +with_ddoc_proc(#doc{id=DDocId,revs={Start, [DiskRev|_]}}=DDoc, Fun) -> + Rev = couch_doc:rev_to_str({Start, DiskRev}), + DDocKey = {DDocId, Rev}, + Proc = get_ddoc_process(DDoc, DDocKey), + try Fun({Proc, DDocId}) + after + ok = ret_os_process(Proc) + end. + +proc_prompt(Proc, Args) -> + {Mod, Func} = Proc#proc.prompt_fun, + apply(Mod, Func, [Proc#proc.pid, Args]). + +proc_stop(Proc) -> + {Mod, Func} = Proc#proc.stop_fun, + apply(Mod, Func, [Proc#proc.pid]). + +proc_set_timeout(Proc, Timeout) -> + {Mod, Func} = Proc#proc.set_timeout_fun, + apply(Mod, Func, [Proc#proc.pid, Timeout]). + +get_ddoc_process(#doc{} = DDoc, DDocKey) -> + % remove this case statement + case gen_server:call(couch_proc_manager, {get_proc, DDoc, DDocKey}) of + {ok, Proc, QueryConfig} -> + % process knows the ddoc + case (catch proc_prompt(Proc, [<<"reset">>, QueryConfig])) of + true -> + proc_set_timeout(Proc, list_to_integer(couch_config:get( + "couchdb", "os_process_timeout", "5000"))), + Proc; + _ -> + catch proc_stop(Proc), + get_ddoc_process(DDoc, DDocKey) + end; + Error -> + throw(Error) + end. + +get_os_process(Lang) -> + case gen_server:call(couch_proc_manager, {get_proc, Lang}) of + {ok, Proc, QueryConfig} -> + case (catch proc_prompt(Proc, [<<"reset">>, QueryConfig])) of + true -> + proc_set_timeout(Proc, list_to_integer(couch_config:get( + "couchdb", "os_process_timeout", "5000"))), + Proc; + _ -> + catch proc_stop(Proc), + get_os_process(Lang) + end; + Error -> + throw(Error) + end. + +ret_os_process(Proc) -> + true = gen_server:call(couch_proc_manager, {ret_proc, Proc}), + catch unlink(Proc#proc.pid), + ok. diff --git a/apps/couch/src/couch_ref_counter.erl b/apps/couch/src/couch_ref_counter.erl new file mode 100644 index 00000000..a774f469 --- /dev/null +++ b/apps/couch/src/couch_ref_counter.erl @@ -0,0 +1,111 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_ref_counter). +-behaviour(gen_server). + +-export([start/1, init/1, terminate/2, handle_call/3, handle_cast/2, code_change/3, handle_info/2]). +-export([drop/1,drop/2,add/1,add/2,count/1]). + +start(ChildProcs) -> + gen_server:start(couch_ref_counter, {self(), ChildProcs}, []). + + +drop(RefCounterPid) -> + drop(RefCounterPid, self()). + +drop(RefCounterPid, Pid) -> + gen_server:call(RefCounterPid, {drop, Pid}, infinity). + + +add(RefCounterPid) -> + add(RefCounterPid, self()). + +add(RefCounterPid, Pid) -> + gen_server:call(RefCounterPid, {add, Pid}, infinity). + +count(RefCounterPid) -> + gen_server:call(RefCounterPid, count). + +% server functions + +-record(srv, + { + referrers=dict:new(), % a dict of each ref counting proc. + child_procs=[] + }). + +init({Pid, ChildProcs}) -> + [link(ChildProc) || ChildProc <- ChildProcs], + Referrers = dict:from_list([{Pid, {erlang:monitor(process, Pid), 1}}]), + {ok, #srv{referrers=Referrers, child_procs=ChildProcs}}. + + +terminate(_Reason, #srv{child_procs=ChildProcs}) -> + [couch_util:shutdown_sync(Pid) || Pid <- ChildProcs], + ok. + + +handle_call({add, Pid},_From, #srv{referrers=Referrers}=Srv) -> + Referrers2 = + case dict:find(Pid, Referrers) of + error -> + dict:store(Pid, {erlang:monitor(process, Pid), 1}, Referrers); + {ok, {MonRef, RefCnt}} -> + dict:store(Pid, {MonRef, RefCnt + 1}, Referrers) + end, + {reply, ok, Srv#srv{referrers=Referrers2}}; +handle_call(count, _From, Srv) -> + {monitors, Monitors} = process_info(self(), monitors), + {reply, length(Monitors), Srv}; +handle_call({drop, Pid}, _From, #srv{referrers=Referrers}=Srv) -> + Referrers2 = + case dict:find(Pid, Referrers) of + {ok, {MonRef, 1}} -> + erlang:demonitor(MonRef, [flush]), + dict:erase(Pid, Referrers); + {ok, {MonRef, Num}} -> + dict:store(Pid, {MonRef, Num-1}, Referrers); + error -> + Referrers + end, + Srv2 = Srv#srv{referrers=Referrers2}, + case should_close() of + true -> + {stop,normal,ok,Srv2}; + false -> + {reply, ok, Srv2} + end. + +handle_cast(Msg, _Srv)-> + exit({unknown_msg,Msg}). + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_info({'DOWN', MonRef, _, Pid, _}, #srv{referrers=Referrers}=Srv) -> + {ok, {MonRef, _RefCount}} = dict:find(Pid, Referrers), + Srv2 = Srv#srv{referrers=dict:erase(Pid, Referrers)}, + case should_close() of + true -> + {stop,normal,Srv2}; + false -> + {noreply,Srv2} + end. + + +should_close() -> + case process_info(self(), monitors) of + {monitors, []} -> true; + _ -> false + end. diff --git a/apps/couch/src/couch_rep.erl b/apps/couch/src/couch_rep.erl new file mode 100644 index 00000000..8f2508f5 --- /dev/null +++ b/apps/couch/src/couch_rep.erl @@ -0,0 +1,923 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([replicate/2, replicate/3, checkpoint/1]). +-export([make_replication_id/2]). +-export([start_replication/4, end_replication/1, get_result/4]). + +-include("couch_db.hrl"). +-include("couch_js_functions.hrl"). +-include_lib("ibrowse/include/ibrowse.hrl"). + +-define(REP_ID_VERSION, 2). + +-record(state, { + changes_feed, + missing_revs, + reader, + writer, + + source, + target, + continuous, + create_target, + init_args, + checkpoint_scheduled = nil, + + start_seq, + history, + session_id, + source_log, + target_log, + rep_starttime, + src_starttime, + tgt_starttime, + checkpoint_history = nil, + + listeners = [], + complete = false, + committed_seq = 0, + + stats = nil, + source_db_update_notifier = nil, + target_db_update_notifier = nil +}). + +%% convenience function to do a simple replication from the shell +replicate(Source, Target) when is_list(Source) -> + replicate(?l2b(Source), Target); +replicate(Source, Target) when is_binary(Source), is_list(Target) -> + replicate(Source, ?l2b(Target)); +replicate(Source, Target) when is_binary(Source), is_binary(Target) -> + replicate({[{<<"source">>, Source}, {<<"target">>, Target}]}, #user_ctx{}); + +%% function handling POST to _replicate +replicate(PostBody, UserCtx) -> + replicate(PostBody, UserCtx, couch_replication_manager). + +replicate({Props}=PostBody, UserCtx, Module) -> + RepId = make_replication_id(PostBody, UserCtx), + case couch_util:get_value(<<"cancel">>, Props, false) of + true -> + end_replication(RepId); + false -> + Server = start_replication(PostBody, RepId, UserCtx, Module), + get_result(Server, RepId, PostBody, UserCtx) + end. + +end_replication({BaseId, Extension}) -> + RepId = BaseId ++ Extension, + case supervisor:terminate_child(couch_rep_sup, RepId) of + {error, not_found} = R -> + R; + ok -> + case supervisor:delete_child(couch_rep_sup, RepId) of + ok -> + {ok, {cancelled, ?l2b(BaseId)}}; + {error, not_found} -> + {ok, {cancelled, ?l2b(BaseId)}}; + {error, _} = Error -> + Error + end + end. + +start_replication(RepDoc, {BaseId, Extension} = RepId, UserCtx, Module) -> + Replicator = { + BaseId ++ Extension, + {gen_server, start_link, + [?MODULE, [RepId, RepDoc, UserCtx, Module], []]}, + temporary, + 1, + worker, + [?MODULE] + }, + start_replication_server(Replicator). + +checkpoint(Server) -> + gen_server:cast(Server, do_checkpoint). + +get_result(Server, {BaseId, _Extension}, {Props} = PostBody, UserCtx) -> + case couch_util:get_value(<<"continuous">>, Props, false) of + true -> + {ok, {continuous, ?l2b(BaseId)}}; + false -> + try gen_server:call(Server, get_result, infinity) of + retry -> replicate(PostBody, UserCtx); + Else -> Else + catch + exit:{noproc, {gen_server, call, [Server, get_result, infinity]}} -> + %% oops, this replication just finished -- restart it. + replicate(PostBody, UserCtx); + exit:{normal, {gen_server, call, [Server, get_result, infinity]}} -> + %% we made the call during terminate + replicate(PostBody, UserCtx) + end + end. + +init(InitArgs) -> + try + do_init(InitArgs) + catch + throw:Error -> + {stop, Error} + end. + +do_init([{BaseId, _Ext} = RepId, {PostProps}, UserCtx, Module] = InitArgs) -> + process_flag(trap_exit, true), + + SourceProps = couch_util:get_value(<<"source">>, PostProps), + TargetProps = couch_util:get_value(<<"target">>, PostProps), + + Continuous = couch_util:get_value(<<"continuous">>, PostProps, false), + CreateTarget = couch_util:get_value(<<"create_target">>, PostProps, false), + + ProxyParams = parse_proxy_params( + couch_util:get_value(<<"proxy">>, PostProps, [])), + Source = open_db(SourceProps, UserCtx, ProxyParams), + Target = open_db(TargetProps, UserCtx, ProxyParams, CreateTarget), + + SourceInfo = dbinfo(Source), + TargetInfo = dbinfo(Target), + + [SourceLog, TargetLog] = find_replication_logs( + [Source, Target], BaseId, {PostProps}, UserCtx), + {StartSeq, History} = compare_replication_logs(SourceLog, TargetLog), + + {ok, ChangesFeed} = + couch_rep_changes_feed:start_link(self(), Source, StartSeq, PostProps), + {ok, MissingRevs} = + couch_rep_missing_revs:start_link(self(), Target, ChangesFeed, PostProps), + {ok, Reader} = + couch_rep_reader:start_link(self(), Source, MissingRevs, PostProps), + {ok, Writer} = + couch_rep_writer:start_link(self(), Target, Reader, PostProps), + + Stats = ets:new(replication_stats, [set, private]), + ets:insert(Stats, {total_revs,0}), + ets:insert(Stats, {missing_revs, 0}), + ets:insert(Stats, {docs_read, 0}), + ets:insert(Stats, {docs_written, 0}), + ets:insert(Stats, {doc_write_failures, 0}), + + {ShortId, _} = lists:split(6, BaseId), + couch_task_status:add_task("Replication", io_lib:format("~s: ~s -> ~s", + [ShortId, dbname(Source), dbname(Target)]), "Starting"), + + Module:replication_started(RepId), + + State = #state{ + changes_feed = ChangesFeed, + missing_revs = MissingRevs, + reader = Reader, + writer = Writer, + + source = Source, + target = Target, + continuous = Continuous, + create_target = CreateTarget, + init_args = InitArgs, + stats = Stats, + checkpoint_scheduled = nil, + + start_seq = StartSeq, + history = History, + session_id = couch_uuids:random(), + source_log = SourceLog, + target_log = TargetLog, + rep_starttime = httpd_util:rfc1123_date(), + src_starttime = couch_util:get_value(instance_start_time, SourceInfo), + tgt_starttime = couch_util:get_value(instance_start_time, TargetInfo), + source_db_update_notifier = source_db_update_notifier(Source), + target_db_update_notifier = target_db_update_notifier(Target) + }, + {ok, State}. + +handle_call(get_result, From, #state{complete=true, listeners=[]} = State) -> + {stop, normal, State#state{listeners=[From]}}; +handle_call(get_result, From, State) -> + Listeners = State#state.listeners, + {noreply, State#state{listeners=[From|Listeners]}}; + +handle_call(get_source_db, _From, #state{source = Source} = State) -> + {reply, {ok, Source}, State}; + +handle_call(get_target_db, _From, #state{target = Target} = State) -> + {reply, {ok, Target}, State}. + +handle_cast(reopen_source_db, #state{source = Source} = State) -> + {ok, NewSource} = couch_db:reopen(Source), + {noreply, State#state{source = NewSource}}; + +handle_cast(reopen_target_db, #state{target = Target} = State) -> + {ok, NewTarget} = couch_db:reopen(Target), + {noreply, State#state{target = NewTarget}}; + +handle_cast(do_checkpoint, State) -> + {noreply, do_checkpoint(State)}; + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({missing_revs_checkpoint, SourceSeq}, State) -> + couch_task_status:update("MR Processed source update #~p", [SourceSeq]), + {noreply, schedule_checkpoint(State#state{committed_seq = SourceSeq})}; + +handle_info({writer_checkpoint, SourceSeq}, #state{committed_seq=N} = State) + when SourceSeq > N -> + MissingRevs = State#state.missing_revs, + ok = gen_server:cast(MissingRevs, {update_committed_seq, SourceSeq}), + couch_task_status:update("W Processed source update #~p", [SourceSeq]), + {noreply, schedule_checkpoint(State#state{committed_seq = SourceSeq})}; +handle_info({writer_checkpoint, _}, State) -> + {noreply, State}; + +handle_info({update_stats, Key, N}, State) -> + ets:update_counter(State#state.stats, Key, N), + {noreply, State}; + +handle_info({'DOWN', _, _, _, _}, State) -> + ?LOG_INFO("replication terminating because local DB is shutting down", []), + timer:cancel(State#state.checkpoint_scheduled), + {stop, shutdown, State}; + +handle_info({'EXIT', Writer, normal}, #state{writer=Writer} = State) -> + case State#state.listeners of + [] -> + {noreply, State#state{complete = true}}; + _Else -> + {stop, normal, State} + end; + +handle_info({'EXIT', _, normal}, State) -> + {noreply, State}; +handle_info({'EXIT', _Pid, {Err, Reason}}, State) when Err == source_error; + Err == target_error -> + ?LOG_INFO("replication terminating due to ~p: ~p", [Err, Reason]), + timer:cancel(State#state.checkpoint_scheduled), + {stop, shutdown, State}; +handle_info({'EXIT', _Pid, Reason}, State) -> + {stop, Reason, State}. + +terminate(normal, #state{checkpoint_scheduled=nil, init_args=[RepId, _, _, Module]} = State) -> + do_terminate(State), + Module:replication_completed(RepId); + +terminate(normal, #state{init_args=[RepId, _, _, Module]} = State) -> + timer:cancel(State#state.checkpoint_scheduled), + do_terminate(do_checkpoint(State)), + Module:replication_completed(RepId); + +terminate(shutdown, #state{listeners = Listeners} = State) -> + % continuous replication stopped + [gen_server:reply(L, {ok, stopped}) || L <- Listeners], + terminate_cleanup(State); + +terminate(Reason, #state{listeners = Listeners, init_args=[RepId, _, _, Module]} = State) -> + [gen_server:reply(L, {error, Reason}) || L <- Listeners], + terminate_cleanup(State), + Module:replication_error(RepId, Reason). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +% internal funs + +start_replication_server(Replicator) -> + RepId = element(1, Replicator), + case supervisor:start_child(couch_rep_sup, Replicator) of + {ok, Pid} -> + ?LOG_INFO("starting new replication ~p at ~p", [RepId, Pid]), + Pid; + {error, already_present} -> + case supervisor:restart_child(couch_rep_sup, RepId) of + {ok, Pid} -> + ?LOG_INFO("starting replication ~p at ~p", [RepId, Pid]), + Pid; + {error, running} -> + %% this error occurs if multiple replicators are racing + %% each other to start and somebody else won. Just grab + %% the Pid by calling start_child again. + {error, {already_started, Pid}} = + supervisor:start_child(couch_rep_sup, Replicator), + ?LOG_DEBUG("replication ~p already running at ~p", [RepId, Pid]), + Pid; + {error, {db_not_found, DbUrl}} -> + throw({db_not_found, <<"could not open ", DbUrl/binary>>}); + {error, {unauthorized, DbUrl}} -> + throw({unauthorized, + <<"unauthorized to access or create database ", DbUrl/binary>>}); + {error, {'EXIT', {badarg, + [{erlang, apply, [gen_server, start_link, undefined]} | _]}}} -> + % Clause to deal with a change in the supervisor module introduced + % in R14B02. For more details consult the thread at: + % http://erlang.org/pipermail/erlang-bugs/2011-March/002273.html + _ = supervisor:delete_child(couch_rep_sup, RepId), + start_replication_server(Replicator) + end; + {error, {already_started, Pid}} -> + ?LOG_DEBUG("replication ~p already running at ~p", [RepId, Pid]), + Pid; + {error, {{db_not_found, DbUrl}, _}} -> + throw({db_not_found, <<"could not open ", DbUrl/binary>>}); + {error, {{unauthorized, DbUrl}, _}} -> + throw({unauthorized, + <<"unauthorized to access or create database ", DbUrl/binary>>}) + end. + +compare_replication_logs(SrcDoc, TgtDoc) -> + #doc{body={RepRecProps}} = SrcDoc, + #doc{body={RepRecPropsTgt}} = TgtDoc, + case couch_util:get_value(<<"session_id">>, RepRecProps) == + couch_util:get_value(<<"session_id">>, RepRecPropsTgt) of + true -> + % if the records have the same session id, + % then we have a valid replication history + OldSeqNum = couch_util:get_value(<<"source_last_seq">>, RepRecProps, 0), + OldHistory = couch_util:get_value(<<"history">>, RepRecProps, []), + {OldSeqNum, OldHistory}; + false -> + SourceHistory = couch_util:get_value(<<"history">>, RepRecProps, []), + TargetHistory = couch_util:get_value(<<"history">>, RepRecPropsTgt, []), + ?LOG_INFO("Replication records differ. " + "Scanning histories to find a common ancestor.", []), + ?LOG_DEBUG("Record on source:~p~nRecord on target:~p~n", + [RepRecProps, RepRecPropsTgt]), + compare_rep_history(SourceHistory, TargetHistory) + end. + +compare_rep_history(S, T) when S =:= [] orelse T =:= [] -> + ?LOG_INFO("no common ancestry -- performing full replication", []), + {0, []}; +compare_rep_history([{S}|SourceRest], [{T}|TargetRest]=Target) -> + SourceId = couch_util:get_value(<<"session_id">>, S), + case has_session_id(SourceId, Target) of + true -> + RecordSeqNum = couch_util:get_value(<<"recorded_seq">>, S, 0), + ?LOG_INFO("found a common replication record with source_seq ~p", + [RecordSeqNum]), + {RecordSeqNum, SourceRest}; + false -> + TargetId = couch_util:get_value(<<"session_id">>, T), + case has_session_id(TargetId, SourceRest) of + true -> + RecordSeqNum = couch_util:get_value(<<"recorded_seq">>, T, 0), + ?LOG_INFO("found a common replication record with source_seq ~p", + [RecordSeqNum]), + {RecordSeqNum, TargetRest}; + false -> + compare_rep_history(SourceRest, TargetRest) + end + end. + +close_db(#http_db{}) -> + ok; +close_db(Db) -> + couch_db:close(Db). + +dbname(#http_db{url = Url}) -> + couch_util:url_strip_password(Url); +dbname(#db{name = Name}) -> + Name. + +dbinfo(#http_db{} = Db) -> + {DbProps} = couch_rep_httpc:request(Db), + [{couch_util:to_existing_atom(K), V} || {K,V} <- DbProps]; +dbinfo(Db) -> + {ok, Info} = couch_db:get_db_info(Db), + Info. + +do_terminate(State) -> + #state{ + checkpoint_history = CheckpointHistory, + committed_seq = NewSeq, + listeners = Listeners, + source = Source, + continuous = Continuous, + source_log = #doc{body={OldHistory}} + } = State, + + NewRepHistory = case CheckpointHistory of + nil -> + {[{<<"no_changes">>, true} | OldHistory]}; + _Else -> + CheckpointHistory + end, + + %% reply to original requester + OtherListeners = case Continuous of + true -> + []; % continuous replications have no listeners + _ -> + [Original|Rest] = lists:reverse(Listeners), + gen_server:reply(Original, {ok, NewRepHistory}), + Rest + end, + + %% maybe trigger another replication. If this replicator uses a local + %% source Db, changes to that Db since we started will not be included in + %% this pass. + case up_to_date(Source, NewSeq) of + true -> + [gen_server:reply(R, {ok, NewRepHistory}) || R <- OtherListeners]; + false -> + [gen_server:reply(R, retry) || R <- OtherListeners] + end, + couch_task_status:update("Finishing"), + terminate_cleanup(State). + +terminate_cleanup(State) -> + close_db(State#state.source), + close_db(State#state.target), + stop_db_update_notifier(State#state.source_db_update_notifier), + stop_db_update_notifier(State#state.target_db_update_notifier), + ets:delete(State#state.stats). + +stop_db_update_notifier(nil) -> + ok; +stop_db_update_notifier(Notifier) -> + couch_db_update_notifier:stop(Notifier). + +has_session_id(_SessionId, []) -> + false; +has_session_id(SessionId, [{Props} | Rest]) -> + case couch_util:get_value(<<"session_id">>, Props, nil) of + SessionId -> + true; + _Else -> + has_session_id(SessionId, Rest) + end. + +maybe_append_options(Options, {Props}) -> + lists:foldl(fun(Option, Acc) -> + Acc ++ + case couch_util:get_value(Option, Props, false) of + true -> + "+" ++ ?b2l(Option); + false -> + "" + end + end, [], Options). + +make_replication_id(RepProps, UserCtx) -> + BaseId = make_replication_id(RepProps, UserCtx, ?REP_ID_VERSION), + Extension = maybe_append_options( + [<<"continuous">>, <<"create_target">>], RepProps), + {BaseId, Extension}. + +% Versioned clauses for generating replication ids +% If a change is made to how replications are identified +% add a new clause and increase ?REP_ID_VERSION at the top +make_replication_id({Props}, UserCtx, 2) -> + {ok, HostName} = inet:gethostname(), + Port = case (catch mochiweb_socket_server:get(couch_httpd, port)) of + P when is_number(P) -> + P; + _ -> + % On restart we might be called before the couch_httpd process is + % started. + % TODO: we might be under an SSL socket server only, or both under + % SSL and a non-SSL socket. + % ... mochiweb_socket_server:get(https, port) + list_to_integer(couch_config:get("httpd", "port", "5984")) + end, + Src = get_rep_endpoint(UserCtx, couch_util:get_value(<<"source">>, Props)), + Tgt = get_rep_endpoint(UserCtx, couch_util:get_value(<<"target">>, Props)), + maybe_append_filters({Props}, [HostName, Port, Src, Tgt], UserCtx); +make_replication_id({Props}, UserCtx, 1) -> + {ok, HostName} = inet:gethostname(), + Src = get_rep_endpoint(UserCtx, couch_util:get_value(<<"source">>, Props)), + Tgt = get_rep_endpoint(UserCtx, couch_util:get_value(<<"target">>, Props)), + maybe_append_filters({Props}, [HostName, Src, Tgt], UserCtx). + +maybe_append_filters({Props}, Base, UserCtx) -> + Base2 = Base ++ + case couch_util:get_value(<<"filter">>, Props) of + undefined -> + case couch_util:get_value(<<"doc_ids">>, Props) of + undefined -> + []; + DocIds -> + [DocIds] + end; + Filter -> + [filter_code(Filter, Props, UserCtx), + couch_util:get_value(<<"query_params">>, Props, {[]})] + end, + couch_util:to_hex(couch_util:md5(term_to_binary(Base2))). + +filter_code(Filter, Props, UserCtx) -> + {DDocName, FilterName} = + case re:run(Filter, "(.*?)/(.*)", [{capture, [1, 2], binary}]) of + {match, [DDocName0, FilterName0]} -> + {DDocName0, FilterName0}; + _ -> + throw({error, <<"Invalid filter. Must match `ddocname/filtername`.">>}) + end, + ProxyParams = parse_proxy_params( + couch_util:get_value(<<"proxy">>, Props, [])), + DbName = couch_util:get_value(<<"source">>, Props), + Source = try + open_db(DbName, UserCtx, ProxyParams) + catch + _Tag:DbError -> + DbErrorMsg = io_lib:format("Could not open source database `~s`: ~s", + [couch_util:url_strip_password(DbName), couch_util:to_binary(DbError)]), + throw({error, iolist_to_binary(DbErrorMsg)}) + end, + try + Body = case (catch open_doc(Source, <<"_design/", DDocName/binary>>)) of + {ok, #doc{body = Body0}} -> + Body0; + DocError -> + DocErrorMsg = io_lib:format( + "Couldn't open document `_design/~s` from source " + "database `~s`: ~s", + [DDocName, dbname(Source), couch_util:to_binary(DocError)]), + throw({error, iolist_to_binary(DocErrorMsg)}) + end, + Code = couch_util:get_nested_json_value( + Body, [<<"filters">>, FilterName]), + re:replace(Code, "^\s*(.*?)\s*$", "\\1", [{return, binary}]) + after + close_db(Source) + end. + +maybe_add_trailing_slash(Url) -> + re:replace(Url, "[^/]$", "&/", [{return, list}]). + +get_rep_endpoint(_UserCtx, {Props}) -> + Url = maybe_add_trailing_slash(couch_util:get_value(<<"url">>, Props)), + {BinHeaders} = couch_util:get_value(<<"headers">>, Props, {[]}), + {Auth} = couch_util:get_value(<<"auth">>, Props, {[]}), + case couch_util:get_value(<<"oauth">>, Auth) of + undefined -> + {remote, Url, [{?b2l(K),?b2l(V)} || {K,V} <- BinHeaders]}; + {OAuth} -> + {remote, Url, [{?b2l(K),?b2l(V)} || {K,V} <- BinHeaders], OAuth} + end; +get_rep_endpoint(_UserCtx, <<"http://",_/binary>>=Url) -> + {remote, maybe_add_trailing_slash(Url), []}; +get_rep_endpoint(_UserCtx, <<"https://",_/binary>>=Url) -> + {remote, maybe_add_trailing_slash(Url), []}; +get_rep_endpoint(UserCtx, <<DbName/binary>>) -> + {local, DbName, UserCtx}. + +find_replication_logs(DbList, RepId, RepProps, UserCtx) -> + LogId = ?l2b(?LOCAL_DOC_PREFIX ++ RepId), + fold_replication_logs(DbList, ?REP_ID_VERSION, + LogId, LogId, RepProps, UserCtx, []). + +% Accumulate the replication logs +% Falls back to older log document ids and migrates them +fold_replication_logs([], _Vsn, _LogId, _NewId, _RepProps, _UserCtx, Acc) -> + lists:reverse(Acc); +fold_replication_logs([Db|Rest]=Dbs, Vsn, LogId, NewId, + RepProps, UserCtx, Acc) -> + case open_replication_log(Db, LogId) of + {error, not_found} when Vsn > 1 -> + OldRepId = make_replication_id(RepProps, UserCtx, Vsn - 1), + fold_replication_logs(Dbs, Vsn - 1, + ?l2b(?LOCAL_DOC_PREFIX ++ OldRepId), NewId, RepProps, UserCtx, Acc); + {error, not_found} -> + fold_replication_logs(Rest, ?REP_ID_VERSION, NewId, NewId, + RepProps, UserCtx, [#doc{id=NewId}|Acc]); + {ok, Doc} when LogId =:= NewId -> + fold_replication_logs(Rest, ?REP_ID_VERSION, NewId, NewId, + RepProps, UserCtx, [Doc|Acc]); + {ok, Doc} -> + MigratedLog = #doc{id=NewId,body=Doc#doc.body}, + fold_replication_logs(Rest, ?REP_ID_VERSION, NewId, NewId, + RepProps, UserCtx, [MigratedLog|Acc]) + end. + +open_replication_log(Db, DocId) -> + case open_doc(Db, DocId) of + {ok, Doc} -> + ?LOG_DEBUG("found a replication log for ~s", [dbname(Db)]), + {ok, Doc}; + _ -> + ?LOG_DEBUG("didn't find a replication log for ~s", [dbname(Db)]), + {error, not_found} + end. + +open_doc(#http_db{} = Db, DocId) -> + Req = Db#http_db{resource = couch_util:encode_doc_id(DocId)}, + case couch_rep_httpc:request(Req) of + {[{<<"error">>, _}, {<<"reason">>, _}]} -> + {error, not_found}; + Doc -> + {ok, couch_doc:from_json_obj(Doc)} + end; +open_doc(Db, DocId) -> + couch_db:open_doc(Db, DocId). + +open_db(Props, UserCtx, ProxyParams) -> + open_db(Props, UserCtx, ProxyParams, false). + +open_db({Props}, _UserCtx, ProxyParams, CreateTarget) -> + Url = maybe_add_trailing_slash(couch_util:get_value(<<"url">>, Props)), + {AuthProps} = couch_util:get_value(<<"auth">>, Props, {[]}), + {BinHeaders} = couch_util:get_value(<<"headers">>, Props, {[]}), + Headers = [{?b2l(K),?b2l(V)} || {K,V} <- BinHeaders], + DefaultHeaders = (#http_db{})#http_db.headers, + Db1 = #http_db{ + url = Url, + auth = AuthProps, + headers = lists:ukeymerge(1, Headers, DefaultHeaders) + }, + Db = Db1#http_db{ + options = Db1#http_db.options ++ ProxyParams ++ + couch_rep_httpc:ssl_options(Db1) + }, + couch_rep_httpc:db_exists(Db, CreateTarget); +open_db(<<"http://",_/binary>>=Url, _, ProxyParams, CreateTarget) -> + open_db({[{<<"url">>,Url}]}, [], ProxyParams, CreateTarget); +open_db(<<"https://",_/binary>>=Url, _, ProxyParams, CreateTarget) -> + open_db({[{<<"url">>,Url}]}, [], ProxyParams, CreateTarget); +open_db(<<DbName/binary>>, UserCtx, _ProxyParams, CreateTarget) -> + try + case CreateTarget of + true -> + ok = couch_httpd:verify_is_server_admin(UserCtx), + couch_server:create(DbName, [{user_ctx, UserCtx}]); + false -> + ok + end, + + case couch_db:open(DbName, [{user_ctx, UserCtx}]) of + {ok, Db} -> + couch_db:monitor(Db), + Db; + {not_found, no_db_file} -> + throw({db_not_found, DbName}) + end + catch throw:{unauthorized, _} -> + throw({unauthorized, DbName}) + end. + +schedule_checkpoint(#state{checkpoint_scheduled = nil} = State) -> + Server = self(), + case timer:apply_after(5000, couch_rep, checkpoint, [Server]) of + {ok, TRef} -> + State#state{checkpoint_scheduled = TRef}; + Error -> + ?LOG_ERROR("tried to schedule a checkpoint but got ~p", [Error]), + State + end; +schedule_checkpoint(State) -> + State. + +do_checkpoint(State) -> + #state{ + source = Source, + target = Target, + committed_seq = NewSeqNum, + start_seq = StartSeqNum, + history = OldHistory, + session_id = SessionId, + source_log = SourceLog, + target_log = TargetLog, + rep_starttime = ReplicationStartTime, + src_starttime = SrcInstanceStartTime, + tgt_starttime = TgtInstanceStartTime, + stats = Stats, + init_args = [_RepId, {RepDoc} | _] + } = State, + case commit_to_both(Source, Target, NewSeqNum) of + {SrcInstanceStartTime, TgtInstanceStartTime} -> + ?LOG_INFO("recording a checkpoint for ~s -> ~s at source update_seq ~p", + [dbname(Source), dbname(Target), NewSeqNum]), + EndTime = ?l2b(httpd_util:rfc1123_date()), + StartTime = ?l2b(ReplicationStartTime), + DocsRead = ets:lookup_element(Stats, docs_read, 2), + DocsWritten = ets:lookup_element(Stats, docs_written, 2), + DocWriteFailures = ets:lookup_element(Stats, doc_write_failures, 2), + NewHistoryEntry = {[ + {<<"session_id">>, SessionId}, + {<<"start_time">>, StartTime}, + {<<"end_time">>, EndTime}, + {<<"start_last_seq">>, StartSeqNum}, + {<<"end_last_seq">>, NewSeqNum}, + {<<"recorded_seq">>, NewSeqNum}, + {<<"missing_checked">>, ets:lookup_element(Stats, total_revs, 2)}, + {<<"missing_found">>, ets:lookup_element(Stats, missing_revs, 2)}, + {<<"docs_read">>, DocsRead}, + {<<"docs_written">>, DocsWritten}, + {<<"doc_write_failures">>, DocWriteFailures} + ]}, + BaseHistory = [ + {<<"session_id">>, SessionId}, + {<<"source_last_seq">>, NewSeqNum}, + {<<"replication_id_version">>, ?REP_ID_VERSION} + ] ++ case couch_util:get_value(<<"doc_ids">>, RepDoc) of + undefined -> + []; + DocIds when is_list(DocIds) -> + % backwards compatibility with the result of a replication by + % doc IDs in versions 0.11.x and 1.0.x + [ + {<<"start_time">>, StartTime}, + {<<"end_time">>, EndTime}, + {<<"docs_read">>, DocsRead}, + {<<"docs_written">>, DocsWritten}, + {<<"doc_write_failures">>, DocWriteFailures} + ] + end, + % limit history to 50 entries + NewRepHistory = { + BaseHistory ++ + [{<<"history">>, lists:sublist([NewHistoryEntry | OldHistory], 50)}] + }, + + try + {SrcRevPos,SrcRevId} = + update_local_doc(Source, SourceLog#doc{body=NewRepHistory}), + {TgtRevPos,TgtRevId} = + update_local_doc(Target, TargetLog#doc{body=NewRepHistory}), + State#state{ + checkpoint_scheduled = nil, + checkpoint_history = NewRepHistory, + source_log = SourceLog#doc{revs={SrcRevPos, [SrcRevId]}}, + target_log = TargetLog#doc{revs={TgtRevPos, [TgtRevId]}} + } + catch throw:conflict -> + ?LOG_ERROR("checkpoint failure: conflict (are you replicating to " + "yourself?)", []), + State + end; + _Else -> + ?LOG_INFO("rebooting ~s -> ~s from last known replication checkpoint", + [dbname(Source), dbname(Target)]), + #state{ + changes_feed = CF, + missing_revs = MR, + reader = Reader, + writer = Writer + } = State, + Pids = [Writer, Reader, MR, CF], + [unlink(Pid) || Pid <- Pids], + [exit(Pid, shutdown) || Pid <- Pids], + close_db(Target), + close_db(Source), + {ok, NewState} = init(State#state.init_args), + NewState#state{listeners=State#state.listeners} + end. + +commit_to_both(Source, Target, RequiredSeq) -> + % commit the src async + ParentPid = self(), + SrcCommitPid = spawn_link(fun() -> + ParentPid ! {self(), ensure_full_commit(Source, RequiredSeq)} end), + + % commit tgt sync + TargetStartTime = ensure_full_commit(Target), + + SourceStartTime = + receive + {SrcCommitPid, Timestamp} -> + Timestamp; + {'EXIT', SrcCommitPid, {http_request_failed, _}} -> + exit(replication_link_failure) + end, + {SourceStartTime, TargetStartTime}. + +ensure_full_commit(#http_db{headers = Headers} = Target) -> + Headers1 = [ + {"Content-Length", 0} | + couch_util:proplist_apply_field( + {"Content-Type", "application/json"}, Headers) + ], + Req = Target#http_db{ + resource = "_ensure_full_commit", + method = post, + headers = Headers1 + }, + {ResultProps} = couch_rep_httpc:request(Req), + true = couch_util:get_value(<<"ok">>, ResultProps), + couch_util:get_value(<<"instance_start_time">>, ResultProps); +ensure_full_commit(Target) -> + {ok, NewDb} = couch_db:open_int(Target#db.name, []), + UpdateSeq = couch_db:get_update_seq(Target), + CommitSeq = couch_db:get_committed_update_seq(NewDb), + InstanceStartTime = NewDb#db.instance_start_time, + couch_db:close(NewDb), + if UpdateSeq > CommitSeq -> + ?LOG_DEBUG("target needs a full commit: update ~p commit ~p", + [UpdateSeq, CommitSeq]), + {ok, DbStartTime} = couch_db:ensure_full_commit(Target), + DbStartTime; + true -> + ?LOG_DEBUG("target doesn't need a full commit", []), + InstanceStartTime + end. + +ensure_full_commit(#http_db{headers = Headers} = Source, RequiredSeq) -> + Headers1 = [ + {"Content-Length", 0} | + couch_util:proplist_apply_field( + {"Content-Type", "application/json"}, Headers) + ], + Req = Source#http_db{ + resource = "_ensure_full_commit", + method = post, + qs = [{seq, iolist_to_binary(?JSON_ENCODE(RequiredSeq))}], + headers = Headers1 + }, + {ResultProps} = couch_rep_httpc:request(Req), + case couch_util:get_value(<<"ok">>, ResultProps) of + true -> + couch_util:get_value(<<"instance_start_time">>, ResultProps); + undefined -> nil end; +ensure_full_commit(Source, RequiredSeq) -> + {ok, NewDb} = couch_db:open_int(Source#db.name, []), + CommitSeq = couch_db:get_committed_update_seq(NewDb), + InstanceStartTime = NewDb#db.instance_start_time, + couch_db:close(NewDb), + if RequiredSeq > CommitSeq -> + ?LOG_DEBUG("source needs a full commit: required ~p committed ~p", + [RequiredSeq, CommitSeq]), + {ok, DbStartTime} = couch_db:ensure_full_commit(Source), + DbStartTime; + true -> + ?LOG_DEBUG("source doesn't need a full commit", []), + InstanceStartTime + end. + +update_local_doc(#http_db{} = Db, Doc) -> + Req = Db#http_db{ + resource = couch_util:encode_doc_id(Doc), + method = put, + body = couch_doc:to_json_obj(Doc, [attachments]), + headers = [{"x-couch-full-commit", "false"} | Db#http_db.headers] + }, + {ResponseMembers} = couch_rep_httpc:request(Req), + Rev = couch_util:get_value(<<"rev">>, ResponseMembers), + couch_doc:parse_rev(Rev); +update_local_doc(Db, Doc) -> + {ok, Result} = couch_db:update_doc(Db, Doc, [delay_commit]), + Result. + +up_to_date(#http_db{}, _Seq) -> + true; +up_to_date(Source, Seq) -> + {ok, NewDb} = couch_db:open_int(Source#db.name, []), + T = NewDb#db.update_seq == Seq, + couch_db:close(NewDb), + T. + +parse_proxy_params(ProxyUrl) when is_binary(ProxyUrl) -> + parse_proxy_params(?b2l(ProxyUrl)); +parse_proxy_params([]) -> + []; +parse_proxy_params(ProxyUrl) -> + #url{ + host = Host, + port = Port, + username = User, + password = Passwd + } = ibrowse_lib:parse_url(ProxyUrl), + [{proxy_host, Host}, {proxy_port, Port}] ++ + case is_list(User) andalso is_list(Passwd) of + false -> + []; + true -> + [{proxy_user, User}, {proxy_password, Passwd}] + end. + +source_db_update_notifier(#db{name = DbName}) -> + Server = self(), + {ok, Notifier} = couch_db_update_notifier:start_link( + fun({compacted, DbName1}) when DbName1 =:= DbName -> + ok = gen_server:cast(Server, reopen_source_db); + (_) -> + ok + end), + Notifier; +source_db_update_notifier(_) -> + nil. + +target_db_update_notifier(#db{name = DbName}) -> + Server = self(), + {ok, Notifier} = couch_db_update_notifier:start_link( + fun({compacted, DbName1}) when DbName1 =:= DbName -> + ok = gen_server:cast(Server, reopen_target_db); + (_) -> + ok + end), + Notifier; +target_db_update_notifier(_) -> + nil. diff --git a/apps/couch/src/couch_rep_att.erl b/apps/couch/src/couch_rep_att.erl new file mode 100644 index 00000000..9988c5db --- /dev/null +++ b/apps/couch/src/couch_rep_att.erl @@ -0,0 +1,118 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_att). + +-export([convert_stub/2, cleanup/0]). + +-include("couch_db.hrl"). + +convert_stub(#att{data=stub, name=Name} = Attachment, + {#http_db{} = Db, Id, Rev}) -> + {Pos, [RevId|_]} = Rev, + Request = Db#http_db{ + resource = lists:flatten([couch_util:url_encode(Id), "/", + couch_util:url_encode(Name)]), + qs = [{rev, couch_doc:rev_to_str({Pos,RevId})}] + }, + Ref = make_ref(), + RcvFun = fun() -> attachment_receiver(Ref, Request) end, + Attachment#att{data=RcvFun}. + +cleanup() -> + receive + {ibrowse_async_response, _, _} -> + %% TODO maybe log, didn't expect to have data here + cleanup(); + {ibrowse_async_response_end, _} -> + cleanup(); + {ibrowse_async_headers, _, _, _} -> + cleanup() + after 0 -> + erase(), + ok + end. + +% internal funs + +attachment_receiver(Ref, Request) -> + try case get(Ref) of + undefined -> + {ReqId, ContentEncoding} = start_http_request(Request), + put(Ref, {ReqId, ContentEncoding}), + receive_data(Ref, ReqId, ContentEncoding); + {ReqId, ContentEncoding} -> + receive_data(Ref, ReqId, ContentEncoding) + end + catch + throw:{attachment_request_failed, _} -> + case {Request#http_db.retries, Request#http_db.pause} of + {0, _} -> + ?LOG_INFO("request for ~p failed", [Request#http_db.resource]), + throw({attachment_request_failed, max_retries_reached}); + {N, Pause} when N > 0 -> + ?LOG_INFO("request for ~p timed out, retrying in ~p seconds", + [Request#http_db.resource, Pause/1000]), + timer:sleep(Pause), + cleanup(), + attachment_receiver(Ref, Request#http_db{retries = N-1}) + end + end. + +receive_data(Ref, ReqId, ContentEncoding) -> + receive + {ibrowse_async_response, ReqId, {chunk_start,_}} -> + receive_data(Ref, ReqId, ContentEncoding); + {ibrowse_async_response, ReqId, chunk_end} -> + receive_data(Ref, ReqId, ContentEncoding); + {ibrowse_async_response, ReqId, {error, Err}} -> + ?LOG_ERROR("streaming attachment ~p failed with ~p", [ReqId, Err]), + throw({attachment_request_failed, Err}); + {ibrowse_async_response, ReqId, Data} -> + Data; + {ibrowse_async_response_end, ReqId} -> + ?LOG_ERROR("streaming att. ended but more data requested ~p", [ReqId]), + throw({attachment_request_failed, premature_end}) + after 31000 -> + throw({attachment_request_failed, timeout}) + end. + +start_http_request(Req) -> + %% set stream_to here because self() has changed + Req2 = Req#http_db{options = [{stream_to,self()} | Req#http_db.options]}, + {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Req2), + receive {ibrowse_async_headers, ReqId, Code, Headers} -> + case validate_headers(Req2, list_to_integer(Code), Headers) of + {ok, ContentEncoding} -> + {ReqId, ContentEncoding}; + {ok, ContentEncoding, NewReqId} -> + {NewReqId, ContentEncoding} + end + after 10000 -> + throw({attachment_request_failed, timeout}) + end. + +validate_headers(_Req, 200, Headers) -> + MochiHeaders = mochiweb_headers:make(Headers), + {ok, mochiweb_headers:get_value("Content-Encoding", MochiHeaders)}; +validate_headers(Req, Code, Headers) when Code > 299, Code < 400 -> + NewReq = couch_rep_httpc:redirected_request(Code, Headers, Req), + {ibrowse_req_id, ReqId} = couch_rep_httpc:request(NewReq), + receive {ibrowse_async_headers, ReqId, NewCode, NewHeaders} -> + {ok, Encoding} = validate_headers(NewReq, list_to_integer(NewCode), + NewHeaders) + end, + {ok, Encoding, ReqId}; +validate_headers(Req, Code, _Headers) -> + #http_db{url=Url, resource=Resource} = Req, + ?LOG_ERROR("got ~p for ~s~s", [Code, Url, Resource]), + throw({attachment_request_failed, {bad_code, Code}}). diff --git a/apps/couch/src/couch_rep_changes_feed.erl b/apps/couch/src/couch_rep_changes_feed.erl new file mode 100644 index 00000000..70db52a0 --- /dev/null +++ b/apps/couch/src/couch_rep_changes_feed.erl @@ -0,0 +1,520 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_changes_feed). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/4, next/1, stop/1]). + +-define(BUFFER_SIZE, 1000). +-define(DOC_IDS_FILTER_NAME, "_doc_ids"). + +-include("couch_db.hrl"). +-include_lib("ibrowse/include/ibrowse.hrl"). + +-record (state, { + changes_from = nil, + changes_loop = nil, + init_args, + last_seq, + conn = nil, + reqid = nil, + complete = false, + count = 0, + partial_chunk = <<>>, + reply_to = nil, + rows = queue:new(), + doc_ids = nil +}). + +-import(couch_util, [ + get_value/2, + get_value/3 +]). + +start_link(Parent, Source, StartSeq, PostProps) -> + gen_server:start_link(?MODULE, [Parent, Source, StartSeq, PostProps], []). + +next(Server) -> + gen_server:call(Server, next_changes, infinity). + +stop(Server) -> + catch gen_server:call(Server, stop), + ok. + +init([Parent, #http_db{headers = Headers0} = Source, Since, PostProps]) -> + process_flag(trap_exit, true), + Feed = case get_value(<<"continuous">>, PostProps, false) of + false -> + normal; + true -> + continuous + end, + BaseQS = [ + {"style", all_docs}, + {"heartbeat", 10000}, + {"since", iolist_to_binary(?JSON_ENCODE(Since))}, + {"feed", Feed} + ], + {QS, Method, Body, Headers} = case get_value(<<"doc_ids">>, PostProps) of + undefined -> + {maybe_add_filter_qs_params(PostProps, BaseQS), get, nil, Headers0}; + DocIds when is_list(DocIds) -> + Headers1 = [{"Content-Type", "application/json"} | Headers0], + QS1 = [{"filter", ?l2b(?DOC_IDS_FILTER_NAME)} | BaseQS], + {QS1, post, {[{<<"doc_ids">>, DocIds}]}, Headers1} + end, + Pid = couch_rep_httpc:spawn_link_worker_process(Source), + Req = Source#http_db{ + method = Method, + body = Body, + resource = "_changes", + qs = QS, + conn = Pid, + options = [{stream_to, {self(), once}}] ++ + lists:keydelete(inactivity_timeout, 1, Source#http_db.options), + headers = Headers -- [{"Accept-Encoding", "gzip"}] + }, + {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Req), + Args = [Parent, Req, Since, PostProps], + State = #state{ + conn = Pid, + last_seq = Since, + reqid = ReqId, + init_args = Args, + doc_ids = get_value(<<"doc_ids">>, PostProps, nil) + }, + + receive + {ibrowse_async_headers, ReqId, "200", _} -> + ibrowse:stream_next(ReqId), + {ok, State}; + {ibrowse_async_headers, ReqId, Code, Hdrs} + when Code =:= "301"; Code =:= "302"; Code =:= "303" -> + {ReqId2, Req2} = redirect_req(Req, Code, Hdrs), + receive + {ibrowse_async_headers, ReqId2, "200", _} -> + {ok, State#state{ + conn = Req2#http_db.conn, + reqid = ReqId2, + init_args = [Parent, Req2, Since, PostProps]}}; + {ibrowse_async_headers, ReqId2, "405", _} when Method =:= post -> + {ReqId3, Req3} = req_no_builtin_doc_ids(Req2, ReqId2), + receive + {ibrowse_async_headers, ReqId3, "200", _} -> + {ok, State#state{ + conn = Req3#http_db.conn, + reqid = ReqId3, + init_args = [Parent, Req3, Since, PostProps]}} + after 30000 -> + {stop, changes_timeout} + end + after 30000 -> + {stop, changes_timeout} + end; + {ibrowse_async_headers, ReqId, "404", _} -> + stop_link_worker(Pid), + ?LOG_INFO("source doesn't have _changes, trying _all_docs_by_seq", []), + Self = self(), + BySeqPid = spawn_link(fun() -> by_seq_loop(Self, Source, Since) end), + {ok, State#state{changes_loop = BySeqPid}}; + {ibrowse_async_headers, ReqId, "405", _} when Method =:= post -> + {ReqId2, Req2} = req_no_builtin_doc_ids(Req, ReqId), + receive + {ibrowse_async_headers, ReqId2, "200", _} -> + {ok, State#state{ + conn = Req2#http_db.conn, + reqid = ReqId2, + init_args = [Parent, Req2, Since, PostProps]}}; + {ibrowse_async_headers, ReqId, Code, Hdrs} + when Code =:= "301"; Code =:= "302"; Code =:= "303" -> + {ReqId3, Req3} = redirect_req(Req2, Code, Hdrs), + receive + {ibrowse_async_headers, ReqId3, "200", _} -> + {ok, State#state{ + conn = Req3#http_db.conn, + reqid = ReqId3, + init_args = [Parent, Req3, Since, PostProps]}} + after 30000 -> + {stop, changes_timeout} + end + after 30000 -> + {stop, changes_timeout} + end; + {ibrowse_async_headers, ReqId, Code, _} -> + {stop, {changes_error_code, list_to_integer(Code)}} + after 30000 -> + {stop, changes_timeout} + end; + +init([_Parent, Source, Since, PostProps] = InitArgs) -> + process_flag(trap_exit, true), + Server = self(), + Filter = case get_value(<<"doc_ids">>, PostProps) of + undefined -> + ?b2l(get_value(<<"filter">>, PostProps, <<>>)); + DocIds when is_list(DocIds) -> + ?DOC_IDS_FILTER_NAME + end, + ChangesArgs = #changes_args{ + style = all_docs, + since = Since, + filter = Filter, + feed = case get_value(<<"continuous">>, PostProps, false) of + true -> + "continuous"; + false -> + "normal" + end, + timeout = infinity + }, + ChangesPid = spawn_link(fun() -> + ChangesFeedFun = couch_changes:handle_changes( + ChangesArgs, + {json_req, filter_json_req(Filter, Source, PostProps)}, + Source + ), + ChangesFeedFun(fun({change, Change, _}, _) -> + gen_server:call(Server, {add_change, Change}, infinity); + (_, _) -> + ok + end) + end), + {ok, #state{changes_loop=ChangesPid, init_args=InitArgs}}. + +maybe_add_filter_qs_params(PostProps, BaseQS) -> + case get_value(<<"filter">>, PostProps) of + undefined -> + BaseQS; + FilterName -> + {Params} = get_value(<<"query_params">>, PostProps, {[]}), + lists:foldr( + fun({K, V}, QSAcc) -> + Ks = couch_util:to_list(K), + case proplists:is_defined(Ks, QSAcc) of + true -> + QSAcc; + false -> + [{Ks, V} | QSAcc] + end + end, + [{"filter", FilterName} | BaseQS], + Params + ) + end. + +filter_json_req([], _Db, _PostProps) -> + {[]}; +filter_json_req(?DOC_IDS_FILTER_NAME, _Db, PostProps) -> + {[{<<"doc_ids">>, get_value(<<"doc_ids">>, PostProps)}]}; +filter_json_req(FilterName, Db, PostProps) -> + {Query} = get_value(<<"query_params">>, PostProps, {[]}), + {ok, Info} = couch_db:get_db_info(Db), + % simulate a request to db_name/_changes + {[ + {<<"info">>, {Info}}, + {<<"id">>, null}, + {<<"method">>, 'GET'}, + {<<"path">>, [couch_db:name(Db), <<"_changes">>]}, + {<<"query">>, {[{<<"filter">>, FilterName} | Query]}}, + {<<"headers">>, []}, + {<<"body">>, []}, + {<<"peer">>, <<"replicator">>}, + {<<"form">>, []}, + {<<"cookie">>, []}, + {<<"userCtx">>, couch_util:json_user_ctx(Db)} + ]}. + +handle_call({add_change, Row}, From, State) -> + handle_add_change(Row, From, State); + +handle_call(next_changes, From, State) -> + handle_next_changes(From, State); + +handle_call(stop, _From, State) -> + {stop, normal, ok, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({ibrowse_async_headers, Id, Code, Hdrs}, #state{reqid=Id}=State) -> + handle_headers(list_to_integer(Code), Hdrs, State); + +handle_info({ibrowse_async_response, Id, {error, sel_conn_closed}}, + #state{reqid=Id}=State) -> + handle_retry(State); + +handle_info({ibrowse_async_response, Id, {error, connection_closed}}, + #state{reqid=Id}=State) -> + handle_retry(State); + +handle_info({ibrowse_async_response, Id, {error,E}}, #state{reqid=Id}=State) -> + {stop, {error, E}, State}; + +handle_info({ibrowse_async_response, Id, Chunk}, #state{reqid=Id}=State) -> + Messages = [M || M <- re:split(Chunk, ",?\n", [trim]), M =/= <<>>], + handle_messages(Messages, State); + +handle_info({ibrowse_async_response_end, Id}, #state{reqid=Id} = State) -> + handle_feed_completion(State); + +handle_info({'EXIT', From, normal}, #state{changes_loop=From} = State) -> + handle_feed_completion(State); + +handle_info({'EXIT', From, normal}, #state{conn=From, complete=true} = State) -> + {noreply, State}; + +handle_info({'EXIT', From, Reason}, #state{changes_loop=From} = State) -> + ?LOG_ERROR("changes_loop died with reason ~p", [Reason]), + {stop, changes_loop_died, State}; + +handle_info({'EXIT', From, Reason}, State) -> + ?LOG_ERROR("changes loop, process ~p died with reason ~p", [From, Reason]), + {stop, {From, Reason}, State}; + +handle_info(Msg, #state{init_args = InitArgs} = State) -> + case Msg of + changes_timeout -> + [_, #http_db{url = Url} | _] = InitArgs, + ?LOG_ERROR("changes loop timeout, no data received from ~s", + [couch_util:url_strip_password(Url)]); + _ -> + ?LOG_ERROR("changes loop received unexpected message ~p", [Msg]) + end, + {stop, Msg, State}. + +terminate(_Reason, State) -> + #state{ + changes_loop = ChangesPid, + conn = Conn + } = State, + if is_pid(ChangesPid) -> exit(ChangesPid, stop); true -> ok end, + stop_link_worker(Conn). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%internal funs + +handle_add_change(Row, From, #state{reply_to=nil} = State) -> + {Rows2, Count2} = queue_changes_row(Row, State), + NewState = State#state{count = Count2, rows = Rows2}, + if Count2 =< ?BUFFER_SIZE -> + {reply, ok, NewState}; + true -> + {noreply, NewState#state{changes_from=From}} + end; +handle_add_change(Row, _From, #state{count=0} = State) -> + gen_server:reply(State#state.reply_to, [Row]), + {reply, ok, State#state{reply_to=nil}}. + +handle_next_changes(From, #state{count=0}=State) -> + if State#state.complete -> + {stop, normal, complete, State}; + true -> + {noreply, State#state{reply_to=From}} + end; +handle_next_changes(_From, State) -> + #state{ + changes_from = ChangesFrom, + rows = Rows + } = State, + NewState = State#state{count=0, changes_from=nil, rows=queue:new()}, + maybe_stream_next(NewState), + if ChangesFrom =/= nil -> gen_server:reply(ChangesFrom, ok); true -> ok end, + {reply, queue:to_list(Rows), NewState}. + +handle_headers(200, _, State) -> + maybe_stream_next(State), + {noreply, State}; +handle_headers(Code, Hdrs, #state{init_args = InitArgs} = State) + when Code =:= 301 ; Code =:= 302 ; Code =:= 303 -> + stop_link_worker(State#state.conn), + [Parent, Source, Since, PostProps] = InitArgs, + Source2 = couch_rep_httpc:redirected_request(Code, Hdrs, Source), + Pid2 = couch_rep_httpc:spawn_link_worker_process(Source2), + Source3 = Source2#http_db{conn = Pid2}, + {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Source3), + InitArgs2 = [Parent, Source3, Since, PostProps], + {noreply, State#state{conn=Pid2, reqid=ReqId, init_args=InitArgs2}}; +handle_headers(Code, Hdrs, State) -> + ?LOG_ERROR("replicator changes feed failed with code ~s and Headers ~n~p", + [Code,Hdrs]), + {stop, {error, Code}, State}. + +handle_messages([], State) -> + maybe_stream_next(State), + {noreply, State}; +handle_messages([<<"{\"results\":[">>|Rest], State) -> + handle_messages(Rest, State); +handle_messages([<<"]">>, <<"\"last_seq\":", _/binary>>], State) -> + handle_feed_completion(State); +handle_messages([<<"{\"last_seq\":", _/binary>>], State) -> + handle_feed_completion(State); +handle_messages([Chunk|Rest], #state{partial_chunk = Partial} = State) -> + NewState = try + Row = {Props} = decode_row(<<Partial/binary, Chunk/binary>>), + case State of + #state{reply_to=nil} -> + {Rows2, Count2} = queue_changes_row(Row, State), + State#state{ + last_seq = couch_util:get_value(<<"seq">>, Props), + partial_chunk = <<>>, + rows = Rows2, + count = Count2 + }; + #state{count=0, reply_to=From}-> + gen_server:reply(From, [Row]), + State#state{reply_to = nil, partial_chunk = <<>>} + end + catch + throw:{invalid_json, Bad} -> + State#state{partial_chunk = Bad} + end, + handle_messages(Rest, NewState). + +handle_feed_completion(#state{reply_to=nil} = State)-> + {noreply, State#state{complete=true}}; +handle_feed_completion(#state{count=0} = State) -> + gen_server:reply(State#state.reply_to, complete), + {stop, normal, State}. + +handle_retry(State) -> + ?LOG_DEBUG("retrying changes feed because our connection closed", []), + #state{ + count = Count, + init_args = [_, Source, _, PostProps], + last_seq = Since, + reply_to = ReplyTo, + rows = Rows + } = State, + case init([nil, Source, Since, PostProps]) of + {ok, State1} -> + MergedState = State1#state{ + count = Count, + reply_to = ReplyTo, + rows = Rows + }, + {noreply, MergedState}; + _ -> + {stop, {error, connection_closed}, State} + end. + +by_seq_loop(Server, Source, StartSeq) -> + Req = Source#http_db{ + resource = "_all_docs_by_seq", + qs = [{limit, 1000}, {startkey, StartSeq}] + }, + {Results} = couch_rep_httpc:request(Req), + Rows = couch_util:get_value(<<"rows">>, Results), + if Rows =:= [] -> exit(normal); true -> ok end, + EndSeq = lists:foldl(fun({RowInfoList}, _) -> + Id = couch_util:get_value(<<"id">>, RowInfoList), + Seq = couch_util:get_value(<<"key">>, RowInfoList), + {RowProps} = couch_util:get_value(<<"value">>, RowInfoList), + RawRevs = [ + couch_util:get_value(<<"rev">>, RowProps), + couch_util:get_value(<<"conflicts">>, RowProps, []), + couch_util:get_value(<<"deleted_conflicts">>, RowProps, []) + ], + ParsedRevs = couch_doc:parse_revs(lists:flatten(RawRevs)), + Change = {[ + {<<"seq">>, Seq}, + {<<"id">>, Id}, + {<<"changes">>, [{[{<<"rev">>,R}]} || R <- ParsedRevs]} + ]}, + gen_server:call(Server, {add_change, Change}, infinity), + Seq + end, 0, Rows), + by_seq_loop(Server, Source, EndSeq). + +decode_row(<<",", Rest/binary>>) -> + decode_row(Rest); +decode_row(Row) -> + ?JSON_DECODE(Row). + +maybe_stream_next(#state{reqid=nil}) -> + ok; +maybe_stream_next(#state{complete=false, count=N} = S) when N < ?BUFFER_SIZE -> + timer:cancel(get(timeout)), + {ok, Timeout} = timer:send_after(31000, changes_timeout), + put(timeout, Timeout), + ibrowse:stream_next(S#state.reqid); +maybe_stream_next(_) -> + timer:cancel(get(timeout)). + +stop_link_worker(Conn) when is_pid(Conn) -> + unlink(Conn), + receive {'EXIT', Conn, _} -> ok after 0 -> ok end, + catch ibrowse:stop_worker_process(Conn); +stop_link_worker(_) -> + ok. + +redirect_req(#http_db{conn = WorkerPid} = Req, Code, Headers) -> + stop_link_worker(WorkerPid), + Req2 = couch_rep_httpc:redirected_request(Code, Headers, Req), + WorkerPid2 = couch_rep_httpc:spawn_link_worker_process(Req2), + Req3 = Req2#http_db{conn = WorkerPid2}, + {ibrowse_req_id, ReqId} = couch_rep_httpc:request(Req3), + {ReqId, Req3}. + +req_no_builtin_doc_ids(#http_db{conn = WorkerPid, qs = QS} = Req, ReqId) -> + % CouchDB versions prior to 1.1.0 don't have the builtin filter _doc_ids + % and don't allow POSTing to /database/_changes + purge_req_messages(ReqId), + stop_link_worker(WorkerPid), + Req2 = Req#http_db{method = get, qs = lists:keydelete("filter", 1, QS)}, + WorkerPid2 = couch_rep_httpc:spawn_link_worker_process(Req2), + Req3 = Req2#http_db{conn = WorkerPid2}, + {ibrowse_req_id, ReqId2} = couch_rep_httpc:request(Req3), + {ReqId2, Req3}. + +purge_req_messages(ReqId) -> + ibrowse:stream_next(ReqId), + receive + {ibrowse_async_response, ReqId, {error, _}} -> + ok; + {ibrowse_async_response, ReqId, _Data} -> + purge_req_messages(ReqId); + {ibrowse_async_response_end, ReqId} -> + ok + end. + +queue_changes_row(Row, #state{doc_ids = nil} = State) -> + maybe_queue_row(Row, State); +queue_changes_row({RowProps} = Row, + #state{doc_ids = Ids, count = Count, rows = Rows} = State) -> + case lists:member(get_value(<<"id">>, RowProps), Ids) of + true -> + maybe_queue_row(Row, State); + false -> + {Rows, Count} + end. + +maybe_queue_row({Props} = Row, #state{count = Count, rows = Rows} = State) -> + case get_value(<<"id">>, Props) of + <<>> -> + [_, Db | _] = State#state.init_args, + ?LOG_ERROR("Replicator: ignoring document with empty ID in source " + "database `~s` (_changes sequence ~p)", + [dbname(Db), couch_util:get_value(<<"seq">>, Props)]), + {Rows, Count}; + _ -> + {queue:in(Row, Rows), Count + 1} + end. + +dbname(#http_db{url = Url}) -> + couch_util:url_strip_password(Url); +dbname(#db{name = Name}) -> + Name. diff --git a/apps/couch/src/couch_rep_httpc.erl b/apps/couch/src/couch_rep_httpc.erl new file mode 100644 index 00000000..e22c8f81 --- /dev/null +++ b/apps/couch/src/couch_rep_httpc.erl @@ -0,0 +1,317 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_httpc). +-include("couch_db.hrl"). +-include_lib("ibrowse/include/ibrowse.hrl"). + +-export([db_exists/1, db_exists/2]). +-export([full_url/1, request/1, redirected_request/3]). +-export([spawn_worker_process/1, spawn_link_worker_process/1]). +-export([ssl_options/1]). + +request(#http_db{} = Req) -> + do_request(Req). + +do_request(#http_db{url=Url} = Req) when is_binary(Url) -> + do_request(Req#http_db{url = ?b2l(Url)}); + +do_request(Req) -> + #http_db{ + auth = Auth, + body = B, + conn = Conn, + headers = Headers0, + method = Method, + options = Opts, + qs = QS + } = Req, + Url = full_url(Req), + Headers = case couch_util:get_value(<<"oauth">>, Auth) of + undefined -> + Headers0; + {OAuthProps} -> + [oauth_header(Url, QS, Method, OAuthProps) | Headers0] + end, + Body = case B of + {Fun, InitialState} when is_function(Fun) -> + {Fun, InitialState}; + nil -> + []; + _Else -> + iolist_to_binary(?JSON_ENCODE(B)) + end, + Resp = case Conn of + nil -> + ibrowse:send_req(Url, Headers, Method, Body, Opts, infinity); + _ -> + ibrowse:send_req_direct(Conn, Url, Headers, Method, Body, Opts, infinity) + end, + process_response(Resp, Req). + +db_exists(Req) -> + db_exists(Req, Req#http_db.url). + +db_exists(Req, true) -> + db_exists(Req, Req#http_db.url, true); + +db_exists(Req, false) -> + db_exists(Req, Req#http_db.url, false); + +db_exists(Req, CanonicalUrl) -> + db_exists(Req, CanonicalUrl, false). + +db_exists(Req, CanonicalUrl, CreateDB) -> + #http_db{ + auth = Auth, + headers = Headers0, + options = Options, + url = Url + } = Req, + HeadersFun = fun(Method) -> + case couch_util:get_value(<<"oauth">>, Auth) of + undefined -> + Headers0; + {OAuthProps} -> + [oauth_header(Url, [], Method, OAuthProps) | Headers0] + end + end, + case CreateDB of + true -> + Headers = [{"Content-Length", 0} | HeadersFun(put)], + catch ibrowse:send_req(Url, Headers, put, [], Options); + _Else -> ok + end, + case catch ibrowse:send_req(Url, HeadersFun(head), head, [], Options) of + {ok, "200", _, _} -> + config_http(CanonicalUrl), + Req#http_db{url = CanonicalUrl}; + {ok, "301", RespHeaders, _} -> + RedirectUrl = redirect_url(RespHeaders, Req#http_db.url), + db_exists(Req#http_db{url = RedirectUrl}, RedirectUrl); + {ok, "302", RespHeaders, _} -> + RedirectUrl = redirect_url(RespHeaders, Req#http_db.url), + db_exists(Req#http_db{url = RedirectUrl}, CanonicalUrl); + {ok, "303", RespHeaders, _} -> + RedirectUrl = redirect_url(RespHeaders, Req#http_db.url), + db_exists(Req#http_db{method = get, url = RedirectUrl}, CanonicalUrl); + {ok, "401", _, _} -> + throw({unauthorized, ?l2b(Url)}); + Error -> + ?LOG_DEBUG("DB at ~s could not be found because ~p", [Url, Error]), + throw({db_not_found, ?l2b(Url)}) + end. + +config_http(Url) -> + #url{host = Host, port = Port} = ibrowse_lib:parse_url(Url), + ok = ibrowse:set_max_sessions(Host, Port, list_to_integer( + couch_config:get("replicator", "max_http_sessions", "20"))), + ok = ibrowse:set_max_pipeline_size(Host, Port, list_to_integer( + couch_config:get("replicator", "max_http_pipeline_size", "50"))), + ok = couch_config:register( + fun("replicator", "max_http_sessions", MaxSessions) -> + ibrowse:set_max_sessions(Host, Port, list_to_integer(MaxSessions)); + ("replicator", "max_http_pipeline_size", PipeSize) -> + ibrowse:set_max_pipeline_size(Host, Port, list_to_integer(PipeSize)) + end). + +redirect_url(RespHeaders, OrigUrl) -> + MochiHeaders = mochiweb_headers:make(RespHeaders), + RedUrl = mochiweb_headers:get_value("Location", MochiHeaders), + #url{ + host = Host, host_type = HostType, port = Port, + path = Path, protocol = Proto + } = ibrowse_lib:parse_url(RedUrl), + #url{username = User, password = Passwd} = ibrowse_lib:parse_url(OrigUrl), + Creds = case is_list(User) andalso is_list(Passwd) of + true -> + User ++ ":" ++ Passwd ++ "@"; + false -> + [] + end, + HostPart = case HostType of + ipv6_address -> + "[" ++ Host ++ "]"; + _ -> + Host + end, + atom_to_list(Proto) ++ "://" ++ Creds ++ HostPart ++ ":" ++ + integer_to_list(Port) ++ Path. + +full_url(#http_db{url=Url} = Req) when is_binary(Url) -> + full_url(Req#http_db{url = ?b2l(Url)}); + +full_url(#http_db{qs=[]} = Req) -> + Req#http_db.url ++ Req#http_db.resource; + +full_url(Req) -> + #http_db{ + url = Url, + resource = Resource, + qs = QS + } = Req, + QStr = lists:map(fun({K,V}) -> io_lib:format("~s=~s", + [couch_util:to_list(K), couch_util:to_list(V)]) end, QS), + lists:flatten([Url, Resource, "?", string:join(QStr, "&")]). + +process_response({ok, Status, Headers, Body}, Req) -> + Code = list_to_integer(Status), + if Code =:= 200; Code =:= 201 -> + ?JSON_DECODE(maybe_decompress(Headers, Body)); + Code =:= 301; Code =:= 302 ; Code =:= 303 -> + do_request(redirected_request(Code, Headers, Req)); + Code =:= 409 -> + throw(conflict); + Code >= 400, Code < 500 -> + ?JSON_DECODE(maybe_decompress(Headers, Body)); + Code =:= 500; Code =:= 502; Code =:= 503 -> + #http_db{pause = Pause, retries = Retries} = Req, + ?LOG_INFO("retrying couch_rep_httpc request in ~p seconds " ++ + % "due to remote server error: ~s~s", [Pause/1000, Req#http_db.url, + "due to remote server error: ~p Body ~s", [Pause/1000, Code, + Body]), + timer:sleep(Pause), + do_request(Req#http_db{retries = Retries-1, pause = 2*Pause}); + true -> + exit({http_request_failed, ?l2b(["unhandled response code ", Status])}) + end; + +process_response({ibrowse_req_id, Id}, _Req) -> + {ibrowse_req_id, Id}; + +process_response({error, _Reason}, #http_db{url=Url, retries=0}) -> + ?LOG_ERROR("couch_rep_httpc request failed after 10 retries: ~s", [Url]), + exit({http_request_failed, ?l2b(["failed to replicate ", Url])}); +process_response({error, Reason}, Req) -> + #http_db{ + method = Method, + retries = Retries, + pause = Pause + } = Req, + ShortReason = case Reason of + sel_conn_closed -> + connection_closed; + {'EXIT', {noproc, _}} -> + noproc; + {'EXIT', {normal, _}} -> + normal; + Else -> + Else + end, + ?LOG_ERROR("~p retry ~p ~s in ~p seconds due to {error, ~p}", + [?MODULE, Method, full_url(Req), Pause/1000, ShortReason]), + timer:sleep(Pause), + if Reason == worker_is_dead -> + C = spawn_link_worker_process(Req), + do_request(Req#http_db{retries = Retries-1, pause = 2*Pause, conn=C}); + true -> + do_request(Req#http_db{retries = Retries-1, pause = 2*Pause}) + end. + +redirected_request(Code, Headers, Req) -> + RedirectUrl = redirect_url(Headers, Req#http_db.url), + {Base, QStr, _} = mochiweb_util:urlsplit_path(RedirectUrl), + QS = mochiweb_util:parse_qs(QStr), + ReqHeaders = case couch_util:get_value(<<"oauth">>, Req#http_db.auth) of + undefined -> + Req#http_db.headers; + _Else -> + lists:keydelete("Authorization", 1, Req#http_db.headers) + end, + Req#http_db{ + method = case couch_util:to_integer(Code) of + 303 -> get; + _ -> Req#http_db.method + end, + url = Base, + resource = "", + qs = QS, + headers = ReqHeaders + }. + +spawn_worker_process(Req) -> + Url = ibrowse_lib:parse_url(Req#http_db.url), + {ok, Pid} = ibrowse_http_client:start(Url), + Pid. + +spawn_link_worker_process(Req) -> + {ok, Pid} = ibrowse:spawn_link_worker_process(Req#http_db.url), + Pid. + +maybe_decompress(Headers, Body) -> + MochiHeaders = mochiweb_headers:make(Headers), + case mochiweb_headers:get_value("Content-Encoding", MochiHeaders) of + "gzip" -> + zlib:gunzip(Body); + _ -> + Body + end. + +oauth_header(Url, QS, Action, Props) -> + % erlang-oauth doesn't like iolists + QSL = [{couch_util:to_list(K), ?b2l(?l2b(couch_util:to_list(V)))} || + {K,V} <- QS], + ConsumerKey = ?b2l(couch_util:get_value(<<"consumer_key">>, Props)), + Token = ?b2l(couch_util:get_value(<<"token">>, Props)), + TokenSecret = ?b2l(couch_util:get_value(<<"token_secret">>, Props)), + ConsumerSecret = ?b2l(couch_util:get_value(<<"consumer_secret">>, Props)), + SignatureMethodStr = ?b2l(couch_util:get_value(<<"signature_method">>, Props, <<"HMAC-SHA1">>)), + SignatureMethodAtom = case SignatureMethodStr of + "PLAINTEXT" -> + plaintext; + "HMAC-SHA1" -> + hmac_sha1; + "RSA-SHA1" -> + rsa_sha1 + end, + Consumer = {ConsumerKey, ConsumerSecret, SignatureMethodAtom}, + Method = case Action of + get -> "GET"; + post -> "POST"; + put -> "PUT"; + head -> "HEAD" + end, + Params = oauth:signed_params(Method, Url, QSL, Consumer, Token, TokenSecret) + -- QSL, + {"Authorization", "OAuth " ++ oauth_uri:params_to_header_string(Params)}. + +ssl_options(#http_db{url = Url}) -> + case ibrowse_lib:parse_url(Url) of + #url{protocol = https} -> + Depth = list_to_integer( + couch_config:get("replicator", "ssl_certificate_max_depth", "3") + ), + SslOpts = [{depth, Depth} | + case couch_config:get("replicator", "verify_ssl_certificates") of + "true" -> + ssl_verify_options(true); + _ -> + ssl_verify_options(false) + end], + [{is_ssl, true}, {ssl_options, SslOpts}]; + #url{protocol = http} -> + [] + end. + +ssl_verify_options(Value) -> + ssl_verify_options(Value, erlang:system_info(otp_release)). + +ssl_verify_options(true, OTPVersion) when OTPVersion >= "R14" -> + CAFile = couch_config:get("replicator", "ssl_trusted_certificates_file"), + [{verify, verify_peer}, {cacertfile, CAFile}]; +ssl_verify_options(false, OTPVersion) when OTPVersion >= "R14" -> + [{verify, verify_none}]; +ssl_verify_options(true, _OTPVersion) -> + CAFile = couch_config:get("replicator", "ssl_trusted_certificates_file"), + [{verify, 2}, {cacertfile, CAFile}]; +ssl_verify_options(false, _OTPVersion) -> + [{verify, 0}]. diff --git a/apps/couch/src/couch_rep_missing_revs.erl b/apps/couch/src/couch_rep_missing_revs.erl new file mode 100644 index 00000000..9809ca5e --- /dev/null +++ b/apps/couch/src/couch_rep_missing_revs.erl @@ -0,0 +1,198 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_missing_revs). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/4, next/1, stop/1]). + +-define(BUFFER_SIZE, 1000). + +-include("couch_db.hrl"). + +-record (state, { + changes_loop, + changes_from = nil, + parent, + complete = false, + count = 0, + reply_to = nil, + rows = queue:new(), + high_source_seq = 0, + high_missing_seq = 0, + high_committed_seq = 0 +}). + +start_link(Parent, Target, ChangesFeed, PostProps) -> + gen_server:start_link(?MODULE, [Parent, Target, ChangesFeed, PostProps], []). + +next(Server) -> + gen_server:call(Server, next_missing_revs, infinity). + +stop(Server) -> + gen_server:call(Server, stop). + +init([Parent, _Target, ChangesFeed, _PostProps]) -> + process_flag(trap_exit, true), + Self = self(), + Pid = spawn_link(fun() -> changes_loop(Self, ChangesFeed, Parent) end), + {ok, #state{changes_loop=Pid, parent=Parent}}. + +handle_call({add_missing_revs, {HighSeq, Revs}}, From, State) -> + State#state.parent ! {update_stats, missing_revs, length(Revs)}, + handle_add_missing_revs(HighSeq, Revs, From, State); + +handle_call(next_missing_revs, From, State) -> + handle_next_missing_revs(From, State). + +handle_cast({update_committed_seq, N}, State) -> + if State#state.high_committed_seq < N -> + ?LOG_DEBUG("missing_revs updating committed seq to ~p", [N]); + true -> ok end, + {noreply, State#state{high_committed_seq=N}}. + +handle_info({'EXIT', Pid, Reason}, #state{changes_loop=Pid} = State) -> + handle_changes_loop_exit(Reason, State); + +handle_info(Msg, State) -> + ?LOG_INFO("unexpected message ~p", [Msg]), + {noreply, State}. + +terminate(_Reason, #state{changes_loop=Pid}) when is_pid(Pid) -> + exit(Pid, shutdown), + ok; +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%internal funs + +handle_add_missing_revs(HighSeq, [], _From, State) -> + NewState = State#state{high_source_seq=HighSeq}, + maybe_checkpoint(NewState), + {reply, ok, NewState}; +handle_add_missing_revs(HighSeq, Revs, From, #state{reply_to=nil} = State) -> + #state{rows=Rows, count=Count} = State, + NewState = State#state{ + rows = queue:join(Rows, queue:from_list(Revs)), + count = Count + length(Revs), + high_source_seq = HighSeq, + high_missing_seq = HighSeq + }, + if NewState#state.count < ?BUFFER_SIZE -> + {reply, ok, NewState}; + true -> + {noreply, NewState#state{changes_from=From}} + end; +handle_add_missing_revs(HighSeq, Revs, _From, #state{count=0} = State) -> + gen_server:reply(State#state.reply_to, {HighSeq, Revs}), + NewState = State#state{ + high_source_seq = HighSeq, + high_missing_seq = HighSeq, + reply_to = nil + }, + {reply, ok, NewState}. + +handle_next_missing_revs(From, #state{count=0} = State) -> + if State#state.complete -> + {stop, normal, complete, State}; + true -> + {noreply, State#state{reply_to=From}} + end; +handle_next_missing_revs(_From, State) -> + #state{ + changes_from = ChangesFrom, + high_missing_seq = HighSeq, + rows = Rows + } = State, + if ChangesFrom =/= nil -> gen_server:reply(ChangesFrom, ok); true -> ok end, + NewState = State#state{count=0, changes_from=nil, rows=queue:new()}, + {reply, {HighSeq, queue:to_list(Rows)}, NewState}. + +handle_changes_loop_exit(normal, State) -> + if State#state.reply_to =/= nil -> + gen_server:reply(State#state.reply_to, complete), + {stop, normal, State}; + true -> + {noreply, State#state{complete=true, changes_loop=nil}} + end; +handle_changes_loop_exit(Reason, State) -> + {stop, Reason, State#state{changes_loop=nil}}. + +changes_loop(OurServer, SourceChangesServer, Parent) -> + case couch_rep_changes_feed:next(SourceChangesServer) of + complete -> + exit(normal); + Changes -> + {ok, Target} = gen_server:call(Parent, get_target_db, infinity), + MissingRevs = get_missing_revs(Target, Changes), + gen_server:call(OurServer, {add_missing_revs, MissingRevs}, infinity) + end, + changes_loop(OurServer, SourceChangesServer, Parent). + +get_missing_revs(#http_db{}=Target, Changes) -> + Transform = fun({Props}) -> + C = couch_util:get_value(<<"changes">>, Props), + Id = couch_util:get_value(<<"id">>, Props), + {Id, [R || {[{<<"rev">>, R}]} <- C]} + end, + IdRevsList = [Transform(Change) || Change <- Changes], + SeqDict = changes_dictionary(Changes), + {LastProps} = lists:last(Changes), + HighSeq = couch_util:get_value(<<"seq">>, LastProps), + Request = Target#http_db{ + resource = "_missing_revs", + method = post, + body = {IdRevsList} + }, + {Resp} = couch_rep_httpc:request(Request), + case couch_util:get_value(<<"missing_revs">>, Resp) of + {MissingRevs} -> + X = [{Id, dict:fetch(Id, SeqDict), couch_doc:parse_revs(RevStrs)} || + {Id,RevStrs} <- MissingRevs], + {HighSeq, X}; + _ -> + exit({target_error, couch_util:get_value(<<"error">>, Resp)}) + end; + +get_missing_revs(Target, Changes) -> + Transform = fun({Props}) -> + C = couch_util:get_value(<<"changes">>, Props), + Id = couch_util:get_value(<<"id">>, Props), + {Id, [couch_doc:parse_rev(R) || {[{<<"rev">>, R}]} <- C]} + end, + IdRevsList = [Transform(Change) || Change <- Changes], + SeqDict = changes_dictionary(Changes), + {LastProps} = lists:last(Changes), + HighSeq = couch_util:get_value(<<"seq">>, LastProps), + {ok, Results} = couch_db:get_missing_revs(Target, IdRevsList), + {HighSeq, [{Id, dict:fetch(Id, SeqDict), Revs} || {Id, Revs, _} <- Results]}. + +changes_dictionary(ChangeList) -> + KVs = [{couch_util:get_value(<<"id">>,C), couch_util:get_value(<<"seq">>,C)} + || {C} <- ChangeList], + dict:from_list(KVs). + +%% save a checkpoint if no revs are missing on target so we don't +%% rescan metadata unnecessarily +maybe_checkpoint(#state{high_missing_seq=N, high_committed_seq=N} = State) -> + #state{ + parent = Parent, + high_source_seq = SourceSeq + } = State, + Parent ! {missing_revs_checkpoint, SourceSeq}; +maybe_checkpoint(_State) -> + ok. diff --git a/apps/couch/src/couch_rep_reader.erl b/apps/couch/src/couch_rep_reader.erl new file mode 100644 index 00000000..1e8ca074 --- /dev/null +++ b/apps/couch/src/couch_rep_reader.erl @@ -0,0 +1,277 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_reader). +-behaviour(gen_server). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([start_link/4, next/1]). + +-import(couch_util, [encode_doc_id/1]). + +-define (BUFFER_SIZE, 1000). +-define (MAX_CONCURRENT_REQUESTS, 100). + +-include("couch_db.hrl"). + +-record (state, { + parent, + source, + missing_revs, + reader_loop, + reader_from = [], + count = 0, + docs = queue:new(), + reply_to = nil, + complete = false, + monitor_count = 0, + pending_doc_request = nil, + requested_seqs = [], + opened_seqs = [] +}). + +start_link(Parent, Source, MissingRevs, PostProps) -> + gen_server:start_link(?MODULE, [Parent, Source, MissingRevs, PostProps], []). + +next(Pid) -> + gen_server:call(Pid, next_docs, infinity). + +init([Parent, Source, MissingRevs, _PostProps]) -> + process_flag(trap_exit, true), + Self = self(), + ReaderLoop = spawn_link( + fun() -> reader_loop(Self, Parent, Source, MissingRevs) end), + State = #state{ + parent = Parent, + source = Source, + missing_revs = MissingRevs, + reader_loop = ReaderLoop + }, + {ok, State}. + +handle_call({add_docs, Seq, Docs}, From, State) -> + State#state.parent ! {update_stats, docs_read, length(Docs)}, + handle_add_docs(Seq, lists:flatten(Docs), From, State); + +handle_call({add_request_seqs, Seqs}, _From, State) -> + SeqList = State#state.requested_seqs, + {reply, ok, State#state{requested_seqs = lists:merge(Seqs, SeqList)}}; + +handle_call(next_docs, From, State) -> + handle_next_docs(From, State); + +handle_call({open_remote_doc, Id, Seq, Revs}, From, State) -> + handle_open_remote_doc(Id, Seq, Revs, From, State). + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({'DOWN', _, _, _, Reason}, State) -> + handle_monitor_down(Reason, State); + +handle_info({'EXIT', Loop, complete}, #state{reader_loop=Loop} = State) -> + handle_reader_loop_complete(State). + +terminate(_Reason, _State) -> + % ?LOG_INFO("rep reader terminating with reason ~p", [_Reason]), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%internal funs + +handle_add_docs(_Seq, [], _From, State) -> + {reply, ok, State}; +handle_add_docs(Seq, DocsToAdd, From, #state{reply_to=nil} = State) -> + State1 = update_sequence_lists(Seq, State), + NewState = State1#state{ + docs = queue:join(State1#state.docs, queue:from_list(DocsToAdd)), + count = State1#state.count + length(DocsToAdd) + }, + if NewState#state.count < ?BUFFER_SIZE -> + {reply, ok, NewState}; + true -> + {noreply, NewState#state{reader_from=[From|State#state.reader_from]}} + end; +handle_add_docs(Seq, DocsToAdd, _From, #state{count=0} = State) -> + NewState = update_sequence_lists(Seq, State), + HighSeq = calculate_new_high_seq(NewState), + gen_server:reply(State#state.reply_to, {HighSeq, DocsToAdd}), + {reply, ok, NewState#state{reply_to=nil}}. + +handle_next_docs(From, #state{count=0} = State) -> + if State#state.complete -> + {stop, normal, {complete, calculate_new_high_seq(State)}, State}; + true -> + {noreply, State#state{reply_to=From}} + end; +handle_next_docs(_From, State) -> + #state{ + reader_from = ReaderFrom, + docs = Docs + } = State, + [gen_server:reply(F, ok) || F <- ReaderFrom], + NewState = State#state{count=0, reader_from=[], docs=queue:new()}, + {reply, {calculate_new_high_seq(State), queue:to_list(Docs)}, NewState}. + +handle_open_remote_doc(Id, Seq, Revs, From, #state{monitor_count=N} = State) + when N > ?MAX_CONCURRENT_REQUESTS -> + {noreply, State#state{pending_doc_request={From,Id,Seq,Revs}}}; +handle_open_remote_doc(Id, Seq, Revs, _, #state{source=#http_db{}} = State) -> + #state{ + monitor_count = Count, + source = Source + } = State, + {_, _Ref} = spawn_document_request(Source, Id, Seq, Revs), + {reply, ok, State#state{monitor_count = Count+1}}. + +handle_monitor_down(normal, #state{pending_doc_request=nil, reply_to=nil, + monitor_count=1, complete=waiting_on_monitors} = State) -> + {noreply, State#state{complete=true, monitor_count=0}}; +handle_monitor_down(normal, #state{pending_doc_request=nil, reply_to=From, + monitor_count=1, complete=waiting_on_monitors} = State) -> + gen_server:reply(From, {complete, calculate_new_high_seq(State)}), + {stop, normal, State#state{complete=true, monitor_count=0}}; +handle_monitor_down(normal, #state{pending_doc_request=nil} = State) -> + #state{monitor_count = Count} = State, + {noreply, State#state{monitor_count = Count-1}}; +handle_monitor_down(normal, State) -> + #state{ + source = Source, + pending_doc_request = {From, Id, Seq, Revs} + } = State, + gen_server:reply(From, ok), + {_, _NewRef} = spawn_document_request(Source, Id, Seq, Revs), + {noreply, State#state{pending_doc_request=nil}}; +handle_monitor_down(Reason, State) -> + {stop, Reason, State}. + +handle_reader_loop_complete(#state{reply_to=nil, monitor_count=0} = State) -> + {noreply, State#state{complete = true}}; +handle_reader_loop_complete(#state{monitor_count=0} = State) -> + HighSeq = calculate_new_high_seq(State), + gen_server:reply(State#state.reply_to, {complete, HighSeq}), + {stop, normal, State}; +handle_reader_loop_complete(State) -> + {noreply, State#state{complete = waiting_on_monitors}}. + +calculate_new_high_seq(#state{requested_seqs=[], opened_seqs=[Open|_]}) -> + Open; +calculate_new_high_seq(#state{requested_seqs=[Req|_], opened_seqs=[Open|_]}) + when Req < Open -> + 0; +calculate_new_high_seq(#state{opened_seqs=[]}) -> + 0; +calculate_new_high_seq(State) -> + hd(State#state.opened_seqs). + +split_revlist(Rev, {[CurrentAcc|Rest], BaseLength, Length}) -> + case Length+size(Rev)+3 > 8192 of + false -> + {[[Rev|CurrentAcc] | Rest], BaseLength, Length+size(Rev)+3}; + true -> + {[[Rev],CurrentAcc|Rest], BaseLength, BaseLength} + end. + +% We store outstanding requested sequences and a subset of already opened +% sequences in 2 ordered lists. The subset of opened seqs is a) the largest +% opened seq smaller than the smallest outstanding request seq plus b) all the +% opened seqs greater than the smallest outstanding request. I believe its the +% minimal set of info needed to correctly calculate which seqs have been +% replicated (because remote docs can be opened out-of-order) -- APK +update_sequence_lists(Seq, State) -> + Requested = lists:delete(Seq, State#state.requested_seqs), + AllOpened = lists:merge([Seq], State#state.opened_seqs), + Opened = case Requested of + [] -> + [lists:last(AllOpened)]; + [EarliestReq|_] -> + case lists:splitwith(fun(X) -> X < EarliestReq end, AllOpened) of + {[], Greater} -> + Greater; + {Less, Greater} -> + [lists:last(Less) | Greater] + end + end, + State#state{ + requested_seqs = Requested, + opened_seqs = Opened + }. + +open_doc_revs(#http_db{url = Url} = DbS, DocId, Revs) -> + %% all this logic just splits up revision lists that are too long for + %% MochiWeb into multiple requests + BaseQS = [{revs,true}, {latest,true}, {att_encoding_info,true}], + BaseReq = DbS#http_db{resource=encode_doc_id(DocId), qs=BaseQS}, + BaseLength = length(couch_rep_httpc:full_url(BaseReq) ++ "&open_revs=[]"), + + {RevLists, _, _} = lists:foldl(fun split_revlist/2, + {[[]], BaseLength, BaseLength}, couch_doc:revs_to_strs(Revs)), + + Requests = [BaseReq#http_db{ + qs = [{open_revs, ?JSON_ENCODE(RevList)} | BaseQS] + } || RevList <- RevLists], + JsonResults = lists:flatten([couch_rep_httpc:request(R) || R <- Requests]), + + Transform = + fun({[{<<"ok">>, Json}]}, Acc) -> + #doc{id=Id, revs=Rev, atts=Atts} = Doc = couch_doc:from_json_obj(Json), + Doc1 = Doc#doc{ + atts=[couch_rep_att:convert_stub(A, {DbS,Id,Rev}) || A <- Atts] + }, + [Doc1 | Acc]; + ({ErrorProps}, Acc) -> + Err = couch_util:get_value(<<"error">>, ErrorProps, + ?JSON_ENCODE({ErrorProps})), + ?LOG_ERROR("Replicator: error accessing doc ~s at ~s, reason: ~s", + [DocId, couch_util:url_strip_password(Url), Err]), + Acc + end, + lists:reverse(lists:foldl(Transform, [], JsonResults)). + +reader_loop(ReaderServer, Parent, Source, MissingRevsServer) -> + case couch_rep_missing_revs:next(MissingRevsServer) of + complete -> + exit(complete); + {_HighSeq, IdsRevs} -> + % to be safe, make sure Results are sorted by source_seq + SortedIdsRevs = lists:keysort(2, IdsRevs), + RequestSeqs = [S || {_,S,_} <- SortedIdsRevs], + gen_server:call(ReaderServer, {add_request_seqs, RequestSeqs}, infinity), + case Source of + #http_db{} -> + [gen_server:call(ReaderServer, {open_remote_doc, Id, Seq, Revs}, + infinity) || {Id,Seq,Revs} <- SortedIdsRevs], + reader_loop(ReaderServer, Parent, Source, MissingRevsServer); + _Local -> + {ok, Source2} = couch_db:open( + Source#db.name, [{user_ctx, Source#db.user_ctx}]), + lists:foreach(fun({Id,Seq,Revs}) -> + {ok, Docs} = couch_db:open_doc_revs(Source2, Id, Revs, [latest]), + JustTheDocs = [Doc || {ok, Doc} <- Docs], + gen_server:call(ReaderServer, {add_docs, Seq, JustTheDocs}, + infinity) + end, SortedIdsRevs), + couch_db:close(Source2), + reader_loop(ReaderServer, Parent, Source2, MissingRevsServer) + end + end. + +spawn_document_request(Source, Id, Seq, Revs) -> + Server = self(), + SpawnFun = fun() -> + Results = open_doc_revs(Source, Id, Revs), + gen_server:call(Server, {add_docs, Seq, Results}, infinity) + end, + spawn_monitor(SpawnFun). diff --git a/apps/couch/src/couch_rep_sup.erl b/apps/couch/src/couch_rep_sup.erl new file mode 100644 index 00000000..1318c598 --- /dev/null +++ b/apps/couch/src/couch_rep_sup.erl @@ -0,0 +1,31 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_sup). +-behaviour(supervisor). +-export([init/1, start_link/0]). + +-include("couch_db.hrl"). + +start_link() -> + supervisor:start_link({local,?MODULE}, ?MODULE, []). + +%%============================================================================= +%% supervisor callbacks +%%============================================================================= + +init([]) -> + {ok, {{one_for_one, 3, 10}, []}}. + +%%============================================================================= +%% internal functions +%%============================================================================= diff --git a/apps/couch/src/couch_rep_writer.erl b/apps/couch/src/couch_rep_writer.erl new file mode 100644 index 00000000..40323925 --- /dev/null +++ b/apps/couch/src/couch_rep_writer.erl @@ -0,0 +1,179 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rep_writer). + +-export([start_link/4]). + +-include("couch_db.hrl"). + +start_link(Parent, _Target, Reader, _PostProps) -> + {ok, spawn_link(fun() -> writer_loop(Parent, Reader) end)}. + +writer_loop(Parent, Reader) -> + case couch_rep_reader:next(Reader) of + {complete, FinalSeq} -> + Parent ! {writer_checkpoint, FinalSeq}, + ok; + {HighSeq, Docs} -> + DocCount = length(Docs), + {ok, Target0} = gen_server:call(Parent, get_target_db, infinity), + Target = open_db(Target0), + try write_docs(Target, Docs) of + {ok, []} -> + Parent ! {update_stats, docs_written, DocCount}; + {ok, Errors} -> + ErrorCount = length(Errors), + Parent ! {update_stats, doc_write_failures, ErrorCount}, + Parent ! {update_stats, docs_written, DocCount - ErrorCount} + catch + {attachment_request_failed, Err} -> + ?LOG_DEBUG("writer failed to write an attachment ~p", [Err]), + exit({attachment_request_failed, Err, Docs}) + after + close_db(Target) + end, + Parent ! {writer_checkpoint, HighSeq}, + couch_rep_att:cleanup(), + couch_util:should_flush(), + writer_loop(Parent, Reader) + end. + +write_docs(#http_db{} = Db, Docs) -> + {DocsAtts, DocsNoAtts} = lists:partition( + fun(#doc{atts=[]}) -> false; (_) -> true end, + Docs + ), + ErrorsJson0 = write_bulk_docs(Db, DocsNoAtts), + ErrorsJson = lists:foldl( + fun(Doc, Acc) -> write_multi_part_doc(Db, Doc) ++ Acc end, + ErrorsJson0, + DocsAtts + ), + {ok, ErrorsJson}; +write_docs(Db, Docs) -> + couch_db:update_docs(Db, Docs, [delay_commit], replicated_changes). + +write_bulk_docs(_Db, []) -> + []; +write_bulk_docs(#http_db{headers = Headers} = Db, Docs) -> + JsonDocs = [ + couch_doc:to_json_obj(Doc, [revs]) || Doc <- Docs + ], + Request = Db#http_db{ + resource = "_bulk_docs", + method = post, + body = {[{new_edits, false}, {docs, JsonDocs}]}, + headers = couch_util:proplist_apply_field({"Content-Type", "application/json"}, [{"X-Couch-Full-Commit", "false"} | Headers]) + }, + ErrorsJson = case couch_rep_httpc:request(Request) of + {FailProps} -> + exit({target_error, couch_util:get_value(<<"error">>, FailProps)}); + List when is_list(List) -> + List + end, + [write_docs_1(V) || V <- ErrorsJson]. + +write_multi_part_doc(#http_db{headers=Headers} = Db, #doc{atts=Atts} = Doc) -> + JsonBytes = ?JSON_ENCODE( + couch_doc:to_json_obj( + Doc, + [follows, att_encoding_info, attachments, revs] + ) + ), + Boundary = couch_uuids:random(), + {ContentType, Len} = couch_doc:len_doc_to_multi_part_stream( + Boundary, JsonBytes, Atts, true + ), + StreamerPid = spawn_link( + fun() -> streamer_fun(Boundary, JsonBytes, Atts) end + ), + BodyFun = fun(Acc) -> + DataQueue = case Acc of + nil -> + StreamerPid ! {start, self()}, + receive + {queue, Q} -> + Q + end; + Queue -> + Queue + end, + case couch_work_queue:dequeue(DataQueue) of + closed -> + eof; + {ok, Data} -> + {ok, iolist_to_binary(Data), DataQueue} + end + end, + Request = Db#http_db{ + resource = couch_util:encode_doc_id(Doc), + method = put, + qs = [{new_edits, false}], + body = {BodyFun, nil}, + headers = [ + {"x-couch-full-commit", "false"}, + {"Content-Type", ?b2l(ContentType)}, + {"Content-Length", Len} | Headers + ] + }, + Result = case couch_rep_httpc:request(Request) of + {[{<<"error">>, Error}, {<<"reason">>, Reason}]} -> + {Pos, [RevId | _]} = Doc#doc.revs, + ErrId = couch_util:to_existing_atom(Error), + [{Doc#doc.id, couch_doc:rev_to_str({Pos, RevId})}, {ErrId, Reason}]; + _ -> + [] + end, + StreamerPid ! stop, + Result. + +streamer_fun(Boundary, JsonBytes, Atts) -> + receive + stop -> + ok; + {start, From} -> + % better use a brand new queue, to ensure there's no garbage from + % a previous (failed) iteration + {ok, DataQueue} = couch_work_queue:new( + [{max_size, 1024 * 1024}, {max_items, 1000}]), + From ! {queue, DataQueue}, + couch_doc:doc_to_multi_part_stream( + Boundary, + JsonBytes, + Atts, + fun(Data) -> + couch_work_queue:queue(DataQueue, Data) + end, + true + ), + couch_work_queue:close(DataQueue), + streamer_fun(Boundary, JsonBytes, Atts) + end. + +write_docs_1({Props}) -> + Id = couch_util:get_value(<<"id">>, Props), + Rev = couch_doc:parse_rev(couch_util:get_value(<<"rev">>, Props)), + ErrId = couch_util:to_existing_atom(couch_util:get_value(<<"error">>, Props)), + Reason = couch_util:get_value(<<"reason">>, Props), + {{Id, Rev}, {ErrId, Reason}}. + +open_db(#db{name = Name, user_ctx = UserCtx}) -> + {ok, Db} = couch_db:open(Name, [{user_ctx, UserCtx}]), + Db; +open_db(HttpDb) -> + HttpDb. + +close_db(#db{} = Db) -> + couch_db:close(Db); +close_db(_HttpDb) -> + ok. diff --git a/apps/couch/src/couch_replication_manager.erl b/apps/couch/src/couch_replication_manager.erl new file mode 100644 index 00000000..3715cea1 --- /dev/null +++ b/apps/couch/src/couch_replication_manager.erl @@ -0,0 +1,629 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replication_manager). +-behaviour(gen_server). + +% public API +-export([replication_started/1, replication_completed/1, replication_error/2]). + +% gen_server callbacks +-export([start_link/0, init/1, handle_call/3, handle_info/2, handle_cast/2]). +-export([code_change/3, terminate/2]). + +-include("couch_db.hrl"). +-include("couch_js_functions.hrl"). + +-define(DOC_TO_REP, couch_rep_doc_id_to_rep_id). +-define(REP_TO_STATE, couch_rep_id_to_rep_state). +-define(INITIAL_WAIT, 2.5). % seconds +-define(MAX_WAIT, 600). % seconds + +-record(state, { + changes_feed_loop = nil, + db_notifier = nil, + rep_db_name = nil, + rep_start_pids = [], + max_retries +}). + +-record(rep_state, { + doc_id, + user_ctx, + doc, + starting, + retries_left, + max_retries, + wait = ?INITIAL_WAIT +}). + +-import(couch_util, [ + get_value/2, + get_value/3, + to_binary/1 +]). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +replication_started({BaseId, _} = RepId) -> + case rep_state(RepId) of + nil -> + ok; + #rep_state{doc_id = DocId} -> + update_rep_doc(DocId, [ + {<<"_replication_state">>, <<"triggered">>}, + {<<"_replication_id">>, ?l2b(BaseId)}]), + ok = gen_server:call(?MODULE, {rep_started, RepId}, infinity), + ?LOG_INFO("Document `~s` triggered replication `~s`", + [DocId, pp_rep_id(RepId)]) + end. + + +replication_completed(RepId) -> + case rep_state(RepId) of + nil -> + ok; + #rep_state{doc_id = DocId} -> + update_rep_doc(DocId, [{<<"_replication_state">>, <<"completed">>}]), + ok = gen_server:call(?MODULE, {rep_complete, RepId}, infinity), + ?LOG_INFO("Replication `~s` finished (triggered by document `~s`)", + [pp_rep_id(RepId), DocId]) + end. + + +replication_error({BaseId, _} = RepId, Error) -> + case rep_state(RepId) of + nil -> + ok; + #rep_state{doc_id = DocId} -> + % TODO: maybe add error reason to replication document + update_rep_doc(DocId, [ + {<<"_replication_state">>, <<"error">>}, + {<<"_replication_id">>, ?l2b(BaseId)}]), + ok = gen_server:call(?MODULE, {rep_error, RepId, Error}, infinity) + end. + + +init(_) -> + process_flag(trap_exit, true), + ?DOC_TO_REP = ets:new(?DOC_TO_REP, [named_table, set, protected]), + ?REP_TO_STATE = ets:new(?REP_TO_STATE, [named_table, set, protected]), + Server = self(), + ok = couch_config:register( + fun("replicator", "db", NewName) -> + ok = gen_server:cast(Server, {rep_db_changed, ?l2b(NewName)}); + ("replicator", "max_replication_retry_count", V) -> + ok = gen_server:cast(Server, {set_max_retries, retries_value(V)}) + end + ), + {Loop, RepDbName} = changes_feed_loop(), + {ok, #state{ + changes_feed_loop = Loop, + rep_db_name = RepDbName, + db_notifier = db_update_notifier(), + max_retries = retries_value( + couch_config:get("replicator", "max_replication_retry_count", "10")) + }}. + + +handle_call({rep_db_update, {ChangeProps} = Change}, _From, State) -> + NewState = try + process_update(State, Change) + catch + _Tag:Error -> + {RepProps} = get_value(doc, ChangeProps), + DocId = get_value(<<"_id">>, RepProps), + rep_db_update_error(Error, DocId), + State + end, + {reply, ok, NewState}; + +handle_call({rep_started, RepId}, _From, State) -> + case rep_state(RepId) of + nil -> + ok; + RepState -> + NewRepState = RepState#rep_state{ + starting = false, + retries_left = State#state.max_retries, + max_retries = State#state.max_retries, + wait = ?INITIAL_WAIT + }, + true = ets:insert(?REP_TO_STATE, {RepId, NewRepState}) + end, + {reply, ok, State}; + +handle_call({rep_complete, RepId}, _From, State) -> + true = ets:delete(?REP_TO_STATE, RepId), + {reply, ok, State}; + +handle_call({rep_error, RepId, Error}, _From, State) -> + {reply, ok, replication_error(State, RepId, Error)}; + +handle_call(Msg, From, State) -> + ?LOG_ERROR("Replication manager received unexpected call ~p from ~p", + [Msg, From]), + {stop, {error, {unexpected_call, Msg}}, State}. + + +handle_cast({rep_db_changed, NewName}, #state{rep_db_name = NewName} = State) -> + {noreply, State}; + +handle_cast({rep_db_changed, _NewName}, State) -> + {noreply, restart(State)}; + +handle_cast({rep_db_created, NewName}, #state{rep_db_name = NewName} = State) -> + {noreply, State}; + +handle_cast({rep_db_created, _NewName}, State) -> + {noreply, restart(State)}; + +handle_cast({set_max_retries, MaxRetries}, State) -> + {noreply, State#state{max_retries = MaxRetries}}; + +handle_cast(Msg, State) -> + ?LOG_ERROR("Replication manager received unexpected cast ~p", [Msg]), + {stop, {error, {unexpected_cast, Msg}}, State}. + + +handle_info({'EXIT', From, normal}, #state{changes_feed_loop = From} = State) -> + % replicator DB deleted + {noreply, State#state{changes_feed_loop = nil, rep_db_name = nil}}; + +handle_info({'EXIT', From, Reason}, #state{db_notifier = From} = State) -> + ?LOG_ERROR("Database update notifier died. Reason: ~p", [Reason]), + {stop, {db_update_notifier_died, Reason}, State}; + +handle_info({'EXIT', From, normal}, #state{rep_start_pids = Pids} = State) -> + % one of the replication start processes terminated successfully + {noreply, State#state{rep_start_pids = Pids -- [From]}}; + +handle_info({'DOWN', _Ref, _, _, _}, State) -> + % From a db monitor created by a replication process. Ignore. + {noreply, State}; + +handle_info(Msg, State) -> + ?LOG_ERROR("Replication manager received unexpected message ~p", [Msg]), + {stop, {unexpected_msg, Msg}, State}. + + +terminate(_Reason, State) -> + #state{ + rep_start_pids = StartPids, + changes_feed_loop = Loop, + db_notifier = Notifier + } = State, + stop_all_replications(), + lists:foreach( + fun(Pid) -> + catch unlink(Pid), + catch exit(Pid, stop) + end, + [Loop | StartPids]), + true = ets:delete(?REP_TO_STATE), + true = ets:delete(?DOC_TO_REP), + couch_db_update_notifier:stop(Notifier). + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +changes_feed_loop() -> + {ok, RepDb} = ensure_rep_db_exists(), + Server = self(), + Pid = spawn_link( + fun() -> + ChangesFeedFun = couch_changes:handle_changes( + #changes_args{ + include_docs = true, + feed = "continuous", + timeout = infinity, + db_open_options = [sys_db] + }, + {json_req, null}, + RepDb + ), + ChangesFeedFun( + fun({change, Change, _}, _) -> + case has_valid_rep_id(Change) of + true -> + ok = gen_server:call( + Server, {rep_db_update, Change}, infinity); + false -> + ok + end; + (_, _) -> + ok + end + ) + end + ), + couch_db:close(RepDb), + {Pid, couch_db:name(RepDb)}. + + +has_valid_rep_id({Change}) -> + has_valid_rep_id(get_value(<<"id">>, Change)); +has_valid_rep_id(<<?DESIGN_DOC_PREFIX, _Rest/binary>>) -> + false; +has_valid_rep_id(_Else) -> + true. + + +db_update_notifier() -> + Server = self(), + {ok, Notifier} = couch_db_update_notifier:start_link( + fun({created, DbName}) -> + case ?l2b(couch_config:get("replicator", "db", "_replicator")) of + DbName -> + ok = gen_server:cast(Server, {rep_db_created, DbName}); + _ -> + ok + end; + (_) -> + % no need to handle the 'deleted' event - the changes feed loop + % dies when the database is deleted + ok + end + ), + Notifier. + + +restart(#state{changes_feed_loop = Loop, rep_start_pids = StartPids} = State) -> + stop_all_replications(), + lists:foreach( + fun(Pid) -> + catch unlink(Pid), + catch exit(Pid, rep_db_changed) + end, + [Loop | StartPids]), + {NewLoop, NewRepDbName} = changes_feed_loop(), + State#state{ + changes_feed_loop = NewLoop, + rep_db_name = NewRepDbName, + rep_start_pids = [] + }. + + +process_update(State, {Change}) -> + {RepProps} = JsonRepDoc = get_value(doc, Change), + DocId = get_value(<<"_id">>, RepProps), + case get_value(<<"deleted">>, Change, false) of + true -> + rep_doc_deleted(DocId), + State; + false -> + case get_value(<<"_replication_state">>, RepProps) of + undefined -> + maybe_start_replication(State, DocId, JsonRepDoc); + <<"triggered">> -> + maybe_start_replication(State, DocId, JsonRepDoc); + <<"completed">> -> + replication_complete(DocId), + State; + <<"error">> -> + case ets:lookup(?DOC_TO_REP, DocId) of + [] -> + maybe_start_replication(State, DocId, JsonRepDoc); + _ -> + State + end + end + end. + + +rep_db_update_error(Error, DocId) -> + case Error of + {bad_rep_doc, Reason} -> + ok; + _ -> + Reason = to_binary(Error) + end, + ?LOG_ERROR("Replication manager, error processing document `~s`: ~s", + [DocId, Reason]), + update_rep_doc(DocId, [{<<"_replication_state">>, <<"error">>}]). + + +rep_user_ctx({RepDoc}) -> + case get_value(<<"user_ctx">>, RepDoc) of + undefined -> + #user_ctx{}; + {UserCtx} -> + #user_ctx{ + name = get_value(<<"name">>, UserCtx, null), + roles = get_value(<<"roles">>, UserCtx, []) + } + end. + + +maybe_start_replication(State, DocId, RepDoc) -> + UserCtx = rep_user_ctx(RepDoc), + {BaseId, _} = RepId = make_rep_id(RepDoc, UserCtx), + case rep_state(RepId) of + nil -> + RepState = #rep_state{ + doc_id = DocId, + user_ctx = UserCtx, + doc = RepDoc, + starting = true, + retries_left = State#state.max_retries, + max_retries = State#state.max_retries + }, + true = ets:insert(?REP_TO_STATE, {RepId, RepState}), + true = ets:insert(?DOC_TO_REP, {DocId, RepId}), + ?LOG_INFO("Attempting to start replication `~s` (document `~s`).", + [pp_rep_id(RepId), DocId]), + Server = self(), + Pid = spawn_link(fun() -> + start_replication(Server, RepDoc, RepId, UserCtx, 0) + end), + State#state{rep_start_pids = [Pid | State#state.rep_start_pids]}; + #rep_state{doc_id = DocId} -> + State; + #rep_state{starting = false, doc_id = OtherDocId} -> + ?LOG_INFO("The replication specified by the document `~s` was already" + " triggered by the document `~s`", [DocId, OtherDocId]), + maybe_tag_rep_doc(DocId, RepDoc, ?l2b(BaseId)), + State; + #rep_state{starting = true, doc_id = OtherDocId} -> + ?LOG_INFO("The replication specified by the document `~s` is already" + " being triggered by the document `~s`", [DocId, OtherDocId]), + maybe_tag_rep_doc(DocId, RepDoc, ?l2b(BaseId)), + State + end. + + +make_rep_id(RepDoc, UserCtx) -> + try + couch_rep:make_replication_id(RepDoc, UserCtx) + catch + throw:{error, Reason} -> + throw({bad_rep_doc, Reason}); + Tag:Err -> + throw({bad_rep_doc, to_binary({Tag, Err})}) + end. + + +maybe_tag_rep_doc(DocId, {RepProps}, RepId) -> + case get_value(<<"_replication_id">>, RepProps) of + RepId -> + ok; + _ -> + update_rep_doc(DocId, [{<<"_replication_id">>, RepId}]) + end. + + +start_replication(Server, RepDoc, RepId, UserCtx, Wait) -> + ok = timer:sleep(Wait * 1000), + case (catch couch_rep:start_replication(RepDoc, RepId, UserCtx, ?MODULE)) of + Pid when is_pid(Pid) -> + ok = gen_server:call(Server, {rep_started, RepId}, infinity), + couch_rep:get_result(Pid, RepId, RepDoc, UserCtx); + Error -> + replication_error(RepId, Error) + end. + + +replication_complete(DocId) -> + case ets:lookup(?DOC_TO_REP, DocId) of + [{DocId, RepId}] -> + case rep_state(RepId) of + nil -> + couch_rep:end_replication(RepId); + #rep_state{} -> + ok + end, + true = ets:delete(?DOC_TO_REP, DocId); + _ -> + ok + end. + + +rep_doc_deleted(DocId) -> + case ets:lookup(?DOC_TO_REP, DocId) of + [{DocId, RepId}] -> + couch_rep:end_replication(RepId), + true = ets:delete(?REP_TO_STATE, RepId), + true = ets:delete(?DOC_TO_REP, DocId), + ?LOG_INFO("Stopped replication `~s` because replication document `~s`" + " was deleted", [pp_rep_id(RepId), DocId]); + [] -> + ok + end. + + +replication_error(State, RepId, Error) -> + case rep_state(RepId) of + nil -> + State; + RepState -> + maybe_retry_replication(RepId, RepState, Error, State) + end. + +maybe_retry_replication(RepId, #rep_state{retries_left = 0} = RepState, Error, State) -> + #rep_state{ + doc_id = DocId, + max_retries = MaxRetries + } = RepState, + couch_rep:end_replication(RepId), + true = ets:delete(?REP_TO_STATE, RepId), + true = ets:delete(?DOC_TO_REP, DocId), + ?LOG_ERROR("Error in replication `~s` (triggered by document `~s`): ~s" + "~nReached maximum retry attempts (~p).", + [pp_rep_id(RepId), DocId, to_binary(error_reason(Error)), MaxRetries]), + State; + +maybe_retry_replication(RepId, RepState, Error, State) -> + #rep_state{ + doc_id = DocId, + user_ctx = UserCtx, + doc = RepDoc + } = RepState, + #rep_state{wait = Wait} = NewRepState = state_after_error(RepState), + true = ets:insert(?REP_TO_STATE, {RepId, NewRepState}), + ?LOG_ERROR("Error in replication `~s` (triggered by document `~s`): ~s" + "~nRestarting replication in ~p seconds.", + [pp_rep_id(RepId), DocId, to_binary(error_reason(Error)), Wait]), + Server = self(), + Pid = spawn_link(fun() -> + start_replication(Server, RepDoc, RepId, UserCtx, Wait) + end), + State#state{rep_start_pids = [Pid | State#state.rep_start_pids]}. + + +stop_all_replications() -> + ?LOG_INFO("Stopping all ongoing replications because the replicator" + " database was deleted or changed", []), + ets:foldl( + fun({_, RepId}, _) -> + couch_rep:end_replication(RepId) + end, + ok, ?DOC_TO_REP), + true = ets:delete_all_objects(?REP_TO_STATE), + true = ets:delete_all_objects(?DOC_TO_REP). + + +update_rep_doc(RepDocId, KVs) -> + {ok, RepDb} = ensure_rep_db_exists(), + try + case couch_db:open_doc(RepDb, RepDocId, []) of + {ok, LatestRepDoc} -> + update_rep_doc(RepDb, LatestRepDoc, KVs); + _ -> + ok + end + catch throw:conflict -> + % Shouldn't happen, as by default only the role _replicator can + % update replication documents. + ?LOG_ERROR("Conflict error when updating replication document `~s`." + " Retrying.", [RepDocId]), + ok = timer:sleep(5), + update_rep_doc(RepDocId, KVs) + after + couch_db:close(RepDb) + end. + +update_rep_doc(RepDb, #doc{body = {RepDocBody}} = RepDoc, KVs) -> + NewRepDocBody = lists:foldl( + fun({<<"_replication_state">> = K, State} = KV, Body) -> + case get_value(K, Body) of + State -> + Body; + _ -> + Body1 = lists:keystore(K, 1, Body, KV), + lists:keystore( + <<"_replication_state_time">>, 1, Body1, + {<<"_replication_state_time">>, timestamp()}) + end; + ({K, _V} = KV, Body) -> + lists:keystore(K, 1, Body, KV) + end, + RepDocBody, KVs), + case NewRepDocBody of + RepDocBody -> + ok; + _ -> + % Might not succeed - when the replication doc is deleted right + % before this update (not an error, ignore). + couch_db:update_doc(RepDb, RepDoc#doc{body = {NewRepDocBody}}, []) + end. + + +% RFC3339 timestamps. +% Note: doesn't include the time seconds fraction (RFC3339 says it's optional). +timestamp() -> + {{Year, Month, Day}, {Hour, Min, Sec}} = calendar:now_to_local_time(now()), + UTime = erlang:universaltime(), + LocalTime = calendar:universal_time_to_local_time(UTime), + DiffSecs = calendar:datetime_to_gregorian_seconds(LocalTime) - + calendar:datetime_to_gregorian_seconds(UTime), + zone(DiffSecs div 3600, (DiffSecs rem 3600) div 60), + iolist_to_binary( + io_lib:format("~4..0w-~2..0w-~2..0wT~2..0w:~2..0w:~2..0w~s", + [Year, Month, Day, Hour, Min, Sec, + zone(DiffSecs div 3600, (DiffSecs rem 3600) div 60)])). + +zone(Hr, Min) when Hr >= 0, Min >= 0 -> + io_lib:format("+~2..0w:~2..0w", [Hr, Min]); +zone(Hr, Min) -> + io_lib:format("-~2..0w:~2..0w", [abs(Hr), abs(Min)]). + + +ensure_rep_db_exists() -> + DbName = ?l2b(couch_config:get("replicator", "db", "_replicator")), + Opts = [ + {user_ctx, #user_ctx{roles=[<<"_admin">>, <<"_replicator">>]}}, + sys_db + ], + case couch_db:open(DbName, Opts) of + {ok, Db} -> + Db; + _Error -> + {ok, Db} = couch_db:create(DbName, Opts) + end, + ok = ensure_rep_ddoc_exists(Db, <<"_design/_replicator">>), + {ok, Db}. + + +ensure_rep_ddoc_exists(RepDb, DDocID) -> + case couch_db:open_doc(RepDb, DDocID, []) of + {ok, _Doc} -> + ok; + _ -> + DDoc = couch_doc:from_json_obj({[ + {<<"_id">>, DDocID}, + {<<"language">>, <<"javascript">>}, + {<<"validate_doc_update">>, ?REP_DB_DOC_VALIDATE_FUN} + ]}), + {ok, _Rev} = couch_db:update_doc(RepDb, DDoc, []) + end, + ok. + + +% pretty-print replication id +pp_rep_id({Base, Extension}) -> + Base ++ Extension. + + +rep_state(RepId) -> + case ets:lookup(?REP_TO_STATE, RepId) of + [{RepId, RepState}] -> + RepState; + [] -> + nil + end. + + +error_reason({error, Reason}) -> + Reason; +error_reason(Reason) -> + Reason. + + +retries_value("infinity") -> + infinity; +retries_value(Value) -> + list_to_integer(Value). + + +state_after_error(#rep_state{retries_left = Left, wait = Wait} = State) -> + Wait2 = erlang:min(trunc(Wait * 2), ?MAX_WAIT), + case Left of + infinity -> + State#rep_state{wait = Wait2}; + _ -> + State#rep_state{retries_left = Left - 1, wait = Wait2} + end. diff --git a/apps/couch/src/couch_secondary_sup.erl b/apps/couch/src/couch_secondary_sup.erl new file mode 100644 index 00000000..8ccbd799 --- /dev/null +++ b/apps/couch/src/couch_secondary_sup.erl @@ -0,0 +1,35 @@ +-module(couch_secondary_sup). +-behaviour(supervisor). +-export([init/1, start_link/0]). + +start_link() -> + supervisor:start_link({local,couch_secondary_services}, ?MODULE, []). +init([]) -> + SecondarySupervisors = [ + {couch_db_update_notifier_sup, + {couch_db_update_notifier_sup, start_link, []}, + permanent, + infinity, + supervisor, + [couch_db_update_notifier_sup]}, + {couch_metrics_event_manager, + {gen_event, start_link, [{local, couch_metrics_event_manager}]}, + permanent, + brutal_kill, + worker, + dynamic} + ], + Children = SecondarySupervisors ++ [ + begin + {ok, {Module, Fun, Args}} = couch_util:parse_term(SpecStr), + + {list_to_atom(Name), + {Module, Fun, Args}, + permanent, + brutal_kill, + worker, + [Module]} + end + || {Name, SpecStr} + <- couch_config:get("daemons"), SpecStr /= ""], + {ok, {{one_for_one, 10, 3600}, Children}}. diff --git a/apps/couch/src/couch_server.erl b/apps/couch/src/couch_server.erl new file mode 100644 index 00000000..f9c960c5 --- /dev/null +++ b/apps/couch/src/couch_server.erl @@ -0,0 +1,379 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_server). +-behaviour(gen_server). + +-export([open/2,create/2,delete/2,all_databases/0,all_databases/1]). +-export([init/1, handle_call/3,sup_start_link/0]). +-export([handle_cast/2,code_change/3,handle_info/2,terminate/2]). +-export([dev_start/0,is_admin/2,has_admins/0,get_stats/0,config_change/4]). +-export([close_lru/0]). + +-include("couch_db.hrl"). + +-record(server,{ + root_dir = [], + dbname_regexp, + max_dbs_open=100, + dbs_open=0, + start_time="" + }). + +dev_start() -> + couch:stop(), + up_to_date = make:all([load, debug_info]), + couch:start(). + +get_stats() -> + {ok, #server{start_time=Time,dbs_open=Open}} = + gen_server:call(couch_server, get_server), + [{start_time, ?l2b(Time)}, {dbs_open, Open}]. + +sup_start_link() -> + gen_server:start_link({local, couch_server}, couch_server, [], []). + + +open(DbName, Options) -> + Ctx = couch_util:get_value(user_ctx, Options, #user_ctx{}), + case ets:lookup(couch_dbs, DbName) of + [#db{fd=Fd, fd_monitor=Lock} = Db] when Lock =/= locked -> + ets:insert(couch_lru, {DbName, now()}), + {ok, Db#db{user_ctx=Ctx, fd_monitor=erlang:monitor(process,Fd)}}; + _ -> + Timeout = couch_util:get_value(timeout, Options, infinity), + case gen_server:call(couch_server, {open, DbName, Options}, Timeout) of + {ok, #db{fd=Fd} = Db} -> + ets:insert(couch_lru, {DbName, now()}), + {ok, Db#db{user_ctx=Ctx, fd_monitor=erlang:monitor(process,Fd)}}; + Error -> + Error + end + end. + +close_lru() -> + gen_server:call(couch_server, close_lru). + +create(DbName, Options) -> + case gen_server:call(couch_server, {create, DbName, Options}, infinity) of + {ok, #db{fd=Fd} = Db} -> + Ctx = couch_util:get_value(user_ctx, Options, #user_ctx{}), + {ok, Db#db{user_ctx=Ctx, fd_monitor=erlang:monitor(process,Fd)}}; + Error -> + Error + end. + +delete(DbName, Options) -> + gen_server:call(couch_server, {delete, DbName, Options}, infinity). + +check_dbname(#server{dbname_regexp=RegExp}, DbName) -> + case re:run(DbName, RegExp, [{capture, none}]) of + nomatch -> + case DbName of + "_users" -> ok; + "_replicator" -> ok; + _Else -> + {error, illegal_database_name} + end; + match -> + ok + end. + +is_admin(User, ClearPwd) -> + case couch_config:get("admins", User) of + "-hashed-" ++ HashedPwdAndSalt -> + [HashedPwd, Salt] = string:tokens(HashedPwdAndSalt, ","), + couch_util:to_hex(crypto:sha(ClearPwd ++ Salt)) == HashedPwd; + _Else -> + false + end. + +has_admins() -> + couch_config:get("admins") /= []. + +get_full_filename(Server, DbName) -> + filename:join([Server#server.root_dir, "./" ++ DbName ++ ".couch"]). + +hash_admin_passwords() -> + hash_admin_passwords(true). + +hash_admin_passwords(Persist) -> + lists:foreach( + fun({_User, "-hashed-" ++ _}) -> + ok; % already hashed + ({User, ClearPassword}) -> + Salt = ?b2l(couch_uuids:random()), + Hashed = couch_util:to_hex(crypto:sha(ClearPassword ++ Salt)), + couch_config:set("admins", + User, "-hashed-" ++ Hashed ++ "," ++ Salt, Persist) + end, couch_config:get("admins")). + +init([]) -> + % read config and register for configuration changes + + % just stop if one of the config settings change. couch_server_sup + % will restart us and then we will pick up the new settings. + + RootDir = couch_config:get("couchdb", "database_dir", "."), + MaxDbsOpen = list_to_integer( + couch_config:get("couchdb", "max_dbs_open")), + ok = couch_config:register(fun ?MODULE:config_change/4), + ok = couch_file:init_delete_dir(RootDir), + hash_admin_passwords(), + {ok, RegExp} = re:compile( + "^[a-z][a-z0-9\\_\\$()\\+\\-\\/]*" % use the stock CouchDB regex + "(\\.[0-9]{10,})?$" % but allow an optional shard timestamp at the end + ), + ets:new(couch_dbs, [set, protected, named_table, {keypos, #db.name}]), + ets:new(couch_lru, [set, public, named_table]), + process_flag(trap_exit, true), + {ok, #server{root_dir=RootDir, + dbname_regexp=RegExp, + max_dbs_open=MaxDbsOpen, + start_time=httpd_util:rfc1123_date()}}. + +terminate(_Reason, _Srv) -> + ets:foldl(fun(#db{main_pid=Pid}, _) -> couch_util:shutdown_sync(Pid) end, + nil, couch_dbs), + ok. + +config_change("couchdb", "database_dir", _, _) -> + exit(whereis(couch_server), config_change); +config_change("couchdb", "max_dbs_open", Max, _) -> + gen_server:call(couch_server, {set_max_dbs_open, list_to_integer(Max)}); +config_change("admins", _, _, Persist) -> + % spawn here so couch_config doesn't try to call itself + spawn(fun() -> hash_admin_passwords(Persist) end). + +all_databases() -> + all_databases(""). + +all_databases(Prefix) -> + {ok, #server{root_dir=Root}} = gen_server:call(couch_server, get_server), + NormRoot = couch_util:normpath(Root), + Filenames = + filelib:fold_files(Root++Prefix, + "^[a-z0-9\\_\\$()\\+\\-]*" % stock CouchDB name regex + "(\\.[0-9]{10,})?" % optional shard timestamp + "\\.couch$", % filename extenstion + true, + fun(Filename, AccIn) -> + NormFilename = couch_util:normpath(Filename), + case NormFilename -- NormRoot of + [$/ | RelativeFilename] -> ok; + RelativeFilename -> ok + end, + [list_to_binary(filename:rootname(RelativeFilename, ".couch")) | AccIn] + end, []), + {ok, lists:usort(Filenames)}. + + +maybe_close_lru_db(#server{dbs_open=NumOpen, max_dbs_open=MaxOpen}=Server) + when NumOpen < MaxOpen -> + {ok, Server}; +maybe_close_lru_db(#server{dbs_open=NumOpen}=Server) -> + % must free up the lru db. + case try_close_lru(now()) of + ok -> + {ok, Server#server{dbs_open=NumOpen - 1}}; + Error -> Error + end. + +find_oldest_db({DbName, Lru}, Acc) -> + erlang:min({Lru, DbName}, Acc). + +try_close_lru(StartTime) -> + case ets:foldl(fun find_oldest_db/2, {StartTime, nil}, couch_lru) of + {StartTime, nil} -> + {error, all_dbs_active}; + {_, DbName} -> + % There may exist an extremely small possibility of a race + % condition here, if a process could lookup the DB before the lock, + % but fail to monitor the fd before the is_idle check. + true = ets:update_element(couch_dbs, DbName, {#db.fd_monitor, locked}), + [#db{main_pid = Pid} = Db] = ets:lookup(couch_dbs, DbName), + case couch_db:is_idle(Db) of true -> + true = ets:delete(couch_dbs, DbName), + true = ets:delete(couch_lru, DbName), + exit(Pid, kill), + ok; + false -> + true = ets:update_element(couch_dbs, DbName, {#db.fd_monitor, nil}), + true = ets:insert(couch_lru, {DbName, now()}), + try_close_lru(StartTime) + end + end. + +open_async(Server, From, DbName, Filepath, Options) -> + Parent = self(), + Opener = spawn_link(fun() -> + Res = couch_db:start_link(DbName, Filepath, Options), + gen_server:call(Parent, {open_result, DbName, Res, Options}, infinity), + unlink(Parent) + end), + % icky hack of field values - compactor_pid used to store clients + true = ets:insert(couch_dbs, #db{ + name = DbName, + main_pid = Opener, + compactor_pid = [From], + fd_monitor = locked + }), + Server#server{dbs_open=Server#server.dbs_open + 1}. + +handle_call(close_lru, _From, #server{dbs_open=N} = Server) -> + case try_close_lru(now()) of + ok -> + {reply, ok, Server#server{dbs_open = N-1}}; + Error -> + {reply, Error, Server} + end; +handle_call(open_dbs_count, _From, Server) -> + {reply, Server#server.dbs_open, Server}; +handle_call({set_dbname_regexp, RegExp}, _From, Server) -> + {reply, ok, Server#server{dbname_regexp=RegExp}}; +handle_call({set_max_dbs_open, Max}, _From, Server) -> + {reply, ok, Server#server{max_dbs_open=Max}}; +handle_call(get_server, _From, Server) -> + {reply, {ok, Server}, Server}; +handle_call({open_result, DbName, {ok, Db}, Options}, _From, Server) -> + link(Db#db.main_pid), + % icky hack of field values - compactor_pid used to store clients + [#db{compactor_pid=Froms}] = ets:lookup(couch_dbs, DbName), + [gen_server:reply(From, {ok, Db}) || From <- Froms], + true = ets:insert(couch_dbs, Db), + true = ets:insert(couch_lru, {DbName, now()}), + case lists:member(create, Options) of + true -> + couch_db_update_notifier:notify({created, DbName}); + false -> + ok + end, + {reply, ok, Server}; +handle_call({open_result, DbName, Error, _Options}, _From, Server) -> + % icky hack of field values - compactor_pid used to store clients + [#db{compactor_pid=Froms}] = ets:lookup(couch_dbs, DbName), + [gen_server:reply(From, Error) || From <- Froms], + true = ets:delete(couch_dbs, DbName), + {reply, ok, Server#server{dbs_open=Server#server.dbs_open - 1}}; +handle_call({open, DbName, Options}, From, Server) -> + case ets:lookup(couch_dbs, DbName) of + [] -> + DbNameList = binary_to_list(DbName), + case check_dbname(Server, DbNameList) of + ok -> + case maybe_close_lru_db(Server) of + {ok, Server2} -> + Filepath = get_full_filename(Server, DbNameList), + {noreply, open_async(Server2, From, DbName, Filepath, Options)}; + CloseError -> + {reply, CloseError, Server} + end; + Error -> + {reply, Error, Server} + end; + [#db{compactor_pid = Froms} = Db] when is_list(Froms) -> + % icky hack of field values - compactor_pid used to store clients + true = ets:insert(couch_dbs, Db#db{compactor_pid = [From|Froms]}), + {noreply, Server}; + [#db{} = Db] -> + {reply, {ok, Db}, Server} + end; +handle_call({create, DbName, Options}, From, Server) -> + DbNameList = binary_to_list(DbName), + case check_dbname(Server, DbNameList) of + ok -> + case ets:lookup(couch_dbs, DbName) of + [] -> + case maybe_close_lru_db(Server) of + {ok, Server2} -> + Filepath = get_full_filename(Server, DbNameList), + {noreply, open_async(Server2, From, DbName, Filepath, + [create | Options])}; + CloseError -> + {reply, CloseError, Server} + end; + [_AlreadyRunningDb] -> + {reply, file_exists, Server} + end; + Error -> + {reply, Error, Server} + end; +handle_call({delete, DbName, _Options}, _From, Server) -> + DbNameList = binary_to_list(DbName), + case check_dbname(Server, DbNameList) of + ok -> + FullFilepath = get_full_filename(Server, DbNameList), + Server2 = + case ets:lookup(couch_dbs, DbName) of + [] -> Server; + [#db{main_pid=Pid, compactor_pid=Froms}] when is_list(Froms) -> + % icky hack of field values - compactor_pid used to store clients + true = ets:delete(couch_dbs, DbName), + true = ets:delete(couch_lru, DbName), + exit(Pid, kill), + [gen_server:reply(F, not_found) || F <- Froms], + Server#server{dbs_open=Server#server.dbs_open - 1}; + [#db{main_pid=Pid}] -> + true = ets:delete(couch_dbs, DbName), + true = ets:delete(couch_lru, DbName), + exit(Pid, kill), + Server#server{dbs_open=Server#server.dbs_open - 1} + end, + + %% Delete any leftover .compact files. If we don't do this a subsequent + %% request for this DB will try to open the .compact file and use it. + couch_file:delete(Server#server.root_dir, FullFilepath ++ ".compact"), + + case couch_file:delete(Server#server.root_dir, FullFilepath) of + ok -> + couch_db_update_notifier:notify({deleted, DbName}), + {reply, ok, Server2}; + {error, enoent} -> + {reply, not_found, Server2}; + Else -> + {reply, Else, Server2} + end; + Error -> + {reply, Error, Server} + end; +handle_call({db_updated, #db{name = DbName} = Db}, _From, Server) -> + true = ets:insert(couch_dbs, Db), + true = ets:insert(couch_lru, {DbName, now()}), + {reply, ok, Server}. + + +handle_cast(Msg, Server) -> + {stop, {unknown_cast_message, Msg}, Server}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_info({'EXIT', _Pid, config_change}, Server) -> + {stop, config_change, Server}; +handle_info({'EXIT', Pid, Reason}, #server{dbs_open=DbsOpen}=Server) -> + case ets:match_object(couch_dbs, #db{main_pid=Pid, _='_'}) of + [#db{name = DbName, compactor_pid=Froms}] -> + ?LOG_INFO("db ~s died with reason ~p", [DbName, Reason]), + % icky hack of field values - compactor_pid used to store clients + if is_list(Froms) -> + [gen_server:reply(From, Reason) || From <- Froms]; + true -> + ok + end, + true = ets:delete(couch_dbs, DbName), + true = ets:delete(couch_lru, DbName), + {noreply, Server#server{dbs_open=DbsOpen - 1}}; + [] -> + {noreply, Server} + end; +handle_info(Info, Server) -> + {stop, {unknown_message, Info}, Server}. diff --git a/apps/couch/src/couch_server_sup.erl b/apps/couch/src/couch_server_sup.erl new file mode 100644 index 00000000..726e397f --- /dev/null +++ b/apps/couch/src/couch_server_sup.erl @@ -0,0 +1,155 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_server_sup). +-behaviour(supervisor). + + +-export([start_link/1, couch_config_start_link_wrapper/2, + restart_core_server/0, config_change/2]). + +-include("couch_db.hrl"). + +%% supervisor callbacks +-export([init/1]). + +start_link(IniFiles) -> + case whereis(couch_server_sup) of + undefined -> + start_server(IniFiles); + _Else -> + {error, already_started} + end. + +restart_core_server() -> + init:restart(). + +couch_config_start_link_wrapper(IniFiles, FirstConfigPid) -> + case is_process_alive(FirstConfigPid) of + true -> + link(FirstConfigPid), + {ok, FirstConfigPid}; + false -> couch_config:start_link(IniFiles) + end. + +start_server(IniFiles) -> + case init:get_argument(pidfile) of + {ok, [PidFile]} -> + case file:write_file(PidFile, os:getpid()) of + ok -> ok; + Error -> io:format("Failed to write PID file ~s, error: ~p", [PidFile, Error]) + end; + _ -> ok + end, + + {ok, ConfigPid} = couch_config:start_link(IniFiles), + + LogLevel = couch_config:get("log", "level", "info"), + % announce startup + io:format("Apache CouchDB ~s (LogLevel=~s) is starting.~n", [ + couch:version(), + LogLevel + ]), + case LogLevel of + "debug" -> + io:format("Configuration Settings ~p:~n", [IniFiles]), + [io:format(" [~s] ~s=~p~n", [Module, Variable, Value]) + || {{Module, Variable}, Value} <- couch_config:all()]; + _ -> ok + end, + + BaseChildSpecs = + {{one_for_all, 10, 60}, + [{couch_config, + {couch_server_sup, couch_config_start_link_wrapper, [IniFiles, ConfigPid]}, + permanent, + brutal_kill, + worker, + [couch_config]}, + {couch_config_event, + {couch_config_event, start_link, []}, + permanent, + 1000, + worker, + dynamic}, + {couch_primary_services, + {couch_primary_sup, start_link, []}, + permanent, + infinity, + supervisor, + [couch_primary_sup]}, + {couch_secondary_services, + {couch_secondary_sup, start_link, []}, + permanent, + infinity, + supervisor, + [couch_secondary_sup]} + ]}, + + {ok, Pid} = supervisor:start_link( + {local, couch_server_sup}, couch_server_sup, BaseChildSpecs), + + % just restart if one of the config settings change. + couch_config:register(fun ?MODULE:config_change/2, Pid), + + unlink(ConfigPid), + + Ip = couch_config:get("httpd", "bind_address"), + io:format("Apache CouchDB has started. Time to relax.~n"), + Uris = [get_uri(Name, Ip) || Name <- [couch_httpd, https]], + [begin + case Uri of + undefined -> ok; + Uri -> ?LOG_INFO("Apache CouchDB has started on ~s", [Uri]) + end + end + || Uri <- Uris], + case couch_config:get("couchdb", "uri_file", null) of + null -> ok; + UriFile -> + Lines = [begin case Uri of + undefined -> []; + Uri -> io_lib:format("~s~n", [Uri]) + end end || Uri <- Uris], + ok = file:write_file(UriFile, Lines) + end, + + {ok, Pid}. + +config_change("daemons", _) -> + exit(whereis(couch_server_sup), shutdown); +config_change("couchdb", "util_driver_dir") -> + [Pid] = [P || {collation_driver,P,_,_} + <- supervisor:which_children(couch_primary_services)], + Pid ! reload_driver. + +init(ChildSpecs) -> + {ok, ChildSpecs}. + +get_uri(Name, Ip) -> + case get_port(Name) of + undefined -> + undefined; + Port -> + io_lib:format("~s://~s:~w/", [get_scheme(Name), Ip, Port]) + end. + +get_scheme(couch_httpd) -> "http"; +get_scheme(https) -> "https". + +get_port(Name) -> + try + mochiweb_socket_server:get(Name, port) + catch + exit:{noproc, _}-> + undefined + end. diff --git a/apps/couch/src/couch_stats_aggregator.erl b/apps/couch/src/couch_stats_aggregator.erl new file mode 100644 index 00000000..7dac1124 --- /dev/null +++ b/apps/couch/src/couch_stats_aggregator.erl @@ -0,0 +1,302 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_stats_aggregator). +-behaviour(gen_server). + +-export([start/0, start/1, stop/0]). +-export([all/0, all/1, get/1, get/2, get_json/1, get_json/2, collect_sample/0]). + +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +-record(aggregate, { + description = <<"">>, + seconds = 0, + count = 0, + current = null, + sum = null, + mean = null, + variance = null, + stddev = null, + min = null, + max = null, + samples = [] +}). + + +start() -> + PrivDir = couch_util:priv_dir(), + start(filename:join(PrivDir, "stat_descriptions.cfg")). + +start(FileName) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [FileName], []). + +stop() -> + gen_server:cast(?MODULE, stop). + +all() -> + ?MODULE:all(0). +all(Time) when is_binary(Time) -> + ?MODULE:all(list_to_integer(binary_to_list(Time))); +all(Time) when is_atom(Time) -> + ?MODULE:all(list_to_integer(atom_to_list(Time))); +all(Time) when is_integer(Time) -> + Aggs = ets:match(?MODULE, {{'$1', Time}, '$2'}), + Stats = lists:map(fun([Key, Agg]) -> {Key, Agg} end, Aggs), + case Stats of + [] -> + {[]}; + _ -> + Ret = lists:foldl(fun({{Mod, Key}, Agg}, Acc) -> + CurrKeys = case proplists:lookup(Mod, Acc) of + none -> []; + {Mod, {Keys}} -> Keys + end, + NewMod = {[{Key, to_json_term(Agg)} | CurrKeys]}, + [{Mod, NewMod} | proplists:delete(Mod, Acc)] + end, [], Stats), + {Ret} + end. + +get(Key) -> + ?MODULE:get(Key, 0). +get(Key, Time) when is_binary(Time) -> + ?MODULE:get(Key, list_to_integer(binary_to_list(Time))); +get(Key, Time) when is_atom(Time) -> + ?MODULE:get(Key, list_to_integer(atom_to_list(Time))); +get(Key, Time) when is_integer(Time) -> + case ets:lookup(?MODULE, {make_key(Key), Time}) of + [] -> #aggregate{seconds=Time}; + [{_, Agg}] -> Agg + end. + +get_json(Key) -> + get_json(Key, 0). +get_json(Key, Time) -> + to_json_term(?MODULE:get(Key, Time)). + +collect_sample() -> + gen_server:call(?MODULE, collect_sample, infinity). + + +init(StatDescsFileName) -> + % Create an aggregate entry for each {description, rate} pair. + ets:new(?MODULE, [named_table, set, protected]), + SampleStr = couch_config:get("stats", "samples", "[0]"), + {ok, Samples} = couch_util:parse_term(SampleStr), + case file:consult(StatDescsFileName) of + {ok, Descs} -> + ok; + {error, _} -> + Descs = [] + end, + lists:foreach(fun({Sect, Key, Value}) -> + lists:foreach(fun(Secs) -> + Agg = #aggregate{ + description=list_to_binary(Value), + seconds=Secs + }, + ets:insert(?MODULE, {{{Sect, Key}, Secs}, Agg}) + end, Samples) + end, Descs), + + Self = self(), + ok = couch_config:register( + fun("stats", _) -> exit(Self, config_change) end + ), + + Rate = list_to_integer(couch_config:get("stats", "rate", "1000")), + % TODO: Add timer_start to kernel start options. + {ok, TRef} = timer:apply_after(Rate, ?MODULE, collect_sample, []), + {ok, {TRef, Rate}}. + +terminate(_Reason, {TRef, _Rate}) -> + timer:cancel(TRef), + ok. + +handle_call(collect_sample, _, {OldTRef, SampleInterval}) -> + timer:cancel(OldTRef), + {ok, TRef} = timer:apply_after(SampleInterval, ?MODULE, collect_sample, []), + % Gather new stats values to add. + Incs = lists:map(fun({Key, Value}) -> + {Key, {incremental, Value}} + end, couch_stats_collector:all(incremental)), + Abs = lists:map(fun({Key, Values}) -> + couch_stats_collector:clear(Key), + Values2 = case Values of + X when is_list(X) -> X; + Else -> [Else] + end, + {_, Mean} = lists:foldl(fun(Val, {Count, Curr}) -> + {Count+1, Curr + (Val - Curr) / (Count+1)} + end, {0, 0}, Values2), + {Key, {absolute, Mean}} + end, couch_stats_collector:all(absolute)), + + Values = Incs ++ Abs, + Now = erlang:now(), + lists:foreach(fun({{Key, Rate}, Agg}) -> + NewAgg = case proplists:lookup(Key, Values) of + none -> + rem_values(Now, Agg); + {Key, {Type, Value}} -> + NewValue = new_value(Type, Value, Agg#aggregate.current), + Agg2 = add_value(Now, NewValue, Agg), + rem_values(Now, Agg2) + end, + ets:insert(?MODULE, {{Key, Rate}, NewAgg}) + end, ets:tab2list(?MODULE)), + {reply, ok, {TRef, SampleInterval}}. + +handle_cast(stop, State) -> + {stop, normal, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +code_change(_OldVersion, State, _Extra) -> + {ok, State}. + + +new_value(incremental, Value, null) -> + Value; +new_value(incremental, Value, Current) -> + Value - Current; +new_value(absolute, Value, _Current) -> + Value. + +add_value(Time, Value, #aggregate{count=Count, seconds=Secs}=Agg) when Count < 1 -> + Samples = case Secs of + 0 -> []; + _ -> [{Time, Value}] + end, + Agg#aggregate{ + count=1, + current=Value, + sum=Value, + mean=Value, + variance=0.0, + stddev=null, + min=Value, + max=Value, + samples=Samples + }; +add_value(Time, Value, Agg) -> + #aggregate{ + count=Count, + current=Current, + sum=Sum, + mean=Mean, + variance=Variance, + samples=Samples + } = Agg, + + NewCount = Count + 1, + NewMean = Mean + (Value - Mean) / NewCount, + NewVariance = Variance + (Value - Mean) * (Value - NewMean), + StdDev = case NewCount > 1 of + false -> null; + _ -> math:sqrt(NewVariance / (NewCount - 1)) + end, + Agg2 = Agg#aggregate{ + count=NewCount, + current=Current + Value, + sum=Sum + Value, + mean=NewMean, + variance=NewVariance, + stddev=StdDev, + min=lists:min([Agg#aggregate.min, Value]), + max=lists:max([Agg#aggregate.max, Value]) + }, + case Agg2#aggregate.seconds of + 0 -> Agg2; + _ -> Agg2#aggregate{samples=[{Time, Value} | Samples]} + end. + +rem_values(Time, Agg) -> + Seconds = Agg#aggregate.seconds, + Samples = Agg#aggregate.samples, + Pred = fun({When, _Value}) -> + timer:now_diff(Time, When) =< (Seconds * 1000000) + end, + {Keep, Remove} = lists:splitwith(Pred, Samples), + Agg2 = lists:foldl(fun({_, Value}, Acc) -> + rem_value(Value, Acc) + end, Agg, Remove), + Agg2#aggregate{samples=Keep}. + +rem_value(_Value, #aggregate{count=Count, seconds=Secs}) when Count =< 1 -> + #aggregate{seconds=Secs}; +rem_value(Value, Agg) -> + #aggregate{ + count=Count, + sum=Sum, + mean=Mean, + variance=Variance + } = Agg, + + OldMean = (Mean * Count - Value) / (Count - 1), + OldVariance = Variance - (Value - OldMean) * (Value - Mean), + OldCount = Count - 1, + StdDev = case OldCount > 1 of + false -> null; + _ -> math:sqrt(clamp_value(OldVariance / (OldCount - 1))) + end, + Agg#aggregate{ + count=OldCount, + sum=Sum-Value, + mean=clamp_value(OldMean), + variance=clamp_value(OldVariance), + stddev=StdDev + }. + +to_json_term(Agg) -> + {Min, Max} = case Agg#aggregate.seconds > 0 of + false -> + {Agg#aggregate.min, Agg#aggregate.max}; + _ -> + case length(Agg#aggregate.samples) > 0 of + true -> + Extract = fun({_Time, Value}) -> Value end, + Samples = lists:map(Extract, Agg#aggregate.samples), + {lists:min(Samples), lists:max(Samples)}; + _ -> + {null, null} + end + end, + {[ + {description, Agg#aggregate.description}, + {current, round_value(Agg#aggregate.sum)}, + {sum, round_value(Agg#aggregate.sum)}, + {mean, round_value(Agg#aggregate.mean)}, + {stddev, round_value(Agg#aggregate.stddev)}, + {min, Min}, + {max, Max} + ]}. + +make_key({Mod, Val}) when is_integer(Val) -> + {Mod, list_to_atom(integer_to_list(Val))}; +make_key(Key) -> + Key. + +round_value(Val) when not is_number(Val) -> + Val; +round_value(Val) when Val == 0 -> + Val; +round_value(Val) -> + erlang:round(Val * 1000.0) / 1000.0. + +clamp_value(Val) when Val > 0.00000000000001 -> + Val; +clamp_value(_) -> + 0.0. diff --git a/apps/couch/src/couch_stats_collector.erl b/apps/couch/src/couch_stats_collector.erl new file mode 100644 index 00000000..74238fc8 --- /dev/null +++ b/apps/couch/src/couch_stats_collector.erl @@ -0,0 +1,131 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +% todo +% - remove existance check on increment(), decrement() and record(). have +% modules initialize counters on startup. + +-module(couch_stats_collector). + +-behaviour(gen_server). + +-export([start/0, stop/0]). +-export([all/0, all/1, get/1, increment/1, decrement/1, record/2, clear/1]). +-export([track_process_count/1, track_process_count/2]). + +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +-define(HIT_TABLE, stats_hit_table). +-define(ABS_TABLE, stats_abs_table). + +start() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +stop() -> + gen_server:call(?MODULE, stop). + +all() -> + ets:tab2list(?HIT_TABLE) ++ abs_to_list(). + +all(Type) -> + case Type of + incremental -> ets:tab2list(?HIT_TABLE); + absolute -> abs_to_list() + end. + +get(Key) -> + case ets:lookup(?HIT_TABLE, Key) of + [] -> + case ets:lookup(?ABS_TABLE, Key) of + [] -> + nil; + AbsVals -> + lists:map(fun({_, Value}) -> Value end, AbsVals) + end; + [{_, Counter}] -> + Counter + end. + +increment(Key) -> + Key2 = make_key(Key), + case catch ets:update_counter(?HIT_TABLE, Key2, 1) of + {'EXIT', {badarg, _}} -> + catch ets:insert(?HIT_TABLE, {Key2, 1}), + ok; + _ -> + ok + end. + +decrement(Key) -> + Key2 = make_key(Key), + case catch ets:update_counter(?HIT_TABLE, Key2, -1) of + {'EXIT', {badarg, _}} -> + catch ets:insert(?HIT_TABLE, {Key2, -1}), + ok; + _ -> ok + end. + +record(Key, Value) -> + catch ets:insert(?ABS_TABLE, {make_key(Key), Value}). + +clear(Key) -> + catch ets:delete(?ABS_TABLE, make_key(Key)). + +track_process_count(Stat) -> + track_process_count(self(), Stat). + +track_process_count(Pid, Stat) -> + gen_server:cast(?MODULE, {track_process_count, Stat, Pid}). + +init(_) -> + ets:new(?HIT_TABLE, [named_table, set, public]), + ets:new(?ABS_TABLE, [named_table, duplicate_bag, public]), + {ok, []}. + +terminate(_Reason, _State) -> + ok. + +handle_call(stop, _, State) -> + {stop, normal, stopped, State}. + +handle_cast({track_process_count, Stat, Pid}, State) -> + ok = couch_stats_collector:increment(Stat), + Ref = erlang:monitor(process, Pid), + {noreply, [{Ref,Stat} | State]}. + +handle_info({'DOWN', Ref, _, _, _}, State) -> + {Ref, Stat} = lists:keyfind(Ref, 1, State), + ok = couch_stats_collector:decrement(Stat), + {noreply, lists:keydelete(Ref, 1, State)}. + +code_change(_OldVersion, State, _Extra) -> + {ok, State}. + + +make_key({Module, Key}) when is_integer(Key) -> + {Module, list_to_atom(integer_to_list(Key))}; +make_key(Key) -> + Key. + +abs_to_list() -> + SortedKVs = lists:sort(ets:tab2list(?ABS_TABLE)), + lists:foldl(fun({Key, Val}, Acc) -> + case Acc of + [] -> + [{Key, [Val]}]; + [{Key, Prev} | Rest] -> + [{Key, [Val | Prev]} | Rest]; + Others -> + [{Key, [Val]} | Others] + end + end, [], SortedKVs).
\ No newline at end of file diff --git a/apps/couch/src/couch_stream.erl b/apps/couch/src/couch_stream.erl new file mode 100644 index 00000000..60af1c2b --- /dev/null +++ b/apps/couch/src/couch_stream.erl @@ -0,0 +1,357 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_stream). +-behaviour(gen_server). + + +-define(FILE_POINTER_BYTES, 8). +-define(FILE_POINTER_BITS, 8*(?FILE_POINTER_BYTES)). + +-define(STREAM_OFFSET_BYTES, 4). +-define(STREAM_OFFSET_BITS, 8*(?STREAM_OFFSET_BYTES)). + +-define(HUGE_CHUNK, 1000000000). % Huge chuck size when reading all in one go + +-define(DEFAULT_STREAM_CHUNK, 16#00100000). % 1 meg chunks when streaming data + +-export([open/1, open/3, close/1, write/2, foldl/4, foldl/5, range_foldl/6, foldl_decode/6, + old_foldl/5,old_copy_to_new_stream/4]). +-export([copy_to_new_stream/3,old_read_term/2]). +-export([init/1, terminate/2, handle_call/3]). +-export([handle_cast/2,code_change/3,handle_info/2]). + +-include("couch_db.hrl"). + +-record(stream, + {fd = 0, + written_pointers=[], + buffer_list = [], + buffer_len = 0, + max_buffer = 4096, + written_len = 0, + md5, + % md5 of the content without any transformation applied (e.g. compression) + % needed for the attachment upload integrity check (ticket 558) + identity_md5, + identity_len = 0, + encoding_fun, + end_encoding_fun + }). + + +%%% Interface functions %%% + +open(Fd) -> + open(Fd, identity, []). + +open(Fd, Encoding, Options) -> + gen_server:start_link(couch_stream, {Fd, Encoding, Options}, []). + +close(Pid) -> + gen_server:call(Pid, close, infinity). + +copy_to_new_stream(Fd, PosList, DestFd) -> + {ok, Dest} = open(DestFd), + foldl(Fd, PosList, + fun(Bin, _) -> + ok = write(Dest, Bin) + end, ok), + close(Dest). + + +% 09 UPGRADE CODE +old_copy_to_new_stream(Fd, Pos, Len, DestFd) -> + {ok, Dest} = open(DestFd), + old_foldl(Fd, Pos, Len, + fun(Bin, _) -> + ok = write(Dest, Bin) + end, ok), + close(Dest). + +% 09 UPGRADE CODE +old_foldl(_Fd, null, 0, _Fun, Acc) -> + Acc; +old_foldl(Fd, OldPointer, Len, Fun, Acc) when is_tuple(OldPointer)-> + {ok, Acc2, _} = old_stream_data(Fd, OldPointer, Len, ?DEFAULT_STREAM_CHUNK, Fun, Acc), + Acc2. + +foldl(_Fd, [], _Fun, Acc) -> + Acc; +foldl(Fd, [Pos|Rest], Fun, Acc) -> + {ok, Bin} = couch_file:pread_iolist(Fd, Pos), + foldl(Fd, Rest, Fun, Fun(Bin, Acc)). + +foldl(Fd, PosList, <<>>, Fun, Acc) -> + foldl(Fd, PosList, Fun, Acc); +foldl(Fd, PosList, Md5, Fun, Acc) -> + foldl(Fd, PosList, Md5, couch_util:md5_init(), Fun, Acc). + +foldl_decode(Fd, PosList, Md5, Enc, Fun, Acc) -> + {DecDataFun, DecEndFun} = case Enc of + gzip -> + ungzip_init(); + identity -> + identity_enc_dec_funs() + end, + Result = foldl_decode( + DecDataFun, Fd, PosList, Md5, couch_util:md5_init(), Fun, Acc + ), + DecEndFun(), + Result. + +foldl(_Fd, [], Md5, Md5Acc, _Fun, Acc) -> + Md5 = couch_util:md5_final(Md5Acc), + Acc; +foldl(Fd, [{Pos, _Size}], Md5, Md5Acc, Fun, Acc) -> % 0110 UPGRADE CODE + foldl(Fd, [Pos], Md5, Md5Acc, Fun, Acc); +foldl(Fd, [Pos], Md5, Md5Acc, Fun, Acc) -> + {ok, Bin} = couch_file:pread_iolist(Fd, Pos), + Md5 = couch_util:md5_final(couch_util:md5_update(Md5Acc, Bin)), + Fun(Bin, Acc); +foldl(Fd, [{Pos, _Size}|Rest], Md5, Md5Acc, Fun, Acc) -> + foldl(Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc); +foldl(Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc) -> + {ok, Bin} = couch_file:pread_iolist(Fd, Pos), + foldl(Fd, Rest, Md5, couch_util:md5_update(Md5Acc, Bin), Fun, Fun(Bin, Acc)). + +range_foldl(Fd, PosList, From, To, Fun, Acc) -> + range_foldl(Fd, PosList, From, To, 0, Fun, Acc). + +range_foldl(_Fd, _PosList, _From, To, Off, _Fun, Acc) when Off >= To -> + Acc; +range_foldl(Fd, [Pos|Rest], From, To, Off, Fun, Acc) when is_integer(Pos) -> % old-style attachment + {ok, Bin} = couch_file:pread_iolist(Fd, Pos), + range_foldl(Fd, [{Pos, iolist_size(Bin)}] ++ Rest, From, To, Off, Fun, Acc); +range_foldl(Fd, [{_Pos, Size}|Rest], From, To, Off, Fun, Acc) when From > Off + Size -> + range_foldl(Fd, Rest, From, To, Off + Size, Fun, Acc); +range_foldl(Fd, [{Pos, Size}|Rest], From, To, Off, Fun, Acc) -> + {ok, Bin} = couch_file:pread_iolist(Fd, Pos), + Bin1 = if + From =< Off andalso To >= Off + Size -> Bin; %% the whole block is covered + true -> + PrefixLen = clip(From - Off, 0, Size), + PostfixLen = clip(Off + Size - To, 0, Size), + MatchLen = Size - PrefixLen - PostfixLen, + <<_Prefix:PrefixLen/binary,Match:MatchLen/binary,_Postfix:PostfixLen/binary>> = iolist_to_binary(Bin), + Match + end, + range_foldl(Fd, Rest, From, To, Off + Size, Fun, Fun(Bin1, Acc)). + +clip(Value, Lo, Hi) -> + if + Value < Lo -> Lo; + Value > Hi -> Hi; + true -> Value + end. + +foldl_decode(_DecFun, _Fd, [], Md5, Md5Acc, _Fun, Acc) -> + Md5 = couch_util:md5_final(Md5Acc), + Acc; +foldl_decode(DecFun, Fd, [{Pos, _Size}], Md5, Md5Acc, Fun, Acc) -> + foldl_decode(DecFun, Fd, [Pos], Md5, Md5Acc, Fun, Acc); +foldl_decode(DecFun, Fd, [Pos], Md5, Md5Acc, Fun, Acc) -> + {ok, EncBin} = couch_file:pread_iolist(Fd, Pos), + Md5 = couch_util:md5_final(couch_util:md5_update(Md5Acc, EncBin)), + Bin = DecFun(EncBin), + Fun(Bin, Acc); +foldl_decode(DecFun, Fd, [{Pos, _Size}|Rest], Md5, Md5Acc, Fun, Acc) -> + foldl_decode(DecFun, Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc); +foldl_decode(DecFun, Fd, [Pos|Rest], Md5, Md5Acc, Fun, Acc) -> + {ok, EncBin} = couch_file:pread_iolist(Fd, Pos), + Bin = DecFun(EncBin), + Md5Acc2 = couch_util:md5_update(Md5Acc, EncBin), + foldl_decode(DecFun, Fd, Rest, Md5, Md5Acc2, Fun, Fun(Bin, Acc)). + +gzip_init(Options) -> + case couch_util:get_value(compression_level, Options, 0) of + Lvl when Lvl >= 1 andalso Lvl =< 9 -> + Z = zlib:open(), + % 15 = ?MAX_WBITS (defined in the zlib module) + % the 16 + ?MAX_WBITS formula was obtained by inspecting zlib:gzip/1 + ok = zlib:deflateInit(Z, Lvl, deflated, 16 + 15, 8, default), + { + fun(Data) -> + zlib:deflate(Z, Data) + end, + fun() -> + Last = zlib:deflate(Z, [], finish), + ok = zlib:deflateEnd(Z), + ok = zlib:close(Z), + Last + end + }; + _ -> + identity_enc_dec_funs() + end. + +ungzip_init() -> + Z = zlib:open(), + zlib:inflateInit(Z, 16 + 15), + { + fun(Data) -> + zlib:inflate(Z, Data) + end, + fun() -> + ok = zlib:inflateEnd(Z), + ok = zlib:close(Z) + end + }. + +identity_enc_dec_funs() -> + { + fun(Data) -> Data end, + fun() -> [] end + }. + +write(_Pid, <<>>) -> + ok; +write(Pid, Bin) -> + gen_server:call(Pid, {write, Bin}, infinity). + + +init({Fd, Encoding, Options}) -> + {EncodingFun, EndEncodingFun} = case Encoding of + identity -> + identity_enc_dec_funs(); + gzip -> + gzip_init(Options) + end, + {ok, #stream{ + fd=Fd, + md5=couch_util:md5_init(), + identity_md5=couch_util:md5_init(), + encoding_fun=EncodingFun, + end_encoding_fun=EndEncodingFun + } + }. + +terminate(_Reason, _Stream) -> + ok. + +handle_call({write, Bin}, _From, Stream) -> + BinSize = iolist_size(Bin), + #stream{ + fd = Fd, + written_len = WrittenLen, + written_pointers = Written, + buffer_len = BufferLen, + buffer_list = Buffer, + max_buffer = Max, + md5 = Md5, + identity_md5 = IdenMd5, + identity_len = IdenLen, + encoding_fun = EncodingFun} = Stream, + if BinSize + BufferLen > Max -> + WriteBin = lists:reverse(Buffer, [Bin]), + IdenMd5_2 = couch_util:md5_update(IdenMd5, WriteBin), + case EncodingFun(WriteBin) of + [] -> + % case where the encoder did some internal buffering + % (zlib does it for example) + WrittenLen2 = WrittenLen, + Md5_2 = Md5, + Written2 = Written; + WriteBin2 -> + {ok, Pos} = couch_file:append_binary(Fd, WriteBin2), + WrittenLen2 = WrittenLen + iolist_size(WriteBin2), + Md5_2 = couch_util:md5_update(Md5, WriteBin2), + Written2 = [{Pos, iolist_size(WriteBin2)}|Written] + end, + + {reply, ok, Stream#stream{ + written_len=WrittenLen2, + written_pointers=Written2, + buffer_list=[], + buffer_len=0, + md5=Md5_2, + identity_md5=IdenMd5_2, + identity_len=IdenLen + BinSize}}; + true -> + {reply, ok, Stream#stream{ + buffer_list=[Bin|Buffer], + buffer_len=BufferLen + BinSize, + identity_len=IdenLen + BinSize}} + end; +handle_call(close, _From, Stream) -> + #stream{ + fd = Fd, + written_len = WrittenLen, + written_pointers = Written, + buffer_list = Buffer, + md5 = Md5, + identity_md5 = IdenMd5, + identity_len = IdenLen, + encoding_fun = EncodingFun, + end_encoding_fun = EndEncodingFun} = Stream, + + WriteBin = lists:reverse(Buffer), + IdenMd5Final = couch_util:md5_final(couch_util:md5_update(IdenMd5, WriteBin)), + WriteBin2 = EncodingFun(WriteBin) ++ EndEncodingFun(), + Md5Final = couch_util:md5_final(couch_util:md5_update(Md5, WriteBin2)), + Result = case WriteBin2 of + [] -> + {lists:reverse(Written), WrittenLen, IdenLen, Md5Final, IdenMd5Final}; + _ -> + {ok, Pos} = couch_file:append_binary(Fd, WriteBin2), + StreamInfo = lists:reverse(Written, [{Pos, iolist_size(WriteBin2)}]), + StreamLen = WrittenLen + iolist_size(WriteBin2), + {StreamInfo, StreamLen, IdenLen, Md5Final, IdenMd5Final} + end, + {stop, normal, Result, Stream}. + +handle_cast(_Msg, State) -> + {noreply,State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_info(_Info, State) -> + {noreply, State}. + + + +% 09 UPGRADE CODE +old_read_term(Fd, Sp) -> + {ok, <<TermLen:(?STREAM_OFFSET_BITS)>>, Sp2} + = old_read(Fd, Sp, ?STREAM_OFFSET_BYTES), + {ok, Bin, _Sp3} = old_read(Fd, Sp2, TermLen), + {ok, binary_to_term(Bin)}. + +old_read(Fd, Sp, Num) -> + {ok, RevBin, Sp2} = old_stream_data(Fd, Sp, Num, ?HUGE_CHUNK, fun(Bin, Acc) -> [Bin | Acc] end, []), + Bin = list_to_binary(lists:reverse(RevBin)), + {ok, Bin, Sp2}. + +% 09 UPGRADE CODE +old_stream_data(_Fd, Sp, 0, _MaxChunk, _Fun, Acc) -> + {ok, Acc, Sp}; +old_stream_data(Fd, {Pos, 0}, Num, MaxChunk, Fun, Acc) -> + {ok, <<NextPos:(?FILE_POINTER_BITS), NextOffset:(?STREAM_OFFSET_BITS)>>} + = couch_file:old_pread(Fd, Pos, ?FILE_POINTER_BYTES + ?STREAM_OFFSET_BYTES), + Sp = {NextPos, NextOffset}, + % Check NextPos is past current Pos (this is always true in a stream) + % Guards against potential infinite loops caused by corruption. + case NextPos > Pos of + true -> ok; + false -> throw({error, stream_corruption}) + end, + old_stream_data(Fd, Sp, Num, MaxChunk, Fun, Acc); +old_stream_data(Fd, {Pos, Offset}, Num, MaxChunk, Fun, Acc) -> + ReadAmount = lists:min([MaxChunk, Num, Offset]), + {ok, Bin} = couch_file:old_pread(Fd, Pos, ReadAmount), + Sp = {Pos + ReadAmount, Offset - ReadAmount}, + old_stream_data(Fd, Sp, Num - ReadAmount, MaxChunk, Fun, Fun(Bin, Acc)). + + +% Tests moved to tests/etap/050-stream.t + diff --git a/apps/couch/src/couch_task_status.erl b/apps/couch/src/couch_task_status.erl new file mode 100644 index 00000000..639515c7 --- /dev/null +++ b/apps/couch/src/couch_task_status.erl @@ -0,0 +1,123 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_task_status). +-behaviour(gen_server). + +% This module allows is used to track the status of long running tasks. +% Long running tasks register (add_task/3) then update their status (update/1) +% and the task and status is added to tasks list. When the tracked task dies +% it will be automatically removed the tracking. To get the tasks list, use the +% all/0 function + +-export([start_link/0, stop/0]). +-export([all/0, add_task/3, update/1, update/2, set_update_frequency/1]). + +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +-import(couch_util, [to_binary/1]). + +-include("couch_db.hrl"). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +stop() -> + gen_server:cast(?MODULE, stop). + + +all() -> + gen_server:call(?MODULE, all). + + +add_task(Type, TaskName, StatusText) -> + put(task_status_update, {{0, 0, 0}, 0}), + Msg = { + add_task, + to_binary(Type), + to_binary(TaskName), + to_binary(StatusText) + }, + gen_server:call(?MODULE, Msg). + + +set_update_frequency(Msecs) -> + put(task_status_update, {{0, 0, 0}, Msecs * 1000}). + + +update(StatusText) -> + update("~s", [StatusText]). + +update(Format, Data) -> + {LastUpdateTime, Frequency} = get(task_status_update), + case timer:now_diff(Now = now(), LastUpdateTime) >= Frequency of + true -> + put(task_status_update, {Now, Frequency}), + Msg = ?l2b(io_lib:format(Format, Data)), + gen_server:cast(?MODULE, {update_status, self(), Msg}); + false -> + ok + end. + + +init([]) -> + % read configuration settings and register for configuration changes + ets:new(?MODULE, [ordered_set, protected, named_table]), + {ok, nil}. + + +terminate(_Reason,_State) -> + ok. + + +handle_call({add_task, Type, TaskName, StatusText}, {From, _}, Server) -> + case ets:lookup(?MODULE, From) of + [] -> + true = ets:insert(?MODULE, {From, {Type, TaskName, StatusText}}), + erlang:monitor(process, From), + {reply, ok, Server}; + [_] -> + {reply, {add_task_error, already_registered}, Server} + end; +handle_call(all, _, Server) -> + All = [ + [ + {type, Type}, + {task, Task}, + {status, Status}, + {pid, ?l2b(pid_to_list(Pid))} + ] + || + {Pid, {Type, Task, Status}} <- ets:tab2list(?MODULE) + ], + {reply, All, Server}. + + +handle_cast({update_status, Pid, StatusText}, Server) -> + [{Pid, {Type, TaskName, _StatusText}}] = ets:lookup(?MODULE, Pid), + true = ets:insert(?MODULE, {Pid, {Type, TaskName, StatusText}}), + {noreply, Server}; +handle_cast(stop, State) -> + {stop, normal, State}. + +handle_info({'DOWN', _MonitorRef, _Type, Pid, _Info}, Server) -> + %% should we also erlang:demonitor(_MonitorRef), ? + ets:delete(?MODULE, Pid), + {noreply, Server}. + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + diff --git a/apps/couch/src/couch_util.erl b/apps/couch/src/couch_util.erl new file mode 100644 index 00000000..839f5956 --- /dev/null +++ b/apps/couch/src/couch_util.erl @@ -0,0 +1,478 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_util). + +-export([priv_dir/0, start_driver/1, normpath/1]). +-export([should_flush/0, should_flush/1, to_existing_atom/1]). +-export([rand32/0, implode/2, collate/2, collate/3]). +-export([abs_pathname/1,abs_pathname/2, trim/1]). +-export([encodeBase64Url/1, decodeBase64Url/1]). +-export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]). +-export([get_nested_json_value/2, json_user_ctx/1]). +-export([proplist_apply_field/2, json_apply_field/2]). +-export([to_binary/1, to_integer/1, to_list/1, url_encode/1]). +-export([json_encode/1, json_decode/1]). +-export([verify/2,simple_call/2,shutdown_sync/1]). +-export([compressible_att_type/1]). +-export([get_value/2, get_value/3]). +-export([md5/1, md5_init/0, md5_update/2, md5_final/1]). +-export([reorder_results/2]). +-export([url_strip_password/1]). +-export([encode_doc_id/1]). + +-include("couch_db.hrl"). + +% arbitrarily chosen amount of memory to use before flushing to disk +-define(FLUSH_MAX_MEM, 10000000). + +priv_dir() -> + case code:priv_dir(couch) of + {error, bad_name} -> + % small hack, in dev mode "app" is couchdb. Fixing requires + % renaming src/couch to src/couch. Not really worth the hassle. + % -Damien + code:priv_dir(couchdb); + Dir -> Dir + end. + +start_driver(LibDir) -> + case erl_ddll:load_driver(LibDir, "couch_icu_driver") of + ok -> + ok; + {error, already_loaded} -> + ok = erl_ddll:reload_driver(LibDir, "couch_icu_driver"); + {error, Error} -> + exit(erl_ddll:format_error(Error)) + end. + +% Normalize a pathname by removing .. and . components. +normpath(Path) -> + normparts(filename:split(Path), []). + +normparts([], Acc) -> + filename:join(lists:reverse(Acc)); +normparts([".." | RestParts], [_Drop | RestAcc]) -> + normparts(RestParts, RestAcc); +normparts(["." | RestParts], Acc) -> + normparts(RestParts, Acc); +normparts([Part | RestParts], Acc) -> + normparts(RestParts, [Part | Acc]). + +% works like list_to_existing_atom, except can be list or binary and it +% gives you the original value instead of an error if no existing atom. +to_existing_atom(V) when is_list(V) -> + try list_to_existing_atom(V) catch _:_ -> V end; +to_existing_atom(V) when is_binary(V) -> + try list_to_existing_atom(?b2l(V)) catch _:_ -> V end; +to_existing_atom(V) when is_atom(V) -> + V. + +shutdown_sync(Pid) when not is_pid(Pid)-> + ok; +shutdown_sync(Pid) -> + MRef = erlang:monitor(process, Pid), + try + catch unlink(Pid), + catch exit(Pid, shutdown), + receive + {'DOWN', MRef, _, _, _} -> + ok + end + after + erlang:demonitor(MRef, [flush]) + end. + + +simple_call(Pid, Message) -> + MRef = erlang:monitor(process, Pid), + try + Pid ! {self(), Message}, + receive + {Pid, Result} -> + Result; + {'DOWN', MRef, _, _, Reason} -> + exit(Reason) + end + after + erlang:demonitor(MRef, [flush]) + end. + +validate_utf8(Data) when is_list(Data) -> + validate_utf8(?l2b(Data)); +validate_utf8(Bin) when is_binary(Bin) -> + validate_utf8_fast(Bin, 0). + +validate_utf8_fast(B, O) -> + case B of + <<_:O/binary>> -> + true; + <<_:O/binary, C1, _/binary>> when + C1 < 128 -> + validate_utf8_fast(B, 1 + O); + <<_:O/binary, C1, C2, _/binary>> when + C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + validate_utf8_fast(B, 2 + O); + <<_:O/binary, C1, C2, C3, _/binary>> when + C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + validate_utf8_fast(B, 3 + O); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when + C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + validate_utf8_fast(B, 4 + O); + _ -> + false + end. + +to_hex([]) -> + []; +to_hex(Bin) when is_binary(Bin) -> + to_hex(binary_to_list(Bin)); +to_hex([H|T]) -> + [to_digit(H div 16), to_digit(H rem 16) | to_hex(T)]. + +to_digit(N) when N < 10 -> $0 + N; +to_digit(N) -> $a + N-10. + + +parse_term(Bin) when is_binary(Bin) -> + parse_term(binary_to_list(Bin)); +parse_term(List) -> + {ok, Tokens, _} = erl_scan:string(List ++ "."), + erl_parse:parse_term(Tokens). + +get_value(Key, List) -> + get_value(Key, List, undefined). + +get_value(Key, List, Default) -> + case lists:keysearch(Key, 1, List) of + {value, {Key,Value}} -> + Value; + false -> + Default + end. + +get_nested_json_value({Props}, [Key|Keys]) -> + case couch_util:get_value(Key, Props, nil) of + nil -> throw({not_found, <<"missing json key: ", Key/binary>>}); + Value -> get_nested_json_value(Value, Keys) + end; +get_nested_json_value(Value, []) -> + Value; +get_nested_json_value(_NotJSONObj, _) -> + throw({not_found, json_mismatch}). + +proplist_apply_field(H, L) -> + {R} = json_apply_field(H, {L}), + R. + +json_apply_field(H, {L}) -> + json_apply_field(H, L, []). +json_apply_field({Key, NewValue}, [{Key, _OldVal} | Headers], Acc) -> + json_apply_field({Key, NewValue}, Headers, Acc); +json_apply_field({Key, NewValue}, [{OtherKey, OtherVal} | Headers], Acc) -> + json_apply_field({Key, NewValue}, Headers, [{OtherKey, OtherVal} | Acc]); +json_apply_field({Key, NewValue}, [], Acc) -> + {[{Key, NewValue}|Acc]}. + +json_user_ctx(#db{name=ShardName, user_ctx=Ctx}) -> + {[{<<"db">>, mem3:dbname(ShardName)}, + {<<"name">>,Ctx#user_ctx.name}, + {<<"roles">>,Ctx#user_ctx.roles}]}. + + +% returns a random integer +rand32() -> + crypto:rand_uniform(0, 16#100000000). + +% given a pathname "../foo/bar/" it gives back the fully qualified +% absolute pathname. +abs_pathname(" " ++ Filename) -> + % strip leading whitspace + abs_pathname(Filename); +abs_pathname([$/ |_]=Filename) -> + Filename; +abs_pathname(Filename) -> + {ok, Cwd} = file:get_cwd(), + {Filename2, Args} = separate_cmd_args(Filename, ""), + abs_pathname(Filename2, Cwd) ++ Args. + +abs_pathname(Filename, Dir) -> + Name = filename:absname(Filename, Dir ++ "/"), + OutFilename = filename:join(fix_path_list(filename:split(Name), [])), + % If the filename is a dir (last char slash, put back end slash + case string:right(Filename,1) of + "/" -> + OutFilename ++ "/"; + "\\" -> + OutFilename ++ "/"; + _Else-> + OutFilename + end. + +% if this as an executable with arguments, seperate out the arguments +% ""./foo\ bar.sh -baz=blah" -> {"./foo\ bar.sh", " -baz=blah"} +separate_cmd_args("", CmdAcc) -> + {lists:reverse(CmdAcc), ""}; +separate_cmd_args("\\ " ++ Rest, CmdAcc) -> % handle skipped value + separate_cmd_args(Rest, " \\" ++ CmdAcc); +separate_cmd_args(" " ++ Rest, CmdAcc) -> + {lists:reverse(CmdAcc), " " ++ Rest}; +separate_cmd_args([Char|Rest], CmdAcc) -> + separate_cmd_args(Rest, [Char | CmdAcc]). + +% Is a character whitespace? +is_whitespace($\s) -> true; +is_whitespace($\t) -> true; +is_whitespace($\n) -> true; +is_whitespace($\r) -> true; +is_whitespace(_Else) -> false. + + +% removes leading and trailing whitespace from a string +trim(String) -> + String2 = lists:dropwhile(fun is_whitespace/1, String), + lists:reverse(lists:dropwhile(fun is_whitespace/1, lists:reverse(String2))). + +% takes a heirarchical list of dirs and removes the dots ".", double dots +% ".." and the corresponding parent dirs. +fix_path_list([], Acc) -> + lists:reverse(Acc); +fix_path_list([".."|Rest], [_PrevAcc|RestAcc]) -> + fix_path_list(Rest, RestAcc); +fix_path_list(["."|Rest], Acc) -> + fix_path_list(Rest, Acc); +fix_path_list([Dir | Rest], Acc) -> + fix_path_list(Rest, [Dir | Acc]). + + +implode(List, Sep) -> + implode(List, Sep, []). + +implode([], _Sep, Acc) -> + lists:flatten(lists:reverse(Acc)); +implode([H], Sep, Acc) -> + implode([], Sep, [H|Acc]); +implode([H|T], Sep, Acc) -> + implode(T, Sep, [Sep,H|Acc]). + + +drv_port() -> + case get(couch_drv_port) of + undefined -> + Port = open_port({spawn, "couch_icu_driver"}, []), + put(couch_drv_port, Port), + Port; + Port -> + Port + end. + +collate(A, B) -> + collate(A, B, []). + +collate(A, B, Options) when is_binary(A), is_binary(B) -> + Operation = + case lists:member(nocase, Options) of + true -> 1; % Case insensitive + false -> 0 % Case sensitive + end, + SizeA = byte_size(A), + SizeB = byte_size(B), + Bin = <<SizeA:32/native, A/binary, SizeB:32/native, B/binary>>, + [Result] = erlang:port_control(drv_port(), Operation, Bin), + % Result is 0 for lt, 1 for eq and 2 for gt. Subtract 1 to return the + % expected typical -1, 0, 1 + Result - 1. + +should_flush() -> + should_flush(?FLUSH_MAX_MEM). + +should_flush(MemThreshHold) -> + {memory, ProcMem} = process_info(self(), memory), + BinMem = lists:foldl(fun({_Id, Size, _NRefs}, Acc) -> Size+Acc end, + 0, element(2,process_info(self(), binary))), + if ProcMem+BinMem > 2*MemThreshHold -> + garbage_collect(), + {memory, ProcMem2} = process_info(self(), memory), + BinMem2 = lists:foldl(fun({_Id, Size, _NRefs}, Acc) -> Size+Acc end, + 0, element(2,process_info(self(), binary))), + ProcMem2+BinMem2 > MemThreshHold; + true -> false end. + +encodeBase64Url(Url) -> + Url1 = iolist_to_binary(re:replace(base64:encode(Url), "=+$", "")), + Url2 = iolist_to_binary(re:replace(Url1, "/", "_", [global])), + iolist_to_binary(re:replace(Url2, "\\+", "-", [global])). + +decodeBase64Url(Url64) -> + Url1 = re:replace(iolist_to_binary(Url64), "-", "+", [global]), + Url2 = iolist_to_binary( + re:replace(iolist_to_binary(Url1), "_", "/", [global]) + ), + Padding = ?l2b(lists:duplicate((4 - size(Url2) rem 4) rem 4, $=)), + base64:decode(<<Url2/binary, Padding/binary>>). + +dict_find(Key, Dict, DefaultValue) -> + case dict:find(Key, Dict) of + {ok, Value} -> + Value; + error -> + DefaultValue + end. + +to_binary(V) when is_binary(V) -> + V; +to_binary(V) when is_list(V) -> + try + list_to_binary(V) + catch + _:_ -> + list_to_binary(io_lib:format("~p", [V])) + end; +to_binary(V) when is_atom(V) -> + list_to_binary(atom_to_list(V)); +to_binary(V) -> + list_to_binary(io_lib:format("~p", [V])). + +to_integer(V) when is_integer(V) -> + V; +to_integer(V) when is_list(V) -> + erlang:list_to_integer(V); +to_integer(V) when is_binary(V) -> + erlang:list_to_integer(binary_to_list(V)). + +to_list(V) when is_list(V) -> + V; +to_list(V) when is_binary(V) -> + binary_to_list(V); +to_list(V) when is_atom(V) -> + atom_to_list(V); +to_list(V) -> + lists:flatten(io_lib:format("~p", [V])). + +url_encode(Bin) when is_binary(Bin) -> + url_encode(binary_to_list(Bin)); +url_encode([H|T]) -> + if + H >= $a, $z >= H -> + [H|url_encode(T)]; + H >= $A, $Z >= H -> + [H|url_encode(T)]; + H >= $0, $9 >= H -> + [H|url_encode(T)]; + H == $_; H == $.; H == $-; H == $: -> + [H|url_encode(T)]; + true -> + case lists:flatten(io_lib:format("~.16.0B", [H])) of + [X, Y] -> + [$%, X, Y | url_encode(T)]; + [X] -> + [$%, $0, X | url_encode(T)] + end + end; +url_encode([]) -> + []. + +json_encode(V) -> + Handler = + fun({L}) when is_list(L) -> + {struct,L}; + (Bad) -> + exit({json_encode, {bad_term, Bad}}) + end, + (mochijson2:encoder([{handler, Handler}]))(V). + +json_decode(V) -> + try (mochijson2:decoder([{object_hook, fun({struct,L}) -> {L} end}]))(V) + catch + _Type:_Error -> + throw({invalid_json,V}) + end. + +verify([X|RestX], [Y|RestY], Result) -> + verify(RestX, RestY, (X bxor Y) bor Result); +verify([], [], Result) -> + Result == 0. + +verify(<<X/binary>>, <<Y/binary>>) -> + verify(?b2l(X), ?b2l(Y)); +verify(X, Y) when is_list(X) and is_list(Y) -> + case length(X) == length(Y) of + true -> + verify(X, Y, 0); + false -> + false + end; +verify(_X, _Y) -> false. + +compressible_att_type(MimeType) when is_binary(MimeType) -> + compressible_att_type(?b2l(MimeType)); +compressible_att_type(MimeType) -> + TypeExpList = re:split( + couch_config:get("attachments", "compressible_types", ""), + "\\s*,\\s*", + [{return, list}] + ), + lists:any( + fun(TypeExp) -> + Regexp = ["^\\s*", re:replace(TypeExp, "\\*", ".*"), + "(?:\\s*;.*?)?\\s*", $$], + re:run(MimeType, Regexp, [caseless]) =/= nomatch + end, + [T || T <- TypeExpList, T /= []] + ). + +-spec md5(Data::(iolist() | binary())) -> Digest::binary(). +md5(Data) -> + try crypto:md5(Data) catch error:_ -> erlang:md5(Data) end. + +-spec md5_init() -> Context::binary(). +md5_init() -> + try crypto:md5_init() catch error:_ -> erlang:md5_init() end. + +-spec md5_update(Context::binary(), Data::(iolist() | binary())) -> + NewContext::binary(). +md5_update(Ctx, D) -> + try crypto:md5_update(Ctx,D) catch error:_ -> erlang:md5_update(Ctx,D) end. + +-spec md5_final(Context::binary()) -> Digest::binary(). +md5_final(Ctx) -> + try crypto:md5_final(Ctx) catch error:_ -> erlang:md5_final(Ctx) end. + +% linear search is faster for small lists, length() is 0.5 ms for 100k list +reorder_results(Keys, SortedResults) when length(Keys) < 100 -> + [couch_util:get_value(Key, SortedResults) || Key <- Keys]; +reorder_results(Keys, SortedResults) -> + KeyDict = dict:from_list(SortedResults), + [dict:fetch(Key, KeyDict) || Key <- Keys]. + +url_strip_password(Url) -> + re:replace(Url, + "http(s)?://([^:]+):[^@]+@(.*)$", + "http\\1://\\2:*****@\\3", + [{return, list}]). + +encode_doc_id(#doc{id = Id}) -> + encode_doc_id(Id); +encode_doc_id(Id) when is_list(Id) -> + encode_doc_id(?l2b(Id)); +encode_doc_id(<<"_design/", Rest/binary>>) -> + "_design/" ++ url_encode(Rest); +encode_doc_id(<<"_local/", Rest/binary>>) -> + "_local/" ++ url_encode(Rest); +encode_doc_id(Id) -> + url_encode(Id). diff --git a/apps/couch/src/couch_uuids.erl b/apps/couch/src/couch_uuids.erl new file mode 100644 index 00000000..e1851e1d --- /dev/null +++ b/apps/couch/src/couch_uuids.erl @@ -0,0 +1,95 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +-module(couch_uuids). +-include("couch_db.hrl"). + +-behaviour(gen_server). + +-export([start/0, stop/0]). +-export([new/0, random/0, utc_random/0]). + +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +start() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +stop() -> + gen_server:cast(?MODULE, stop). + +new() -> + gen_server:call(?MODULE, create). + +random() -> + list_to_binary(couch_util:to_hex(crypto:rand_bytes(16))). + +utc_random() -> + Now = {_, _, Micro} = now(), + Nowish = calendar:now_to_universal_time(Now), + Nowsecs = calendar:datetime_to_gregorian_seconds(Nowish), + Then = calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}), + Prefix = io_lib:format("~14.16.0b", [(Nowsecs - Then) * 1000000 + Micro]), + list_to_binary(Prefix ++ couch_util:to_hex(crypto:rand_bytes(9))). + +init([]) -> + ok = couch_config:register( + fun("uuids", _) -> gen_server:cast(?MODULE, change) end + ), + {ok, state()}. + +terminate(_Reason, _State) -> + ok. + +handle_call(create, _From, random) -> + {reply, random(), random}; +handle_call(create, _From, utc_random) -> + {reply, utc_random(), utc_random}; +handle_call(create, _From, {sequential, Pref, Seq}) -> + Result = ?l2b(Pref ++ io_lib:format("~6.16.0b", [Seq])), + case Seq >= 16#fff000 of + true -> + {reply, Result, {sequential, new_prefix(), inc()}}; + _ -> + {reply, Result, {sequential, Pref, Seq + inc()}} + end. + +handle_cast(change, _State) -> + {noreply, state()}; +handle_cast(stop, State) -> + {stop, normal, State}; +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +new_prefix() -> + couch_util:to_hex((crypto:rand_bytes(13))). + +inc() -> + crypto:rand_uniform(1, 16#ffe). + +state() -> + AlgoStr = couch_config:get("uuids", "algorithm", "random"), + case couch_util:to_existing_atom(AlgoStr) of + random -> + random; + utc_random -> + utc_random; + sequential -> + {sequential, new_prefix(), inc()}; + Unknown -> + throw({unknown_uuid_algorithm, Unknown}) + end. diff --git a/apps/couch/src/couch_view.erl b/apps/couch/src/couch_view.erl new file mode 100644 index 00000000..8d479d7e --- /dev/null +++ b/apps/couch/src/couch_view.erl @@ -0,0 +1,480 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_view). +-behaviour(gen_server). + +-export([start_link/0,fold/4,less_json/2,less_json_ids/2,expand_dups/2, + detuple_kvs/2,init/1,terminate/2,handle_call/3,handle_cast/2,handle_info/2, + code_change/3,get_reduce_view/4,get_temp_reduce_view/5,get_temp_map_view/4, + get_map_view/4,get_row_count/1,reduce_to_count/1,fold_reduce/4, + extract_map_view/1,get_group_server/2,get_group_info/2, + cleanup_index_files/1,config_change/2, data_size/2]). + +-include("couch_db.hrl"). + + +-record(server,{ + root_dir = []}). + +start_link() -> + gen_server:start_link({local, couch_view}, couch_view, [], []). + +get_temp_updater(DbName, Language, DesignOptions, MapSrc, RedSrc) -> + {ok, Group} = + couch_view_group:open_temp_group(DbName, Language, DesignOptions, MapSrc, RedSrc), + case gen_server:call(couch_view, {get_group_server, DbName, Group}, infinity) of + {ok, Pid} -> + Pid; + Error -> + throw(Error) + end. + +get_group_server(DbName, GroupId) -> + case couch_view_group:open_db_group(DbName, GroupId) of + {ok, Group} -> + case gen_server:call(couch_view, {get_group_server, DbName, Group}, infinity) of + {ok, Pid} -> + Pid; + Error -> + throw(Error) + end; + Error -> + throw(Error) + end. + +get_group(Db, GroupId, Stale) -> + MinUpdateSeq = case Stale of + ok -> 0; + update_after -> 0; + _Else -> couch_db:get_update_seq(Db) + end, + GroupPid = get_group_server(couch_db:name(Db), GroupId), + Result = couch_view_group:request_group(GroupPid, MinUpdateSeq), + case Stale of + update_after -> + % best effort, process might die + spawn(fun() -> + LastSeq = couch_db:get_update_seq(Db), + couch_view_group:request_group(GroupPid, LastSeq) + end); + _ -> + ok + end, + Result. + +get_temp_group(Db, Language, DesignOptions, MapSrc, RedSrc) -> + couch_view_group:request_group( + get_temp_updater(couch_db:name(Db), Language, DesignOptions, MapSrc, RedSrc), + couch_db:get_update_seq(Db)). + +get_group_info(Db, GroupId) -> + couch_view_group:request_group_info( + get_group_server(couch_db:name(Db), GroupId)). + +cleanup_index_files(Db) -> + % load all ddocs + {ok, DesignDocs} = couch_db:get_design_docs(Db), + + % make unique list of group sigs + Sigs = lists:map(fun(#doc{id = GroupId}) -> + {ok, Info} = get_group_info(Db, GroupId), + ?b2l(couch_util:get_value(signature, Info)) + end, [DD||DD <- DesignDocs, DD#doc.deleted == false]), + + FileList = list_index_files(Db), + + DeleteFiles = + if length(Sigs) =:= 0 -> + FileList; + true -> + % regex that matches all ddocs + RegExp = "("++ string:join(Sigs, "|") ++")", + + % filter out the ones in use + [FilePath || FilePath <- FileList, + re:run(FilePath, RegExp, [{capture, none}]) =:= nomatch] + end, + + % delete unused files + ?LOG_DEBUG("deleting unused view index files: ~p",[DeleteFiles]), + RootDir = couch_config:get("couchdb", "view_index_dir"), + [couch_file:delete(RootDir,File,false)||File <- DeleteFiles], + ok. + +list_index_files(Db) -> + % call server to fetch the index files + RootDir = couch_config:get("couchdb", "view_index_dir"), + filelib:wildcard(RootDir ++ "/." ++ ?b2l(couch_db:name(Db)) ++ "_design"++"/*"). + + +get_row_count(#view{btree=Bt}) -> + {ok, {Count, _, _}} = couch_btree:full_reduce(Bt), + {ok, Count}. + +get_temp_reduce_view(Db, Language, DesignOptions, MapSrc, RedSrc) -> + {ok, #group{views=[View]}=Group} = + get_temp_group(Db, Language, DesignOptions, MapSrc, RedSrc), + {ok, {temp_reduce, View}, Group}. + + +get_reduce_view(Db, GroupId, Name, Update) -> + case get_group(Db, GroupId, Update) of + {ok, #group{views=Views,def_lang=Lang}=Group} -> + case get_reduce_view0(Name, Lang, Views) of + {ok, View} -> + {ok, View, Group}; + Else -> + Else + end; + Error -> + Error + end. + +get_reduce_view0(_Name, _Lang, []) -> + {not_found, missing_named_view}; +get_reduce_view0(Name, Lang, [#view{reduce_funs=RedFuns}=View|Rest]) -> + case get_key_pos(Name, RedFuns, 0) of + 0 -> get_reduce_view0(Name, Lang, Rest); + N -> {ok, {reduce, N, Lang, View}} + end. + +extract_map_view({reduce, _N, _Lang, View}) -> + View. + +detuple_kvs([], Acc) -> + lists:reverse(Acc); +detuple_kvs([KV | Rest], Acc) -> + {{Key,Id},Value} = KV, + NKV = [[Key, Id], Value], + detuple_kvs(Rest, [NKV | Acc]). + +expand_dups([], Acc) -> + lists:reverse(Acc); +expand_dups([{Key, {dups, Vals}} | Rest], Acc) -> + Expanded = [{Key, Val} || Val <- Vals], + expand_dups(Rest, Expanded ++ Acc); +expand_dups([KV | Rest], Acc) -> + expand_dups(Rest, [KV | Acc]). + +data_size(KVList, Reduction) -> + lists:foldl(fun([[Key, _], Value], Acc) -> + size(term_to_binary(Key)) + + size(term_to_binary(Value)) + + Acc + end,size(term_to_binary(Reduction)),KVList). + +fold_reduce({temp_reduce, #view{btree=Bt}}, Fun, Acc, Options) -> + WrapperFun = fun({GroupedKey, _}, PartialReds, Acc0) -> + {_, [Red]} = couch_btree:final_reduce(Bt, PartialReds), + Fun(GroupedKey, Red, Acc0) + end, + couch_btree:fold_reduce(Bt, WrapperFun, Acc, Options); + +fold_reduce({reduce, NthRed, Lang, #view{btree=Bt, reduce_funs=RedFuns}}, Fun, Acc, Options) -> + PreResultPadding = lists:duplicate(NthRed - 1, []), + PostResultPadding = lists:duplicate(length(RedFuns) - NthRed, []), + {_Name, FunSrc} = lists:nth(NthRed,RedFuns), + ReduceFun = + fun(reduce, KVs) -> + {ok, Reduced} = couch_query_servers:reduce(Lang, [FunSrc], detuple_kvs(expand_dups(KVs, []),[])), + {0, PreResultPadding ++ Reduced ++ PostResultPadding}; + (rereduce, Reds) -> + UserReds = [[lists:nth(NthRed, element(2, R))] || R <- Reds], + {ok, Reduced} = couch_query_servers:rereduce(Lang, [FunSrc], UserReds), + {0, PreResultPadding ++ Reduced ++ PostResultPadding} + end, + WrapperFun = fun({GroupedKey, _}, PartialReds, Acc0) -> + {_, Reds} = couch_btree:final_reduce(ReduceFun, PartialReds), + Fun(GroupedKey, lists:nth(NthRed, Reds), Acc0) + end, + couch_btree:fold_reduce(Bt, WrapperFun, Acc, Options). + +get_key_pos(_Key, [], _N) -> + 0; +get_key_pos(Key, [{Key1,_Value}|_], N) when Key == Key1 -> + N + 1; +get_key_pos(Key, [_|Rest], N) -> + get_key_pos(Key, Rest, N+1). + + +get_temp_map_view(Db, Language, DesignOptions, Src) -> + {ok, #group{views=[View]}=Group} = get_temp_group(Db, Language, DesignOptions, Src, []), + {ok, View, Group}. + +get_map_view(Db, GroupId, Name, Stale) -> + case get_group(Db, GroupId, Stale) of + {ok, #group{views=Views}=Group} -> + case get_map_view0(Name, Views) of + {ok, View} -> + {ok, View, Group}; + Else -> + Else + end; + Error -> + Error + end. + +get_map_view0(_Name, []) -> + {not_found, missing_named_view}; +get_map_view0(Name, [#view{map_names=MapNames}=View|Rest]) -> + case lists:member(Name, MapNames) of + true -> {ok, View}; + false -> get_map_view0(Name, Rest) + end. + +reduce_to_count(Reductions) -> + {Count, _} = + couch_btree:final_reduce( + fun(reduce, KVs) -> + Count = lists:sum( + [case V of {dups, Vals} -> length(Vals); _ -> 1 end + || {_,V} <- KVs]), + {Count, []}; + (rereduce, Reds) -> + {lists:sum([Count0 || {Count0, _} <- Reds]), []} + end, Reductions), + Count. + + + +fold_fun(_Fun, [], _, Acc) -> + {ok, Acc}; +fold_fun(Fun, [KV|Rest], {KVReds, Reds}, Acc) -> + case Fun(KV, {KVReds, Reds}, Acc) of + {ok, Acc2} -> + fold_fun(Fun, Rest, {[KV|KVReds], Reds}, Acc2); + {stop, Acc2} -> + {stop, Acc2} + end. + + +fold(#view{btree=Btree}, Fun, Acc, Options) -> + WrapperFun = + fun(visit, KV, Reds, Acc2) -> + fold_fun(Fun, expand_dups([KV],[]), Reds, Acc2); + (traverse, LK, Red, Acc2) + when is_function(Fun, 4) -> + Fun(traverse, LK, Red, Acc2); + (traverse, _LK, Red, {_, Skip, _, _} = Acc2) + when Skip >= element(1, Red) -> + {skip, setelement(2, Acc2, Skip - element(1, Red))}; + (traverse, _, _, Acc2) -> + {ok, Acc2} + end, + {ok, _LastReduce, _AccResult} = couch_btree:fold(Btree, WrapperFun, Acc, Options). + + +init([]) -> + % read configuration settings and register for configuration changes + RootDir = couch_config:get("couchdb", "view_index_dir"), + ok = couch_config:register(fun ?MODULE:config_change/2), + + couch_db_update_notifier:start_link( + fun({deleted, DbName}) -> + gen_server:cast(couch_view, {reset_indexes, DbName}); + ({created, DbName}) -> + gen_server:cast(couch_view, {reset_indexes, DbName}); + (_Else) -> + ok + end), + ets:new(couch_groups_by_db, [bag, private, named_table]), + ets:new(group_servers_by_sig, [set, protected, named_table]), + ets:new(couch_groups_by_updater, [set, private, named_table]), + process_flag(trap_exit, true), + ok = couch_file:init_delete_dir(RootDir), + {ok, #server{root_dir=RootDir}}. + + +terminate(_Reason, _Srv) -> + [couch_util:shutdown_sync(Pid) || {Pid, _} <- + ets:tab2list(couch_groups_by_updater)], + ok. + + +handle_call({get_group_server, DbName, #group{sig=Sig}=Group}, From, + #server{root_dir=Root}=Server) -> + case ets:lookup(group_servers_by_sig, {DbName, Sig}) of + [] -> + spawn_monitor(fun() -> new_group(Root, DbName, Group) end), + ets:insert(group_servers_by_sig, {{DbName, Sig}, [From]}), + {noreply, Server}; + [{_, WaitList}] when is_list(WaitList) -> + ets:insert(group_servers_by_sig, {{DbName, Sig}, [From | WaitList]}), + {noreply, Server}; + [{_, ExistingPid}] -> + {reply, {ok, ExistingPid}, Server} + end; + +handle_call({reset_indexes, DbName}, _From, #server{root_dir=Root}=Server) -> + do_reset_indexes(DbName, Root), + {reply, ok, Server}. + +handle_cast({reset_indexes, DbName}, #server{root_dir=Root}=Server) -> + do_reset_indexes(DbName, Root), + {noreply, Server}. + +new_group(Root, DbName, #group{name=GroupId, sig=Sig} = Group) -> + ?LOG_DEBUG("Spawning new group server for view group ~s in database ~s.", + [GroupId, DbName]), + case (catch couch_view_group:start_link({Root, DbName, Group})) of + {ok, NewPid} -> + unlink(NewPid), + exit({DbName, Sig, {ok, NewPid}}); + {error, invalid_view_seq} -> + ok = gen_server:call(couch_view, {reset_indexes, DbName}), + new_group(Root, DbName, Group); + Error -> + exit({DbName, Sig, Error}) + end. + +do_reset_indexes(DbName, Root) -> + % shutdown all the updaters and clear the files, the db got changed + Names = ets:lookup(couch_groups_by_db, DbName), + lists:foreach( + fun({_DbName, Sig}) -> + ?LOG_DEBUG("Killing update process for view group ~s. in database ~s.", [Sig, DbName]), + [{_, Pid}] = ets:lookup(group_servers_by_sig, {DbName, Sig}), + couch_util:shutdown_sync(Pid), + delete_from_ets(Pid, DbName, Sig) + end, Names), + delete_index_dir(Root, DbName), + RootDelDir = couch_config:get("couchdb", "view_index_dir"), + couch_file:delete(RootDelDir, Root ++ "/." ++ ?b2l(DbName) ++ "_temp"). + +handle_info({'EXIT', FromPid, Reason}, Server) -> + case ets:lookup(couch_groups_by_updater, FromPid) of + [] -> + if Reason =/= normal, Reason =/= no_db_file -> + % non-updater linked process died, we propagate the error + ?LOG_ERROR("Exit on non-updater process: ~p", [Reason]), + exit(Reason); + true -> ok + end; + [{_, {DbName, GroupId}}] -> + delete_from_ets(FromPid, DbName, GroupId) + end, + {noreply, Server}; + +handle_info({'DOWN', _, _, _, {DbName, Sig, Reply}}, Server) -> + [{_, WaitList}] = ets:lookup(group_servers_by_sig, {DbName, Sig}), + [gen_server:reply(From, Reply) || From <- WaitList], + case Reply of {ok, NewPid} -> + link(NewPid), + add_to_ets(NewPid, DbName, Sig); + _ -> ok end, + {noreply, Server}. + +config_change("couchdb", "view_index_dir") -> + exit(whereis(couch_view), config_change). + +add_to_ets(Pid, DbName, Sig) -> + true = ets:insert(couch_groups_by_updater, {Pid, {DbName, Sig}}), + true = ets:insert(group_servers_by_sig, {{DbName, Sig}, Pid}), + true = ets:insert(couch_groups_by_db, {DbName, Sig}). + +delete_from_ets(Pid, DbName, Sig) -> + true = ets:delete(couch_groups_by_updater, Pid), + true = ets:delete(group_servers_by_sig, {DbName, Sig}), + true = ets:delete_object(couch_groups_by_db, {DbName, Sig}). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +delete_index_dir(RootDir, DbName) -> + nuke_dir(RootDir, RootDir ++ "/." ++ ?b2l(DbName) ++ "_design"). + +nuke_dir(RootDelDir, Dir) -> + case file:list_dir(Dir) of + {error, enoent} -> ok; % doesn't exist + {ok, Files} -> + lists:foreach( + fun(File)-> + Full = Dir ++ "/" ++ File, + case couch_file:delete(RootDelDir, Full, false) of + ok -> ok; + {error, eperm} -> + ok = nuke_dir(RootDelDir, Full) + end + end, + Files), + ok = file:del_dir(Dir) + end. + + +% keys come back in the language of btree - tuples. +less_json_ids({JsonA, IdA}, {JsonB, IdB}) -> + case less_json0(JsonA, JsonB) of + 0 -> + IdA < IdB; + Result -> + Result < 0 + end. + +less_json(A,B) -> + less_json0(A,B) < 0. + +less_json0(A,A) -> 0; + +less_json0(A,B) when is_atom(A), is_atom(B) -> atom_sort(A) - atom_sort(B); +less_json0(A,_) when is_atom(A) -> -1; +less_json0(_,B) when is_atom(B) -> 1; + +less_json0(A,B) when is_number(A), is_number(B) -> A - B; +less_json0(A,_) when is_number(A) -> -1; +less_json0(_,B) when is_number(B) -> 1; + +less_json0(A,B) when is_binary(A), is_binary(B) -> couch_util:collate(A,B); +less_json0(A,_) when is_binary(A) -> -1; +less_json0(_,B) when is_binary(B) -> 1; + +less_json0(A,B) when is_list(A), is_list(B) -> less_list(A,B); +less_json0(A,_) when is_list(A) -> -1; +less_json0(_,B) when is_list(B) -> 1; + +less_json0({A},{B}) when is_list(A), is_list(B) -> less_props(A,B); +less_json0({A},_) when is_list(A) -> -1; +less_json0(_,{B}) when is_list(B) -> 1. + +atom_sort(null) -> 1; +atom_sort(false) -> 2; +atom_sort(true) -> 3. + +less_props([], [_|_]) -> + -1; +less_props(_, []) -> + 1; +less_props([{AKey, AValue}|RestA], [{BKey, BValue}|RestB]) -> + case couch_util:collate(AKey, BKey) of + 0 -> + case less_json0(AValue, BValue) of + 0 -> + less_props(RestA, RestB); + Result -> + Result + end; + Result -> + Result + end. + +less_list([], [_|_]) -> + -1; +less_list(_, []) -> + 1; +less_list([A|RestA], [B|RestB]) -> + case less_json0(A,B) of + 0 -> + less_list(RestA, RestB); + Result -> + Result + end. diff --git a/apps/couch/src/couch_view_compactor.erl b/apps/couch/src/couch_view_compactor.erl new file mode 100644 index 00000000..8ea1dca2 --- /dev/null +++ b/apps/couch/src/couch_view_compactor.erl @@ -0,0 +1,124 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_view_compactor). + +-include ("couch_db.hrl"). + +-export([start_compact/2]). + +%% @spec start_compact(DbName::binary(), GroupId:binary()) -> ok +%% @doc Compacts the views. GroupId must not include the _design/ prefix +start_compact(DbName, GroupId) -> + Pid = couch_view:get_group_server(DbName, <<"_design/",GroupId/binary>>), + gen_server:call(Pid, {start_compact, fun compact_group/3}). + +%%============================================================================= +%% internal functions +%%============================================================================= + +%% @spec compact_group(Group, NewGroup) -> ok +compact_group(Group, EmptyGroup, DbName) -> + #group{ + current_seq = Seq, + id_btree = IdBtree, + name = GroupId, + views = Views + } = Group, + + #group{ + fd = Fd, + id_btree = EmptyIdBtree, + views = EmptyViews + } = EmptyGroup, + + erlang:monitor(process, Fd), + + {ok, Db} = couch_db:open(DbName, []), + + {ok, Count} = couch_db:get_doc_count(Db), + + <<"_design", ShortName/binary>> = GroupId, + TaskName = <<DbName/binary, ShortName/binary>>, + couch_task_status:add_task(<<"View Group Compaction">>, TaskName, <<"">>), + + Fun = fun({DocId, _ViewIdKeys} = KV, {Bt, Acc, TotalCopied, LastId}) -> + if DocId =:= LastId -> % COUCHDB-999 + ?LOG_ERROR("Duplicates of document `~s` detected in view group `~s`" + ", database `~s` - view rebuild, from scratch, is required", + [DocId, GroupId, DbName]), + exit({view_duplicated_id, DocId}); + true -> ok end, + if TotalCopied rem 10000 =:= 0 -> + couch_task_status:update("Copied ~p of ~p Ids (~p%)", + [TotalCopied, Count, (TotalCopied*100) div Count]), + {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])), + {ok, {Bt2, [], TotalCopied+1, DocId}}; + true -> + {ok, {Bt, [KV|Acc], TotalCopied+1, DocId}} + end + end, + {ok, _, {Bt3, Uncopied, _Total, _LastId}} = couch_btree:foldl(IdBtree, Fun, + {EmptyIdBtree, [], 0, nil}), + {ok, NewIdBtree} = couch_btree:add(Bt3, lists:reverse(Uncopied)), + + NewViews = lists:map(fun({View, EmptyView}) -> + compact_view(View, EmptyView) + end, lists:zip(Views, EmptyViews)), + + NewGroup = EmptyGroup#group{ + id_btree=NewIdBtree, + views=NewViews, + current_seq=Seq + }, + maybe_retry_compact(Db, GroupId, NewGroup). + +maybe_retry_compact(#db{name = DbName} = Db, GroupId, NewGroup) -> + #group{sig = Sig, fd = NewFd} = NewGroup, + Header = {Sig, couch_view_group:get_index_header_data(NewGroup)}, + ok = couch_file:write_header(NewFd, Header), + Pid = ets:lookup_element(group_servers_by_sig, {DbName, Sig}, 2), + case gen_server:call(Pid, {compact_done, NewGroup}) of + ok -> + couch_db:close(Db); + update -> + {ok, Db2} = couch_db:reopen(Db), + {_, Ref} = erlang:spawn_monitor(fun() -> + couch_view_updater:update(nil, NewGroup, Db2) + end), + receive + {'DOWN', Ref, _, _, {new_group, NewGroup2}} -> + maybe_retry_compact(Db2, GroupId, NewGroup2) + end + end. + +%% @spec compact_view(View, EmptyView) -> CompactView +compact_view(View, EmptyView) -> + {ok, Count} = couch_view:get_row_count(View), + + %% Key is {Key,DocId} + Fun = fun(KV, {Bt, Acc, TotalCopied}) -> + if TotalCopied rem 10000 =:= 0 -> + couch_task_status:update("View #~p: copied ~p of ~p KVs (~p%)", + [View#view.id_num, TotalCopied, Count, (TotalCopied*100) div Count]), + {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])), + {ok, {Bt2, [], TotalCopied + 1}}; + true -> + {ok, {Bt, [KV|Acc], TotalCopied + 1}} + end + end, + + {ok, _, {Bt3, Uncopied, _Total}} = couch_btree:foldl(View#view.btree, Fun, + {EmptyView#view.btree, [], 0}), + {ok, NewBt} = couch_btree:add(Bt3, lists:reverse(Uncopied)), + EmptyView#view{btree = NewBt}. + diff --git a/apps/couch/src/couch_view_group.erl b/apps/couch/src/couch_view_group.erl new file mode 100644 index 00000000..75644d6b --- /dev/null +++ b/apps/couch/src/couch_view_group.erl @@ -0,0 +1,641 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_view_group). +-behaviour(gen_server). + +%% API +-export([start_link/1, request_group/2, trigger_group_update/2, request_group_info/1]). +-export([open_db_group/2, open_temp_group/5, design_doc_to_view_group/1,design_root/2]). + +%% Exports for the compactor +-export([get_index_header_data/1]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-include("couch_db.hrl"). + +-record(group_state, { + type, + db_name, + init_args, + group, + updater_pid=nil, + compactor_pid=nil, + waiting_commit=false, + waiting_list=[], + ref_counter=nil +}). + +% api methods +request_group(Pid, Seq) -> + ?LOG_DEBUG("request_group {Pid, Seq} ~p", [{Pid, Seq}]), + case gen_server:call(Pid, {request_group, Seq}, infinity) of + {ok, Group, _RefCounter} -> + {ok, Group}; + Error -> + ?LOG_DEBUG("request_group Error ~p", [Error]), + throw(Error) + end. + +request_group_info(Pid) -> + case gen_server:call(Pid, request_group_info) of + {ok, GroupInfoList} -> + {ok, GroupInfoList}; + Error -> + throw(Error) + end. + +trigger_group_update(Pid, RequestSeq) -> + gen_server:cast(Pid, {update_group, RequestSeq}). + +% from template +start_link(InitArgs) -> + case gen_server:start_link(couch_view_group, + {InitArgs, self(), Ref = make_ref()}, []) of + {ok, Pid} -> + {ok, Pid}; + ignore -> + receive + {Ref, Pid, Error} -> + case process_info(self(), trap_exit) of + {trap_exit, true} -> receive {'EXIT', Pid, _} -> ok end; + {trap_exit, false} -> ok + end, + Error + end; + Error -> + Error + end. + +% init creates a closure which spawns the appropriate view_updater. +init({{_, DbName, _} = InitArgs, ReturnPid, Ref}) -> + process_flag(trap_exit, true), + case prepare_group(InitArgs, false) of + {ok, Db, #group{fd=Fd, current_seq=Seq}=Group} -> + case Seq > couch_db:get_update_seq(Db) of + true -> + ReturnPid ! {Ref, self(), {error, invalid_view_seq}}, + couch_db:close(Db), + ignore; + _ -> + try couch_db:monitor(Db) after couch_db:close(Db) end, + {ok, #group_state{ + db_name=DbName, + init_args=InitArgs, + group=Group, + ref_counter=erlang:monitor(process,Fd)}} + end; + Error -> + ReturnPid ! {Ref, self(), Error}, + ignore + end. + + + + +% There are two sources of messages: couch_view, which requests an up to date +% view group, and the couch_view_updater, which when spawned, updates the +% group and sends it back here. We employ a caching mechanism, so that between +% database writes, we don't have to spawn a couch_view_updater with every view +% request. + +% The caching mechanism: each request is submitted with a seq_id for the +% database at the time it was read. We guarantee to return a view from that +% sequence or newer. + +% If the request sequence is higher than our current high_target seq, we set +% that as the highest seqence. If the updater is not running, we launch it. + +handle_call({request_group, RequestSeq}, From, + #group_state{ + db_name=DbName, + group=#group{current_seq=Seq}=Group, + updater_pid=nil, + waiting_list=WaitList + }=State) when RequestSeq > Seq -> + Owner = self(), + Pid = spawn_link(fun()-> couch_view_updater:update(Owner, Group, DbName) end), + + {noreply, State#group_state{ + updater_pid=Pid, + waiting_list=[{From,RequestSeq}|WaitList] + }, infinity}; + + +% If the request seqence is less than or equal to the seq_id of a known Group, +% we respond with that Group. +handle_call({request_group, RequestSeq}, _From, #group_state{ + group = #group{current_seq=GroupSeq} = Group, + ref_counter = RefCounter + } = State) when RequestSeq =< GroupSeq -> + {reply, {ok, Group, RefCounter}, State}; + +% Otherwise: TargetSeq => RequestSeq > GroupSeq +% We've already initiated the appropriate action, so just hold the response until the group is up to the RequestSeq +handle_call({request_group, RequestSeq}, From, + #group_state{waiting_list=WaitList}=State) -> + {noreply, State#group_state{ + waiting_list=[{From, RequestSeq}|WaitList] + }, infinity}; + +handle_call(request_group_info, _From, State) -> + GroupInfo = get_group_info(State), + {reply, {ok, GroupInfo}, State}; + +handle_call({start_compact, CompactFun}, _From, #group_state{compactor_pid=nil} + = State) -> + #group_state{ + group = #group{name = GroupId, sig = GroupSig} = Group, + init_args = {RootDir, DbName, _} + } = State, + ?LOG_INFO("View index compaction starting for ~s ~s", [DbName, GroupId]), + {ok, Db} = couch_db:open_int(DbName, []), + {ok, Fd} = open_index_file(compact, RootDir, DbName, GroupSig), + NewGroup = reset_file(Db, Fd, DbName, Group), + couch_db:close(Db), + Pid = spawn_link(fun() -> CompactFun(Group, NewGroup, DbName) end), + {reply, {ok, Pid}, State#group_state{compactor_pid = Pid}}; +handle_call({start_compact, _}, _From, #group_state{compactor_pid=Pid} = State) -> + %% compact already running, this is a no-op + {reply, {ok, Pid}, State}; + +handle_call({compact_done, #group{fd=NewFd, current_seq=NewSeq} = NewGroup}, _From, + #group_state{group = #group{current_seq=OldSeq}} = State) + when NewSeq >= OldSeq -> + #group_state{ + group = #group{name=GroupId, fd=OldFd, sig=GroupSig}, + init_args = {RootDir, DbName, _}, + updater_pid = UpdaterPid, + compactor_pid = CompactorPid, + ref_counter = RefCounter + } = State, + + ?LOG_INFO("View index compaction complete for ~s ~s", [DbName, GroupId]), + FileName = index_file_name(RootDir, DbName, GroupSig), + CompactName = index_file_name(compact, RootDir, DbName, GroupSig), + ok = couch_file:delete(RootDir, FileName), + ok = file:rename(CompactName, FileName), + + %% if an updater is running, kill it and start a new one + NewUpdaterPid = + if is_pid(UpdaterPid) -> + unlink(UpdaterPid), + exit(UpdaterPid, view_compaction_complete), + Owner = self(), + spawn_link(fun()-> couch_view_updater:update(Owner, NewGroup, DbName) end); + true -> + nil + end, + + %% cleanup old group + unlink(CompactorPid), + receive {'EXIT', CompactorPid, normal} -> ok after 0 -> ok end, + unlink(OldFd), + erlang:demonitor(RefCounter), + + self() ! delayed_commit, + {reply, ok, State#group_state{ + group=NewGroup, + ref_counter=erlang:monitor(process,NewFd), + compactor_pid=nil, + updater_pid=NewUpdaterPid + }}; +handle_call({compact_done, NewGroup}, _From, State) -> + #group_state{ + group = #group{name = GroupId, current_seq = CurrentSeq}, + init_args={_RootDir, DbName, _} + } = State, + ?LOG_INFO("View index compaction still behind for ~s ~s -- current: ~p " ++ + "compact: ~p", [DbName, GroupId, CurrentSeq, NewGroup#group.current_seq]), + {reply, update, State}. + +handle_cast({update_group, RequestSeq}, + #group_state{ + group=#group{current_seq=Seq}=Group, + updater_pid=nil}=State) when RequestSeq > Seq -> + Owner = self(), + Pid = spawn_link(fun()-> couch_view_updater:update(Owner, Group) end), + {noreply, State#group_state{updater_pid=Pid}}; +handle_cast({update_group, _RequestSeq}, State) -> + {noreply, State}; + +handle_cast({partial_update, Pid, NewGroup}, #group_state{updater_pid=Pid} + = State) -> + #group_state{ + db_name = DbName, + waiting_commit = WaitingCommit + } = State, + NewSeq = NewGroup#group.current_seq, + ?LOG_DEBUG("checkpointing view update at seq ~p for ~s ~s", [NewSeq, + DbName, NewGroup#group.name]), + if not WaitingCommit -> + erlang:send_after(1000, self(), delayed_commit); + true -> ok + end, + {noreply, State#group_state{group=NewGroup, waiting_commit=true}}; +handle_cast({partial_update, _, _}, State) -> + %% message from an old (probably pre-compaction) updater; ignore + {noreply, State}. + +handle_info(delayed_commit, #group_state{db_name=DbName,group=Group}=State) -> + {ok, Db} = couch_db:open_int(DbName, []), + CommittedSeq = couch_db:get_committed_update_seq(Db), + couch_db:close(Db), + if CommittedSeq >= Group#group.current_seq -> + % save the header + Header = {Group#group.sig, get_index_header_data(Group)}, + ok = couch_file:write_header(Group#group.fd, Header), + {noreply, State#group_state{waiting_commit=false}}; + true -> + % We can't commit the header because the database seq that's fully + % committed to disk is still behind us. If we committed now and the + % database lost those changes our view could be forever out of sync + % with the database. But a crash before we commit these changes, no big + % deal, we only lose incremental changes since last committal. + erlang:send_after(1000, self(), delayed_commit), + {noreply, State#group_state{waiting_commit=true}} + end; + +handle_info({'EXIT', FromPid, {new_group, Group}}, + #group_state{db_name=DbName, + updater_pid=UpPid, + ref_counter=RefCounter, + waiting_list=WaitList, + waiting_commit=WaitingCommit}=State) when UpPid == FromPid -> + if not WaitingCommit -> + erlang:send_after(1000, self(), delayed_commit); + true -> ok + end, + case reply_with_group(Group, WaitList, [], RefCounter) of + [] -> + {noreply, State#group_state{waiting_commit=true, waiting_list=[], + group=Group, updater_pid=nil}}; + StillWaiting -> + % we still have some waiters, reopen the database and reupdate the index + Owner = self(), + Pid = spawn_link(fun() -> couch_view_updater:update(Owner, Group, DbName) end), + {noreply, State#group_state{waiting_commit=true, + waiting_list=StillWaiting, updater_pid=Pid}} + end; +handle_info({'EXIT', _, {new_group, _}}, State) -> + %% message from an old (probably pre-compaction) updater; ignore + {noreply, State}; + +handle_info({'EXIT', UpPid, reset}, + #group_state{init_args=InitArgs, updater_pid=UpPid} = State) -> + case prepare_group(InitArgs, true) of + {ok, Db, ResetGroup} -> + Owner = self(), + couch_db:close(Db), + Pid = spawn_link(fun() -> + couch_view_updater:update(Owner, ResetGroup, Db#db.name) + end), + {noreply, State#group_state{ + updater_pid=Pid, + group=ResetGroup}}; + Error -> + {stop, normal, reply_all(State, Error)} + end; +handle_info({'EXIT', _, reset}, State) -> + %% message from an old (probably pre-compaction) updater; ignore + {noreply, State}; + +handle_info({'EXIT', _FromPid, normal}, State) -> + {noreply, State}; + +handle_info({'EXIT', FromPid, {{nocatch, Reason}, _Trace}}, State) -> + ?LOG_DEBUG("Uncaught throw() in linked pid: ~p", [{FromPid, Reason}]), + {stop, Reason, State}; + +handle_info({'EXIT', FromPid, Reason}, State) -> + ?LOG_DEBUG("Exit from linked pid: ~p", [{FromPid, Reason}]), + {stop, Reason, State}; + +handle_info({'DOWN',_,_,Pid,Reason}, #group_state{group=G}=State) -> + ?LOG_INFO("Shutting down group server ~p, db ~p closing w/ reason~n~p", + [G#group.name, Pid, Reason]), + {stop, normal, reply_all(State, shutdown)}. + + +terminate(Reason, #group_state{updater_pid=Update, compactor_pid=Compact}=S) -> + reply_all(S, Reason), + couch_util:shutdown_sync(Update), + couch_util:shutdown_sync(Compact), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% Local Functions + +% reply_with_group/3 +% for each item in the WaitingList {Pid, Seq} +% if the Seq is =< GroupSeq, reply +reply_with_group(Group=#group{current_seq=GroupSeq}, [{Pid, Seq}|WaitList], + StillWaiting, RefCounter) when Seq =< GroupSeq -> + gen_server:reply(Pid, {ok, Group, RefCounter}), + reply_with_group(Group, WaitList, StillWaiting, RefCounter); + +% else +% put it in the continuing waiting list +reply_with_group(Group, [{Pid, Seq}|WaitList], StillWaiting, RefCounter) -> + reply_with_group(Group, WaitList, [{Pid, Seq}|StillWaiting], RefCounter); + +% return the still waiting list +reply_with_group(_Group, [], StillWaiting, _RefCounter) -> + StillWaiting. + +reply_all(#group_state{waiting_list=WaitList}=State, Reply) -> + [catch gen_server:reply(Pid, Reply) || {Pid, _} <- WaitList], + State#group_state{waiting_list=[]}. + +prepare_group({RootDir, DbName, #group{sig=Sig}=Group}, ForceReset)-> + case couch_db:open_int(DbName, []) of + {ok, Db} -> + case open_index_file(RootDir, DbName, Sig) of + {ok, Fd} -> + if ForceReset -> + % this can happen if we missed a purge + {ok, Db, reset_file(Db, Fd, DbName, Group)}; + true -> + % 09 UPGRADE CODE + ok = couch_file:upgrade_old_header(Fd, <<$r, $c, $k, 0>>), + case (catch couch_file:read_header(Fd)) of + {ok, {Sig, HeaderInfo}} -> + % sigs match! + {ok, Db, init_group(Db, Fd, Group, HeaderInfo)}; + _ -> + % this happens on a new file + {ok, Db, reset_file(Db, Fd, DbName, Group)} + end + end; + Error -> + catch delete_index_file(RootDir, DbName, Sig), + Error + end; + Else -> + Else + end. + +get_index_header_data(#group{current_seq=Seq, purge_seq=PurgeSeq, + id_btree=IdBtree,views=Views}) -> + ViewStates = [ + {couch_btree:get_state(V#view.btree), V#view.update_seq, V#view.purge_seq} || V <- Views + ], + #index_header{ + seq=Seq, + purge_seq=PurgeSeq, + id_btree_state=couch_btree:get_state(IdBtree), + view_states=ViewStates + }. + +hex_sig(GroupSig) -> + couch_util:to_hex(?b2l(GroupSig)). + +design_root(RootDir, DbName) -> + RootDir ++ "/." ++ ?b2l(DbName) ++ "_design/". + +index_file_name(RootDir, DbName, GroupSig) -> + design_root(RootDir, DbName) ++ hex_sig(GroupSig) ++".view". + +index_file_name(compact, RootDir, DbName, GroupSig) -> + design_root(RootDir, DbName) ++ hex_sig(GroupSig) ++".compact.view". + + +open_index_file(RootDir, DbName, GroupSig) -> + FileName = index_file_name(RootDir, DbName, GroupSig), + case couch_file:open(FileName) of + {ok, Fd} -> {ok, Fd}; + {error, enoent} -> couch_file:open(FileName, [create]); + Error -> Error + end. + +open_index_file(compact, RootDir, DbName, GroupSig) -> + FileName = index_file_name(compact, RootDir, DbName, GroupSig), + case couch_file:open(FileName) of + {ok, Fd} -> {ok, Fd}; + {error, enoent} -> couch_file:open(FileName, [create]); + Error -> Error + end. + +open_temp_group(DbName, Language, DesignOptions, MapSrc, RedSrc) -> + case couch_db:open_int(DbName, []) of + {ok, Db} -> + View = #view{map_names=[<<"_temp">>], + id_num=0, + btree=nil, + def=MapSrc, + reduce_funs= if RedSrc==[] -> []; true -> [{<<"_temp">>, RedSrc}] end, + options=DesignOptions}, + couch_db:close(Db), + {ok, set_view_sig(#group{name = <<"_temp">>,lib={[]}, views=[View], + def_lang=Language, design_options=DesignOptions})}; + Error -> + Error + end. + +set_view_sig(#group{ + views=Views, + lib={[]}, + def_lang=Language, + design_options=DesignOptions}=G) -> + ViewInfo = [old_view_format(V) || V <- Views], + G#group{sig=couch_util:md5(term_to_binary({ViewInfo, Language, DesignOptions}))}; +set_view_sig(#group{ + views=Views, + lib=Lib, + def_lang=Language, + design_options=DesignOptions}=G) -> + ViewInfo = [old_view_format(V) || V <- Views], + G#group{sig=couch_util:md5(term_to_binary({ViewInfo, Language, DesignOptions, sort_lib(Lib)}))}. + +% Use the old view record format so group sig's don't change +old_view_format(View) -> + { + view, + View#view.id_num, + View#view.map_names, + View#view.def, + View#view.btree, + View#view.reduce_funs, + View#view.options + }. + +sort_lib({Lib}) -> + sort_lib(Lib, []). +sort_lib([], LAcc) -> + lists:keysort(1, LAcc); +sort_lib([{LName, {LObj}}|Rest], LAcc) -> + LSorted = sort_lib(LObj, []), % descend into nested object + sort_lib(Rest, [{LName, LSorted}|LAcc]); +sort_lib([{LName, LCode}|Rest], LAcc) -> + sort_lib(Rest, [{LName, LCode}|LAcc]). + +open_db_group(DbName, GroupId) -> + {Pid, Ref} = spawn_monitor(fun() -> + exit(try + fabric:open_doc(mem3:dbname(DbName), GroupId, []) + catch error:database_does_not_exist -> + {ok, Db} = couch_db:open(DbName, []), + couch_db:open_doc(Db, GroupId) + end) + end), + receive {'DOWN', Ref, process, Pid, {ok, Doc}} -> + {ok, design_doc_to_view_group(Doc)}; + {'DOWN', Ref, process, Pid, Error} -> + Error + end. + +get_group_info(State) -> + #group_state{ + group=Group, + updater_pid=UpdaterPid, + compactor_pid=CompactorPid, + waiting_commit=WaitingCommit, + waiting_list=WaitersList + } = State, + #group{ + fd = Fd, + sig = GroupSig, + def_lang = Lang, + views = Views, + current_seq=CurrentSeq, + purge_seq=PurgeSeq + } = Group, + {ok, Size} = couch_file:bytes(Fd), + [ + {signature, ?l2b(hex_sig(GroupSig))}, + {language, Lang}, + {disk_size, Size}, + {data_size, compute_data_size(Views)}, + {updater_running, UpdaterPid /= nil}, + {compact_running, CompactorPid /= nil}, + {waiting_commit, WaitingCommit}, + {waiting_clients, length(WaitersList)}, + {update_seq, CurrentSeq}, + {purge_seq, PurgeSeq} + ]. + +compute_data_size(ViewList) -> + lists:foldl(fun(#view{btree=Btree}, Acc) -> + {ok, {_, _, Size}} = couch_btree:full_reduce(Btree), + Size + Acc + end, 0, ViewList). + + +% maybe move to another module +design_doc_to_view_group(#doc{id=Id,body={Fields}}) -> + Language = couch_util:get_value(<<"language">>, Fields, <<"javascript">>), + {DesignOptions} = couch_util:get_value(<<"options">>, Fields, {[]}), + {RawViews} = couch_util:get_value(<<"views">>, Fields, {[]}), + Lib = couch_util:get_value(<<"lib">>, RawViews, {[]}), + % add the views to a dictionary object, with the map source as the key + DictBySrc = + lists:foldl( + fun({Name, {MRFuns}}, DictBySrcAcc) -> + case couch_util:get_value(<<"map">>, MRFuns) of + undefined -> DictBySrcAcc; + MapSrc -> + RedSrc = couch_util:get_value(<<"reduce">>, MRFuns, null), + {ViewOptions} = couch_util:get_value(<<"options">>, MRFuns, {[]}), + View = + case dict:find({MapSrc, ViewOptions}, DictBySrcAcc) of + {ok, View0} -> View0; + error -> #view{def=MapSrc, options=ViewOptions} % create new view object + end, + View2 = + if RedSrc == null -> + View#view{map_names=[Name|View#view.map_names]}; + true -> + View#view{reduce_funs=[{Name,RedSrc}|View#view.reduce_funs]} + end, + dict:store({MapSrc, ViewOptions}, View2, DictBySrcAcc) + end + end, dict:new(), RawViews), + % number the views + {Views, _N} = lists:mapfoldl( + fun({_Src, View}, N) -> + {View#view{id_num=N},N+1} + end, 0, lists:sort(dict:to_list(DictBySrc))), + set_view_sig(#group{name=Id, lib=Lib, views=Views, def_lang=Language, design_options=DesignOptions}). + +reset_group(#group{views=Views}=Group) -> + Views2 = [View#view{btree=nil} || View <- Views], + Group#group{fd=nil,query_server=nil,current_seq=0, + id_btree=nil,views=Views2}. + +reset_file(Db, Fd, DbName, #group{sig=Sig,name=Name} = Group) -> + ?LOG_DEBUG("Resetting group index \"~s\" in db ~s", [Name, DbName]), + ok = couch_file:truncate(Fd, 0), + ok = couch_file:write_header(Fd, {Sig, nil}), + init_group(Db, Fd, reset_group(Group), nil). + +delete_index_file(RootDir, DbName, GroupSig) -> + couch_file:delete(RootDir, index_file_name(RootDir, DbName, GroupSig)). + +init_group(Db, Fd, #group{views=Views}=Group, nil) -> + init_group(Db, Fd, Group, + #index_header{seq=0, purge_seq=couch_db:get_purge_seq(Db), + id_btree_state=nil, view_states=[{nil, 0, 0} || _ <- Views]}); +init_group(_Db, Fd, #group{def_lang=Lang,views=Views}= + Group, IndexHeader) -> + #index_header{seq=Seq, purge_seq=PurgeSeq, + id_btree_state=IdBtreeState, view_states=ViewStates} = IndexHeader, + StateUpdate = fun + ({_, _, _}=State) -> State; + (State) -> {State, 0, 0} + end, + ViewStates2 = lists:map(StateUpdate, ViewStates), + {ok, IdBtree} = couch_btree:open(IdBtreeState, Fd), + Views2 = lists:zipwith( + fun({BTState, USeq, PSeq}, #view{reduce_funs=RedFuns,options=Options}=View) -> + FunSrcs = [FunSrc || {_Name, FunSrc} <- RedFuns], + ReduceFun = + fun(reduce, KVs) -> + KVs2 = couch_view:expand_dups(KVs,[]), + KVs3 = couch_view:detuple_kvs(KVs2,[]), + {ok, Reduced} = couch_query_servers:reduce(Lang, FunSrcs, + KVs3), + {length(KVs3), Reduced, couch_view:data_size(KVs3, Reduced)}; + (rereduce, Reds) -> + Count = lists:sum(extract(Reds, counts)), + DataSize = lists:sum(extract(Reds, data_size)), + UserReds = extract(Reds, user_reds), + {ok, Reduced} = couch_query_servers:rereduce(Lang, FunSrcs, + UserReds), + {Count, Reduced, DataSize} + end, + + case couch_util:get_value(<<"collation">>, Options, <<"default">>) of + <<"default">> -> + Less = fun couch_view:less_json_ids/2; + <<"raw">> -> + Less = fun(A,B) -> A < B end + end, + {ok, Btree} = couch_btree:open(BTState, Fd, + [{less, Less}, {reduce, ReduceFun}] + ), + View#view{btree=Btree, update_seq=USeq, purge_seq=PSeq} + end, + ViewStates2, Views), + Group#group{fd=Fd, current_seq=Seq, purge_seq=PurgeSeq, id_btree=IdBtree, + views=Views2}. + +extract(Reds, counts) -> + [element(1, R) || R <- Reds]; +extract(Reds, user_reds) -> + [element(2, R) || R <- Reds]; +extract(Reds, data_size) -> + lists:map(fun({_, _}) -> 0; ({_, _, Size}) -> Size end, Reds). diff --git a/apps/couch/src/couch_view_updater.erl b/apps/couch/src/couch_view_updater.erl new file mode 100644 index 00000000..8238e3e5 --- /dev/null +++ b/apps/couch/src/couch_view_updater.erl @@ -0,0 +1,296 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_view_updater). + +-export([update/3, do_maps/4, do_writes/5, load_docs/3]). + +-include("couch_db.hrl"). + +-spec update(_, #group{}, Dbname::binary()) -> no_return(). + +update(Owner, Group, DbName) when is_binary(DbName) -> + {ok, Db} = couch_db:open_int(DbName, []), + try + update(Owner, Group, Db) + after + couch_db:close(Db) + end; + +update(Owner, Group, #db{name = DbName} = Db) -> + #group{ + name = GroupName, + current_seq = Seq, + purge_seq = PurgeSeq + } = Group, + couch_task_status:add_task(<<"View Group Indexer">>, <<DbName/binary," ",GroupName/binary>>, <<"Starting index update">>), + + DbPurgeSeq = couch_db:get_purge_seq(Db), + Group2 = + if DbPurgeSeq == PurgeSeq -> + Group; + DbPurgeSeq == PurgeSeq + 1 -> + couch_task_status:update(<<"Removing purged entries from view index.">>), + purge_index(Group, Db); + true -> + couch_task_status:update(<<"Resetting view index due to lost purge entries.">>), + exit(reset) + end, + {ok, MapQueue} = couch_work_queue:new( + [{max_size, 100000}, {max_items, 500}]), + {ok, WriteQueue} = couch_work_queue:new( + [{max_size, 100000}, {max_items, 500}]), + Self = self(), + ViewEmptyKVs = [{View, []} || View <- Group2#group.views], + spawn_link(?MODULE, do_maps, [Group, MapQueue, WriteQueue, ViewEmptyKVs]), + spawn_link(?MODULE, do_writes, [Self, Owner, Group2, WriteQueue, Seq == 0]), + % compute on all docs modified since we last computed. + TotalChanges = couch_db:count_changes_since(Db, Seq), + % update status every half second + couch_task_status:set_update_frequency(500), + #group{ design_options = DesignOptions } = Group, + IncludeDesign = couch_util:get_value(<<"include_design">>, + DesignOptions, false), + LocalSeq = couch_util:get_value(<<"local_seq">>, DesignOptions, false), + DocOpts = + case LocalSeq of + true -> [conflicts, deleted_conflicts, local_seq]; + _ -> [conflicts, deleted_conflicts] + end, + EnumFun = fun ?MODULE:load_docs/3, + Acc0 = {0, Db, MapQueue, DocOpts, IncludeDesign, TotalChanges}, + {ok, _, _} = couch_db:enum_docs_since(Db, Seq, EnumFun, Acc0, []), + couch_task_status:set_update_frequency(0), + couch_task_status:update("Finishing."), + couch_work_queue:close(MapQueue), + receive {new_group, NewGroup} -> + exit({new_group, + NewGroup#group{current_seq=couch_db:get_update_seq(Db)}}) + end. + +load_docs(DocInfo, _, {I, Db, MapQueue, DocOpts, IncludeDesign, Total} = Acc) -> + couch_task_status:update("Processed ~p of ~p changes (~p%)", [I, Total, + (I*100) div Total]), + load_doc(Db, DocInfo, MapQueue, DocOpts, IncludeDesign), + {ok, setelement(1, Acc, I+1)}. + +purge_index(#group{views=Views, id_btree=IdBtree}=Group, Db) -> + {ok, PurgedIdsRevs} = couch_db:get_last_purged(Db), + Ids = [Id || {Id, _Revs} <- PurgedIdsRevs], + {ok, Lookups, IdBtree2} = couch_btree:query_modify(IdBtree, Ids, [], Ids), + + % now populate the dictionary with all the keys to delete + ViewKeysToRemoveDict = lists:foldl( + fun({ok,{DocId,ViewNumRowKeys}}, ViewDictAcc) -> + lists:foldl( + fun({ViewNum, RowKey}, ViewDictAcc2) -> + dict:append(ViewNum, {RowKey, DocId}, ViewDictAcc2) + end, ViewDictAcc, ViewNumRowKeys); + ({not_found, _}, ViewDictAcc) -> + ViewDictAcc + end, dict:new(), Lookups), + + % Now remove the values from the btrees + PurgeSeq = couch_db:get_purge_seq(Db), + Views2 = lists:map( + fun(#view{id_num=Num,btree=Btree}=View) -> + case dict:find(Num, ViewKeysToRemoveDict) of + {ok, RemoveKeys} -> + {ok, ViewBtree2} = couch_btree:add_remove(Btree, [], RemoveKeys), + case ViewBtree2 =/= Btree of + true -> + View#view{btree=ViewBtree2, purge_seq=PurgeSeq}; + _ -> + View#view{btree=ViewBtree2} + end; + error -> % no keys to remove in this view + View + end + end, Views), + Group#group{id_btree=IdBtree2, + views=Views2, + purge_seq=PurgeSeq}. + +-spec load_doc(#db{}, #doc_info{}, pid(), [atom()], boolean()) -> ok. +load_doc(Db, DI, MapQueue, DocOpts, IncludeDesign) -> + DocInfo = case DI of + #full_doc_info{id=DocId, update_seq=Seq, deleted=Deleted} -> + couch_doc:to_doc_info(DI); + #doc_info{id=DocId, high_seq=Seq, revs=[#rev_info{deleted=Deleted}|_]} -> + DI + end, + case {IncludeDesign, DocId} of + {false, <<?DESIGN_DOC_PREFIX, _/binary>>} -> % we skip design docs + ok; + _ -> + if Deleted -> + couch_work_queue:queue(MapQueue, {Seq, #doc{id=DocId, deleted=true}}); + true -> + {ok, Doc} = couch_db:open_doc_int(Db, DocInfo, DocOpts), + couch_work_queue:queue(MapQueue, {Seq, Doc}) + end + end. + +-spec do_maps(#group{}, pid(), pid(), any()) -> any(). +do_maps(Group, MapQueue, WriteQueue, ViewEmptyKVs) -> + case couch_work_queue:dequeue(MapQueue) of + closed -> + couch_work_queue:close(WriteQueue), + couch_query_servers:stop_doc_map(Group#group.query_server); + {ok, Queue} -> + Docs = [Doc || {_,#doc{deleted=false}=Doc} <- Queue], + DelKVs = [{Id, []} || {_, #doc{deleted=true,id=Id}} <- Queue], + LastSeq = lists:max([Seq || {Seq, _Doc} <- Queue]), + {Group1, Results} = view_compute(Group, Docs), + {ViewKVs, DocIdViewIdKeys} = view_insert_query_results(Docs, + Results, ViewEmptyKVs, DelKVs), + couch_work_queue:queue(WriteQueue, {LastSeq, ViewKVs, DocIdViewIdKeys}), + ?MODULE:do_maps(Group1, MapQueue, WriteQueue, ViewEmptyKVs) + end. + +-spec do_writes(pid(), pid() | nil, #group{}, pid(), boolean()) -> any(). +do_writes(Parent, Owner, Group, WriteQueue, InitialBuild) -> + case accumulate_writes(WriteQueue, couch_work_queue:dequeue(WriteQueue), nil) of + stop -> + Parent ! {new_group, Group}; + {Go, {NewSeq, ViewKeyValues, DocIdViewIdKeys}} -> + Group2 = write_changes(Group, ViewKeyValues, DocIdViewIdKeys, NewSeq, + InitialBuild), + if Go =:= stop -> + Parent ! {new_group, Group2}; + true -> + case Owner of + nil -> ok; + _ -> ok = gen_server:cast(Owner, {partial_update, Parent, Group2}) + end, + ?MODULE:do_writes(Parent, Owner, Group2, WriteQueue, InitialBuild) + end + end. + +accumulate_writes(_, closed, nil) -> + stop; +accumulate_writes(_, closed, Acc) -> + {stop, Acc}; +accumulate_writes(W, {ok, Queue}, Acc0) -> + {_, _, DocIdViewIdKeys} = NewAcc = lists:foldl( + fun(First, nil) -> First; ({Seq, ViewKVs, DocIdViewIdKeys}, Acc) -> + {Seq2, AccViewKVs, AccDocIdViewIdKeys} = Acc, + AccViewKVs2 = lists:zipwith( + fun({View, KVsIn}, {_View, KVsAcc}) -> + {View, KVsIn ++ KVsAcc} + end, ViewKVs, AccViewKVs), + {erlang:max(Seq, Seq2), AccViewKVs2, + DocIdViewIdKeys ++ AccDocIdViewIdKeys} + end, Acc0, Queue), + % check if we have enough items now + MinItems = couch_config:get("view_updater", "min_writer_items", "100"), + MinSize = couch_config:get("view_updater", "min_writer_size", "16777216"), + case length(DocIdViewIdKeys) >= list_to_integer(MinItems) orelse + process_info(self(), memory) >= list_to_integer(MinSize) of + true -> + {ok, NewAcc}; + false -> + accumulate_writes(W, couch_work_queue:dequeue(W), NewAcc) + end. + +-spec view_insert_query_results([#doc{}], list(), any(), any()) -> any(). +view_insert_query_results([], [], ViewKVs, DocIdViewIdKeysAcc) -> + {ViewKVs, DocIdViewIdKeysAcc}; +view_insert_query_results([Doc|RestDocs], [QueryResults | RestResults], ViewKVs, DocIdViewIdKeysAcc) -> + {NewViewKVs, NewViewIdKeys} = view_insert_doc_query_results(Doc, QueryResults, ViewKVs, [], []), + NewDocIdViewIdKeys = [{Doc#doc.id, NewViewIdKeys} | DocIdViewIdKeysAcc], + view_insert_query_results(RestDocs, RestResults, NewViewKVs, NewDocIdViewIdKeys). + +-spec view_insert_doc_query_results(#doc{}, list(), list(), any(), any()) -> + any(). +view_insert_doc_query_results(_Doc, [], [], ViewKVsAcc, ViewIdKeysAcc) -> + {lists:reverse(ViewKVsAcc), lists:reverse(ViewIdKeysAcc)}; +view_insert_doc_query_results(#doc{id=DocId}=Doc, [ResultKVs|RestResults], [{View, KVs}|RestViewKVs], ViewKVsAcc, ViewIdKeysAcc) -> + % Take any identical keys and combine the values + ResultKVs2 = lists:foldl( + fun({Key,Value}, [{PrevKey,PrevVal}|AccRest]) -> + case Key == PrevKey of + true -> + case PrevVal of + {dups, Dups} -> + [{PrevKey, {dups, [Value|Dups]}} | AccRest]; + _ -> + [{PrevKey, {dups, [Value,PrevVal]}} | AccRest] + end; + false -> + [{Key,Value},{PrevKey,PrevVal}|AccRest] + end; + (KV, []) -> + [KV] + end, [], lists:sort(ResultKVs)), + NewKVs = [{{Key, DocId}, Value} || {Key, Value} <- ResultKVs2], + NewViewKVsAcc = [{View, NewKVs ++ KVs} | ViewKVsAcc], + NewViewIdKeys = [{View#view.id_num, Key} || {Key, _Value} <- ResultKVs2], + NewViewIdKeysAcc = NewViewIdKeys ++ ViewIdKeysAcc, + view_insert_doc_query_results(Doc, RestResults, RestViewKVs, NewViewKVsAcc, NewViewIdKeysAcc). + +-spec view_compute(#group{}, [#doc{}]) -> {#group{}, any()}. +view_compute(Group, []) -> + {Group, []}; +view_compute(#group{def_lang=DefLang, lib=Lib, query_server=QueryServerIn}=Group, Docs) -> + {ok, QueryServer} = + case QueryServerIn of + nil -> % doc map not started + Definitions = [View#view.def || View <- Group#group.views], + couch_query_servers:start_doc_map(DefLang, Definitions, Lib); + _ -> + {ok, QueryServerIn} + end, + {ok, Results} = couch_query_servers:map_docs(QueryServer, Docs), + {Group#group{query_server=QueryServer}, Results}. + + +write_changes(Group, ViewKeyValuesToAdd, DocIdViewIdKeys, NewSeq, InitialBuild) -> + #group{id_btree=IdBtree} = Group, + + AddDocIdViewIdKeys = [{DocId, ViewIdKeys} || {DocId, ViewIdKeys} <- DocIdViewIdKeys, ViewIdKeys /= []], + if InitialBuild -> + RemoveDocIds = [], + LookupDocIds = []; + true -> + RemoveDocIds = [DocId || {DocId, ViewIdKeys} <- DocIdViewIdKeys, ViewIdKeys == []], + LookupDocIds = [DocId || {DocId, _ViewIdKeys} <- DocIdViewIdKeys] + end, + {ok, LookupResults, IdBtree2} + = couch_btree:query_modify(IdBtree, LookupDocIds, AddDocIdViewIdKeys, RemoveDocIds), + KeysToRemoveByView = lists:foldl( + fun(LookupResult, KeysToRemoveByViewAcc) -> + case LookupResult of + {ok, {DocId, ViewIdKeys}} -> + lists:foldl( + fun({ViewId, Key}, KeysToRemoveByViewAcc2) -> + dict:append(ViewId, {Key, DocId}, KeysToRemoveByViewAcc2) + end, + KeysToRemoveByViewAcc, ViewIdKeys); + {not_found, _} -> + KeysToRemoveByViewAcc + end + end, + dict:new(), LookupResults), + Views2 = lists:zipwith(fun(View, {_View, AddKeyValues}) -> + KeysToRemove = couch_util:dict_find(View#view.id_num, KeysToRemoveByView, []), + {ok, ViewBtree2} = couch_btree:add_remove(View#view.btree, AddKeyValues, KeysToRemove), + case ViewBtree2 =/= View#view.btree of + true -> + View#view{btree=ViewBtree2, update_seq=NewSeq}; + _ -> + View#view{btree=ViewBtree2} + end + end, Group#group.views, ViewKeyValuesToAdd), + Group#group{views=Views2, current_seq=NewSeq, id_btree=IdBtree2}. + + diff --git a/apps/couch/src/couch_work_queue.erl b/apps/couch/src/couch_work_queue.erl new file mode 100644 index 00000000..13ec7335 --- /dev/null +++ b/apps/couch/src/couch_work_queue.erl @@ -0,0 +1,155 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_work_queue). +-behaviour(gen_server). + +% public API +-export([new/1, queue/2, dequeue/1, dequeue/2, close/1]). + +% gen_server callbacks +-export([init/1, terminate/2]). +-export([handle_call/3, handle_cast/2, code_change/3, handle_info/2]). + +-record(q, { + queue = queue:new(), + blocked = [], + max_size, + max_items, + items = 0, + size = 0, + work_waiters = [], + close_on_dequeue = false, + multi_workers = false +}). + + +new(Options) -> + gen_server:start_link(couch_work_queue, Options, []). + + +queue(Wq, Item) -> + gen_server:call(Wq, {queue, Item}, infinity). + + +dequeue(Wq) -> + dequeue(Wq, all). + + +dequeue(Wq, MaxItems) -> + try + gen_server:call(Wq, {dequeue, MaxItems}, infinity) + catch + _:_ -> closed + end. + + +close(Wq) -> + gen_server:cast(Wq, close). + + +init(Options) -> + Q = #q{ + max_size = couch_util:get_value(max_size, Options), + max_items = couch_util:get_value(max_items, Options), + multi_workers = couch_util:get_value(multi_workers, Options, false) + }, + {ok, Q}. + + +terminate(_Reason, #q{work_waiters=Workers}) -> + lists:foreach(fun({W, _}) -> gen_server:reply(W, closed) end, Workers). + + +handle_call({queue, Item}, From, #q{work_waiters = []} = Q0) -> + Q = Q0#q{size = Q0#q.size + byte_size(term_to_binary(Item)), + items = Q0#q.items + 1, + queue = queue:in(Item, Q0#q.queue)}, + case (Q#q.size >= Q#q.max_size) orelse + (Q#q.items >= Q#q.max_items) of + true -> + {noreply, Q#q{blocked = [From | Q#q.blocked]}}; + false -> + {reply, ok, Q} + end; + +handle_call({queue, Item}, _From, #q{work_waiters = [{W, _Max} | Rest]} = Q) -> + gen_server:reply(W, {ok, [Item]}), + {reply, ok, Q#q{work_waiters = Rest}}; + +handle_call({dequeue, Max}, From, Q) -> + #q{work_waiters = Workers, multi_workers = Multi, items = Count} = Q, + case {Workers, Multi} of + {[_ | _], false} -> + exit("Only one caller allowed to wait for this work at a time"); + {[_ | _], true} -> + {noreply, Q#q{work_waiters=Workers ++ [{From, Max}]}}; + _ -> + case Count of + 0 -> + {noreply, Q#q{work_waiters=Workers ++ [{From, Max}]}}; + C when C > 0 -> + deliver_queue_items(Max, Q) + end + end. + + +deliver_queue_items(Max, Q) -> + #q{ + queue = Queue, + items = Count, + close_on_dequeue = Close, + blocked = Blocked + } = Q, + case (Max =:= all) orelse (Max >= Count) of + false -> + {Items, Queue2, Blocked2} = dequeue_items(Max, Queue, Blocked, []), + Q2 = Q#q{items = Count - Max, blocked = Blocked2, queue = Queue2}, + {reply, {ok, Items}, Q2}; + true -> + lists:foreach(fun(F) -> gen_server:reply(F, ok) end, Blocked), + Q2 = Q#q{items = 0, size = 0, blocked = [], queue = queue:new()}, + case Close of + false -> + {reply, {ok, queue:to_list(Queue)}, Q2}; + true -> + {stop, normal, {ok, queue:to_list(Queue)}, Q2} + end + end. + + +dequeue_items(0, Queue, Blocked, DequeuedAcc) -> + {lists:reverse(DequeuedAcc), Queue, Blocked}; + +dequeue_items(NumItems, Queue, Blocked, DequeuedAcc) -> + {{value, Item}, Queue2} = queue:out(Queue), + case Blocked of + [] -> + Blocked2 = Blocked; + [From | Blocked2] -> + gen_server:reply(From, ok) + end, + dequeue_items(NumItems - 1, Queue2, Blocked2, [Item | DequeuedAcc]). + + +handle_cast(close, #q{items = 0} = Q) -> + {stop, normal, Q}; + +handle_cast(close, Q) -> + {noreply, Q#q{close_on_dequeue = true}}. + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_info(X, Q) -> + {stop, X, Q}. diff --git a/apps/couch/src/test_util.erl b/apps/couch/src/test_util.erl new file mode 100644 index 00000000..f086bf94 --- /dev/null +++ b/apps/couch/src/test_util.erl @@ -0,0 +1,61 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(test_util). + +-export([init_code_path/0]). +-export([source_file/1, build_file/1, config_files/0]). +-export([request/3, request/4]). + +init_code_path() -> + code:load_abs("apps/couch/test/etap/etap"). + +source_file(Name) -> + filename:join(["apps/couch", Name]). + +build_file(Name) -> + filename:join(["rel/overlay", Name]). + +config_files() -> + [ + build_file("etc/default.ini"), + build_file("etc/local.ini"), + source_file("test/etap/random_port.ini") + ]. + +request(Url, Headers, Method) -> + request(Url, Headers, Method, []). + +request(Url, Headers, Method, Body) -> + request(Url, Headers, Method, Body, 3). + +request(_Url, _Headers, _Method, _Body, 0) -> + {error, request_failed}; +request(Url, Headers, Method, Body, N) -> + case code:is_loaded(ibrowse) of + false -> + {ok, _} = ibrowse:start(); + _ -> + ok + end, + case ibrowse:send_req(Url, Headers, Method, Body) of + {ok, Code0, RespHeaders, RespBody0} -> + Code = list_to_integer(Code0), + RespBody = iolist_to_binary(RespBody0), + {ok, Code, RespHeaders, RespBody}; + {error, {'EXIT', {normal, _}}} -> + % Connection closed right after a successful request that + % used the same connection. + request(Url, Headers, Method, Body, N - 1); + Error -> + Error + end. diff --git a/apps/couch/test/bench/bench_marks.js b/apps/couch/test/bench/bench_marks.js new file mode 100644 index 00000000..4025adbb --- /dev/null +++ b/apps/couch/test/bench/bench_marks.js @@ -0,0 +1,103 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +var NUM_DOCS = 2000; +var NUM_BATCHES = 20; + +var init = function() { + var db = new CouchDB("bench_mark_db", {"X-Couch-Full-Commit": "false"}); + db.deleteDb(); + db.createDb(); + return db; +}; + +var timeit = function(func) { + var startTime = (new Date()).getTime(); + func(); + return ((new Date()).getTime() - startTime) / 1000; +}; + +var report = function(name, rate) { + rate = Math.round(parseFloat(rate) * 100) / 100; + console.log("" + name + ": " + rate + " docs/second"); +}; + +var makeDocs = function(n) { + docs = []; + for (var i=0; i < n; i++) { + docs.push({"foo":"bar"}); + }; + return docs; +}; + +var couchTests = {}; + +couchTests.single_doc_insert = function() { + var db = init(); + var len = timeit(function() { + for(var i = 0; i < NUM_DOCS; i++) { + db.save({"foo": "bar"}); + } + }); + report("Single doc inserts", NUM_DOCS/len); +}; + +couchTests.batch_ok_doc_insert = function() { + var db = init(); + var len = timeit(function() { + for(var i = 0; i < NUM_DOCS; i++) { + db.save({"foo":"bar"}, {"batch":"ok"}); + } + }); + report("Single doc inserts with batch=ok", NUM_DOCS/len); +}; + +couchTests.bulk_doc_100 = function() { + var db = init(); + var len = timeit(function() { + for(var i = 0; i < NUM_BATCHES; i++) { + db.bulkSave(makeDocs(100)); + } + }); + report("Bulk docs - 100", (NUM_BATCHES*100)/len); +}; + +couchTests.bulk_doc_1000 = function() { + var db = init(); + var len = timeit(function() { + for(var i = 0; i < NUM_BATCHES; i++) { + db.bulkSave(makeDocs(1000)); + } + }); + report("Bulk docs - 1000", (NUM_BATCHES*1000)/len); +}; + + +couchTests.bulk_doc_5000 = function() { + var db = init(); + var len = timeit(function() { + for(var i = 0; i < NUM_BATCHES; i++) { + db.bulkSave(makeDocs(5000)); + } + }); + report("Bulk docs - 5000", (NUM_BATCHES*5000)/len); +}; + +couchTests.bulk_doc_10000 = function() { + var db = init(); + var len = timeit(function() { + for(var i = 0; i < NUM_BATCHES; i++) { + db.bulkSave(makeDocs(10000)); + } + }); + report("Bulk docs - 10000", (NUM_BATCHES*10000)/len); +}; diff --git a/apps/couch/test/bench/benchbulk.sh b/apps/couch/test/bench/benchbulk.sh new file mode 100755 index 00000000..22804c64 --- /dev/null +++ b/apps/couch/test/bench/benchbulk.sh @@ -0,0 +1,69 @@ +#!/bin/sh -e +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# + +# usage: time benchbulk.sh +# it takes about 30 seconds to run on my old MacBook with bulksize 1000 + +BULKSIZE=100 +DOCSIZE=10 +INSERTS=10 +ROUNDS=10 +DBURL="http://127.0.0.1:5984/benchbulk" +POSTURL="$DBURL/_bulk_docs" + +function make_bulk_docs() { + ROW=0 + SIZE=$(($1-1)) + START=$2 + BODYSIZE=$3 + + BODY=$(printf "%0${BODYSIZE}d") + + echo '{"docs":[' + while [ $ROW -lt $SIZE ]; do + printf '{"_id":"%020d", "body":"'$BODY'"},' $(($ROW + $START)) + let ROW=ROW+1 + done + printf '{"_id":"%020d", "body":"'$BODY'"}' $(($ROW + $START)) + echo ']}' +} + +echo "Making $INSERTS bulk inserts of $BULKSIZE docs each" + +echo "Attempt to delete db at $DBURL" +curl -X DELETE $DBURL -w\\n + +echo "Attempt to create db at $DBURL" +curl -X PUT $DBURL -w\\n + +echo "Running $ROUNDS rounds of $INSERTS concurrent inserts to $POSTURL" +RUN=0 +while [ $RUN -lt $ROUNDS ]; do + + POSTS=0 + while [ $POSTS -lt $INSERTS ]; do + STARTKEY=$[ POSTS * BULKSIZE + RUN * BULKSIZE * INSERTS ] + echo "startkey $STARTKEY bulksize $BULKSIZE" + DOCS=$(make_bulk_docs $BULKSIZE $STARTKEY $DOCSIZE) + # echo $DOCS + echo $DOCS | curl -T - -X POST $POSTURL -w%{http_code}\ %{time_total}\ sec\\n >/dev/null 2>&1 & + let POSTS=POSTS+1 + done + + echo "waiting" + wait + let RUN=RUN+1 +done + +curl $DBURL -w\\n diff --git a/apps/couch/test/bench/run.tpl b/apps/couch/test/bench/run.tpl new file mode 100755 index 00000000..9307863f --- /dev/null +++ b/apps/couch/test/bench/run.tpl @@ -0,0 +1,28 @@ +#!/bin/sh -e + +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +SRC_DIR=%abs_top_srcdir% +SCRIPT_DIR=$SRC_DIR/share/www/script +JS_TEST_DIR=$SRC_DIR/test/javascript +JS_BENCH_DIR=$SRC_DIR/test/bench + +COUCHJS=%abs_top_builddir%/src/couchdb/priv/couchjs + +cat $SCRIPT_DIR/json2.js \ + $SCRIPT_DIR/couch.js \ + $JS_TEST_DIR/couch_http.js \ + $JS_BENCH_DIR/bench_marks.js \ + $JS_TEST_DIR/cli_runner.js \ + | $COUCHJS - + diff --git a/apps/couch/test/etap/001-load.t b/apps/couch/test/etap/001-load.t new file mode 100755 index 00000000..73bf66d3 --- /dev/null +++ b/apps/couch/test/etap/001-load.t @@ -0,0 +1,68 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +% Test that we can load each module. + +main(_) -> + test_util:init_code_path(), + etap:plan(37), + Modules = [ + couch_btree, + couch_config, + couch_config_writer, + couch_db, + couch_db_update_notifier, + couch_db_update_notifier_sup, + couch_db_updater, + couch_doc, + couch_event_sup, + couch_external_manager, + couch_external_server, + couch_file, + couch_httpd, + couch_httpd_db, + couch_httpd_external, + couch_httpd_misc_handlers, + couch_httpd_show, + couch_httpd_stats_handlers, + couch_httpd_view, + couch_key_tree, + couch_log, + couch_os_process, + couch_query_servers, + couch_ref_counter, + couch_rep, + couch_rep_sup, + couch_server, + couch_server_sup, + couch_stats_aggregator, + couch_stats_collector, + couch_stream, + couch_task_status, + couch_util, + couch_view, + couch_view_compactor, + couch_view_group, + couch_view_updater + ], + + lists:foreach( + fun(Module) -> + etap:loaded_ok( + Module, + lists:concat(["Loaded: ", Module]) + ) + end, Modules), + etap:end_tests(). diff --git a/apps/couch/test/etap/002-icu-driver.t b/apps/couch/test/etap/002-icu-driver.t new file mode 100644 index 00000000..4167aeeb --- /dev/null +++ b/apps/couch/test/etap/002-icu-driver.t @@ -0,0 +1,33 @@ +#!/usr/bin/env escript +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +main(_) -> + test_util:init_code_path(), + etap:plan(3), + etap:is( + erl_ddll:load(code:priv_dir(couch), "couch_icu_driver"), + ok, + "Started couch_icu_driver." + ), + etap:is( + couch_util:collate(<<"foo">>, <<"bar">>), + 1, + "Can collate stuff" + ), + etap:is( + couch_util:collate(<<"A">>, <<"aa">>), + -1, + "Collate's non-ascii style." + ), + etap:end_tests(). diff --git a/apps/couch/test/etap/010-file-basics.t b/apps/couch/test/etap/010-file-basics.t new file mode 100755 index 00000000..ed71f5e8 --- /dev/null +++ b/apps/couch/test/etap/010-file-basics.t @@ -0,0 +1,108 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +filename() -> test_util:build_file("test/etap/temp.010"). + +main(_) -> + test_util:init_code_path(), + etap:plan(19), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail() + end, + ok. + +test() -> + etap:is({error, enoent}, couch_file:open("not a real file"), + "Opening a non-existant file should return an enoent error."), + + etap:fun_is( + fun({ok, _}) -> true; (_) -> false end, + couch_file:open(filename() ++ ".1", [create, invalid_option]), + "Invalid flags to open are ignored." + ), + + {ok, Fd} = couch_file:open(filename() ++ ".0", [create, overwrite]), + etap:ok(is_pid(Fd), + "Returned file descriptor is a Pid"), + + etap:is({ok, 0}, couch_file:bytes(Fd), + "Newly created files have 0 bytes."), + + etap:is({ok, 0}, couch_file:append_term(Fd, foo), + "Appending a term returns the previous end of file position."), + + {ok, Size} = couch_file:bytes(Fd), + etap:is_greater(Size, 0, + "Writing a term increased the file size."), + + etap:is({ok, Size}, couch_file:append_binary(Fd, <<"fancy!">>), + "Appending a binary returns the current file size."), + + etap:is({ok, foo}, couch_file:pread_term(Fd, 0), + "Reading the first term returns what we wrote: foo"), + + etap:is({ok, <<"fancy!">>}, couch_file:pread_binary(Fd, Size), + "Reading back the binary returns what we wrote: <<\"fancy\">>."), + + etap:is({ok, <<131, 100, 0, 3, 102, 111, 111>>}, + couch_file:pread_binary(Fd, 0), + "Reading a binary at a term position returns the term as binary." + ), + + {ok, BinPos} = couch_file:append_binary(Fd, <<131,100,0,3,102,111,111>>), + etap:is({ok, foo}, couch_file:pread_term(Fd, BinPos), + "Reading a term from a written binary term representation succeeds."), + + BigBin = list_to_binary(lists:duplicate(100000, 0)), + {ok, BigBinPos} = couch_file:append_binary(Fd, BigBin), + etap:is({ok, BigBin}, couch_file:pread_binary(Fd, BigBinPos), + "Reading a large term from a written representation succeeds."), + + ok = couch_file:write_header(Fd, hello), + etap:is({ok, hello}, couch_file:read_header(Fd), + "Reading a header succeeds."), + + {ok, BigBinPos2} = couch_file:append_binary(Fd, BigBin), + etap:is({ok, BigBin}, couch_file:pread_binary(Fd, BigBinPos2), + "Reading a large term from a written representation succeeds 2."), + + % append_binary == append_iolist? + % Possible bug in pread_iolist or iolist() -> append_binary + {ok, IOLPos} = couch_file:append_binary(Fd, ["foo", $m, <<"bam">>]), + {ok, IoList} = couch_file:pread_iolist(Fd, IOLPos), + etap:is(<<"foombam">>, iolist_to_binary(IoList), + "Reading an results in a binary form of the written iolist()"), + + % XXX: How does on test fsync? + etap:is(ok, couch_file:sync(Fd), + "Syncing does not cause an error."), + + etap:is(ok, couch_file:truncate(Fd, Size), + "Truncating a file succeeds."), + + %etap:is(eof, (catch couch_file:pread_binary(Fd, Size)), + % "Reading data that was truncated fails.") + etap:skip(fun() -> ok end, + "No idea how to test reading beyond EOF"), + + etap:is({ok, foo}, couch_file:pread_term(Fd, 0), + "Truncating does not affect data located before the truncation mark."), + + etap:is(ok, couch_file:close(Fd), + "Files close properly."), + ok. diff --git a/apps/couch/test/etap/011-file-headers.t b/apps/couch/test/etap/011-file-headers.t new file mode 100755 index 00000000..764b10df --- /dev/null +++ b/apps/couch/test/etap/011-file-headers.t @@ -0,0 +1,152 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%%! -pa ./src/couchdb -sasl errlog_type error -boot start_sasl -noshell + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +filename() -> test_util:build_file("test/etap/temp.011"). +sizeblock() -> 4096. % Need to keep this in sync with couch_file.erl + +main(_) -> + test_util:init_code_path(), + {S1, S2, S3} = now(), + random:seed(S1, S2, S3), + + etap:plan(18), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail() + end, + ok. + +test() -> + {ok, Fd} = couch_file:open(filename(), [create,overwrite]), + + etap:is({ok, 0}, couch_file:bytes(Fd), + "File should be initialized to contain zero bytes."), + + etap:is(ok, couch_file:write_header(Fd, {<<"some_data">>, 32}), + "Writing a header succeeds."), + + {ok, Size1} = couch_file:bytes(Fd), + etap:is_greater(Size1, 0, + "Writing a header allocates space in the file."), + + etap:is({ok, {<<"some_data">>, 32}}, couch_file:read_header(Fd), + "Reading the header returns what we wrote."), + + etap:is(ok, couch_file:write_header(Fd, [foo, <<"more">>]), + "Writing a second header succeeds."), + + {ok, Size2} = couch_file:bytes(Fd), + etap:is_greater(Size2, Size1, + "Writing a second header allocates more space."), + + etap:is({ok, [foo, <<"more">>]}, couch_file:read_header(Fd), + "Reading the second header does not return the first header."), + + % Delete the second header. + ok = couch_file:truncate(Fd, Size1), + + etap:is({ok, {<<"some_data">>, 32}}, couch_file:read_header(Fd), + "Reading the header after a truncation returns a previous header."), + + couch_file:write_header(Fd, [foo, <<"more">>]), + etap:is({ok, Size2}, couch_file:bytes(Fd), + "Rewriting the same second header returns the same second size."), + + couch_file:write_header(Fd, erlang:make_tuple(5000, <<"CouchDB">>)), + etap:is( + couch_file:read_header(Fd), + {ok, erlang:make_tuple(5000, <<"CouchDB">>)}, + "Headers larger than the block size can be saved (COUCHDB-1319)" + ), + + ok = couch_file:close(Fd), + + % Now for the fun stuff. Try corrupting the second header and see + % if we recover properly. + + % Destroy the 0x1 byte that marks a header + check_header_recovery(fun(CouchFd, RawFd, Expect, HeaderPos) -> + etap:isnt(Expect, couch_file:read_header(CouchFd), + "Should return a different header before corruption."), + file:pwrite(RawFd, HeaderPos, <<0>>), + etap:is(Expect, couch_file:read_header(CouchFd), + "Corrupting the byte marker should read the previous header.") + end), + + % Corrupt the size. + check_header_recovery(fun(CouchFd, RawFd, Expect, HeaderPos) -> + etap:isnt(Expect, couch_file:read_header(CouchFd), + "Should return a different header before corruption."), + % +1 for 0x1 byte marker + file:pwrite(RawFd, HeaderPos+1, <<10/integer>>), + etap:is(Expect, couch_file:read_header(CouchFd), + "Corrupting the size should read the previous header.") + end), + + % Corrupt the MD5 signature + check_header_recovery(fun(CouchFd, RawFd, Expect, HeaderPos) -> + etap:isnt(Expect, couch_file:read_header(CouchFd), + "Should return a different header before corruption."), + % +5 = +1 for 0x1 byte and +4 for term size. + file:pwrite(RawFd, HeaderPos+5, <<"F01034F88D320B22">>), + etap:is(Expect, couch_file:read_header(CouchFd), + "Corrupting the MD5 signature should read the previous header.") + end), + + % Corrupt the data + check_header_recovery(fun(CouchFd, RawFd, Expect, HeaderPos) -> + etap:isnt(Expect, couch_file:read_header(CouchFd), + "Should return a different header before corruption."), + % +21 = +1 for 0x1 byte, +4 for term size and +16 for MD5 sig + file:pwrite(RawFd, HeaderPos+21, <<"some data goes here!">>), + etap:is(Expect, couch_file:read_header(CouchFd), + "Corrupting the header data should read the previous header.") + end), + + ok. + +check_header_recovery(CheckFun) -> + {ok, Fd} = couch_file:open(filename(), [create,overwrite]), + {ok, RawFd} = file:open(filename(), [read, write, raw, binary]), + + {ok, _} = write_random_data(Fd), + ExpectHeader = {some_atom, <<"a binary">>, 756}, + ok = couch_file:write_header(Fd, ExpectHeader), + + {ok, HeaderPos} = write_random_data(Fd), + ok = couch_file:write_header(Fd, {2342, <<"corruption! greed!">>}), + + CheckFun(Fd, RawFd, {ok, ExpectHeader}, HeaderPos), + + ok = file:close(RawFd), + ok = couch_file:close(Fd), + ok. + +write_random_data(Fd) -> + write_random_data(Fd, 100 + random:uniform(1000)). + +write_random_data(Fd, 0) -> + {ok, Bytes} = couch_file:bytes(Fd), + {ok, (1 + Bytes div sizeblock()) * sizeblock()}; +write_random_data(Fd, N) -> + Choices = [foo, bar, <<"bizzingle">>, "bank", ["rough", stuff]], + Term = lists:nth(random:uniform(4) + 1, Choices), + {ok, _} = couch_file:append_term(Fd, Term), + write_random_data(Fd, N-1). + diff --git a/apps/couch/test/etap/020-btree-basics.t b/apps/couch/test/etap/020-btree-basics.t new file mode 100755 index 00000000..c65d79c2 --- /dev/null +++ b/apps/couch/test/etap/020-btree-basics.t @@ -0,0 +1,219 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%%! -pa ./src/couchdb -sasl errlog_type error -boot start_sasl -noshell + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +filename() -> test_util:build_file("test/etap/temp.020"). +rows() -> 250. + +-record(btree, {fd, root, extract_kv, assemble_kv, less, reduce}). + +main(_) -> + test_util:init_code_path(), + couch_config:start_link([]), + etap:plan(48), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail() + end, + ok. + +%% @todo Determine if this number should be greater to see if the btree was +%% broken into multiple nodes. AKA "How do we appropiately detect if multiple +%% nodes were created." +test()-> + Sorted = [{Seq, random:uniform()} || Seq <- lists:seq(1, rows())], + etap:ok(test_kvs(Sorted), "Testing sorted keys"), + etap:ok(test_kvs(lists:reverse(Sorted)), "Testing reversed sorted keys"), + etap:ok(test_kvs(shuffle(Sorted)), "Testing shuffled keys."), + ok. + +test_kvs(KeyValues) -> + ReduceFun = fun + (reduce, KVs) -> + length(KVs); + (rereduce, Reds) -> + lists:sum(Reds) + end, + + Keys = [K || {K, _} <- KeyValues], + + {ok, Fd} = couch_file:open(filename(), [create,overwrite]), + {ok, Btree} = couch_btree:open(nil, Fd), + etap:ok(is_record(Btree, btree), "Created btree is really a btree record"), + etap:is(Btree#btree.fd, Fd, "Btree#btree.fd is set correctly."), + etap:is(Btree#btree.root, nil, "Btree#btree.root is set correctly."), + + Btree1 = couch_btree:set_options(Btree, [{reduce, ReduceFun}]), + etap:is(Btree1#btree.reduce, ReduceFun, "Reduce function was set"), + {ok, _, EmptyRes} = couch_btree:foldl(Btree1, fun(_, X) -> {ok, X+1} end, 0), + etap:is(EmptyRes, 0, "Folding over an empty btree"), + + {ok, Btree2} = couch_btree:add_remove(Btree1, KeyValues, []), + etap:ok(test_btree(Btree2, KeyValues), + "Adding all keys at once returns a complete btree."), + + etap:fun_is( + fun + ({ok, {kp_node, _}}) -> true; + (_) -> false + end, + couch_file:pread_term(Fd, element(1, Btree2#btree.root)), + "Btree root pointer is a kp_node." + ), + + {ok, Btree3} = couch_btree:add_remove(Btree2, [], Keys), + etap:ok(test_btree(Btree3, []), + "Removing all keys at once returns an empty btree."), + + Btree4 = lists:foldl(fun(KV, BtAcc) -> + {ok, BtAcc2} = couch_btree:add_remove(BtAcc, [KV], []), + BtAcc2 + end, Btree3, KeyValues), + etap:ok(test_btree(Btree4, KeyValues), + "Adding all keys one at a time returns a complete btree."), + + Btree5 = lists:foldl(fun({K, _}, BtAcc) -> + {ok, BtAcc2} = couch_btree:add_remove(BtAcc, [], [K]), + BtAcc2 + end, Btree4, KeyValues), + etap:ok(test_btree(Btree5, []), + "Removing all keys one at a time returns an empty btree."), + + KeyValuesRev = lists:reverse(KeyValues), + Btree6 = lists:foldl(fun(KV, BtAcc) -> + {ok, BtAcc2} = couch_btree:add_remove(BtAcc, [KV], []), + BtAcc2 + end, Btree5, KeyValuesRev), + etap:ok(test_btree(Btree6, KeyValues), + "Adding all keys in reverse order returns a complete btree."), + + {_, Rem2Keys0, Rem2Keys1} = lists:foldl(fun(X, {Count, Left, Right}) -> + case Count rem 2 == 0 of + true-> {Count+1, [X | Left], Right}; + false -> {Count+1, Left, [X | Right]} + end + end, {0, [], []}, KeyValues), + + etap:ok(test_add_remove(Btree6, Rem2Keys0, Rem2Keys1), + "Add/Remove every other key."), + + etap:ok(test_add_remove(Btree6, Rem2Keys1, Rem2Keys0), + "Add/Remove opposite every other key."), + + {ok, Btree7} = couch_btree:add_remove(Btree6, [], [K||{K,_}<-Rem2Keys1]), + {ok, Btree8} = couch_btree:add_remove(Btree7, [], [K||{K,_}<-Rem2Keys0]), + etap:ok(test_btree(Btree8, []), + "Removing both halves of every other key returns an empty btree."), + + %% Third chunk (close out) + etap:is(couch_file:close(Fd), ok, "closing out"), + true. + +test_btree(Btree, KeyValues) -> + ok = test_key_access(Btree, KeyValues), + ok = test_lookup_access(Btree, KeyValues), + ok = test_final_reductions(Btree, KeyValues), + ok = test_traversal_callbacks(Btree, KeyValues), + true. + +test_add_remove(Btree, OutKeyValues, RemainingKeyValues) -> + Btree2 = lists:foldl(fun({K, _}, BtAcc) -> + {ok, BtAcc2} = couch_btree:add_remove(BtAcc, [], [K]), + BtAcc2 + end, Btree, OutKeyValues), + true = test_btree(Btree2, RemainingKeyValues), + + Btree3 = lists:foldl(fun(KV, BtAcc) -> + {ok, BtAcc2} = couch_btree:add_remove(BtAcc, [KV], []), + BtAcc2 + end, Btree2, OutKeyValues), + true = test_btree(Btree3, OutKeyValues ++ RemainingKeyValues). + +test_key_access(Btree, List) -> + FoldFun = fun(Element, {[HAcc|TAcc], Count}) -> + case Element == HAcc of + true -> {ok, {TAcc, Count + 1}}; + _ -> {ok, {TAcc, Count + 1}} + end + end, + Length = length(List), + Sorted = lists:sort(List), + {ok, _, {[], Length}} = couch_btree:foldl(Btree, FoldFun, {Sorted, 0}), + {ok, _, {[], Length}} = couch_btree:fold(Btree, FoldFun, {Sorted, 0}, [{dir, rev}]), + ok. + +test_lookup_access(Btree, KeyValues) -> + FoldFun = fun({Key, Value}, {Key, Value}) -> {stop, true} end, + lists:foreach(fun({Key, Value}) -> + [{ok, {Key, Value}}] = couch_btree:lookup(Btree, [Key]), + {ok, _, true} = couch_btree:foldl(Btree, FoldFun, {Key, Value}, [{start_key, Key}]) + end, KeyValues). + +test_final_reductions(Btree, KeyValues) -> + KVLen = length(KeyValues), + FoldLFun = fun(_X, LeadingReds, Acc) -> + CountToStart = KVLen div 3 + Acc, + CountToStart = couch_btree:final_reduce(Btree, LeadingReds), + {ok, Acc+1} + end, + FoldRFun = fun(_X, LeadingReds, Acc) -> + CountToEnd = KVLen - KVLen div 3 + Acc, + CountToEnd = couch_btree:final_reduce(Btree, LeadingReds), + {ok, Acc+1} + end, + {LStartKey, _} = case KVLen of + 0 -> {nil, nil}; + _ -> lists:nth(KVLen div 3 + 1, lists:sort(KeyValues)) + end, + {RStartKey, _} = case KVLen of + 0 -> {nil, nil}; + _ -> lists:nth(KVLen div 3, lists:sort(KeyValues)) + end, + {ok, _, FoldLRed} = couch_btree:foldl(Btree, FoldLFun, 0, [{start_key, LStartKey}]), + {ok, _, FoldRRed} = couch_btree:fold(Btree, FoldRFun, 0, [{dir, rev}, {start_key, RStartKey}]), + KVLen = FoldLRed + FoldRRed, + ok. + +test_traversal_callbacks(Btree, KeyValues) -> + FoldFun = + fun + (visit, GroupedKey, Unreduced, Acc) -> + {ok, Acc andalso false}; + (traverse, _LK, _Red, Acc) -> + {skip, Acc andalso true} + end, + % With 250 items the root is a kp. Always skipping should reduce to true. + {ok, _, true} = couch_btree:fold(Btree, FoldFun, true, [{dir, fwd}]), + ok. + +shuffle(List) -> + randomize(round(math:log(length(List)) + 0.5), List). + +randomize(1, List) -> + randomize(List); +randomize(T, List) -> + lists:foldl(fun(_E, Acc) -> + randomize(Acc) + end, randomize(List), lists:seq(1, (T - 1))). + +randomize(List) -> + D = lists:map(fun(A) -> + {random:uniform(), A} + end, List), + {_, D1} = lists:unzip(lists:keysort(1, D)), + D1. diff --git a/apps/couch/test/etap/021-btree-reductions.t b/apps/couch/test/etap/021-btree-reductions.t new file mode 100755 index 00000000..6362594e --- /dev/null +++ b/apps/couch/test/etap/021-btree-reductions.t @@ -0,0 +1,142 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%%! -pa ./src/couchdb -sasl errlog_type error -boot start_sasl -noshell + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +filename() -> "./apps/couch/test/etap/temp.021". +rows() -> 1000. + +main(_) -> + test_util:init_code_path(), + couch_config:start_link([]), + etap:plan(8), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail() + end, + ok. + +test()-> + ReduceFun = fun + (reduce, KVs) -> length(KVs); + (rereduce, Reds) -> lists:sum(Reds) + end, + + {ok, Fd} = couch_file:open(filename(), [create,overwrite]), + {ok, Btree} = couch_btree:open(nil, Fd, [{reduce, ReduceFun}]), + + % Create a list, of {"even", Value} or {"odd", Value} pairs. + {_, EvenOddKVs} = lists:foldl(fun(Idx, {Key, Acc}) -> + case Key of + "even" -> {"odd", [{{Key, Idx}, 1} | Acc]}; + _ -> {"even", [{{Key, Idx}, 1} | Acc]} + end + end, {"odd", []}, lists:seq(1, rows())), + + {ok, Btree2} = couch_btree:add_remove(Btree, EvenOddKVs, []), + + GroupFun = fun({K1, _}, {K2, _}) -> K1 == K2 end, + FoldFun = fun(GroupedKey, Unreduced, Acc) -> + {ok, [{GroupedKey, couch_btree:final_reduce(Btree2, Unreduced)} | Acc]} + end, + + {SK1, EK1} = {{"even", -1}, {"even", foo}}, + {SK2, EK2} = {{"odd", -1}, {"odd", foo}}, + + etap:fun_is( + fun + ({ok, [{{"odd", _}, 500}, {{"even", _}, 500}]}) -> + true; + (_) -> + false + end, + couch_btree:fold_reduce(Btree2, FoldFun, [], [{key_group_fun, GroupFun}]), + "Reduction works with no specified direction, startkey, or endkey." + ), + + etap:fun_is( + fun + ({ok, [{{"odd", _}, 500}, {{"even", _}, 500}]}) -> + true; + (_) -> + false + end, + couch_btree:fold_reduce(Btree2, FoldFun, [], [{key_group_fun, GroupFun}, {dir, fwd}]), + "Reducing forward works with no startkey or endkey." + ), + + etap:fun_is( + fun + ({ok, [{{"even", _}, 500}, {{"odd", _}, 500}]}) -> + true; + (_) -> + false + end, + couch_btree:fold_reduce(Btree2, FoldFun, [], [{key_group_fun, GroupFun}, {dir, rev}]), + "Reducing backwards works with no startkey or endkey." + ), + + etap:fun_is( + fun + ({ok, [{{"odd", _}, 500}, {{"even", _}, 500}]}) -> + true; + (_) -> + false + end, + couch_btree:fold_reduce(Btree2, FoldFun, [], [{dir, fwd}, {key_group_fun, GroupFun}, {start_key, SK1}, {end_key, EK2}]), + "Reducing works over the entire range with startkey and endkey set." + ), + + etap:fun_is( + fun + ({ok, [{{"even", _}, 500}]}) -> true; + (_) -> false + end, + couch_btree:fold_reduce(Btree2, FoldFun, [], [{dir, fwd}, {key_group_fun, GroupFun}, {start_key, SK1}, {end_key, EK1}]), + "Reducing forward over first half works with a startkey and endkey." + ), + + etap:fun_is( + fun + ({ok, [{{"odd", _}, 500}]}) -> true; + (_) -> false + end, + couch_btree:fold_reduce(Btree2, FoldFun, [], [{dir, fwd}, {key_group_fun, GroupFun}, {start_key, SK2}, {end_key, EK2}]), + "Reducing forward over second half works with second startkey and endkey" + ), + + etap:fun_is( + fun + ({ok, [{{"odd", _}, 500}]}) -> true; + (_) -> false + end, + couch_btree:fold_reduce(Btree2, FoldFun, [], [{dir, rev}, {key_group_fun, GroupFun}, {start_key, EK2}, {end_key, SK2}]), + "Reducing in reverse works after swapping the startkey and endkey." + ), + + etap:fun_is( + fun + ({ok, [{{"even", _}, 500}, {{"odd", _}, 500}]}) -> + true; + (_) -> + false + end, + couch_btree:fold_reduce(Btree2, FoldFun, [], [{dir, rev}, {key_group_fun, GroupFun}, {start_key, EK2}, {end_key, SK1}]), + "Reducing in reverse results in reversed accumulator." + ), + + couch_file:close(Fd). diff --git a/apps/couch/test/etap/030-doc-from-json.t b/apps/couch/test/etap/030-doc-from-json.t new file mode 100755 index 00000000..0a1e6ab3 --- /dev/null +++ b/apps/couch/test/etap/030-doc-from-json.t @@ -0,0 +1,237 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%%! -pa ./src/couchdb -pa ./src/mochiweb -sasl errlog_type false -noshell + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% XXX: Figure out how to -include("couch_db.hrl") +-record(doc, {id= <<"">>, revs={0, []}, body={[]}, + atts=[], deleted=false, meta=[]}). +-record(att, {name, type, att_len, disk_len, md5= <<>>, revpos=0, data, + encoding=identity}). + +main(_) -> + test_util:init_code_path(), + etap:plan(26), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail() + end, + ok. + +test() -> + couch_config:start_link(test_util:config_files()), + couch_config_event:start_link(), + couch_config:set("attachments", "compression_level", "0", false), + ok = test_from_json_success(), + ok = test_from_json_errors(), + ok. + +test_from_json_success() -> + Cases = [ + { + {[]}, + #doc{}, + "Return an empty document for an empty JSON object." + }, + { + {[{<<"_id">>, <<"zing!">>}]}, + #doc{id= <<"zing!">>}, + "Parses document ids." + }, + { + {[{<<"_id">>, <<"_design/foo">>}]}, + #doc{id= <<"_design/foo">>}, + "_design/document ids." + }, + { + {[{<<"_id">>, <<"_local/bam">>}]}, + #doc{id= <<"_local/bam">>}, + "_local/document ids." + }, + { + {[{<<"_rev">>, <<"4-230234">>}]}, + #doc{revs={4, [<<"230234">>]}}, + "_rev stored in revs." + }, + { + {[{<<"soap">>, 35}]}, + #doc{body={[{<<"soap">>, 35}]}}, + "Non underscore prefixed fields stored in body." + }, + { + {[{<<"_attachments">>, {[ + {<<"my_attachment.fu">>, {[ + {<<"stub">>, true}, + {<<"content_type">>, <<"application/awesome">>}, + {<<"length">>, 45} + ]}}, + {<<"noahs_private_key.gpg">>, {[ + {<<"data">>, <<"SSBoYXZlIGEgcGV0IGZpc2gh">>}, + {<<"content_type">>, <<"application/pgp-signature">>} + ]}} + ]}}]}, + #doc{atts=[ + #att{ + name = <<"my_attachment.fu">>, + data = stub, + type = <<"application/awesome">>, + att_len = 45, + disk_len = 45, + revpos = nil + }, + #att{ + name = <<"noahs_private_key.gpg">>, + data = <<"I have a pet fish!">>, + type = <<"application/pgp-signature">>, + att_len = 18, + disk_len = 18, + revpos = 0 + } + ]}, + "Attachments are parsed correctly." + }, + { + {[{<<"_deleted">>, true}]}, + #doc{deleted=true}, + "_deleted controls the deleted field." + }, + { + {[{<<"_deleted">>, false}]}, + #doc{}, + "{\"_deleted\": false} is ok." + }, + { + {[ + {<<"_revisions">>, {[ + {<<"start">>, 4}, + {<<"ids">>, [<<"foo1">>, <<"phi3">>, <<"omega">>]} + ]}}, + {<<"_rev">>, <<"6-something">>} + ]}, + #doc{revs={4, [<<"foo1">>, <<"phi3">>, <<"omega">>]}}, + "_revisions attribute are preferred to _rev." + }, + { + {[{<<"_revs_info">>, dropping}]}, + #doc{}, + "Drops _revs_info." + }, + { + {[{<<"_local_seq">>, dropping}]}, + #doc{}, + "Drops _local_seq." + }, + { + {[{<<"_conflicts">>, dropping}]}, + #doc{}, + "Drops _conflicts." + }, + { + {[{<<"_deleted_conflicts">>, dropping}]}, + #doc{}, + "Drops _deleted_conflicts." + } + ], + + lists:foreach(fun({EJson, Expect, Mesg}) -> + etap:is(couch_doc:from_json_obj(EJson), Expect, Mesg) + end, Cases), + ok. + +test_from_json_errors() -> + Cases = [ + { + [], + {bad_request, "Document must be a JSON object"}, + "arrays are invalid" + }, + { + 4, + {bad_request, "Document must be a JSON object"}, + "integers are invalid" + }, + { + true, + {bad_request, "Document must be a JSON object"}, + "literals are invalid" + }, + { + {[{<<"_id">>, {[{<<"foo">>, 5}]}}]}, + {bad_request, <<"Document id must be a string">>}, + "Document id must be a string." + }, + { + {[{<<"_id">>, <<"_random">>}]}, + {bad_request, + <<"Only reserved document ids may start with underscore.">>}, + "Disallow arbitrary underscore prefixed docids." + }, + { + {[{<<"_rev">>, 5}]}, + {bad_request, <<"Invalid rev format">>}, + "_rev must be a string" + }, + { + {[{<<"_rev">>, "foobar"}]}, + {bad_request, <<"Invalid rev format">>}, + "_rev must be %d-%s" + }, + { + {[{<<"_rev">>, "foo-bar"}]}, + "Error if _rev's integer expection is broken." + }, + { + {[{<<"_revisions">>, {[{<<"start">>, true}]}}]}, + {doc_validation, "_revisions.start isn't an integer."}, + "_revisions.start must be an integer." + }, + { + {[{<<"_revisions">>, {[ + {<<"start">>, 0}, + {<<"ids">>, 5} + ]}}]}, + {doc_validation, "_revisions.ids isn't a array."}, + "_revions.ids must be a list." + }, + { + {[{<<"_revisions">>, {[ + {<<"start">>, 0}, + {<<"ids">>, [5]} + ]}}]}, + {doc_validation, "RevId isn't a string"}, + "Revision ids must be strings." + }, + { + {[{<<"_something">>, 5}]}, + {doc_validation, <<"Bad special document member: _something">>}, + "Underscore prefix fields are reserved." + } + ], + + lists:foreach(fun + ({EJson, Expect, Mesg}) -> + Error = (catch couch_doc:from_json_obj(EJson)), + etap:is(Error, Expect, Mesg); + ({EJson, Mesg}) -> + try + couch_doc:from_json_obj(EJson), + etap:ok(false, "Conversion failed to raise an exception.") + catch + _:_ -> etap:ok(true, Mesg) + end + end, Cases), + ok. diff --git a/apps/couch/test/etap/031-doc-to-json.t b/apps/couch/test/etap/031-doc-to-json.t new file mode 100755 index 00000000..6ceae344 --- /dev/null +++ b/apps/couch/test/etap/031-doc-to-json.t @@ -0,0 +1,197 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%%! -pa ./src/couchdb -pa ./src/mochiweb -sasl errlog_type false -noshell + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% XXX: Figure out how to -include("couch_db.hrl") +-record(doc, {id= <<"">>, revs={0, []}, body={[]}, + atts=[], deleted=false, meta=[]}). +-record(att, {name, type, att_len, disk_len, md5= <<>>, revpos=0, data, + encoding=identity}). + +main(_) -> + test_util:init_code_path(), + etap:plan(12), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail() + end, + ok. + +test() -> + couch_config:start_link(test_util:config_files()), + couch_config_event:start_link(), + couch_config:set("attachments", "compression_level", "0", false), + ok = test_to_json_success(), + ok. + +test_to_json_success() -> + Cases = [ + { + #doc{}, + {[{<<"_id">>, <<"">>}]}, + "Empty docs are {\"_id\": \"\"}" + }, + { + #doc{id= <<"foo">>}, + {[{<<"_id">>, <<"foo">>}]}, + "_id is added." + }, + { + #doc{revs={5, ["foo"]}}, + {[{<<"_id">>, <<>>}, {<<"_rev">>, <<"5-foo">>}]}, + "_rev is added." + }, + { + [revs], + #doc{revs={5, [<<"first">>, <<"second">>]}}, + {[ + {<<"_id">>, <<>>}, + {<<"_rev">>, <<"5-first">>}, + {<<"_revisions">>, {[ + {<<"start">>, 5}, + {<<"ids">>, [<<"first">>, <<"second">>]} + ]}} + ]}, + "_revisions include with revs option" + }, + { + #doc{body={[{<<"foo">>, <<"bar">>}]}}, + {[{<<"_id">>, <<>>}, {<<"foo">>, <<"bar">>}]}, + "Arbitrary fields are added." + }, + { + #doc{deleted=true, body={[{<<"foo">>, <<"bar">>}]}}, + {[{<<"_id">>, <<>>}, {<<"foo">>, <<"bar">>}, {<<"_deleted">>, true}]}, + "Deleted docs no longer drop body members." + }, + { + #doc{meta=[ + {revs_info, 4, [{<<"fin">>, deleted}, {<<"zim">>, missing}]} + ]}, + {[ + {<<"_id">>, <<>>}, + {<<"_revs_info">>, [ + {[{<<"rev">>, <<"4-fin">>}, {<<"status">>, <<"deleted">>}]}, + {[{<<"rev">>, <<"3-zim">>}, {<<"status">>, <<"missing">>}]} + ]} + ]}, + "_revs_info field is added correctly." + }, + { + #doc{meta=[{local_seq, 5}]}, + {[{<<"_id">>, <<>>}, {<<"_local_seq">>, 5}]}, + "_local_seq is added as an integer." + }, + { + #doc{meta=[{conflicts, [{3, <<"yep">>}, {1, <<"snow">>}]}]}, + {[ + {<<"_id">>, <<>>}, + {<<"_conflicts">>, [<<"3-yep">>, <<"1-snow">>]} + ]}, + "_conflicts is added as an array of strings." + }, + { + #doc{meta=[{deleted_conflicts, [{10923, <<"big_cowboy_hat">>}]}]}, + {[ + {<<"_id">>, <<>>}, + {<<"_deleted_conflicts">>, [<<"10923-big_cowboy_hat">>]} + ]}, + "_deleted_conflicsts is added as an array of strings." + }, + { + #doc{atts=[ + #att{ + name = <<"big.xml">>, + type = <<"xml/sucks">>, + data = fun() -> ok end, + revpos = 1, + att_len = 400, + disk_len = 400 + }, + #att{ + name = <<"fast.json">>, + type = <<"json/ftw">>, + data = <<"{\"so\": \"there!\"}">>, + revpos = 1, + att_len = 16, + disk_len = 16 + } + ]}, + {[ + {<<"_id">>, <<>>}, + {<<"_attachments">>, {[ + {<<"big.xml">>, {[ + {<<"content_type">>, <<"xml/sucks">>}, + {<<"revpos">>, 1}, + {<<"length">>, 400}, + {<<"stub">>, true} + ]}}, + {<<"fast.json">>, {[ + {<<"content_type">>, <<"json/ftw">>}, + {<<"revpos">>, 1}, + {<<"length">>, 16}, + {<<"stub">>, true} + ]}} + ]}} + ]}, + "Attachments attached as stubs only include a length." + }, + { + [attachments], + #doc{atts=[ + #att{ + name = <<"stuff.txt">>, + type = <<"text/plain">>, + data = fun() -> <<"diet pepsi">> end, + revpos = 1, + att_len = 10, + disk_len = 10 + }, + #att{ + name = <<"food.now">>, + type = <<"application/food">>, + revpos = 1, + data = <<"sammich">> + } + ]}, + {[ + {<<"_id">>, <<>>}, + {<<"_attachments">>, {[ + {<<"stuff.txt">>, {[ + {<<"content_type">>, <<"text/plain">>}, + {<<"revpos">>, 1}, + {<<"data">>, <<"ZGlldCBwZXBzaQ==">>} + ]}}, + {<<"food.now">>, {[ + {<<"content_type">>, <<"application/food">>}, + {<<"revpos">>, 1}, + {<<"data">>, <<"c2FtbWljaA==">>} + ]}} + ]}} + ]}, + "Attachments included inline with attachments option." + } + ], + + lists:foreach(fun + ({Doc, EJson, Mesg}) -> + etap:is(couch_doc:to_json_obj(Doc, []), EJson, Mesg); + ({Options, Doc, EJson, Mesg}) -> + etap:is(couch_doc:to_json_obj(Doc, Options), EJson, Mesg) + end, Cases), + ok. diff --git a/apps/couch/test/etap/040-util.t b/apps/couch/test/etap/040-util.t new file mode 100755 index 00000000..8f80db87 --- /dev/null +++ b/apps/couch/test/etap/040-util.t @@ -0,0 +1,80 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + application:start(crypto), + + etap:plan(14), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + % to_existing_atom + etap:is(true, couch_util:to_existing_atom(true), "An atom is an atom."), + etap:is(foo, couch_util:to_existing_atom(<<"foo">>), + "A binary foo is the atom foo."), + etap:is(foobarbaz, couch_util:to_existing_atom("foobarbaz"), + "A list of atoms is one munged atom."), + + % implode + etap:is([1, 38, 2, 38, 3], couch_util:implode([1,2,3],"&"), + "use & as separator in list."), + + % trim + Strings = [" foo", "foo ", "\tfoo", " foo ", "foo\t", "foo\n", "\nfoo"], + etap:ok(lists:all(fun(S) -> couch_util:trim(S) == "foo" end, Strings), + "everything here trimmed should be foo."), + + % abs_pathname + {ok, Cwd} = file:get_cwd(), + etap:is(Cwd ++ "/foo", couch_util:abs_pathname("./foo"), + "foo is in this directory."), + + % should_flush + etap:ok(not couch_util:should_flush(), + "Not using enough memory to flush."), + AcquireMem = fun() -> + IntsToAGazillion = lists:seq(1, 200000), + LotsOfData = lists:map( + fun(Int) -> {Int, <<"foobar">>} end, + lists:seq(1, 500000)), + etap:ok(couch_util:should_flush(), + "Allocation 200K tuples puts us above the memory threshold.") + end, + AcquireMem(), + + etap:ok(not couch_util:should_flush(), + "Checking to flush invokes GC."), + + % verify + etap:is(true, couch_util:verify("It4Vooya", "It4Vooya"), + "String comparison."), + etap:is(false, couch_util:verify("It4VooyaX", "It4Vooya"), + "String comparison (unequal lengths)."), + etap:is(true, couch_util:verify(<<"ahBase3r">>, <<"ahBase3r">>), + "Binary comparison."), + etap:is(false, couch_util:verify(<<"ahBase3rX">>, <<"ahBase3r">>), + "Binary comparison (unequal lengths)."), + etap:is(false, couch_util:verify(nil, <<"ahBase3r">>), + "Binary comparison with atom."), + + ok. diff --git a/apps/couch/test/etap/041-uuid-gen-seq.ini b/apps/couch/test/etap/041-uuid-gen-seq.ini new file mode 100644 index 00000000..94cebc6f --- /dev/null +++ b/apps/couch/test/etap/041-uuid-gen-seq.ini @@ -0,0 +1,19 @@ +; Licensed to the Apache Software Foundation (ASF) under one +; or more contributor license agreements. See the NOTICE file +; distributed with this work for additional information +; regarding copyright ownership. The ASF licenses this file +; to you under the Apache License, Version 2.0 (the +; "License"); you may not use this file except in compliance +; with the License. You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, +; software distributed under the License is distributed on an +; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +; KIND, either express or implied. See the License for the +; specific language governing permissions and limitations +; under the License. + +[uuids] +algorithm = sequential diff --git a/apps/couch/test/etap/041-uuid-gen-utc.ini b/apps/couch/test/etap/041-uuid-gen-utc.ini new file mode 100644 index 00000000..c2b83831 --- /dev/null +++ b/apps/couch/test/etap/041-uuid-gen-utc.ini @@ -0,0 +1,19 @@ +; Licensed to the Apache Software Foundation (ASF) under one +; or more contributor license agreements. See the NOTICE file +; distributed with this work for additional information +; regarding copyright ownership. The ASF licenses this file +; to you under the Apache License, Version 2.0 (the +; "License"); you may not use this file except in compliance +; with the License. You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, +; software distributed under the License is distributed on an +; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +; KIND, either express or implied. See the License for the +; specific language governing permissions and limitations +; under the License. + +[uuids] +algorithm = utc_random diff --git a/apps/couch/test/etap/041-uuid-gen.t b/apps/couch/test/etap/041-uuid-gen.t new file mode 100755 index 00000000..84c93a44 --- /dev/null +++ b/apps/couch/test/etap/041-uuid-gen.t @@ -0,0 +1,116 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +seq_alg_config() -> + filename:join([code:lib_dir(couch, test), "etap/041-uuid-gen-seq.ini"]). + +utc_alg_config() -> + filename:join([code:lib_dir(couch, test), "etap/041-uuid-gen-utc.ini"]). + +% Run tests and wait for the gen_servers to shutdown +run_test(IniFiles, Test) -> + couch_config_event:start_link(), + {ok, Pid} = couch_config:start_link(IniFiles), + erlang:monitor(process, Pid), + couch_uuids:start(), + Test(), + couch_uuids:stop(), + couch_config:stop(), + receive + {'DOWN', _, _, Pid, _} -> ok; + _Other -> etap:diag("OTHER: ~p~n", [_Other]) + after + 1000 -> throw({timeout_error, config_stop}) + end. + +main(_) -> + test_util:init_code_path(), + application:start(crypto), + etap:plan(5), + + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + + TestUnique = fun() -> + etap:is( + test_unique(10000, couch_uuids:new()), + true, + "Can generate 10K unique IDs" + ) + end, + %run_test([default_config()], TestUnique), + run_test([seq_alg_config()], TestUnique), + run_test([utc_alg_config()], TestUnique), + + TestMonotonic = fun () -> + etap:is( + couch_uuids:new() < couch_uuids:new(), + true, + "should produce monotonically increasing ids" + ) + end, + run_test([seq_alg_config()], TestMonotonic), + run_test([utc_alg_config()], TestMonotonic), + + % Pretty sure that the average of a uniform distribution is the + % midpoint of the range. Thus, to exceed a threshold, we need + % approximately Total / (Range/2 + RangeMin) samples. + % + % In our case this works out to be 8194. (0xFFF000 / 0x7FF) + % These tests just fudge the limits for a good generator at 25% + % in either direction. Technically it should be possible to generate + % bounds that will show if your random number generator is not + % sufficiently random but I hated statistics in school. + TestRollOver = fun() -> + UUID = binary_to_list(couch_uuids:new()), + Prefix = element(1, lists:split(26, UUID)), + N = gen_until_pref_change(Prefix,0), + etap:diag("N is: ~p~n",[N]), + etap:is( + N >= 5000 andalso N =< 11000, + true, + "should roll over every so often." + ) + end, + run_test([seq_alg_config()], TestRollOver). + +test_unique(0, _) -> + true; +test_unique(N, UUID) -> + case couch_uuids:new() of + UUID -> + etap:diag("N: ~p~n", [N]), + false; + Else -> test_unique(N-1, Else) + end. + +get_prefix(UUID) -> + element(1, lists:split(26, binary_to_list(UUID))). + +gen_until_pref_change(_, Count) when Count > 8251 -> + Count; +gen_until_pref_change(Prefix, N) -> + case get_prefix(couch_uuids:new()) of + Prefix -> gen_until_pref_change(Prefix, N+1); + _ -> N + end. diff --git a/apps/couch/test/etap/050-stream.t b/apps/couch/test/etap/050-stream.t new file mode 100755 index 00000000..16f4f0c6 --- /dev/null +++ b/apps/couch/test/etap/050-stream.t @@ -0,0 +1,88 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + etap:plan(13), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +read_all(Fd, PosList) -> + Data = couch_stream:foldl(Fd, PosList, fun(Bin, Acc) -> [Bin, Acc] end, []), + iolist_to_binary(Data). + +test() -> + {ok, Fd} = couch_file:open("apps/couch/test/etap/temp.050", + [create,overwrite]), + {ok, Stream} = couch_stream:open(Fd), + + etap:is(ok, couch_stream:write(Stream, <<"food">>), + "Writing to streams works."), + + etap:is(ok, couch_stream:write(Stream, <<"foob">>), + "Consecutive writing to streams works."), + + etap:is(ok, couch_stream:write(Stream, <<>>), + "Writing an empty binary does nothing."), + + {Ptrs, Length, _, _, _} = couch_stream:close(Stream), + etap:is(Ptrs, [{0, 8}], "Close returns the file pointers."), + etap:is(Length, 8, "Close also returns the number of bytes written."), + etap:is(<<"foodfoob">>, read_all(Fd, Ptrs), "Returned pointers are valid."), + + % Remeber where we expect the pointer to be. + {ok, ExpPtr} = couch_file:bytes(Fd), + {ok, Stream2} = couch_stream:open(Fd), + OneBits = <<1:(8*10)>>, + etap:is(ok, couch_stream:write(Stream2, OneBits), + "Successfully wrote 80 1 bits."), + + ZeroBits = <<0:(8*10)>>, + etap:is(ok, couch_stream:write(Stream2, ZeroBits), + "Successfully wrote 80 0 bits."), + + {Ptrs2, Length2, _, _, _} = couch_stream:close(Stream2), + etap:is(Ptrs2, [{ExpPtr, 20}], "Closing stream returns the file pointers."), + etap:is(Length2, 20, "Length written is 160 bytes."), + + AllBits = iolist_to_binary([OneBits,ZeroBits]), + etap:is(AllBits, read_all(Fd, Ptrs2), "Returned pointers are valid."), + + % Stream more the 4K chunk size. + {ok, ExpPtr2} = couch_file:bytes(Fd), + {ok, Stream3} = couch_stream:open(Fd), + Acc2 = lists:foldl(fun(_, Acc) -> + Data = <<"a1b2c">>, + couch_stream:write(Stream3, Data), + [Data | Acc] + end, [], lists:seq(1, 1024)), + {Ptrs3, Length3, _, _, _} = couch_stream:close(Stream3), + + % 4095 because of 5 * 4096 rem 5 (last write before exceeding threshold) + % + 5 puts us over the threshold + % + 4 bytes for the term_to_binary adding a length header + % + 1 byte every 4K for tail append headers + SecondPtr = ExpPtr2 + 4095 + 5 + 4 + 1, + etap:is(Ptrs3, [{ExpPtr2, 4100}, {SecondPtr, 1020}], "Pointers every 4K bytes."), + etap:is(Length3, 5120, "Wrote the expected 5K bytes."), + + couch_file:close(Fd), + ok. diff --git a/apps/couch/test/etap/060-kt-merging.t b/apps/couch/test/etap/060-kt-merging.t new file mode 100755 index 00000000..efbdbf69 --- /dev/null +++ b/apps/couch/test/etap/060-kt-merging.t @@ -0,0 +1,176 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + etap:plan(16), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + One = {1, {"1","foo",[]}}, + + etap:is( + {[One], no_conflicts}, + couch_key_tree:merge([], One, 10), + "The empty tree is the identity for merge." + ), + etap:is( + {[One], no_conflicts}, + couch_key_tree:merge([One], One, 10), + "Merging is reflexive." + ), + + TwoSibs = [{1, {"1","foo",[]}}, + {1, {"2","foo",[]}}], + + etap:is( + {TwoSibs, no_conflicts}, + couch_key_tree:merge(TwoSibs, One, 10), + "Merging a prefix of a tree with the tree yields the tree." + ), + + Three = {1, {"3","foo",[]}}, + ThreeSibs = [{1, {"1","foo",[]}}, + {1, {"2","foo",[]}}, + {1, {"3","foo",[]}}], + + etap:is( + {ThreeSibs, conflicts}, + couch_key_tree:merge(TwoSibs, Three, 10), + "Merging a third unrelated branch leads to a conflict." + ), + + + TwoChild = {1, {"1","foo", [{"1a", "bar", [{"1aa", "bar", []}]}]}}, + + etap:is( + {[TwoChild], no_conflicts}, + couch_key_tree:merge([TwoChild], TwoChild, 10), + "Merging two children is still reflexive." + ), + + TwoChildSibs = {1, {"1","foo", [{"1a", "bar", []}, + {"1b", "bar", []}]}}, + etap:is( + {[TwoChildSibs], no_conflicts}, + couch_key_tree:merge([TwoChildSibs], TwoChildSibs, 10), + "Merging a tree to itself is itself."), + + TwoChildPlusSibs = + {1, {"1","foo", [{"1a", "bar", [{"1aa", "bar", []}]}, + {"1b", "bar", []}]}}, + + etap:is( + {[TwoChildPlusSibs], no_conflicts}, + couch_key_tree:merge([TwoChild], TwoChildSibs, 10), + "Merging tree of uneven length at node 2."), + + Stemmed1b = {2, {"1a", "bar", []}}, + etap:is( + {[TwoChildSibs], no_conflicts}, + couch_key_tree:merge([TwoChildSibs], Stemmed1b, 10), + "Merging a tree with a stem." + ), + + TwoChildSibs2 = {1, {"1","foo", [{"1a", "bar", []}, + {"1b", "bar", [{"1bb", "boo", []}]}]}}, + Stemmed1bb = {3, {"1bb", "boo", []}}, + etap:is( + {[TwoChildSibs2], no_conflicts}, + couch_key_tree:merge([TwoChildSibs2], Stemmed1bb, 10), + "Merging a stem at a deeper level." + ), + + StemmedTwoChildSibs2 = [{2,{"1a", "bar", []}}, + {2,{"1b", "bar", [{"1bb", "boo", []}]}}], + + etap:is( + {StemmedTwoChildSibs2, no_conflicts}, + couch_key_tree:merge(StemmedTwoChildSibs2, Stemmed1bb, 10), + "Merging a stem at a deeper level against paths at deeper levels." + ), + + Stemmed1aa = {3, {"1aa", "bar", []}}, + etap:is( + {[TwoChild], no_conflicts}, + couch_key_tree:merge([TwoChild], Stemmed1aa, 10), + "Merging a single tree with a deeper stem." + ), + + Stemmed1a = {2, {"1a", "bar", [{"1aa", "bar", []}]}}, + etap:is( + {[TwoChild], no_conflicts}, + couch_key_tree:merge([TwoChild], Stemmed1a, 10), + "Merging a larger stem." + ), + + etap:is( + {[Stemmed1a], no_conflicts}, + couch_key_tree:merge([Stemmed1a], Stemmed1aa, 10), + "More merging." + ), + + OneChild = {1, {"1","foo",[{"1a", "bar", []}]}}, + Expect1 = [OneChild, Stemmed1aa], + etap:is( + {Expect1, conflicts}, + couch_key_tree:merge([OneChild], Stemmed1aa, 10), + "Merging should create conflicts." + ), + + etap:is( + {[TwoChild], no_conflicts}, + couch_key_tree:merge(Expect1, TwoChild, 10), + "Merge should have no conflicts." + ), + + %% this test is based on couch-902-test-case2.py + %% foo has conflicts from replication at depth two + %% foo3 is the current value + Foo = {1, {"foo", + "val1", + [{"foo2","val2",[]}, + {"foo3", "val3", []} + ]}}, + %% foo now has an attachment added, which leads to foo4 and val4 + %% off foo3 + Bar = {1, {"foo", + [], + [{"foo3", + [], + [{"foo4","val4",[]} + ]}]}}, + %% this is what the merge returns + %% note that it ignore the conflicting branch as there's no match + FooBar = {1, {"foo", + "val1", + [{"foo2","val2",[]}, + {"foo3", "val3", [{"foo4","val4",[]}]} + ]}}, + + etap:is( + {[FooBar], no_conflicts}, + couch_key_tree:merge([Foo],Bar,10), + "Merging trees with conflicts ought to behave." + ), + + ok. diff --git a/apps/couch/test/etap/061-kt-missing-leaves.t b/apps/couch/test/etap/061-kt-missing-leaves.t new file mode 100755 index 00000000..d60b4db8 --- /dev/null +++ b/apps/couch/test/etap/061-kt-missing-leaves.t @@ -0,0 +1,65 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + etap:plan(4), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + TwoChildSibs = [{0, {"1","foo", [{"1a", "bar", []}, {"1b", "bar", []}]}}], + Stemmed1 = [{1, {"1a", "bar", [{"1aa", "bar", []}]}}], + Stemmed2 = [{2, {"1aa", "bar", []}}], + + etap:is( + [], + couch_key_tree:find_missing(TwoChildSibs, [{0,"1"}, {1,"1a"}]), + "Look for missing keys." + ), + + etap:is( + [{0, "10"}, {100, "x"}], + couch_key_tree:find_missing( + TwoChildSibs, + [{0,"1"}, {0, "10"}, {1,"1a"}, {100, "x"}] + ), + "Look for missing keys." + ), + + etap:is( + [{0, "1"}, {100, "x"}], + couch_key_tree:find_missing( + Stemmed1, + [{0,"1"}, {1,"1a"}, {100, "x"}] + ), + "Look for missing keys." + ), + etap:is( + [{0, "1"}, {1,"1a"}, {100, "x"}], + couch_key_tree:find_missing( + Stemmed2, + [{0,"1"}, {1,"1a"}, {100, "x"}] + ), + "Look for missing keys." + ), + + ok. diff --git a/apps/couch/test/etap/062-kt-remove-leaves.t b/apps/couch/test/etap/062-kt-remove-leaves.t new file mode 100755 index 00000000..745a00be --- /dev/null +++ b/apps/couch/test/etap/062-kt-remove-leaves.t @@ -0,0 +1,69 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + etap:plan(6), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + OneChild = [{0, {"1","foo",[{"1a", "bar", []}]}}], + TwoChildSibs = [{0, {"1","foo", [{"1a", "bar", []}, {"1b", "bar", []}]}}], + Stemmed = [{1, {"1a", "bar", [{"1aa", "bar", []}]}}], + + etap:is( + {TwoChildSibs, []}, + couch_key_tree:remove_leafs(TwoChildSibs, []), + "Removing no leaves has no effect on the tree." + ), + + etap:is( + {TwoChildSibs, []}, + couch_key_tree:remove_leafs(TwoChildSibs, [{0, "1"}]), + "Removing a non-existant branch has no effect." + ), + + etap:is( + {OneChild, [{1, "1b"}]}, + couch_key_tree:remove_leafs(TwoChildSibs, [{1, "1b"}]), + "Removing a leaf removes the leaf." + ), + + etap:is( + {[], [{1, "1b"},{1, "1a"}]}, + couch_key_tree:remove_leafs(TwoChildSibs, [{1, "1a"}, {1, "1b"}]), + "Removing all leaves returns an empty tree." + ), + + etap:is( + {Stemmed, []}, + couch_key_tree:remove_leafs(Stemmed, [{1, "1a"}]), + "Removing a non-existant node has no effect." + ), + + etap:is( + {[], [{2, "1aa"}]}, + couch_key_tree:remove_leafs(Stemmed, [{2, "1aa"}]), + "Removing the last leaf returns an empty tree." + ), + + ok. diff --git a/apps/couch/test/etap/063-kt-get-leaves.t b/apps/couch/test/etap/063-kt-get-leaves.t new file mode 100755 index 00000000..6d4e8007 --- /dev/null +++ b/apps/couch/test/etap/063-kt-get-leaves.t @@ -0,0 +1,98 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + etap:plan(11), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + TwoChildSibs = [{0, {"1","foo", [{"1a", "bar", []}, {"1b", "bar", []}]}}], + Stemmed = [{1, {"1a", "bar", [{"1aa", "bar", []}]}}], + + etap:is( + {[{"foo", {0, ["1"]}}],[]}, + couch_key_tree:get(TwoChildSibs, [{0, "1"}]), + "extract a subtree." + ), + + etap:is( + {[{"bar", {1, ["1a", "1"]}}],[]}, + couch_key_tree:get(TwoChildSibs, [{1, "1a"}]), + "extract a subtree." + ), + + etap:is( + {[],[{0,"x"}]}, + couch_key_tree:get_key_leafs(TwoChildSibs, [{0, "x"}]), + "gather up the leaves." + ), + + etap:is( + {[{"bar", {1, ["1a","1"]}}],[]}, + couch_key_tree:get_key_leafs(TwoChildSibs, [{1, "1a"}]), + "gather up the leaves." + ), + + etap:is( + {[{"bar", {1, ["1a","1"]}},{"bar",{1, ["1b","1"]}}],[]}, + couch_key_tree:get_key_leafs(TwoChildSibs, [{0, "1"}]), + "gather up the leaves." + ), + + etap:is( + {[{0,[{"1", "foo"}]}],[]}, + couch_key_tree:get_full_key_paths(TwoChildSibs, [{0, "1"}]), + "retrieve full key paths." + ), + + etap:is( + {[{1,[{"1a", "bar"},{"1", "foo"}]}],[]}, + couch_key_tree:get_full_key_paths(TwoChildSibs, [{1, "1a"}]), + "retrieve full key paths." + ), + + etap:is( + [{2, [{"1aa", "bar"},{"1a", "bar"}]}], + couch_key_tree:get_all_leafs_full(Stemmed), + "retrieve all leaves." + ), + + etap:is( + [{1, [{"1a", "bar"},{"1", "foo"}]}, {1, [{"1b", "bar"},{"1", "foo"}]}], + couch_key_tree:get_all_leafs_full(TwoChildSibs), + "retrieve all the leaves." + ), + + etap:is( + [{"bar", {2, ["1aa","1a"]}}], + couch_key_tree:get_all_leafs(Stemmed), + "retrieve all leaves." + ), + + etap:is( + [{"bar", {1, ["1a", "1"]}}, {"bar", {1, ["1b","1"]}}], + couch_key_tree:get_all_leafs(TwoChildSibs), + "retrieve all the leaves." + ), + + ok. diff --git a/apps/couch/test/etap/064-kt-counting.t b/apps/couch/test/etap/064-kt-counting.t new file mode 100755 index 00000000..f182d287 --- /dev/null +++ b/apps/couch/test/etap/064-kt-counting.t @@ -0,0 +1,46 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + etap:plan(4), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + EmptyTree = [], + One = [{0, {"1","foo",[]}}], + TwoChildSibs = [{0, {"1","foo", [{"1a", "bar", []}, {"1b", "bar", []}]}}], + Stemmed = [{2, {"1bb", "boo", []}}], + + etap:is(0, couch_key_tree:count_leafs(EmptyTree), + "Empty trees have no leaves."), + + etap:is(1, couch_key_tree:count_leafs(One), + "Single node trees have a single leaf."), + + etap:is(2, couch_key_tree:count_leafs(TwoChildSibs), + "Two children siblings counted as two leaves."), + + etap:is(1, couch_key_tree:count_leafs(Stemmed), + "Stemming does not affect leaf counting."), + + ok. diff --git a/apps/couch/test/etap/065-kt-stemming.t b/apps/couch/test/etap/065-kt-stemming.t new file mode 100755 index 00000000..6e781c1d --- /dev/null +++ b/apps/couch/test/etap/065-kt-stemming.t @@ -0,0 +1,42 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + etap:plan(3), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + TwoChild = [{0, {"1","foo", [{"1a", "bar", [{"1aa", "bar", []}]}]}}], + Stemmed1 = [{1, {"1a", "bar", [{"1aa", "bar", []}]}}], + Stemmed2 = [{2, {"1aa", "bar", []}}], + + etap:is(TwoChild, couch_key_tree:stem(TwoChild, 3), + "Stemming more levels than what exists does nothing."), + + etap:is(Stemmed1, couch_key_tree:stem(TwoChild, 2), + "Stemming with a depth of two returns the deepest two nodes."), + + etap:is(Stemmed2, couch_key_tree:stem(TwoChild, 1), + "Stemming to a depth of one returns the deepest node."), + + ok. diff --git a/apps/couch/test/etap/080-config-get-set.ini b/apps/couch/test/etap/080-config-get-set.ini new file mode 100644 index 00000000..e72e08db --- /dev/null +++ b/apps/couch/test/etap/080-config-get-set.ini @@ -0,0 +1,8 @@ +[daemons] +something=somevalue + +[httpd_design_handlers] +_view = {couch_httpd_view, handle_view_req} + +[httpd] +port = 5984 diff --git a/apps/couch/test/etap/080-config-get-set.t b/apps/couch/test/etap/080-config-get-set.t new file mode 100755 index 00000000..c457c7f0 --- /dev/null +++ b/apps/couch/test/etap/080-config-get-set.t @@ -0,0 +1,129 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +default_config() -> + filename:join([code:lib_dir(couch, test), "etap/080-config-get-set.ini"]). + +main(_) -> + test_util:init_code_path(), + etap:plan(12), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + % start couch_config with default + couch_config_event:start_link(), + couch_config:start_link([default_config()]), + + + % Check that we can get values + + + etap:fun_is( + fun(List) -> length(List) > 0 end, + couch_config:all(), + "Data was loaded from the INI file." + ), + + etap:fun_is( + fun(List) -> length(List) > 0 end, + couch_config:get("daemons"), + "There are settings in the [daemons] section of the INI file." + ), + + etap:is( + couch_config:get("httpd_design_handlers", "_view"), + "{couch_httpd_view, handle_view_req}", + "The {httpd_design_handlers, view} is the expected default." + ), + + etap:is( + couch_config:get("httpd", "foo", "bar"), + "bar", + "Returns the default when key doesn't exist in config." + ), + + etap:is( + couch_config:get("httpd", "foo"), + undefined, + "The default default is the atom 'undefined'." + ), + + etap:is( + couch_config:get("httpd", "port", "bar"), + "5984", + "Only returns the default when the config setting does not exist." + ), + + + % Check that setting values works. + + + ok = couch_config:set("log", "level", "severe", false), + + etap:is( + couch_config:get("log", "level"), + "severe", + "Non persisted changes take effect." + ), + + etap:is( + couch_config:get("new_section", "bizzle"), + undefined, + "Section 'new_section' does not exist." + ), + + ok = couch_config:set("new_section", "bizzle", "bang", false), + + etap:is( + couch_config:get("new_section", "bizzle"), + "bang", + "New section 'new_section' was created for a new key/value pair." + ), + + + % Check that deleting works + + + ok = couch_config:delete("new_section", "bizzle", false), + etap:is( + couch_config:get("new_section", "bizzle"), + undefined, + "Deleting sets the value to \"\"" + ), + + + % Check ge/set/delete binary strings + + ok = couch_config:set(<<"foo">>, <<"bar">>, <<"baz">>, false), + etap:is( + couch_config:get(<<"foo">>, <<"bar">>), + <<"baz">>, + "Can get and set with binary section and key values." + ), + ok = couch_config:delete(<<"foo">>, <<"bar">>, false), + etap:is( + couch_config:get(<<"foo">>, <<"bar">>), + undefined, + "Deleting with binary section/key pairs sets the value to \"\"" + ), + + ok. diff --git a/apps/couch/test/etap/081-config-default.ini b/apps/couch/test/etap/081-config-default.ini new file mode 100644 index 00000000..abdf79bc --- /dev/null +++ b/apps/couch/test/etap/081-config-default.ini @@ -0,0 +1,6 @@ +[couchdb] +max_dbs_open=100 + +[httpd] +port=5984 +bind_address = 127.0.0.1 diff --git a/apps/couch/test/etap/081-config-override.1.ini b/apps/couch/test/etap/081-config-override.1.ini new file mode 100644 index 00000000..55451dad --- /dev/null +++ b/apps/couch/test/etap/081-config-override.1.ini @@ -0,0 +1,22 @@ +; Licensed to the Apache Software Foundation (ASF) under one +; or more contributor license agreements. See the NOTICE file +; distributed with this work for additional information +; regarding copyright ownership. The ASF licenses this file +; to you under the Apache License, Version 2.0 (the +; "License"); you may not use this file except in compliance +; with the License. You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, +; software distributed under the License is distributed on an +; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +; KIND, either express or implied. See the License for the +; specific language governing permissions and limitations +; under the License. + +[couchdb] +max_dbs_open=10 + +[httpd] +port=4895 diff --git a/apps/couch/test/etap/081-config-override.2.ini b/apps/couch/test/etap/081-config-override.2.ini new file mode 100644 index 00000000..5f46357f --- /dev/null +++ b/apps/couch/test/etap/081-config-override.2.ini @@ -0,0 +1,22 @@ +; Licensed to the Apache Software Foundation (ASF) under one +; or more contributor license agreements. See the NOTICE file +; distributed with this work for additional information +; regarding copyright ownership. The ASF licenses this file +; to you under the Apache License, Version 2.0 (the +; "License"); you may not use this file except in compliance +; with the License. You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, +; software distributed under the License is distributed on an +; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +; KIND, either express or implied. See the License for the +; specific language governing permissions and limitations +; under the License. + +[httpd] +port = 80 + +[fizbang] +unicode = normalized diff --git a/apps/couch/test/etap/081-config-override.t b/apps/couch/test/etap/081-config-override.t new file mode 100755 index 00000000..16a6b6f6 --- /dev/null +++ b/apps/couch/test/etap/081-config-override.t @@ -0,0 +1,213 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +default_config() -> + filename:join([code:lib_dir(couch, test),"etap/081-config-default.ini"]). + +local_config_1() -> + filename:join([code:lib_dir(couch, test),"etap/081-config-override.1.ini"]). + +local_config_2() -> + filename:join([code:lib_dir(couch, test),"etap/081-config-override.2.ini"]). + +local_config_write() -> + test_util:build_file("test/etap/temp.081"). + +% Run tests and wait for the config gen_server to shutdown. +run_tests(IniFiles, Tests) -> + couch_config_event:start_link(), + {ok, Pid} = couch_config:start_link(IniFiles), + erlang:monitor(process, Pid), + Tests(), + couch_config:stop(), + receive + {'DOWN', _, _, Pid, _} -> ok; + _Other -> etap:diag("OTHER: ~p~n", [_Other]) + after + 1000 -> throw({timeout_error, config_stop}) + end. + +main(_) -> + test_util:init_code_path(), + etap:plan(17), + + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + + CheckStartStop = fun() -> ok end, + run_tests([default_config()], CheckStartStop), + + CheckDefaults = fun() -> + etap:is( + couch_config:get("couchdb", "max_dbs_open"), + "100", + "{couchdb, max_dbs_open} is 100 by defualt." + ), + + etap:is( + couch_config:get("httpd","port"), + "5984", + "{httpd, port} is 5984 by default" + ), + + etap:is( + couch_config:get("fizbang", "unicode"), + undefined, + "{fizbang, unicode} is undefined by default" + ) + end, + + run_tests([default_config()], CheckDefaults), + + + % Check that subsequent files override values appropriately + + CheckOverride = fun() -> + etap:is( + couch_config:get("couchdb", "max_dbs_open"), + "10", + "{couchdb, max_dbs_open} was overriden with the value 10" + ), + + etap:is( + couch_config:get("httpd", "port"), + "4895", + "{httpd, port} was overriden with the value 4895" + ) + end, + + run_tests([default_config(), local_config_1()], CheckOverride), + + + % Check that overrides can create new sections + + CheckOverride2 = fun() -> + etap:is( + couch_config:get("httpd", "port"), + "80", + "{httpd, port} is overriden with the value 80" + ), + + etap:is( + couch_config:get("fizbang", "unicode"), + "normalized", + "{fizbang, unicode} was created by override INI file" + ) + end, + + run_tests([default_config(), local_config_2()], CheckOverride2), + + + % Check that values can be overriden multiple times + + CheckOverride3 = fun() -> + etap:is( + couch_config:get("httpd", "port"), + "80", + "{httpd, port} value was taken from the last specified INI file." + ) + end, + + run_tests( + [default_config(), local_config_1(), local_config_2()], + CheckOverride3 + ), + + % Check persistence to last file. + + % Empty the file in case it exists. + {ok, Fd} = file:open(local_config_write(), write), + ok = file:truncate(Fd), + ok = file:close(Fd), + + % Open and write a value + CheckCanWrite = fun() -> + etap:is( + couch_config:get("httpd", "port"), + "5984", + "{httpd, port} is still 5984 by default" + ), + + etap:is( + couch_config:set("httpd", "port", "8080"), + ok, + "Writing {httpd, port} is kosher." + ), + + etap:is( + couch_config:get("httpd", "port"), + "8080", + "{httpd, port} was updated to 8080 successfully." + ), + + etap:is( + couch_config:delete("httpd", "bind_address"), + ok, + "Deleting {httpd, bind_address} succeeds" + ), + + etap:is( + couch_config:get("httpd", "bind_address"), + undefined, + "{httpd, bind_address} was actually deleted." + ) + end, + + run_tests([default_config(), local_config_write()], CheckCanWrite), + + % Open and check where we don't expect persistence. + + CheckDidntWrite = fun() -> + etap:is( + couch_config:get("httpd", "port"), + "5984", + "{httpd, port} was not persisted to the primary INI file." + ), + + etap:is( + couch_config:get("httpd", "bind_address"), + "127.0.0.1", + "{httpd, bind_address} was not deleted form the primary INI file." + ) + end, + + run_tests([default_config()], CheckDidntWrite), + + % Open and check we have only the persistence we expect. + CheckDidWrite = fun() -> + etap:is( + couch_config:get("httpd", "port"), + "8080", + "{httpd, port} is still 8080 after reopening the config." + ), + + etap:is( + couch_config:get("httpd", "bind_address"), + undefined, + "{httpd, bind_address} is still \"\" after reopening." + ) + end, + + run_tests([local_config_write()], CheckDidWrite), + + ok. diff --git a/apps/couch/test/etap/082-config-default.ini b/apps/couch/test/etap/082-config-default.ini new file mode 100644 index 00000000..e46f158f --- /dev/null +++ b/apps/couch/test/etap/082-config-default.ini @@ -0,0 +1,6 @@ +[couchdb] +max_dbs_open=500 + +[httpd] +port=5984 +bind_address = 127.0.0.1 diff --git a/apps/couch/test/etap/082-config-register.t b/apps/couch/test/etap/082-config-register.t new file mode 100755 index 00000000..36b48cf8 --- /dev/null +++ b/apps/couch/test/etap/082-config-register.t @@ -0,0 +1,95 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +default_config() -> + filename:join([code:lib_dir(couch, test), "etap/082-config-default.ini"]). + +main(_) -> + test_util:init_code_path(), + etap:plan(5), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + couch_config_event:start_link(), + couch_config:start_link([default_config()]), + + etap:is( + couch_config:get("httpd", "port"), + "5984", + "{httpd, port} is 5984 by default." + ), + + ok = couch_config:set("httpd", "port", "4895", false), + + etap:is( + couch_config:get("httpd", "port"), + "4895", + "{httpd, port} changed to 4895" + ), + + SentinelFunc = fun() -> + % Ping/Pong to make sure we wait for this + % process to die + receive {ping, From} -> From ! pong end + end, + SentinelPid = spawn(SentinelFunc), + + couch_config:register( + fun("httpd", "port", Value) -> + etap:is(Value, "8080", "Registered function got notification.") + end, + SentinelPid + ), + + ok = couch_config:set("httpd", "port", "8080", false), + + % Implicitly checking that we *don't* call the function + etap:is( + couch_config:get("httpd", "bind_address"), + "127.0.0.1", + "{httpd, bind_address} is not '0.0.0.0'" + ), + ok = couch_config:set("httpd", "bind_address", "0.0.0.0", false), + + % Ping-Pong kill process + SentinelPid ! {ping, self()}, + receive + _Any -> ok + after 1000 -> + throw({timeout_error, registered_pid}) + end, + + ok = couch_config:set("httpd", "port", "80", false), + etap:is( + couch_config:get("httpd", "port"), + "80", + "Implicitly test that the function got de-registered" + ), + + % test passing of Persist flag + couch_config:register( + fun("httpd", _, _, Persist) -> + etap:is(Persist, false) + end), + ok = couch_config:set("httpd", "port", "80", false), + + ok. diff --git a/apps/couch/test/etap/083-config-no-files.t b/apps/couch/test/etap/083-config-no-files.t new file mode 100755 index 00000000..fe2b75c4 --- /dev/null +++ b/apps/couch/test/etap/083-config-no-files.t @@ -0,0 +1,56 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +default_config() -> + "rel/overlay/etc/default.ini". + +main(_) -> + test_util:init_code_path(), + etap:plan(3), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +test() -> + couch_config_event:start_link(), + couch_config:start_link([]), + + etap:fun_is( + fun(KVPairs) -> length(KVPairs) == 0 end, + couch_config:all(), + "No INI files specified returns 0 key/value pairs." + ), + + ok = couch_config:set("httpd", "port", "80", false), + + etap:is( + couch_config:get("httpd", "port"), + "80", + "Created a new non-persisted k/v pair." + ), + + ok = couch_config:set("httpd", "bind_address", "127.0.0.1", false), + etap:is( + couch_config:get("httpd", "bind_address"), + "127.0.0.1", + "Asking for a persistent key/value pair doesn't choke." + ), + + ok. diff --git a/apps/couch/test/etap/090-task-status.t b/apps/couch/test/etap/090-task-status.t new file mode 100755 index 00000000..b278de7f --- /dev/null +++ b/apps/couch/test/etap/090-task-status.t @@ -0,0 +1,209 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + etap:plan(16), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +check_status(Pid,ListPropLists) -> + From = list_to_binary(pid_to_list(Pid)), + Element = lists:foldl( + fun(PropList,Acc) -> + case couch_util:get_value(pid,PropList) of + From -> + [PropList | Acc]; + _ -> + [] + end + end, + [], ListPropLists + ), + couch_util:get_value(status,hd(Element)). + +loop() -> + receive + {add, From} -> + Resp = couch_task_status:add_task("type", "task", "init"), + From ! {ok, self(), Resp}, + loop(); + {update, Status, From} -> + Resp = couch_task_status:update(Status), + From ! {ok, self(), Resp}, + loop(); + {update_frequency, Msecs, From} -> + Resp = couch_task_status:set_update_frequency(Msecs), + From ! {ok, self(), Resp}, + loop(); + {done, From} -> + From ! {ok, self(), ok} + end. + +call(Pid, Command) -> + Pid ! {Command, self()}, + wait(Pid). + +call(Pid, Command, Arg) -> + Pid ! {Command, Arg, self()}, + wait(Pid). + +wait(Pid) -> + receive + {ok, Pid, Msg} -> Msg + after 1000 -> + throw(timeout_error) + end. + +test() -> + {ok, TaskStatusPid} = couch_task_status:start_link(), + + TaskUpdater = fun() -> loop() end, + % create three updaters + Pid1 = spawn(TaskUpdater), + Pid2 = spawn(TaskUpdater), + Pid3 = spawn(TaskUpdater), + + ok = call(Pid1, add), + etap:is( + length(couch_task_status:all()), + 1, + "Started a task" + ), + + etap:is( + call(Pid1, add), + {add_task_error, already_registered}, + "Unable to register multiple tasks for a single Pid." + ), + + etap:is( + check_status(Pid1, couch_task_status:all()), + <<"init">>, + "Task status was set to 'init'." + ), + + call(Pid1,update,"running"), + etap:is( + check_status(Pid1,couch_task_status:all()), + <<"running">>, + "Status updated to 'running'." + ), + + + call(Pid2,add), + etap:is( + length(couch_task_status:all()), + 2, + "Started a second task." + ), + + etap:is( + check_status(Pid2, couch_task_status:all()), + <<"init">>, + "Second tasks's status was set to 'init'." + ), + + call(Pid2, update, "running"), + etap:is( + check_status(Pid2, couch_task_status:all()), + <<"running">>, + "Second task's status updated to 'running'." + ), + + + call(Pid3, add), + etap:is( + length(couch_task_status:all()), + 3, + "Registered a third task." + ), + + etap:is( + check_status(Pid3, couch_task_status:all()), + <<"init">>, + "Third tasks's status was set to 'init'." + ), + + call(Pid3, update, "running"), + etap:is( + check_status(Pid3, couch_task_status:all()), + <<"running">>, + "Third task's status updated to 'running'." + ), + + + call(Pid3, update_frequency, 500), + call(Pid3, update, "still running"), + etap:is( + check_status(Pid3, couch_task_status:all()), + <<"still running">>, + "Third task's status updated to 'still running'." + ), + + call(Pid3, update, "skip this update"), + etap:is( + check_status(Pid3, couch_task_status:all()), + <<"still running">>, + "Status update dropped because of frequency limit." + ), + + call(Pid3, update_frequency, 0), + call(Pid3, update, "don't skip"), + etap:is( + check_status(Pid3, couch_task_status:all()), + <<"don't skip">>, + "Status updated after reseting frequency limit." + ), + + + call(Pid1, done), + etap:is( + length(couch_task_status:all()), + 2, + "First task finished." + ), + + call(Pid2, done), + etap:is( + length(couch_task_status:all()), + 1, + "Second task finished." + ), + + call(Pid3, done), + etap:is( + length(couch_task_status:all()), + 0, + "Third task finished." + ), + + erlang:monitor(process, TaskStatusPid), + couch_task_status:stop(), + receive + {'DOWN', _, _, TaskStatusPid, _} -> + ok + after + 1000 -> + throw(timeout_error) + end, + + ok. diff --git a/apps/couch/test/etap/100-ref-counter.t b/apps/couch/test/etap/100-ref-counter.t new file mode 100755 index 00000000..8f996d04 --- /dev/null +++ b/apps/couch/test/etap/100-ref-counter.t @@ -0,0 +1,114 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + etap:plan(8), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + +loop() -> + receive + close -> ok + end. + +wait() -> + receive + {'DOWN', _, _, _, _} -> ok + after 1000 -> + throw(timeout_error) + end. + +test() -> + {ok, RefCtr} = couch_ref_counter:start([]), + + etap:is( + couch_ref_counter:count(RefCtr), + 1, + "A ref_counter is initialized with the calling process as a referer." + ), + + ChildPid1 = spawn(fun() -> loop() end), + + % This is largely implicit in that nothing else breaks + % as ok is just returned from gen_server:cast() + etap:is( + couch_ref_counter:drop(RefCtr, ChildPid1), + ok, + "Dropping an unknown Pid is ignored." + ), + + couch_ref_counter:add(RefCtr, ChildPid1), + etap:is( + couch_ref_counter:count(RefCtr), + 2, + "Adding a Pid to the ref_counter increases it's count." + ), + + couch_ref_counter:add(RefCtr, ChildPid1), + etap:is( + couch_ref_counter:count(RefCtr), + 2, + "Readding the same Pid maintains the count but increments it's refs." + ), + + couch_ref_counter:drop(RefCtr, ChildPid1), + etap:is( + couch_ref_counter:count(RefCtr), + 2, + "Droping the doubly added Pid only removes a ref, not a referer." + ), + + couch_ref_counter:drop(RefCtr, ChildPid1), + etap:is( + couch_ref_counter:count(RefCtr), + 1, + "Dropping the second ref drops the referer." + ), + + couch_ref_counter:add(RefCtr, ChildPid1), + etap:is( + couch_ref_counter:count(RefCtr), + 2, + "Sanity checking that the Pid was re-added." + ), + + erlang:monitor(process, ChildPid1), + ChildPid1 ! close, + wait(), + + CheckFun = fun + (Iter, nil) -> + case couch_ref_counter:count(RefCtr) of + 1 -> Iter; + _ -> nil + end; + (_, Acc) -> + Acc + end, + Result = lists:foldl(CheckFun, nil, lists:seq(1, 10000)), + etap:isnt( + Result, + nil, + "The referer count was decremented automatically on process exit." + ), + + ok. diff --git a/apps/couch/test/etap/120-stats-collect.inactive b/apps/couch/test/etap/120-stats-collect.inactive new file mode 100755 index 00000000..dee88765 --- /dev/null +++ b/apps/couch/test/etap/120-stats-collect.inactive @@ -0,0 +1,150 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + etap:plan(11), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail() + end, + ok. + +test() -> + couch_stats_collector:start(), + ok = test_counters(), + ok = test_abs_values(), + ok = test_proc_counting(), + ok = test_all(), + ok. + +test_counters() -> + AddCount = fun() -> couch_stats_collector:increment(foo) end, + RemCount = fun() -> couch_stats_collector:decrement(foo) end, + repeat(AddCount, 100), + repeat(RemCount, 25), + repeat(AddCount, 10), + repeat(RemCount, 5), + etap:is( + couch_stats_collector:get(foo), + 80, + "Incrememnt tracks correctly." + ), + + repeat(RemCount, 80), + etap:is( + couch_stats_collector:get(foo), + 0, + "Decremented to zaro." + ), + ok. + +test_abs_values() -> + lists:map(fun(Val) -> + couch_stats_collector:record(bar, Val) + end, lists:seq(1, 15)), + etap:is( + couch_stats_collector:get(bar), + lists:seq(1, 15), + "Absolute values are recorded correctly." + ), + + couch_stats_collector:clear(bar), + etap:is( + couch_stats_collector:get(bar), + nil, + "Absolute values are cleared correctly." + ), + ok. + +test_proc_counting() -> + Self = self(), + OnePid = spawn(fun() -> + couch_stats_collector:track_process_count(hoopla), + Self ! reporting, + receive sepuku -> ok end + end), + R1 = erlang:monitor(process, OnePid), + receive reporting -> ok end, + etap:is( + couch_stats_collector:get(hoopla), + 1, + "track_process_count incrememnts the counter." + ), + + TwicePid = spawn(fun() -> + couch_stats_collector:track_process_count(hoopla), + couch_stats_collector:track_process_count(hoopla), + Self ! reporting, + receive sepuku -> ok end + end), + R2 = erlang:monitor(process, TwicePid), + receive reporting -> ok end, + etap:is( + couch_stats_collector:get(hoopla), + 3, + "track_process_count allows more than one incrememnt per Pid" + ), + + OnePid ! sepuku, + receive {'DOWN', R1, _, _, _} -> ok end, + timer:sleep(250), + etap:is( + couch_stats_collector:get(hoopla), + 2, + "Process count is decremented when process exits." + ), + + TwicePid ! sepuku, + receive {'DOWN', R2, _, _, _} -> ok end, + timer:sleep(250), + etap:is( + couch_stats_collector:get(hoopla), + 0, + "Process count is decremented for each call to track_process_count." + ), + ok. + +test_all() -> + couch_stats_collector:record(bar, 0.0), + couch_stats_collector:record(bar, 1.0), + etap:is( + couch_stats_collector:all(), + [{foo, 0}, {hoopla, 0}, {bar, [1.0, 0.0]}], + "all/0 returns all counters and absolute values." + ), + + etap:is( + couch_stats_collector:all(incremental), + [{foo, 0}, {hoopla, 0}], + "all/1 returns only the specified type." + ), + + couch_stats_collector:record(zing, 90), + etap:is( + couch_stats_collector:all(absolute), + [{zing, [90]}, {bar, [1.0, 0.0]}], + "all/1 returns only the specified type." + ), + ok. + +repeat(_, 0) -> + ok; +repeat(Fun, Count) -> + Fun(), + repeat(Fun, Count-1). diff --git a/apps/couch/test/etap/121-stats-aggregates.cfg b/apps/couch/test/etap/121-stats-aggregates.cfg new file mode 100644 index 00000000..30e475da --- /dev/null +++ b/apps/couch/test/etap/121-stats-aggregates.cfg @@ -0,0 +1,19 @@ +% Licensed to the Apache Software Foundation (ASF) under one +% or more contributor license agreements. See the NOTICE file +% distributed with this work for additional information +% regarding copyright ownership. The ASF licenses this file +% to you under the Apache License, Version 2.0 (the +% "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, +% software distributed under the License is distributed on an +% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +% KIND, either express or implied. See the License for the +% specific language governing permissions and limitations +% under the License. + +{testing, stuff, "yay description"}. +{number, '11', "randomosity"}. diff --git a/apps/couch/test/etap/121-stats-aggregates.inactive b/apps/couch/test/etap/121-stats-aggregates.inactive new file mode 100755 index 00000000..d678aa9d --- /dev/null +++ b/apps/couch/test/etap/121-stats-aggregates.inactive @@ -0,0 +1,171 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +ini_file() -> + test_util:source_file("test/etap/121-stats-aggregates.ini"). + +cfg_file() -> + test_util:source_file("test/etap/121-stats-aggregates.cfg"). + +main(_) -> + test_util:init_code_path(), + etap:plan(17), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail() + end, + ok. + +test() -> + couch_config:start_link([ini_file()]), + couch_stats_collector:start(), + couch_stats_aggregator:start(cfg_file()), + ok = test_all_empty(), + ok = test_get_empty(), + ok = test_count_stats(), + ok = test_abs_stats(), + ok. + +test_all_empty() -> + {Aggs} = couch_stats_aggregator:all(), + + etap:is(length(Aggs), 2, "There are only two aggregate types in testing."), + etap:is( + couch_util:get_value(testing, Aggs), + {[{stuff, make_agg(<<"yay description">>, + null, null, null, null, null)}]}, + "{testing, stuff} is empty at start." + ), + etap:is( + couch_util:get_value(number, Aggs), + {[{'11', make_agg(<<"randomosity">>, + null, null, null, null, null)}]}, + "{number, '11'} is empty at start." + ), + ok. + +test_get_empty() -> + etap:is( + couch_stats_aggregator:get_json({testing, stuff}), + make_agg(<<"yay description">>, null, null, null, null, null), + "Getting {testing, stuff} returns an empty aggregate." + ), + etap:is( + couch_stats_aggregator:get_json({number, '11'}), + make_agg(<<"randomosity">>, null, null, null, null, null), + "Getting {number, '11'} returns an empty aggregate." + ), + ok. + +test_count_stats() -> + lists:foreach(fun(_) -> + couch_stats_collector:increment({testing, stuff}) + end, lists:seq(1, 100)), + couch_stats_aggregator:collect_sample(), + etap:is( + couch_stats_aggregator:get_json({testing, stuff}), + make_agg(<<"yay description">>, 100, 100, null, 100, 100), + "COUNT: Adding values changes the stats." + ), + etap:is( + couch_stats_aggregator:get_json({testing, stuff}, 1), + make_agg(<<"yay description">>, 100, 100, null, 100, 100), + "COUNT: Adding values changes stats for all times." + ), + + timer:sleep(500), + couch_stats_aggregator:collect_sample(), + etap:is( + couch_stats_aggregator:get_json({testing, stuff}), + make_agg(<<"yay description">>, 100, 50, 70.711, 0, 100), + "COUNT: Removing values changes stats." + ), + etap:is( + couch_stats_aggregator:get_json({testing, stuff}, 1), + make_agg(<<"yay description">>, 100, 50, 70.711, 0, 100), + "COUNT: Removing values changes stats for all times." + ), + + timer:sleep(600), + couch_stats_aggregator:collect_sample(), + etap:is( + couch_stats_aggregator:get_json({testing, stuff}), + make_agg(<<"yay description">>, 100, 33.333, 57.735, 0, 100), + "COUNT: Letting time passes doesn't remove data from time 0 aggregates" + ), + etap:is( + couch_stats_aggregator:get_json({testing, stuff}, 1), + make_agg(<<"yay description">>, 0, 0, 0, 0, 0), + "COUNT: Letting time pass removes data from other time aggregates." + ), + ok. + +test_abs_stats() -> + lists:foreach(fun(X) -> + couch_stats_collector:record({number, 11}, X) + end, lists:seq(0, 10)), + couch_stats_aggregator:collect_sample(), + etap:is( + couch_stats_aggregator:get_json({number, 11}), + make_agg(<<"randomosity">>, 5, 5, null, 5, 5), + "ABS: Adding values changes the stats." + ), + etap:is( + couch_stats_aggregator:get_json({number, 11}, 1), + make_agg(<<"randomosity">>, 5, 5, null, 5, 5), + "ABS: Adding values changes stats for all times." + ), + + timer:sleep(500), + couch_stats_collector:record({number, 11}, 15), + couch_stats_aggregator:collect_sample(), + etap:is( + couch_stats_aggregator:get_json({number, 11}), + make_agg(<<"randomosity">>, 20, 10, 7.071, 5, 15), + "ABS: New values changes stats" + ), + etap:is( + couch_stats_aggregator:get_json({number, 11}, 1), + make_agg(<<"randomosity">>, 20, 10, 7.071, 5, 15), + "ABS: Removing values changes stats for all times." + ), + + timer:sleep(600), + couch_stats_aggregator:collect_sample(), + etap:is( + couch_stats_aggregator:get_json({number, 11}), + make_agg(<<"randomosity">>, 20, 10, 7.071, 5, 15), + "ABS: Letting time passes doesn't remove data from time 0 aggregates" + ), + etap:is( + couch_stats_aggregator:get_json({number, 11}, 1), + make_agg(<<"randomosity">>, 15, 15, null, 15, 15), + "ABS: Letting time pass removes data from other time aggregates." + ), + ok. + +make_agg(Desc, Sum, Mean, StdDev, Min, Max) -> + {[ + {description, Desc}, + {current, Sum}, + {sum, Sum}, + {mean, Mean}, + {stddev, StdDev}, + {min, Min}, + {max, Max} + ]}. diff --git a/apps/couch/test/etap/121-stats-aggregates.ini b/apps/couch/test/etap/121-stats-aggregates.ini new file mode 100644 index 00000000..cc5cd218 --- /dev/null +++ b/apps/couch/test/etap/121-stats-aggregates.ini @@ -0,0 +1,20 @@ +; Licensed to the Apache Software Foundation (ASF) under one +; or more contributor license agreements. See the NOTICE file +; distributed with this work for additional information +; regarding copyright ownership. The ASF licenses this file +; to you under the Apache License, Version 2.0 (the +; "License"); you may not use this file except in compliance +; with the License. You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, +; software distributed under the License is distributed on an +; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +; KIND, either express or implied. See the License for the +; specific language governing permissions and limitations +; under the License. + +[stats] +rate = 10000000 ; We call collect_sample in testing +samples = [0, 1] diff --git a/apps/couch/test/etap/etap.beam b/apps/couch/test/etap/etap.beam Binary files differnew file mode 100644 index 00000000..da77f1d6 --- /dev/null +++ b/apps/couch/test/etap/etap.beam diff --git a/apps/couch/test/etap/random_port.ini b/apps/couch/test/etap/random_port.ini new file mode 100644 index 00000000..ada3c13d --- /dev/null +++ b/apps/couch/test/etap/random_port.ini @@ -0,0 +1,19 @@ +; Licensed to the Apache Software Foundation (ASF) under one +; or more contributor license agreements. See the NOTICE file +; distributed with this work for additional information +; regarding copyright ownership. The ASF licenses this file +; to you under the Apache License, Version 2.0 (the +; "License"); you may not use this file except in compliance +; with the License. You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, +; software distributed under the License is distributed on an +; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +; KIND, either express or implied. See the License for the +; specific language governing permissions and limitations +; under the License. + +[httpd] +port = 0 diff --git a/apps/couch/test/etap/run.tpl b/apps/couch/test/etap/run.tpl new file mode 100644 index 00000000..faf0f456 --- /dev/null +++ b/apps/couch/test/etap/run.tpl @@ -0,0 +1,27 @@ +#!/bin/sh -e + +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +SRCDIR="%abs_top_srcdir%" +BUILDIR="%abs_top_builddir%" + +export ERL_FLAGS="$ERL_FLAGS -pa $BUILDIR/test/etap/" + +if test $# -gt 0; then + while [ $# -gt 0 ]; do + $1 + shift + done +else + prove $SRCDIR/test/etap/*.t +fi diff --git a/apps/couch/test/etap/test_cfg_register.c b/apps/couch/test/etap/test_cfg_register.c new file mode 100644 index 00000000..7161eb55 --- /dev/null +++ b/apps/couch/test/etap/test_cfg_register.c @@ -0,0 +1,30 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include <stdio.h> + +int +main(int argc, const char * argv[]) +{ + char c = '\0'; + size_t num = 1; + + fprintf(stdout, "[\"register\", \"s1\"]\n"); + fprintf(stdout, "[\"register\", \"s2\", \"k\"]\n"); + fflush(stdout); + + while(c != '\n' && num > 0) { + num = fread(&c, 1, 1, stdin); + } + + exit(0); +} diff --git a/apps/couch/test/etap/test_web.erl b/apps/couch/test/etap/test_web.erl new file mode 100644 index 00000000..ed78651f --- /dev/null +++ b/apps/couch/test/etap/test_web.erl @@ -0,0 +1,99 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(test_web). +-behaviour(gen_server). + +-export([start_link/0, loop/1, get_port/0, set_assert/1, check_last/0]). +-export([init/1, terminate/2, code_change/3]). +-export([handle_call/3, handle_cast/2, handle_info/2]). + +-define(SERVER, test_web_server). +-define(HANDLER, test_web_handler). + +start_link() -> + gen_server:start({local, ?HANDLER}, ?MODULE, [], []), + mochiweb_http:start([ + {name, ?SERVER}, + {loop, {?MODULE, loop}}, + {port, 0} + ]). + +loop(Req) -> + %etap:diag("Handling request: ~p", [Req]), + case gen_server:call(?HANDLER, {check_request, Req}) of + {ok, RespInfo} -> + {ok, Req:respond(RespInfo)}; + {raw, {Status, Headers, BodyChunks}} -> + Resp = Req:start_response({Status, Headers}), + lists:foreach(fun(C) -> Resp:send(C) end, BodyChunks), + erlang:put(mochiweb_request_force_close, true), + {ok, Resp}; + {chunked, {Status, Headers, BodyChunks}} -> + Resp = Req:respond({Status, Headers, chunked}), + timer:sleep(500), + lists:foreach(fun(C) -> Resp:write_chunk(C) end, BodyChunks), + Resp:write_chunk([]), + {ok, Resp}; + {error, Reason} -> + etap:diag("Error: ~p", [Reason]), + Body = lists:flatten(io_lib:format("Error: ~p", [Reason])), + {ok, Req:respond({200, [], Body})} + end. + +get_port() -> + mochiweb_socket_server:get(?SERVER, port). + +set_assert(Fun) -> + ok = gen_server:call(?HANDLER, {set_assert, Fun}). + +check_last() -> + gen_server:call(?HANDLER, last_status). + +init(_) -> + {ok, nil}. + +terminate(_Reason, _State) -> + ok. + +handle_call({check_request, Req}, _From, State) when is_function(State, 1) -> + Resp2 = case (catch State(Req)) of + {ok, Resp} -> {reply, {ok, Resp}, was_ok}; + {raw, Resp} -> {reply, {raw, Resp}, was_ok}; + {chunked, Resp} -> {reply, {chunked, Resp}, was_ok}; + Error -> {reply, {error, Error}, not_ok} + end, + Req:cleanup(), + Resp2; +handle_call({check_request, _Req}, _From, _State) -> + {reply, {error, no_assert_function}, not_ok}; +handle_call(last_status, _From, State) when is_atom(State) -> + {reply, State, nil}; +handle_call(last_status, _From, State) -> + {reply, {error, not_checked}, State}; +handle_call({set_assert, Fun}, _From, nil) -> + {reply, ok, Fun}; +handle_call({set_assert, _}, _From, State) -> + {reply, {error, assert_function_set}, State}; +handle_call(Msg, _From, State) -> + {reply, {ignored, Msg}, State}. + +handle_cast(Msg, State) -> + etap:diag("Ignoring cast message: ~p", [Msg]), + {noreply, State}. + +handle_info(Msg, State) -> + etap:diag("Ignoring info message: ~p", [Msg]), + {noreply, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/apps/couch/test/javascript/cli_runner.js b/apps/couch/test/javascript/cli_runner.js new file mode 100644 index 00000000..cdbe2e73 --- /dev/null +++ b/apps/couch/test/javascript/cli_runner.js @@ -0,0 +1,52 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +var console = { + log: function(arg) { + var msg = (arg.toString()).replace(/\n/g, "\n "); + print("# " + msg); + } +}; + +function T(arg1, arg2) { + if(!arg1) { + throw((arg2 ? arg2 : arg1).toString()); + } +} + +function runTestConsole(num, name, func) { + try { + func(); + print("ok " + num + " " + name); + } catch(e) { + msg = e.toString(); + msg = msg.replace(/\n/g, "\n "); + print("not ok " + num + " " + name + " " + msg); + } +} + +function runAllTestsConsole() { + var numTests = 0; + for(var t in couchTests) { numTests += 1; } + print("1.." + numTests); + var testId = 0; + for(var t in couchTests) { + testId += 1; + runTestConsole(testId, t, couchTests[t]); + } +}; + +try { + runAllTestsConsole(); +} catch (e) { + p("# " + e.toString()); +} diff --git a/apps/couch/test/javascript/couch_http.js b/apps/couch/test/javascript/couch_http.js new file mode 100644 index 00000000..5f4716d2 --- /dev/null +++ b/apps/couch/test/javascript/couch_http.js @@ -0,0 +1,62 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +(function() { + CouchHTTP.prototype.base_url = "http://127.0.0.1:5984" + + if(typeof(CouchHTTP) != "undefined") { + CouchHTTP.prototype.open = function(method, url, async) { + if(!/^\s*http:\/\//.test(url)) { + if(/^[^\/]/.test(url)) { + url = this.base_url + "/" + url; + } else { + url = this.base_url + url; + } + } + + return this._open(method, url, async); + }; + + CouchHTTP.prototype.setRequestHeader = function(name, value) { + // Drop content-length headers because cURL will set it for us + // based on body length + if(name.toLowerCase().replace(/^\s+|\s+$/g, '') != "content-length") { + this._setRequestHeader(name, value); + } + } + + CouchHTTP.prototype.send = function(body) { + this._send(body || ""); + var headers = {}; + this._headers.forEach(function(hdr) { + var pair = hdr.split(":"); + var name = pair.shift(); + headers[name] = pair.join(":").replace(/^\s+|\s+$/g, ""); + }); + this.headers = headers; + }; + + CouchHTTP.prototype.getResponseHeader = function(name) { + for(var hdr in this.headers) { + if(hdr.toLowerCase() == name.toLowerCase()) { + return this.headers[hdr]; + } + } + return null; + }; + } +})(); + +CouchDB.urlPrefix = ""; +CouchDB.newXhr = function() { + return new CouchHTTP(); +}; diff --git a/apps/couch/test/javascript/run.tpl b/apps/couch/test/javascript/run.tpl new file mode 100644 index 00000000..5f971581 --- /dev/null +++ b/apps/couch/test/javascript/run.tpl @@ -0,0 +1,30 @@ +#!/bin/sh -e + +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +SRC_DIR=%abs_top_srcdir% +SCRIPT_DIR=$SRC_DIR/share/www/script +JS_TEST_DIR=$SRC_DIR/test/javascript + +COUCHJS=%abs_top_builddir%/src/couchdb/priv/couchjs + +cat $SCRIPT_DIR/json2.js \ + $SCRIPT_DIR/sha1.js \ + $SCRIPT_DIR/oauth.js \ + $SCRIPT_DIR/couch.js \ + $SCRIPT_DIR/couch_test_runner.js \ + $SCRIPT_DIR/couch_tests.js \ + $SCRIPT_DIR/test/*.js \ + $JS_TEST_DIR/couch_http.js \ + $JS_TEST_DIR/cli_runner.js \ + | $COUCHJS -H - diff --git a/apps/couch/test/view_server/query_server_spec.rb b/apps/couch/test/view_server/query_server_spec.rb new file mode 100644 index 00000000..de1df5c1 --- /dev/null +++ b/apps/couch/test/view_server/query_server_spec.rb @@ -0,0 +1,824 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +# to run (requires ruby and rspec): +# spec test/view_server/query_server_spec.rb -f specdoc --color +# +# environment options: +# QS_TRACE=true +# shows full output from the query server +# QS_LANG=lang +# run tests on the query server (for now, one of: js, erlang) +# + +COUCH_ROOT = "#{File.dirname(__FILE__)}/../.." unless defined?(COUCH_ROOT) +LANGUAGE = ENV["QS_LANG"] || "js" + +puts "Running query server specs for #{LANGUAGE} query server" + +require 'spec' +require 'json' + +class OSProcessRunner + def self.run + trace = ENV["QS_TRACE"] || false + puts "launching #{run_command}" if trace + if block_given? + IO.popen(run_command, "r+") do |io| + qs = QueryServerRunner.new(io, trace) + yield qs + end + else + io = IO.popen(run_command, "r+") + QueryServerRunner.new(io, trace) + end + end + def initialize io, trace = false + @qsio = io + @trace = trace + end + def close + @qsio.close + end + def reset! + run(["reset"]) + end + def add_fun(fun) + run(["add_fun", fun]) + end + def teach_ddoc(ddoc) + run(["ddoc", "new", ddoc_id(ddoc), ddoc]) + end + def ddoc_run(ddoc, fun_path, args) + run(["ddoc", ddoc_id(ddoc), fun_path, args]) + end + def ddoc_id(ddoc) + d_id = ddoc["_id"] + raise 'ddoc must have _id' unless d_id + d_id + end + def get_chunks + resp = jsgets + raise "not a chunk" unless resp.first == "chunks" + return resp[1] + end + def run json + rrun json + jsgets + end + def rrun json + line = json.to_json + puts "run: #{line}" if @trace + @qsio.puts line + end + def rgets + resp = @qsio.gets + puts "got: #{resp}" if @trace + resp + end + def jsgets + resp = rgets + # err = @qserr.gets + # puts "err: #{err}" if err + if resp + begin + rj = JSON.parse("[#{resp.chomp}]")[0] + rescue JSON::ParserError + puts "JSON ERROR (dump under trace mode)" + # puts resp.chomp + while resp = rgets + # puts resp.chomp + end + end + if rj.respond_to?(:[]) && rj.is_a?(Array) + if rj[0] == "log" + log = rj[1] + puts "log: #{log}" if @trace + rj = jsgets + end + end + rj + else + raise "no response" + end + end +end + +class QueryServerRunner < OSProcessRunner + + COMMANDS = { + "js" => "#{COUCH_ROOT}/bin/couchjs_dev #{COUCH_ROOT}/share/server/main.js", + "erlang" => "#{COUCH_ROOT}/test/view_server/run_native_process.es" + } + + def self.run_command + COMMANDS[LANGUAGE] + end +end + +class ExternalRunner < OSProcessRunner + def self.run_command + "#{COUCH_ROOT}/src/couchdb/couchjs #{COUCH_ROOT}/share/server/echo.js" + end +end + +# we could organize this into a design document per language. +# that would make testing future languages really easy. + +functions = { + "emit-twice" => { + "js" => %{function(doc){emit("foo",doc.a); emit("bar",doc.a)}}, + "erlang" => <<-ERLANG + fun({Doc}) -> + A = couch_util:get_value(<<"a">>, Doc, null), + Emit(<<"foo">>, A), + Emit(<<"bar">>, A) + end. + ERLANG + }, + "emit-once" => { + "js" => <<-JS, + function(doc){ + emit("baz",doc.a) + } + JS + "erlang" => <<-ERLANG + fun({Doc}) -> + A = couch_util:get_value(<<"a">>, Doc, null), + Emit(<<"baz">>, A) + end. + ERLANG + }, + "reduce-values-length" => { + "js" => %{function(keys, values, rereduce) { return values.length; }}, + "erlang" => %{fun(Keys, Values, ReReduce) -> length(Values) end.} + }, + "reduce-values-sum" => { + "js" => %{function(keys, values, rereduce) { return sum(values); }}, + "erlang" => %{fun(Keys, Values, ReReduce) -> lists:sum(Values) end.} + }, + "validate-forbidden" => { + "js" => <<-JS, + function(newDoc, oldDoc, userCtx) { + if(newDoc.bad) + throw({forbidden:"bad doc"}); "foo bar"; + } + JS + "erlang" => <<-ERLANG + fun({NewDoc}, _OldDoc, _UserCtx) -> + case couch_util:get_value(<<"bad">>, NewDoc) of + undefined -> 1; + _ -> {[{forbidden, <<"bad doc">>}]} + end + end. + ERLANG + }, + "show-simple" => { + "js" => <<-JS, + function(doc, req) { + log("ok"); + return [doc.title, doc.body].join(' - '); + } + JS + "erlang" => <<-ERLANG + fun({Doc}, Req) -> + Title = couch_util:get_value(<<"title">>, Doc), + Body = couch_util:get_value(<<"body">>, Doc), + Resp = <<Title/binary, " - ", Body/binary>>, + {[{<<"body">>, Resp}]} + end. + ERLANG + }, + "show-headers" => { + "js" => <<-JS, + function(doc, req) { + var resp = {"code":200, "headers":{"X-Plankton":"Rusty"}}; + resp.body = [doc.title, doc.body].join(' - '); + return resp; + } + JS + "erlang" => <<-ERLANG + fun({Doc}, Req) -> + Title = couch_util:get_value(<<"title">>, Doc), + Body = couch_util:get_value(<<"body">>, Doc), + Resp = <<Title/binary, " - ", Body/binary>>, + {[ + {<<"code">>, 200}, + {<<"headers">>, {[{<<"X-Plankton">>, <<"Rusty">>}]}}, + {<<"body">>, Resp} + ]} + end. + ERLANG + }, + "show-sends" => { + "js" => <<-JS, + function(head, req) { + start({headers:{"Content-Type" : "text/plain"}}); + send("first chunk"); + send('second "chunk"'); + return "tail"; + }; + JS + "erlang" => <<-ERLANG + fun(Head, Req) -> + Resp = {[ + {<<"headers">>, {[{<<"Content-Type">>, <<"text/plain">>}]}} + ]}, + Start(Resp), + Send(<<"first chunk">>), + Send(<<"second \\\"chunk\\\"">>), + <<"tail">> + end. + ERLANG + }, + "show-while-get-rows" => { + "js" => <<-JS, + function(head, req) { + send("first chunk"); + send(req.q); + var row; + log("about to getRow " + typeof(getRow)); + while(row = getRow()) { + send(row.key); + }; + return "tail"; + }; + JS + "erlang" => <<-ERLANG, + fun(Head, {Req}) -> + Send(<<"first chunk">>), + Send(couch_util:get_value(<<"q">>, Req)), + Fun = fun({Row}, _) -> + Send(couch_util:get_value(<<"key">>, Row)), + {ok, nil} + end, + {ok, _} = FoldRows(Fun, nil), + <<"tail">> + end. + ERLANG + }, + "show-while-get-rows-multi-send" => { + "js" => <<-JS, + function(head, req) { + send("bacon"); + var row; + log("about to getRow " + typeof(getRow)); + while(row = getRow()) { + send(row.key); + send("eggs"); + }; + return "tail"; + }; + JS + "erlang" => <<-ERLANG, + fun(Head, Req) -> + Send(<<"bacon">>), + Fun = fun({Row}, _) -> + Send(couch_util:get_value(<<"key">>, Row)), + Send(<<"eggs">>), + {ok, nil} + end, + FoldRows(Fun, nil), + <<"tail">> + end. + ERLANG + }, + "list-simple" => { + "js" => <<-JS, + function(head, req) { + send("first chunk"); + send(req.q); + var row; + while(row = getRow()) { + send(row.key); + }; + return "early"; + }; + JS + "erlang" => <<-ERLANG, + fun(Head, {Req}) -> + Send(<<"first chunk">>), + Send(couch_util:get_value(<<"q">>, Req)), + Fun = fun({Row}, _) -> + Send(couch_util:get_value(<<"key">>, Row)), + {ok, nil} + end, + FoldRows(Fun, nil), + <<"early">> + end. + ERLANG + }, + "list-chunky" => { + "js" => <<-JS, + function(head, req) { + send("first chunk"); + send(req.q); + var row, i=0; + while(row = getRow()) { + send(row.key); + i += 1; + if (i > 2) { + return('early tail'); + } + }; + }; + JS + "erlang" => <<-ERLANG, + fun(Head, {Req}) -> + Send(<<"first chunk">>), + Send(couch_util:get_value(<<"q">>, Req)), + Fun = fun + ({Row}, Count) when Count < 2 -> + Send(couch_util:get_value(<<"key">>, Row)), + {ok, Count+1}; + ({Row}, Count) when Count == 2 -> + Send(couch_util:get_value(<<"key">>, Row)), + {stop, <<"early tail">>} + end, + {ok, Tail} = FoldRows(Fun, 0), + Tail + end. + ERLANG + }, + "list-old-style" => { + "js" => <<-JS, + function(head, req, foo, bar) { + return "stuff"; + } + JS + "erlang" => <<-ERLANG, + fun(Head, Req, Foo, Bar) -> + <<"stuff">> + end. + ERLANG + }, + "list-capped" => { + "js" => <<-JS, + function(head, req) { + send("bacon") + var row, i = 0; + while(row = getRow()) { + send(row.key); + i += 1; + if (i > 2) { + return('early'); + } + }; + } + JS + "erlang" => <<-ERLANG, + fun(Head, Req) -> + Send(<<"bacon">>), + Fun = fun + ({Row}, Count) when Count < 2 -> + Send(couch_util:get_value(<<"key">>, Row)), + {ok, Count+1}; + ({Row}, Count) when Count == 2 -> + Send(couch_util:get_value(<<"key">>, Row)), + {stop, <<"early">>} + end, + {ok, Tail} = FoldRows(Fun, 0), + Tail + end. + ERLANG + }, + "list-raw" => { + "js" => <<-JS, + function(head, req) { + // log(this.toSource()); + // log(typeof send); + send("first chunk"); + send(req.q); + var row; + while(row = getRow()) { + send(row.key); + }; + return "tail"; + }; + JS + "erlang" => <<-ERLANG, + fun(Head, {Req}) -> + Send(<<"first chunk">>), + Send(couch_util:get_value(<<"q">>, Req)), + Fun = fun({Row}, _) -> + Send(couch_util:get_value(<<"key">>, Row)), + {ok, nil} + end, + FoldRows(Fun, nil), + <<"tail">> + end. + ERLANG + }, + "filter-basic" => { + "js" => <<-JS, + function(doc, req) { + if (doc.good) { + return true; + } + } + JS + "erlang" => <<-ERLANG, + fun({Doc}, Req) -> + couch_util:get_value(<<"good">>, Doc) + end. + ERLANG + }, + "update-basic" => { + "js" => <<-JS, + function(doc, req) { + doc.world = "hello"; + var resp = [doc, "hello doc"]; + return resp; + } + JS + "erlang" => <<-ERLANG, + fun({Doc}, Req) -> + Doc2 = [{<<"world">>, <<"hello">>}|Doc], + [{Doc2}, {[{<<"body">>, <<"hello doc">>}]}] + end. + ERLANG + }, + "error" => { + "js" => <<-JS, + function() { + throw(["error","error_key","testing"]); + } + JS + "erlang" => <<-ERLANG + fun(A, B) -> + throw([<<"error">>,<<"error_key">>,<<"testing">>]) + end. + ERLANG + }, + "fatal" => { + "js" => <<-JS, + function() { + throw(["fatal","error_key","testing"]); + } + JS + "erlang" => <<-ERLANG + fun(A, B) -> + throw([<<"fatal">>,<<"error_key">>,<<"testing">>]) + end. + ERLANG + } +} + +def make_ddoc(fun_path, fun_str) + doc = {"_id"=>"foo"} + d = doc + while p = fun_path.shift + l = p + if !fun_path.empty? + d[p] = {} + d = d[p] + end + end + d[l] = fun_str + doc +end + +describe "query server normal case" do + before(:all) do + `cd #{COUCH_ROOT} && make` + @qs = QueryServerRunner.run + end + after(:all) do + @qs.close + end + it "should reset" do + @qs.run(["reset"]).should == true + end + it "should not erase ddocs on reset" do + @fun = functions["show-simple"][LANGUAGE] + @ddoc = make_ddoc(["shows","simple"], @fun) + @qs.teach_ddoc(@ddoc) + @qs.run(["reset"]).should == true + @qs.ddoc_run(@ddoc, + ["shows","simple"], + [{:title => "Best ever", :body => "Doc body"}, {}]).should == + ["resp", {"body" => "Best ever - Doc body"}] + end + + it "should run map funs" do + @qs.reset! + @qs.run(["add_fun", functions["emit-twice"][LANGUAGE]]).should == true + @qs.run(["add_fun", functions["emit-once"][LANGUAGE]]).should == true + rows = @qs.run(["map_doc", {:a => "b"}]) + rows[0][0].should == ["foo", "b"] + rows[0][1].should == ["bar", "b"] + rows[1][0].should == ["baz", "b"] + end + describe "reduce" do + before(:all) do + @fun = functions["reduce-values-length"][LANGUAGE] + @qs.reset! + end + it "should reduce" do + kvs = (0...10).collect{|i|[i,i*2]} + @qs.run(["reduce", [@fun], kvs]).should == [true, [10]] + end + end + describe "rereduce" do + before(:all) do + @fun = functions["reduce-values-sum"][LANGUAGE] + @qs.reset! + end + it "should rereduce" do + vs = (0...10).collect{|i|i} + @qs.run(["rereduce", [@fun], vs]).should == [true, [45]] + end + end + + describe "design docs" do + before(:all) do + @ddoc = { + "_id" => "foo" + } + @qs.reset! + end + it "should learn design docs" do + @qs.teach_ddoc(@ddoc).should == true + end + end + + # it "should validate" + describe "validation" do + before(:all) do + @fun = functions["validate-forbidden"][LANGUAGE] + @ddoc = make_ddoc(["validate_doc_update"], @fun) + @qs.teach_ddoc(@ddoc) + end + it "should allow good updates" do + @qs.ddoc_run(@ddoc, + ["validate_doc_update"], + [{"good" => true}, {}, {}]).should == 1 + end + it "should reject invalid updates" do + @qs.ddoc_run(@ddoc, + ["validate_doc_update"], + [{"bad" => true}, {}, {}]).should == {"forbidden"=>"bad doc"} + end + end + + describe "show" do + before(:all) do + @fun = functions["show-simple"][LANGUAGE] + @ddoc = make_ddoc(["shows","simple"], @fun) + @qs.teach_ddoc(@ddoc) + end + it "should show" do + @qs.ddoc_run(@ddoc, + ["shows","simple"], + [{:title => "Best ever", :body => "Doc body"}, {}]).should == + ["resp", {"body" => "Best ever - Doc body"}] + end + end + + describe "show with headers" do + before(:all) do + # TODO we can make real ddocs up there. + @fun = functions["show-headers"][LANGUAGE] + @ddoc = make_ddoc(["shows","headers"], @fun) + @qs.teach_ddoc(@ddoc) + end + it "should show headers" do + @qs.ddoc_run( + @ddoc, + ["shows","headers"], + [{:title => "Best ever", :body => "Doc body"}, {}] + ). + should == ["resp", {"code"=>200,"headers" => {"X-Plankton"=>"Rusty"}, "body" => "Best ever - Doc body"}] + end + end + + describe "recoverable error" do + before(:all) do + @fun = functions["error"][LANGUAGE] + @ddoc = make_ddoc(["shows","error"], @fun) + @qs.teach_ddoc(@ddoc) + end + it "should not exit" do + @qs.ddoc_run(@ddoc, ["shows","error"], + [{"foo"=>"bar"}, {"q" => "ok"}]). + should == ["error", "error_key", "testing"] + # still running + @qs.run(["reset"]).should == true + end + end + + describe "changes filter" do + before(:all) do + @fun = functions["filter-basic"][LANGUAGE] + @ddoc = make_ddoc(["filters","basic"], @fun) + @qs.teach_ddoc(@ddoc) + end + it "should only return true for good docs" do + @qs.ddoc_run(@ddoc, + ["filters","basic"], + [[{"key"=>"bam", "good" => true}, {"foo" => "bar"}, {"good" => true}], {"req" => "foo"}] + ). + should == [true, [true, false, true]] + end + end + + describe "update" do + before(:all) do + # in another patch we can remove this duplication + # by setting up the design doc for each language ahead of time. + @fun = functions["update-basic"][LANGUAGE] + @ddoc = make_ddoc(["updates","basic"], @fun) + @qs.teach_ddoc(@ddoc) + end + it "should return a doc and a resp body" do + up, doc, resp = @qs.ddoc_run(@ddoc, + ["updates","basic"], + [{"foo" => "gnarly"}, {"method" => "POST"}] + ) + up.should == "up" + doc.should == {"foo" => "gnarly", "world" => "hello"} + resp["body"].should == "hello doc" + end + end + +# end +# LIST TESTS +# __END__ + + describe "ddoc list" do + before(:all) do + @ddoc = { + "_id" => "foo", + "lists" => { + "simple" => functions["list-simple"][LANGUAGE], + "headers" => functions["show-sends"][LANGUAGE], + "rows" => functions["show-while-get-rows"][LANGUAGE], + "buffer-chunks" => functions["show-while-get-rows-multi-send"][LANGUAGE], + "chunky" => functions["list-chunky"][LANGUAGE] + } + } + @qs.teach_ddoc(@ddoc) + end + + describe "example list" do + it "should run normal" do + @qs.ddoc_run(@ddoc, + ["lists","simple"], + [{"foo"=>"bar"}, {"q" => "ok"}] + ).should == ["start", ["first chunk", "ok"], {"headers"=>{}}] + @qs.run(["list_row", {"key"=>"baz"}]).should == ["chunks", ["baz"]] + @qs.run(["list_row", {"key"=>"bam"}]).should == ["chunks", ["bam"]] + @qs.run(["list_row", {"key"=>"foom"}]).should == ["chunks", ["foom"]] + @qs.run(["list_row", {"key"=>"fooz"}]).should == ["chunks", ["fooz"]] + @qs.run(["list_row", {"key"=>"foox"}]).should == ["chunks", ["foox"]] + @qs.run(["list_end"]).should == ["end" , ["early"]] + end + end + + describe "headers" do + it "should do headers proper" do + @qs.ddoc_run(@ddoc, ["lists","headers"], + [{"total_rows"=>1000}, {"q" => "ok"}] + ).should == ["start", ["first chunk", 'second "chunk"'], + {"headers"=>{"Content-Type"=>"text/plain"}}] + @qs.rrun(["list_end"]) + @qs.jsgets.should == ["end", ["tail"]] + end + end + + describe "with rows" do + it "should list em" do + @qs.ddoc_run(@ddoc, ["lists","rows"], + [{"foo"=>"bar"}, {"q" => "ok"}]). + should == ["start", ["first chunk", "ok"], {"headers"=>{}}] + @qs.rrun(["list_row", {"key"=>"baz"}]) + @qs.get_chunks.should == ["baz"] + @qs.rrun(["list_row", {"key"=>"bam"}]) + @qs.get_chunks.should == ["bam"] + @qs.rrun(["list_end"]) + @qs.jsgets.should == ["end", ["tail"]] + end + it "should work with zero rows" do + @qs.ddoc_run(@ddoc, ["lists","rows"], + [{"foo"=>"bar"}, {"q" => "ok"}]). + should == ["start", ["first chunk", "ok"], {"headers"=>{}}] + @qs.rrun(["list_end"]) + @qs.jsgets.should == ["end", ["tail"]] + end + end + + describe "should buffer multiple chunks sent for a single row." do + it "should should buffer em" do + @qs.ddoc_run(@ddoc, ["lists","buffer-chunks"], + [{"foo"=>"bar"}, {"q" => "ok"}]). + should == ["start", ["bacon"], {"headers"=>{}}] + @qs.rrun(["list_row", {"key"=>"baz"}]) + @qs.get_chunks.should == ["baz", "eggs"] + @qs.rrun(["list_row", {"key"=>"bam"}]) + @qs.get_chunks.should == ["bam", "eggs"] + @qs.rrun(["list_end"]) + @qs.jsgets.should == ["end", ["tail"]] + end + end + it "should end after 2" do + @qs.ddoc_run(@ddoc, ["lists","chunky"], + [{"foo"=>"bar"}, {"q" => "ok"}]). + should == ["start", ["first chunk", "ok"], {"headers"=>{}}] + + @qs.run(["list_row", {"key"=>"baz"}]). + should == ["chunks", ["baz"]] + + @qs.run(["list_row", {"key"=>"bam"}]). + should == ["chunks", ["bam"]] + + @qs.run(["list_row", {"key"=>"foom"}]). + should == ["end", ["foom", "early tail"]] + # here's where js has to discard quit properly + @qs.run(["reset"]). + should == true + end + end + end + + + +def should_have_exited qs + begin + qs.run(["reset"]) + "raise before this (except Erlang)".should == true + rescue RuntimeError => e + e.message.should == "no response" + rescue Errno::EPIPE + true.should == true + end +end + +describe "query server that exits" do + before(:each) do + @qs = QueryServerRunner.run + @ddoc = { + "_id" => "foo", + "lists" => { + "capped" => functions["list-capped"][LANGUAGE], + "raw" => functions["list-raw"][LANGUAGE] + }, + "shows" => { + "fatal" => functions["fatal"][LANGUAGE] + } + } + @qs.teach_ddoc(@ddoc) + end + after(:each) do + @qs.close + end + + describe "only goes to 2 list" do + it "should exit if erlang sends too many rows" do + @qs.ddoc_run(@ddoc, ["lists","capped"], + [{"foo"=>"bar"}, {"q" => "ok"}]). + should == ["start", ["bacon"], {"headers"=>{}}] + @qs.run(["list_row", {"key"=>"baz"}]).should == ["chunks", ["baz"]] + @qs.run(["list_row", {"key"=>"foom"}]).should == ["chunks", ["foom"]] + @qs.run(["list_row", {"key"=>"fooz"}]).should == ["end", ["fooz", "early"]] + e = @qs.run(["list_row", {"key"=>"foox"}]) + e[0].should == "error" + e[1].should == "unknown_command" + should_have_exited @qs + end + end + + describe "raw list" do + it "should exit if it gets a non-row in the middle" do + @qs.ddoc_run(@ddoc, ["lists","raw"], + [{"foo"=>"bar"}, {"q" => "ok"}]). + should == ["start", ["first chunk", "ok"], {"headers"=>{}}] + e = @qs.run(["reset"]) + e[0].should == "error" + e[1].should == "list_error" + should_have_exited @qs + end + end + + describe "fatal error" do + it "should exit" do + @qs.ddoc_run(@ddoc, ["shows","fatal"], + [{"foo"=>"bar"}, {"q" => "ok"}]). + should == ["error", "error_key", "testing"] + should_have_exited @qs + end + end +end + +describe "thank you for using the tests" do + it "for more info run with QS_TRACE=true or see query_server_spec.rb file header" do + end +end
\ No newline at end of file diff --git a/apps/couch/test/view_server/run_native_process.es b/apps/couch/test/view_server/run_native_process.es new file mode 100755 index 00000000..fcf16d75 --- /dev/null +++ b/apps/couch/test/view_server/run_native_process.es @@ -0,0 +1,59 @@ +#! /usr/bin/env escript + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +read() -> + case io:get_line('') of + eof -> stop; + Data -> couch_util:json_decode(Data) + end. + +send(Data) when is_binary(Data) -> + send(binary_to_list(Data)); +send(Data) when is_list(Data) -> + io:format(Data ++ "\n", []). + +write(Data) -> + % log("~p", [Data]), + case (catch couch_util:json_encode(Data)) of + % when testing, this is what prints your errors + {json_encode, Error} -> write({[{<<"error">>, Error}]}); + Json -> send(Json) + end. + +% log(Mesg) -> +% log(Mesg, []). +% log(Mesg, Params) -> +% io:format(standard_error, Mesg, Params). +% jlog(Mesg) -> +% write([<<"log">>, list_to_binary(io_lib:format("~p",[Mesg]))]). + +loop(Pid) -> + case read() of + stop -> ok; + Json -> + case (catch couch_native_process:prompt(Pid, Json)) of + {error, Reason} -> + ok = write([error, Reason, Reason]); + Resp -> + ok = write(Resp), + loop(Pid) + end + end. + +main([]) -> + code:add_pathz("src/couchdb"), + code:add_pathz("src/mochiweb"), + {ok, Pid} = couch_native_process:start_link(), + loop(Pid). + |