From dbe5e37ef742617c93c7975a612582a77c7724a8 Mon Sep 17 00:00:00 2001 From: drebs Date: Sun, 16 Jun 2013 21:45:16 -0300 Subject: Split client and server in two different packages and refactor. --- soledad_server/pkg/soledad | 72 ++++ soledad_server/setup.py | 79 ++++ soledad_server/src/leap/__init__.py | 6 + soledad_server/src/leap/soledad_server/__init__.py | 393 +++++++++++++++++ soledad_server/src/leap/soledad_server/couch.py | 480 +++++++++++++++++++++ .../src/leap/soledad_server/objectstore.py | 296 +++++++++++++ 6 files changed, 1326 insertions(+) create mode 100644 soledad_server/pkg/soledad create mode 100644 soledad_server/setup.py create mode 100644 soledad_server/src/leap/__init__.py create mode 100644 soledad_server/src/leap/soledad_server/__init__.py create mode 100644 soledad_server/src/leap/soledad_server/couch.py create mode 100644 soledad_server/src/leap/soledad_server/objectstore.py (limited to 'soledad_server') diff --git a/soledad_server/pkg/soledad b/soledad_server/pkg/soledad new file mode 100644 index 00000000..c640a94d --- /dev/null +++ b/soledad_server/pkg/soledad @@ -0,0 +1,72 @@ +#!/bin/sh +### BEGIN INIT INFO +# Provides: soledad +# Required-Start: $network $named $remote_fs $syslog $time +# Required-Stop: $network $named $remote_fs $syslog +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Start soledad daemon at boot time +# Description: Synchronization of locally encrypted data among devices +### END INIT INFO + +PATH=/sbin:/bin:/usr/sbin:/usr/bin +PIDFILE=/var/run/soledad.pid +RUNDIR=/var/lib/soledad/ +OBJ=leap.soledad.server.application +LOGFILE=/var/log/soledad.log +HTTPS_PORT=2424 +PLAIN_PORT=65534 +CERT_PATH=/etc/leap/soledad-server.pem +PRIVKEY_PATH=/etc/leap/soledad-server.pem +TWISTD_PATH=/usr/bin/twistd +HOME=/var/lib/soledad/ + +[ -r /etc/default/soledad ] && . /etc/default/soledad + +test -r /etc/leap/ || exit 0 + +. /lib/lsb/init-functions + + +case "$1" in + start) + echo -n "Starting soledad: twistd" + start-stop-daemon --start --quiet --exec $TWISTD_PATH -- \ + --pidfile=$PIDFILE \ + --logfile=$LOGFILE \ + web \ + --wsgi=$OBJ \ + --https=$HTTPS_PORT \ + --certificate=$CERT_PATH \ + --privkey=$PRIVKEY_PATH \ + --port=$PLAIN_PORT + echo "." + ;; + + stop) + echo -n "Stopping soledad: twistd" + start-stop-daemon --stop --quiet \ + --pidfile $PIDFILE + echo "." + ;; + + restart) + $0 stop + $0 start + ;; + + force-reload) + $0 restart + ;; + + status) + status_of_proc -p $PIDFILE $TWISTD_PATH soledad && exit 0 || exit $? + ;; + + *) + echo "Usage: /etc/init.d/soledad {start|stop|restart|force-reload|status}" >&2 + exit 1 + ;; +esac + +exit 0 diff --git a/soledad_server/setup.py b/soledad_server/setup.py new file mode 100644 index 00000000..522c86ff --- /dev/null +++ b/soledad_server/setup.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +# setup.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +import os +from setuptools import ( + setup, + find_packages +) + + +install_requirements = [ + 'configparser', + 'couchdb', + 'simplejson', + 'twisted>=12.0.0', # TODO: maybe we just want twisted-web? + 'oauth', # this is not strictly needed by us, but we need it + # until u1db adds it to its release as a dep. + 'u1db', + 'six==1.1.0', + 'routes', + 'PyOpenSSL', +] + + +if os.environ.get('VIRTUAL_ENV', None): + data_files = None +else: + # XXX this should go only for linux/mac + data_files = [("/etc/init.d/", ["pkg/soledad"])] + +trove_classifiers = ( + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: " + "GNU General Public License v3 or later (GPLv3+)", + "Environment :: Console", + "Operating System :: OS Independent", + "Operating System :: POSIX", + "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2.7", + "Topic :: Database :: Front-Ends", + "Topic :: Software Development :: Libraries :: Python Modules" +) + +setup( + name='leap.soledad_server', + version='0.1.1', + url='https://leap.se/', + license='GPLv3+', + description='Synchronization of locally encrypted data among devices.', + author='The LEAP Encryption Access Project', + author_email='info@leap.se', + long_description=( + "Soledad is the part of LEAP that allows application data to be " + "securely shared among devices. It provides, to other parts of the " + "LEAP client, an API for data storage and sync." + ), + namespace_packages=["leap"], + packages=find_packages('src'), + package_dir={'': 'src'}, + install_requires=install_requirements, + data_files=data_files, + classifiers=trove_classifiers, +) diff --git a/soledad_server/src/leap/__init__.py b/soledad_server/src/leap/__init__.py new file mode 100644 index 00000000..f48ad105 --- /dev/null +++ b/soledad_server/src/leap/__init__.py @@ -0,0 +1,6 @@ +# See http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages +try: + __import__('pkg_resources').declare_namespace(__name__) +except ImportError: + from pkgutil import extend_path + __path__ = extend_path(__path__, __name__) diff --git a/soledad_server/src/leap/soledad_server/__init__.py b/soledad_server/src/leap/soledad_server/__init__.py new file mode 100644 index 00000000..bea5d5fd --- /dev/null +++ b/soledad_server/src/leap/soledad_server/__init__.py @@ -0,0 +1,393 @@ +# -*- coding: utf-8 -*- +# server.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +A U1DB server that stores data using CouchDB as its persistence layer. + +This should be run with: + twistd -n web --wsgi=leap.soledad_server.application --port=2424 +""" + +import configparser +import httplib +import simplejson as json + + +from hashlib import sha256 +from routes.mapper import Mapper +from u1db import DBNAME_CONSTRAINTS +from u1db.remote import http_app + + +# Keep OpenSSL's tsafe before importing Twisted submodules so we can put +# it back if Twisted==12.0.0 messes with it. +from OpenSSL import tsafe +old_tsafe = tsafe + +from twisted.web.wsgi import WSGIResource +from twisted.internet import reactor +from twisted.python import log + +from twisted import version +if version.base() == "12.0.0": + # Put OpenSSL's tsafe back into place. This can probably be removed if we + # come to use Twisted>=12.3.0. + import sys + sys.modules['OpenSSL.tsafe'] = old_tsafe + +from couchdb.client import Server + +from leap.soledad import SECRETS_DOC_ID_HASH_PREFIX +from leap.soledad_server.couch import CouchServerState + + +#----------------------------------------------------------------------------- +# Authentication +#----------------------------------------------------------------------------- + +class Unauthorized(Exception): + """ + User authentication failed. + """ + + +class URLToAuth(object): + """ + Verify if actions can be performed by a user. + """ + + HTTP_METHOD_GET = 'GET' + HTTP_METHOD_PUT = 'PUT' + HTTP_METHOD_DELETE = 'DELETE' + HTTP_METHOD_POST = 'POST' + + def __init__(self, uuid): + """ + Initialize the mapper. + + The C{uuid} is used to create the rules that will either allow or + disallow the user to perform specific actions. + + @param uuid: The user uuid. + @type uuid: str + """ + self._map = Mapper(controller_scan=None) + self._register_auth_info(self._uuid_dbname(uuid)) + + def is_authorized(self, environ): + """ + Return whether an HTTP request that produced the CGI C{environ} + corresponds to an authorized action. + + @param environ: Dictionary containing CGI variables. + @type environ: dict + + @return: Whether the action is authorized or not. + @rtype: bool + """ + return self._map.match(environ=environ) is not None + + def _register(self, pattern, http_methods): + """ + Register a C{pattern} in the mapper as valid for C{http_methods}. + + @param pattern: The URL pattern that corresponds to the user action. + @type pattern: str + @param http_methods: A list of authorized HTTP methods. + @type http_methods: list of str + """ + self._map.connect( + None, pattern, http_methods=http_methods, + conditions=dict(method=http_methods), + requirements={'dbname': DBNAME_CONSTRAINTS}) + + def _uuid_dbname(self, uuid): + """ + Return the database name corresponding to C{uuid}. + + @param uuid: The user uid. + @type uuid: str + + @return: The database name corresponding to C{uuid}. + @rtype: str + """ + return '%s%s' % (SoledadApp.USER_DB_PREFIX, uuid) + + def _register_auth_info(self, dbname): + """ + Register the authorization info in the mapper using C{dbname} as the + user's database name. + + This method sets up the following authorization rules: + + URL path | Authorized actions + -------------------------------------------------- + / | GET + /shared-db | GET + /shared-db/docs | - + /shared-db/doc/{id} | GET, PUT, DELETE + /shared-db/sync-from/{source} | - + /user-db | GET, PUT, DELETE + /user-db/docs | - + /user-db/doc/{id} | - + /user-db/sync-from/{source} | GET, PUT, POST + + @param dbname: The name of the user's database. + @type dbname: str + """ + # auth info for global resource + self._register('/', [self.HTTP_METHOD_GET]) + # auth info for shared-db database resource + self._register( + '/%s' % SoledadApp.SHARED_DB_NAME, + [self.HTTP_METHOD_GET]) + # auth info for shared-db doc resource + self._register( + '/%s/doc/{id:.*}' % SoledadApp.SHARED_DB_NAME, + [self.HTTP_METHOD_GET, self.HTTP_METHOD_PUT, + self.HTTP_METHOD_DELETE]) + # auth info for user-db database resource + self._register( + '/%s' % dbname, + [self.HTTP_METHOD_GET, self.HTTP_METHOD_PUT, + self.HTTP_METHOD_DELETE]) + # auth info for user-db sync resource + self._register( + '/%s/sync-from/{source_replica_uid}' % dbname, + [self.HTTP_METHOD_GET, self.HTTP_METHOD_PUT, + self.HTTP_METHOD_POST]) + # generate the regular expressions + self._map.create_regs() + + +class SoledadAuthMiddleware(object): + """ + Soledad Authentication WSGI middleware. + + In general, databases are accessed using a token provided by the LEAP API. + Some special databases can be read without authentication. + """ + + TOKENS_DB = "tokens" + TOKENS_TYPE_KEY = "type" + TOKENS_TYPE_DEF = "Token" + TOKENS_USER_ID_KEY = "user_id" + + HTTP_AUTH_KEY = "HTTP_AUTHORIZATION" + PATH_INFO_KEY = "PATH_INFO" + + CONTENT_TYPE_JSON = ('content-type', 'application/json') + + def __init__(self, app): + """ + Initialize the Soledad Authentication Middleware. + + @param app: The application to run on successfull authentication. + @type app: u1db.remote.http_app.HTTPApp + @param prefix: Auth app path prefix. + @type prefix: str + """ + self._app = app + + def _error(self, start_response, status, description, message=None): + """ + Send a JSON serialized error to WSGI client. + + @param start_response: Callable of the form start_response(status, + response_headers, exc_info=None). + @type start_response: callable + @param status: Status string of the form "999 Message here" + @type status: str + @param response_headers: A list of (header_name, header_value) tuples + describing the HTTP response header. + @type response_headers: list + @param description: The error description. + @type description: str + @param message: The error message. + @type message: str + + @return: List with JSON serialized error message. + @rtype list + """ + start_response("%d %s" % (status, httplib.responses[status]), + [self.CONTENT_TYPE_JSON]) + err = {"error": description} + if message: + err['message'] = message + return [json.dumps(err)] + + def __call__(self, environ, start_response): + """ + Handle a WSGI call to the authentication application. + + @param environ: Dictionary containing CGI variables. + @type environ: dict + @param start_response: Callable of the form start_response(status, + response_headers, exc_info=None). + @type start_response: callable + + @return: Target application results if authentication succeeds, an + error message otherwise. + @rtype: list + """ + unauth_err = lambda msg: self._error(start_response, + 401, + "unauthorized", + msg) + + auth = environ.get(self.HTTP_AUTH_KEY) + if not auth: + return unauth_err("Missing Token Authentication.") + + scheme, encoded = auth.split(None, 1) + if scheme.lower() != 'token': + return unauth_err("Missing Token Authentication") + + uuid, token = encoded.decode('base64').split(':', 1) + if not self.verify_token(environ, uuid, token): + return unauth_err("Incorrect address or token.") + + if not self.verify_action(uuid, environ): + return unauth_err("Unauthorized action.") + + del environ[self.HTTP_AUTH_KEY] + + return self._app(environ, start_response) + + def verify_token(self, environ, uuid, token): + """ + Verify if token is valid for authenticating this request. + + @param environ: Dictionary containing CGI variables. + @type environ: dict + @param uuid: The user's uuid. + @type uuid: str + @param token: The authentication token. + @type token: str + + @return: Whether the token is valid for authenticating the request. + @rtype: bool + """ + + server = Server(url=self._app.state.couch_url) + try: + dbname = self.TOKENS_DB + db = server[dbname] + token = db.get(token) + if token is None: + return False + return token[self.TOKENS_TYPE_KEY] == self.TOKENS_TYPE_DEF and \ + token[self.TOKENS_USER_ID_KEY] == uuid + except Exception as e: + log.err(e) + return False + return True + + def verify_action(self, uuid, environ): + """ + Verify if the user is authorized to perform the requested action over + the requested database. + + @param uuid: The user's uuid. + @type uuid: str + @param environ: Dictionary containing CGI variables. + @type environ: dict + + @return: Whether the user is authorize to perform the requested action + over the requested db. + @rtype: bool + """ + return URLToAuth(uuid).is_authorized(environ) + + +#----------------------------------------------------------------------------- +# Soledad WSGI application +#----------------------------------------------------------------------------- + +class SoledadApp(http_app.HTTPApp): + """ + Soledad WSGI application + """ + + SHARED_DB_NAME = 'shared' + """ + The name of the shared database that holds user's encrypted secrets. + """ + + USER_DB_PREFIX = 'uuid-' + """ + The string prefix of users' databases. + """ + + def __call__(self, environ, start_response): + """ + Handle a WSGI call to the Soledad application. + + @param environ: Dictionary containing CGI variables. + @type environ: dict + @param start_response: Callable of the form start_response(status, + response_headers, exc_info=None). + @type start_response: callable + + @return: HTTP application results. + @rtype: list + """ + # ensure the shared database exists + self.state.ensure_database(self.SHARED_DB_NAME) + return http_app.HTTPApp.__call__(self, environ, start_response) + + +#----------------------------------------------------------------------------- +# Auxiliary functions +#----------------------------------------------------------------------------- + +def load_configuration(file_path): + """ + Load server configuration from file. + + @param file_path: The path to the configuration file. + @type file_path: str + + @return: A dictionary with the configuration. + @rtype: dict + """ + conf = { + 'couch_url': 'http://localhost:5984', + } + config = configparser.ConfigParser() + config.read(file_path) + if 'soledad-server' in config: + for key in conf: + if key in config['soledad-server']: + conf[key] = config['soledad-server'][key] + # TODO: implement basic parsing/sanitization of options comming from + # config file. + return conf + + +#----------------------------------------------------------------------------- +# Run as Twisted WSGI Resource +#----------------------------------------------------------------------------- + +conf = load_configuration('/etc/leap/soledad-server.conf') +state = CouchServerState(conf['couch_url']) + +# WSGI application that may be used by `twistd -web` +application = SoledadAuthMiddleware(SoledadApp(state)) + +resource = WSGIResource(reactor, reactor.getThreadPool(), application) diff --git a/soledad_server/src/leap/soledad_server/couch.py b/soledad_server/src/leap/soledad_server/couch.py new file mode 100644 index 00000000..ed5ad6b3 --- /dev/null +++ b/soledad_server/src/leap/soledad_server/couch.py @@ -0,0 +1,480 @@ +# -*- coding: utf-8 -*- +# couch.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +"""A U1DB backend that uses CouchDB as its persistence layer.""" + +# general imports +import uuid +import re +import simplejson as json + + +from base64 import b64encode, b64decode +from u1db import errors +from u1db.sync import Synchronizer +from u1db.backends.inmemory import InMemoryIndex +from u1db.remote.server_state import ServerState +from u1db.errors import DatabaseDoesNotExist +from couchdb.client import Server, Document as CouchDocument +from couchdb.http import ResourceNotFound + + +from leap.soledad_server.objectstore import ( + ObjectStoreDatabase, + ObjectStoreSyncTarget, +) + + +class InvalidURLError(Exception): + """ + Exception raised when Soledad encounters a malformed URL. + """ + + +class CouchDatabase(ObjectStoreDatabase): + """ + A U1DB backend that uses Couch as its persistence layer. + """ + + U1DB_TRANSACTION_LOG_KEY = 'transaction_log' + U1DB_CONFLICTS_KEY = 'conflicts' + U1DB_OTHER_GENERATIONS_KEY = 'other_generations' + U1DB_INDEXES_KEY = 'indexes' + U1DB_REPLICA_UID_KEY = 'replica_uid' + + COUCH_ID_KEY = '_id' + COUCH_REV_KEY = '_rev' + COUCH_U1DB_ATTACHMENT_KEY = 'u1db_json' + COUCH_U1DB_REV_KEY = 'u1db_rev' + + @classmethod + def open_database(cls, url, create): + """ + Open a U1DB database using CouchDB as backend. + + @param url: the url of the database replica + @type url: str + @param create: should the replica be created if it does not exist? + @type create: bool + + @return: the database instance + @rtype: CouchDatabase + """ + # get database from url + m = re.match('(^https?://[^/]+)/(.+)$', url) + if not m: + raise InvalidURLError + url = m.group(1) + dbname = m.group(2) + server = Server(url=url) + try: + server[dbname] + except ResourceNotFound: + if not create: + raise DatabaseDoesNotExist() + return cls(url, dbname) + + def __init__(self, url, dbname, replica_uid=None, full_commit=True, + session=None): + """ + Create a new Couch data container. + + @param url: the url of the couch database + @type url: str + @param dbname: the database name + @type dbname: str + @param replica_uid: an optional unique replica identifier + @type replica_uid: str + @param full_commit: turn on the X-Couch-Full-Commit header + @type full_commit: bool + @param session: an http.Session instance or None for a default session + @type session: http.Session + """ + self._url = url + self._full_commit = full_commit + self._session = session + self._server = Server(url=self._url, + full_commit=self._full_commit, + session=self._session) + self._dbname = dbname + # this will ensure that transaction and sync logs exist and are + # up-to-date. + try: + self._database = self._server[self._dbname] + except ResourceNotFound: + self._server.create(self._dbname) + self._database = self._server[self._dbname] + ObjectStoreDatabase.__init__(self, replica_uid=replica_uid) + + #------------------------------------------------------------------------- + # methods from Database + #------------------------------------------------------------------------- + + def _get_doc(self, doc_id, check_for_conflicts=False): + """ + Get just the document content, without fancy handling. + + @param doc_id: The unique document identifier + @type doc_id: str + @param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise asking for a deleted + document will return None. + @type include_deleted: bool + + @return: a Document object. + @type: u1db.Document + """ + cdoc = self._database.get(doc_id) + if cdoc is None: + return None + has_conflicts = False + if check_for_conflicts: + has_conflicts = self._has_conflicts(doc_id) + doc = self._factory( + doc_id=doc_id, + rev=cdoc[self.COUCH_U1DB_REV_KEY], + has_conflicts=has_conflicts) + contents = self._database.get_attachment( + cdoc, + self.COUCH_U1DB_ATTACHMENT_KEY) + if contents: + doc.content = json.loads(contents.read()) + else: + doc.make_tombstone() + return doc + + def get_all_docs(self, include_deleted=False): + """ + Get the JSON content for all documents in the database. + + @param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted documents will not + be included in the results. + @type include_deleted: bool + + @return: (generation, [Document]) + The current generation of the database, followed by a list of all + the documents in the database. + @rtype: tuple + """ + generation = self._get_generation() + results = [] + for doc_id in self._database: + if doc_id == self.U1DB_DATA_DOC_ID: + continue + doc = self._get_doc(doc_id, check_for_conflicts=True) + if doc.content is None and not include_deleted: + continue + results.append(doc) + return (generation, results) + + def _put_doc(self, doc): + """ + Update a document. + + This is called everytime we just want to do a raw put on the db (i.e. + without index updates, document constraint checks, and conflict + checks). + + @param doc: The document to update. + @type doc: u1db.Document + + @return: The new revision identifier for the document. + @rtype: str + """ + # prepare couch's Document + cdoc = CouchDocument() + cdoc[self.COUCH_ID_KEY] = doc.doc_id + # we have to guarantee that couch's _rev is consistent + old_cdoc = self._database.get(doc.doc_id) + if old_cdoc is not None: + cdoc[self.COUCH_REV_KEY] = old_cdoc[self.COUCH_REV_KEY] + # store u1db's rev + cdoc[self.COUCH_U1DB_REV_KEY] = doc.rev + # save doc in db + self._database.save(cdoc) + # store u1db's content as json string + if not doc.is_tombstone(): + self._database.put_attachment( + cdoc, doc.get_json(), + filename=self.COUCH_U1DB_ATTACHMENT_KEY) + else: + self._database.delete_attachment( + cdoc, + self.COUCH_U1DB_ATTACHMENT_KEY) + + def get_sync_target(self): + """ + Return a SyncTarget object, for another u1db to synchronize with. + + @return: The sync target. + @rtype: CouchSyncTarget + """ + return CouchSyncTarget(self) + + def create_index(self, index_name, *index_expressions): + """ + Create a named index, which can then be queried for future lookups. + + @param index_name: A unique name which can be used as a key prefix. + @param index_expressions: Index expressions defining the index + information. + """ + if index_name in self._indexes: + if self._indexes[index_name]._definition == list( + index_expressions): + return + raise errors.IndexNameTakenError + index = InMemoryIndex(index_name, list(index_expressions)) + for doc_id in self._database: + if doc_id == self.U1DB_DATA_DOC_ID: # skip special file + continue + doc = self._get_doc(doc_id) + if doc.content is not None: + index.add_json(doc_id, doc.get_json()) + self._indexes[index_name] = index + # save data in object store + self._store_u1db_data() + + def close(self): + """ + Release any resources associated with this database. + + @return: True if db was succesfully closed. + @rtype: bool + """ + # TODO: fix this method so the connection is properly closed and + # test_close (+tearDown, which deletes the db) works without problems. + self._url = None + self._full_commit = None + self._session = None + #self._server = None + self._database = None + return True + + def sync(self, url, creds=None, autocreate=True): + """ + Synchronize documents with remote replica exposed at url. + + @param url: The url of the target replica to sync with. + @type url: str + @param creds: optional dictionary giving credentials. + to authorize the operation with the server. + @type creds: dict + @param autocreate: Ask the target to create the db if non-existent. + @type autocreate: bool + + @return: The local generation before the synchronisation was performed. + @rtype: int + """ + return Synchronizer(self, CouchSyncTarget(url, creds=creds)).sync( + autocreate=autocreate) + + #------------------------------------------------------------------------- + # methods from ObjectStoreDatabase + #------------------------------------------------------------------------- + + def _init_u1db_data(self): + """ + Initialize U1DB info data structure in the couch db. + + A U1DB database needs to keep track of all database transactions, + document conflicts, the generation of other replicas it has seen, + indexes created by users and so on. + + In this implementation, all this information is stored in a special + document stored in the couch db with id equals to + CouchDatabse.U1DB_DATA_DOC_ID. + + This method initializes the document that will hold such information. + """ + if self._replica_uid is None: + self._replica_uid = uuid.uuid4().hex + # TODO: prevent user from overwriting a document with the same doc_id + # as this one. + doc = self._factory(doc_id=self.U1DB_DATA_DOC_ID) + doc.content = { + self.U1DB_TRANSACTION_LOG_KEY: b64encode(json.dumps([])), + self.U1DB_CONFLICTS_KEY: b64encode(json.dumps({})), + self.U1DB_OTHER_GENERATIONS_KEY: b64encode(json.dumps({})), + self.U1DB_INDEXES_KEY: b64encode(json.dumps({})), + self.U1DB_REPLICA_UID_KEY: b64encode(self._replica_uid), + } + self._put_doc(doc) + + def _fetch_u1db_data(self): + """ + Fetch U1DB info from the couch db. + + See C{_init_u1db_data} documentation. + """ + # retrieve u1db data from couch db + cdoc = self._database.get(self.U1DB_DATA_DOC_ID) + jsonstr = self._database.get_attachment( + cdoc, self.COUCH_U1DB_ATTACHMENT_KEY).read() + content = json.loads(jsonstr) + # set u1db database info + self._transaction_log = json.loads( + b64decode(content[self.U1DB_TRANSACTION_LOG_KEY])) + self._conflicts = json.loads( + b64decode(content[self.U1DB_CONFLICTS_KEY])) + self._other_generations = json.loads( + b64decode(content[self.U1DB_OTHER_GENERATIONS_KEY])) + self._indexes = self._load_indexes_from_json( + b64decode(content[self.U1DB_INDEXES_KEY])) + self._replica_uid = b64decode(content[self.U1DB_REPLICA_UID_KEY]) + # save couch _rev + self._couch_rev = cdoc[self.COUCH_REV_KEY] + + def _store_u1db_data(self): + """ + Store U1DB info in the couch db. + + See C{_init_u1db_data} documentation. + """ + doc = self._factory(doc_id=self.U1DB_DATA_DOC_ID) + doc.content = { + # Here, the b64 encode ensures that document content + # does not cause strange behaviour in couchdb because + # of encoding. + self.U1DB_TRANSACTION_LOG_KEY: + b64encode(json.dumps(self._transaction_log)), + self.U1DB_CONFLICTS_KEY: b64encode(json.dumps(self._conflicts)), + self.U1DB_OTHER_GENERATIONS_KEY: + b64encode(json.dumps(self._other_generations)), + self.U1DB_INDEXES_KEY: b64encode(self._dump_indexes_as_json()), + self.U1DB_REPLICA_UID_KEY: b64encode(self._replica_uid), + self.COUCH_REV_KEY: self._couch_rev} + self._put_doc(doc) + + #------------------------------------------------------------------------- + # Couch specific methods + #------------------------------------------------------------------------- + + INDEX_NAME_KEY = 'name' + INDEX_DEFINITION_KEY = 'definition' + INDEX_VALUES_KEY = 'values' + + def delete_database(self): + """ + Delete a U1DB CouchDB database. + """ + del(self._server[self._dbname]) + + def _dump_indexes_as_json(self): + """ + Dump index definitions as JSON string. + """ + indexes = {} + for name, idx in self._indexes.iteritems(): + indexes[name] = {} + for attr in [self.INDEX_NAME_KEY, self.INDEX_DEFINITION_KEY, + self.INDEX_VALUES_KEY]: + indexes[name][attr] = getattr(idx, '_' + attr) + return json.dumps(indexes) + + def _load_indexes_from_json(self, indexes): + """ + Load index definitions from JSON string. + + @param indexes: A JSON serialization of a list of [('index-name', + ['field', 'field2'])]. + @type indexes: str + + @return: A dictionary with the index definitions. + @rtype: dict + """ + dict = {} + for name, idx_dict in json.loads(indexes).iteritems(): + idx = InMemoryIndex(name, idx_dict[self.INDEX_DEFINITION_KEY]) + idx._values = idx_dict[self.INDEX_VALUES_KEY] + dict[name] = idx + return dict + + +class CouchSyncTarget(ObjectStoreSyncTarget): + """ + Functionality for using a CouchDatabase as a synchronization target. + """ + + +class CouchServerState(ServerState): + """ + Inteface of the WSGI server with the CouchDB backend. + """ + + def __init__(self, couch_url): + self._couch_url = couch_url + + def open_database(self, dbname): + """ + Open a couch database. + + @param dbname: The name of the database to open. + @type dbname: str + + @return: The CouchDatabase object. + @rtype: CouchDatabase + """ + # TODO: open couch + return CouchDatabase.open_database( + self._couch_url + '/' + dbname, + create=False) + + def ensure_database(self, dbname): + """ + Ensure couch database exists. + + @param dbname: The name of the database to ensure. + @type dbname: str + + @return: The CouchDatabase object and the replica uid. + @rtype: (CouchDatabase, str) + """ + db = CouchDatabase.open_database( + self._couch_url + '/' + dbname, + create=True) + return db, db._replica_uid + + def delete_database(self, dbname): + """ + Delete couch database. + + @param dbname: The name of the database to delete. + @type dbname: str + """ + CouchDatabase.delete_database(self._couch_url + '/' + dbname) + + def _set_couch_url(self, url): + """ + Set the couchdb URL + + @param url: CouchDB URL + @type url: str + """ + self._couch_url = url + + def _get_couch_url(self): + """ + Return CouchDB URL + + @rtype: str + """ + return self._couch_url + + couch_url = property(_get_couch_url, _set_couch_url, doc='CouchDB URL') diff --git a/soledad_server/src/leap/soledad_server/objectstore.py b/soledad_server/src/leap/soledad_server/objectstore.py new file mode 100644 index 00000000..8afac3ec --- /dev/null +++ b/soledad_server/src/leap/soledad_server/objectstore.py @@ -0,0 +1,296 @@ +# -*- coding: utf-8 -*- +# objectstore.py +# Copyright (C) 2013 LEAP +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +""" +Abstract U1DB backend to handle storage using object stores (like CouchDB, for +example). + +Right now, this is only used by CouchDatabase backend, but can also be +extended to implement OpenStack or Amazon S3 storage, for example. + +See U1DB documentation for more information on how to use databases. +""" + +from u1db.backends.inmemory import ( + InMemoryDatabase, + InMemorySyncTarget, +) +from u1db import errors + + +class ObjectStoreDatabase(InMemoryDatabase): + """ + A backend for storing u1db data in an object store. + """ + + @classmethod + def open_database(cls, url, create, document_factory=None): + """ + Open a U1DB database using an object store as backend. + + @param url: the url of the database replica + @type url: str + @param create: should the replica be created if it does not exist? + @type create: bool + @param document_factory: A function that will be called with the same + parameters as Document.__init__. + @type document_factory: callable + + @return: the database instance + @rtype: CouchDatabase + """ + raise NotImplementedError(cls.open_database) + + def __init__(self, replica_uid=None, document_factory=None): + """ + Initialize the object store database. + + @param replica_uid: an optional unique replica identifier + @type replica_uid: str + @param document_factory: A function that will be called with the same + parameters as Document.__init__. + @type document_factory: callable + """ + InMemoryDatabase.__init__( + self, + replica_uid, + document_factory=document_factory) + # sync data in memory with data in object store + if not self._get_doc(self.U1DB_DATA_DOC_ID): + self._init_u1db_data() + self._fetch_u1db_data() + + #------------------------------------------------------------------------- + # methods from Database + #------------------------------------------------------------------------- + + def _set_replica_uid(self, replica_uid): + """ + Force the replica_uid to be set. + + @param replica_uid: The uid of the replica. + @type replica_uid: str + """ + InMemoryDatabase._set_replica_uid(self, replica_uid) + self._store_u1db_data() + + def _put_doc(self, doc): + """ + Update a document. + + This is called everytime we just want to do a raw put on the db (i.e. + without index updates, document constraint checks, and conflict + checks). + + @param doc: The document to update. + @type doc: u1db.Document + + @return: The new revision identifier for the document. + @rtype: str + """ + raise NotImplementedError(self._put_doc) + + def _get_doc(self, doc_id): + """ + Get just the document content, without fancy handling. + + @param doc_id: The unique document identifier + @type doc_id: str + @param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise asking for a deleted + document will return None. + @type include_deleted: bool + + @return: a Document object. + @type: u1db.Document + """ + raise NotImplementedError(self._get_doc) + + def get_all_docs(self, include_deleted=False): + """ + Get the JSON content for all documents in the database. + + @param include_deleted: If set to True, deleted documents will be + returned with empty content. Otherwise deleted documents will not + be included in the results. + @type include_deleted: bool + + @return: (generation, [Document]) + The current generation of the database, followed by a list of all + the documents in the database. + @rtype: tuple + """ + raise NotImplementedError(self.get_all_docs) + + def delete_doc(self, doc): + """ + Mark a document as deleted. + + @param doc: The document to mark as deleted. + @type doc: u1db.Document + + @return: The new revision id of the document. + @type: str + """ + old_doc = self._get_doc(doc.doc_id, check_for_conflicts=True) + if old_doc is None: + raise errors.DocumentDoesNotExist + if old_doc.rev != doc.rev: + raise errors.RevisionConflict() + if old_doc.is_tombstone(): + raise errors.DocumentAlreadyDeleted + if old_doc.has_conflicts: + raise errors.ConflictedDoc() + new_rev = self._allocate_doc_rev(doc.rev) + doc.rev = new_rev + doc.make_tombstone() + self._put_and_update_indexes(old_doc, doc) + return new_rev + + # index-related methods + + def create_index(self, index_name, *index_expressions): + """ + Create a named index, which can then be queried for future lookups. + + See U1DB documentation for more information. + + @param index_name: A unique name which can be used as a key prefix. + @param index_expressions: Index expressions defining the index + information. + """ + raise NotImplementedError(self.create_index) + + def delete_index(self, index_name): + """ + Remove a named index. + + Here we just guarantee that the new info will be stored in the backend + db after update. + + @param index_name: The name of the index we are removing. + @type index_name: str + """ + InMemoryDatabase.delete_index(self, index_name) + self._store_u1db_data() + + def _replace_conflicts(self, doc, conflicts): + """ + Set new conflicts for a document. + + Here we just guarantee that the new info will be stored in the backend + db after update. + + @param doc: The document with a new set of conflicts. + @param conflicts: The new set of conflicts. + @type conflicts: list + """ + InMemoryDatabase._replace_conflicts(self, doc, conflicts) + self._store_u1db_data() + + def _do_set_replica_gen_and_trans_id(self, other_replica_uid, + other_generation, + other_transaction_id): + """ + Set the last-known generation and transaction id for the other + database replica. + + Here we just guarantee that the new info will be stored in the backend + db after update. + + @param other_replica_uid: The U1DB identifier for the other replica. + @type other_replica_uid: str + @param other_generation: The generation number for the other replica. + @type other_generation: int + @param other_transaction_id: The transaction id associated with the + generation. + @type other_transaction_id: str + """ + InMemoryDatabase._do_set_replica_gen_and_trans_id( + self, + other_replica_uid, + other_generation, + other_transaction_id) + self._store_u1db_data() + + #------------------------------------------------------------------------- + # implemented methods from CommonBackend + #------------------------------------------------------------------------- + + def _put_and_update_indexes(self, old_doc, doc): + """ + Update a document and all indexes related to it. + + @param old_doc: The old version of the document. + @type old_doc: u1db.Document + @param doc: The new version of the document. + @type doc: u1db.Document + """ + for index in self._indexes.itervalues(): + if old_doc is not None and not old_doc.is_tombstone(): + index.remove_json(old_doc.doc_id, old_doc.get_json()) + if not doc.is_tombstone(): + index.add_json(doc.doc_id, doc.get_json()) + trans_id = self._allocate_transaction_id() + self._put_doc(doc) + self._transaction_log.append((doc.doc_id, trans_id)) + self._store_u1db_data() + + #------------------------------------------------------------------------- + # methods specific for object stores + #------------------------------------------------------------------------- + + U1DB_DATA_DOC_ID = 'u1db_data' + + def _fetch_u1db_data(self): + """ + Fetch u1db configuration data from backend storage. + + See C{_init_u1db_data} documentation. + """ + NotImplementedError(self._fetch_u1db_data) + + def _store_u1db_data(self): + """ + Store u1db configuration data on backend storage. + + See C{_init_u1db_data} documentation. + """ + NotImplementedError(self._store_u1db_data) + + def _init_u1db_data(self): + """ + Initialize u1db configuration data on backend storage. + + A U1DB database needs to keep track of all database transactions, + document conflicts, the generation of other replicas it has seen, + indexes created by users and so on. + + In this implementation, all this information is stored in a special + document stored in the couch db with id equals to + CouchDatabse.U1DB_DATA_DOC_ID. + + This method initializes the document that will hold such information. + """ + NotImplementedError(self._init_u1db_data) + + +class ObjectStoreSyncTarget(InMemorySyncTarget): + """ + Functionality for using an ObjectStore as a synchronization target. + """ -- cgit v1.2.3