2 files changed, 147 insertions, 288 deletions
diff --git a/scripts/migrate_dbs.py b/scripts/migrate_dbs.py
deleted file mode 100644
index f1c20d87..00000000
--- a/scripts/migrate_dbs.py
+++ /dev/null
@@ -1,288 +0,0 @@
-#!/usr/bin/python
-
-import sys
-import json
-import logging
-import argparse
-import re
-import threading
-from urlparse import urlparse
-from ConfigParser import ConfigParser
-from couchdb.client import Server
-from couchdb.http import ResourceNotFound, Resource, Session
-from datetime import datetime
-
-from leap.soledad.common.couch import CouchDatabase
-
-
-# parse command line for the log file name
-logger_fname = "/tmp/u1db-couch-db-migration_%s.log" % \
-               str(datetime.now()).replace(' ', '_')
-parser = argparse.ArgumentParser()
-parser.add_argument('--log', action='store', default=logger_fname, type=str,
-                    required=False, help='the name of the log file', nargs=1)
-args = parser.parse_args()
-
-
-# configure the logger
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
-print "Logging to %s." % args.log
-logging.basicConfig(
-    filename=args.log,
-    format="%(asctime)-15s %(message)s")
-
-
-# configure threads
-max_threads = 20
-semaphore_pool = threading.BoundedSemaphore(value=max_threads)
-
-# get couch url
-cp = ConfigParser()
-cp.read('/etc/leap/soledad-server.conf')
-url = cp.get('soledad-server', 'couch_url')
-
-resource = Resource(url, Session(retry_delays=[1,2,4,8], timeout=10))
-server = Server(url=resource)
-
-hidden_url = re.sub(
-    'http://(.*):.*@',
-    'http://\\1:xxxxx@',
-    url)
-
-print """
-==========
-ATTENTION!
-==========
-
-This script will modify Soledad's shared and user databases in:
-
-  %s
-
-This script does not make a backup of the couch db data, so make sure youj
-have a copy or you may loose data.
-""" % hidden_url
-confirm = raw_input("Proceed (type uppercase YES)? ")
-
-if confirm != "YES":
-    exit(1)
-
-
-#
-# Thread
-#
-
-class DocWorkerThread(threading.Thread):
-
-    def __init__(self, dbname, doc_id, db_idx, db_len, doc_idx, doc_len,
-                 transaction_log, conflict_log, release_fun):
-        threading.Thread.__init__(self)
-        resource = Resource(url, Session(retry_delays=[1,2,4,8], timeout=10))
-        server = Server(url=resource)
-        self._dbname = dbname
-        self._cdb = server[self._dbname]
-        self._doc_id = doc_id
-        self._db_idx = db_idx
-        self._db_len = db_len
-        self._doc_idx = doc_idx
-        self._doc_len = doc_len
-        self._transaction_log = transaction_log
-        self._conflict_log = conflict_log
-        self._release_fun = release_fun
-
-    def run(self):
-
-        old_doc = self._cdb[self._doc_id]
-
-        # skip non u1db docs
-        if 'u1db_rev' not in old_doc:
-            logger.debug('(%d/%d) (%d/%d) Skipping %s/%s).' %
-                         (self._db_idx, self._db_len, self._doc_idx,
-                          self._doc_len, self._dbname, self._doc_id))
-            self._release_fun()
-            return
-        else:
-            logger.debug('(%d/%d) (%d/%d) Processing %s/%s ...' %
-                         (self._db_idx, self._db_len, self._doc_idx,
-                          self._doc_len, self._dbname, self._doc_id))
-
-        doc = {
-            '_id': self._doc_id,
-            '_rev': old_doc['_rev'],
-            'u1db_rev': old_doc['u1db_rev']
-        }
-        attachments = []
-
-        # add transactions
-        doc['u1db_transactions'] = map(
-            lambda (gen, doc_id, trans_id): (gen, trans_id),
-            filter(
-                lambda (gen, doc_id, trans_id): doc_id == doc['_id'],
-                self._transaction_log))
-        if len(doc['u1db_transactions']) == 0:
-            del doc['u1db_transactions']
-
-        # add conflicts
-        if doc['_id'] in self._conflict_log:
-            attachments.append([
-                conflict_log[doc['_id']],
-                'u1db_conflicts',
-                "application/octet-stream"])
-
-        # move document's content to 'u1db_content' attachment
-        content = self._cdb.get_attachment(doc, 'u1db_json')
-        if content is not None:
-            attachments.append([
-                content,
-                'u1db_content',
-                "application/octet-stream"])
-        #self._cdb.delete_attachment(doc, 'u1db_json')
-
-        # save modified doc
-        self._cdb.save(doc)
-
-        # save all doc attachments
-        for content, att_name, content_type in attachments:
-            self._cdb.put_attachment(
-                doc,
-                content,
-                filename=att_name,
-                content_type=content_type)
-
-        # release the semaphore
-        self._release_fun()
-
-
-db_idx = 0
-db_len = len(server)
-for dbname in server:
-
-    db_idx += 1
-
-    if not (dbname.startswith('user-') or dbname == 'shared') \
-            or dbname == 'user-test-db':
-        logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname))
-        continue
-
-    logger.info("(%d/%d) Migrating db %s." % (db_idx, db_len, dbname))
-
-    # get access to couch db
-    cdb = Server(url)[dbname]
-
-    # get access to soledad db
-    sdb = CouchDatabase(url, dbname)
-
-    # Migration table
-    # ---------------
-    #
-    # * Metadata that was previously stored in special documents migrate to
-    #   inside documents, to allow for atomic doc-and-metadata updates.
-    # * Doc content attachment name changes.
-    # * Indexes are removed, to be implemented in the future possibly as
-    #   design docs view functions.
-    #
-    # +-----------------+-------------------------+-------------------------+
-    # | Data            | old storage             | new storage             |
-    # |-----------------+-------------------------+-------------------------+
-    # | doc content     | <doc_id>/u1db_json      | <doc_id>/u1db_content   |
-    # | doc conflicts   | u1db/_conflicts         | <doc_id>/u1db_conflicts |
-    # | transaction log | u1db/_transaction_log   | doc.u1db_transactions   |
-    # | sync log        | u1db/_other_generations | u1db_sync_log           |
-    # | indexes         | u1db/_indexes           | not implemented         |
-    # | replica uid     | u1db/_replica_uid       | u1db_config             |
-    # +-----------------+-------------------------+-------------------------+
-
-    def get_att_content(db, doc_id, att_name):
-        try:
-            return json.loads(
-                db.get_attachment(
-                    doc_id, att_name).read())['content']
-        except:
-            import ipdb
-            ipdb.set_trace()
-
-    # only migrate databases that have the 'u1db/_replica_uid' document
-    try:
-        metadoc = cdb.get('u1db/_replica_uid')
-        replica_uid = get_att_content(cdb, 'u1db/_replica_uid', 'u1db_json')
-    except ResourceNotFound:
-        continue
-
-    #---------------------------------------------------------------------
-    # Step 1: Set replica uid.
-    #---------------------------------------------------------------------
-    sdb._set_replica_uid(replica_uid)
-
-    #---------------------------------------------------------------------
-    # Step 2: Obtain metadata.
-    #---------------------------------------------------------------------
-
-    # obtain the transaction log: [['<doc_id>', '<trans_id>'], ...]
-    transaction_log = get_att_content(
-        cdb, 'u1db/_transaction_log', 'u1db_json')
-    new_transaction_log = []
-    gen = 1
-    for (doc_id, trans_id) in transaction_log:
-        new_transaction_log.append((gen, doc_id, trans_id))
-        gen += 1
-    transaction_log = new_transaction_log
-
-    # obtain the conflict log: {'<doc_id>': ['<rev>', '<content>'], ...}
-    conflict_log = get_att_content(cdb, 'u1db/_conflicts', 'u1db_json')
-
-    # obtain the sync log:
-    # {'<replica_uid>': ['<gen>', '<transaction_id>'], ...}
-    other_generations = get_att_content(
-        cdb, 'u1db/_other_generations', 'u1db_json')
-
-    #---------------------------------------------------------------------
-    # Step 3: Iterate over all documents in database.
-    #---------------------------------------------------------------------
-    doc_len = len(cdb)
-    logger.info("(%d, %d) Found %d documents." % (db_idx, db_len, doc_len))
-    doc_idx = 0
-    threads = []
-    for doc_id in cdb:
-        doc_idx = doc_idx + 1
-
-        semaphore_pool.acquire()
-        thread = DocWorkerThread(dbname, doc_id, db_idx, db_len,
-                                 doc_idx, doc_len, transaction_log,
-                                 conflict_log, semaphore_pool.release)
-        thread.daemon = True
-        thread.start()
-        threads.append(thread)
-
-    map(lambda thread: thread.join(), threads)
-
-    #---------------------------------------------------------------------
-    # Step 4: Move sync log.
-    #---------------------------------------------------------------------
-
-    # move sync log
-    sync_doc = {
-        '_id': 'u1db_sync_log',
-        'syncs': []
-    }
-
-    for replica_uid in other_generations:
-        gen, transaction_id = other_generations[replica_uid]
-        sync_doc['syncs'].append([replica_uid, gen, transaction_id])
-    cdb.save(sync_doc)
-
-    #---------------------------------------------------------------------
-    # Step 5: Delete old meta documents.
-    #---------------------------------------------------------------------
-
-    # remove unused docs
-    for doc_id in ['_transaction_log', '_conflicts', '_other_generations',
-            '_indexes', '_replica_uid']:
-        for prefix in ['u1db/', 'u1db%2F']:
-            try:
-                doc = cdb['%s%s' % (prefix, doc_id)]
-                logger.info(
-                    "(%d/%d) Deleting %s/%s/%s." %
-                    (db_idx, db_len, dbname, 'u1db', doc_id))
-                cdb.delete(doc)
-            except ResourceNotFound:
-                pass
diff --git a/scripts/update_design_docs.py b/scripts/update_design_docs.py
new file mode 100644
index 00000000..e7b5a29c
--- /dev/null
+++ b/scripts/update_design_docs.py
@@ -0,0 +1,147 @@
+#!/usr/bin/python
+
+# This script updates Soledad's design documents in the session database and
+# all user databases with contents from the installed leap.soledad.common
+# package.
+
+import json
+import logging
+import argparse
+import re
+import threading
+import binascii
+
+
+from getpass import getpass
+from ConfigParser import ConfigParser
+from couchdb.client import Server
+from couchdb.http import Resource, Session
+from datetime import datetime
+from urlparse import urlparse
+
+
+from leap.soledad.common import ddocs
+
+
+# parse command line for the log file name
+logger_fname = "/tmp/update-design-docs_%s.log" % \
+               str(datetime.now()).replace(' ', '_')
+parser = argparse.ArgumentParser()
+parser.add_argument('--log', action='store', default=logger_fname, type=str,
+                    required=False, help='the name of the log file', nargs=1)
+args = parser.parse_args()
+
+
+# configure the logger
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+print "Logging to %s." % args.log
+logging.basicConfig(
+    filename=args.log,
+    format="%(asctime)-15s %(message)s")
+
+
+# configure threads
+max_threads = 20
+semaphore_pool = threading.BoundedSemaphore(value=max_threads)
+threads = []
+
+# get couch url
+cp = ConfigParser()
+cp.read('/etc/leap/soledad-server.conf')
+url = urlparse(cp.get('soledad-server', 'couch_url'))
+
+# get admin password
+netloc = re.sub('^.*@', '', url.netloc)
+url = url._replace(netloc=netloc)
+password = getpass("Admin password for %s: " % url.geturl())
+url = url._replace(netloc='admin:%s@%s' % (password, netloc))
+
+resource = Resource(url.geturl(), Session(retry_delays=[1,2,4,8], timeout=10))
+server = Server(url=resource)
+
+hidden_url = re.sub(
+    'http://(.*):.*@',
+    'http://\\1:xxxxx@',
+    url.geturl())
+
+print """
+==========
+ATTENTION!
+==========
+
+This script will modify Soledad's shared and user databases in:
+
+  %s
+
+This script does not make a backup of the couch db data, so make sure you
+have a copy or you may loose data.
+""" % hidden_url
+confirm = raw_input("Proceed (type uppercase YES)? ")
+
+if confirm != "YES":
+    exit(1)
+
+# convert design doc content
+
+design_docs = {
+    '_design/docs': json.loads(binascii.a2b_base64(ddocs.docs)),
+    '_design/syncs': json.loads(binascii.a2b_base64(ddocs.syncs)),
+    '_design/transactions': json.loads(binascii.a2b_base64(ddocs.transactions)),
+}
+
+#
+# Thread
+#
+
+class DBWorkerThread(threading.Thread):
+
+    def __init__(self, server, dbname, db_idx, db_len, release_fun):
+        threading.Thread.__init__(self)
+        self._dbname = dbname
+        self._cdb = server[self._dbname]
+        self._db_idx = db_idx
+        self._db_len = db_len
+        self._release_fun = release_fun
+
+    def run(self):
+
+        logger.info("(%d/%d) Updating db %s." % (self._db_idx, self._db_len,
+                    self._dbname))
+
+        for doc_id in design_docs:
+            doc = self._cdb[doc_id]
+            for key in ['lists', 'views', 'updates']:
+                if key in design_docs[doc_id]:
+                    doc[key] = design_docs[doc_id][key]
+            self._cdb.save(doc)
+
+        # release the semaphore
+        self._release_fun()
+
+
+db_idx = 0
+db_len = len(server)
+for dbname in server:
+
+    db_idx += 1
+
+    if not (dbname.startswith('user-') or dbname == 'shared') \
+            or dbname == 'user-test-db':
+        logger.info("(%d/%d) Skipping db %s." % (db_idx, db_len, dbname))
+        continue
+
+
+    # get access to couch db
+    cdb = Server(url.geturl())[dbname]
+
+    #---------------------------------------------------------------------
+    # Start DB worker thread
+    #---------------------------------------------------------------------
+    semaphore_pool.acquire()
+    thread = DBWorkerThread(server, dbname, db_idx, db_len, semaphore_pool.release)
+    thread.daemon = True
+    thread.start()
+    threads.append(thread)
+
+map(lambda thread: thread.join(), threads)