summaryrefslogtreecommitdiff
path: root/scripts/migration
diff options
context:
space:
mode:
authordrebs <drebs@leap.se>2016-07-29 10:01:46 -0300
committerdrebs <drebs@leap.se>2016-08-01 21:09:04 -0300
commitbc28ea6e652418791dcf63fadcc81db9c50e2d45 (patch)
tree8ee54c15dc636c4ab347b5387ea3da268ebf7259 /scripts/migration
parent72ee56e3863729b148cf8cc16e4004dc7b52acdd (diff)
[pkg] add couch schema migration script
Diffstat (limited to 'scripts/migration')
-rw-r--r--scripts/migration/0.8.2/README.md73
-rw-r--r--scripts/migration/0.8.2/log/.empty0
-rwxr-xr-xscripts/migration/0.8.2/migrate.py77
-rw-r--r--scripts/migration/0.8.2/migrate_couch_schema/__init__.py142
-rw-r--r--scripts/migration/0.8.2/setup.py8
-rw-r--r--scripts/migration/0.8.2/tests/conftest.py46
-rw-r--r--scripts/migration/0.8.2/tests/test_migrate.py67
-rw-r--r--scripts/migration/0.8.2/tox.ini13
8 files changed, 426 insertions, 0 deletions
diff --git a/scripts/migration/0.8.2/README.md b/scripts/migration/0.8.2/README.md
new file mode 100644
index 00000000..919a5235
--- /dev/null
+++ b/scripts/migration/0.8.2/README.md
@@ -0,0 +1,73 @@
+CouchDB schema migration to Soledad 0.8.2
+=========================================
+
+Migrate couch database schema from <= 0.8.1 version to 0.8.2 version.
+
+
+ATTENTION!
+----------
+
+ - This script does not backup your data for you. Make sure you have a backup
+ copy of your databases before running this script!
+
+ - Make sure you turn off any service that might be writing to the couch
+ database before running this script.
+
+
+Usage
+-----
+
+To see what the script would do, run:
+
+ ./migrate.py
+
+To actually run the migration, add the --do-migrate command line option:
+
+ ./migrate.py --do-migrate
+
+See command line options:
+
+ ./migrate.py --help
+
+
+Log
+---
+
+If you don't pass a --log-file command line option, a log will be written to
+the `log/` folder.
+
+
+Differences between old and new couch schema
+--------------------------------------------
+
+The differences between old and new schemas are:
+
+ - Transaction metadata was previously stored inside each document, and we
+ used design doc view/list functions to retrieve that information. Now,
+ transaction metadata is stored in documents with special ids
+ (gen-0000000001 to gen-9999999999).
+
+ - Database replica config metadata was stored in a document called
+ "u1db_config", and now we store it in the "_local/config" document.
+
+ - Sync metadata was previously stored in documents with id
+ "u1db_sync_<source-replica-id>", and now are stored in
+ "_local/sync_<source-replica-id>".
+
+ - The new schema doesn't make use of any design documents.
+
+
+What does this script do
+------------------------
+
+- List all databases starting with "user-".
+- For each one, do:
+ - Check if it contains the old "u1db_config" document.
+ - If it doesn't, skip this db.
+ - Get the transaction log using the usual design doc view/list functions.
+ - Write a new "gen-X" document for each line on the transaction log.
+ - Get the "u1db_config" document, create a new one in "_local/config",
+ Delete the old one.
+ - List all "u1db_sync_X" documents, create new ones in "_local/sync_X",
+ delete the old ones.
+ - Delete unused design documents.
diff --git a/scripts/migration/0.8.2/log/.empty b/scripts/migration/0.8.2/log/.empty
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/scripts/migration/0.8.2/log/.empty
diff --git a/scripts/migration/0.8.2/migrate.py b/scripts/migration/0.8.2/migrate.py
new file mode 100755
index 00000000..159905ef
--- /dev/null
+++ b/scripts/migration/0.8.2/migrate.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+# migrate.py
+
+"""
+Migrate CouchDB schema to Soledad 0.8.2 schema.
+
+******************************************************************************
+ ATTENTION!
+
+ - This script does not backup your data for you. Make sure you have a backup
+ copy of your databases before running this script!
+
+ - Make sure you turn off any service that might be writing to the couch
+ database before running this script.
+
+******************************************************************************
+
+Run this script with the --help option to see command line options.
+
+See the README.md file for more information.
+"""
+
+import datetime
+import logging
+import os
+
+from argparse import ArgumentParser
+
+from migrate_couch_schema import migrate
+
+
+TARGET_VERSION = '0.8.2'
+DEFAULT_COUCH_URL = 'http://127.0.0.1:5984'
+
+
+#
+# command line args and execution
+#
+
+def _configure_logger(log_file):
+ if not log_file:
+ fname, _ = os.path.basename(__file__).split('.')
+ timestr = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
+ filename = 'soledad_%s_%s_%s.log' \
+ % (TARGET_VERSION, fname, timestr)
+ dirname = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), 'log')
+ log_file = os.path.join(dirname, filename)
+ logging.basicConfig(
+ filename=log_file,
+ filemode='a',
+ format='%(asctime)s,%(msecs)d %(levelname)s %(message)s',
+ datefmt='%H:%M:%S',
+ level=logging.DEBUG)
+
+
+def _parse_args():
+ parser = ArgumentParser()
+ parser.add_argument(
+ '--couch_url',
+ help='the url for the couch database',
+ default=DEFAULT_COUCH_URL)
+ parser.add_argument(
+ '--do-migrate',
+ help='actually perform the migration (otherwise '
+ 'just print what would be done)',
+ action='store_true')
+ parser.add_argument(
+ '--log-file',
+ help='the log file to use')
+ return parser.parse_args()
+
+
+if __name__ == '__main__':
+ args = _parse_args()
+ _configure_logger(args.log_file)
+ migrate(args, TARGET_VERSION)
diff --git a/scripts/migration/0.8.2/migrate_couch_schema/__init__.py b/scripts/migration/0.8.2/migrate_couch_schema/__init__.py
new file mode 100644
index 00000000..37e5a525
--- /dev/null
+++ b/scripts/migration/0.8.2/migrate_couch_schema/__init__.py
@@ -0,0 +1,142 @@
+# __init__.py
+"""
+Support functions for migration script.
+"""
+
+import logging
+
+from couchdb import Server
+
+from leap.soledad.common.couch import GENERATION_KEY
+from leap.soledad.common.couch import TRANSACTION_ID_KEY
+from leap.soledad.common.couch import REPLICA_UID_KEY
+from leap.soledad.common.couch import DOC_ID_KEY
+from leap.soledad.common.couch import SCHEMA_VERSION_KEY
+from leap.soledad.common.couch import CONFIG_DOC_ID
+from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX
+from leap.soledad.common.couch import SCHEMA_VERSION
+
+
+logger = logging.getLogger(__name__)
+
+
+#
+# support functions
+#
+
+def _get_couch_server(couch_url):
+ return Server(couch_url)
+
+
+def _is_migrateable(db):
+ config_doc = db.get('u1db_config')
+ if config_doc is None:
+ return False
+ return True
+
+
+def _get_transaction_log(db):
+ ddoc_path = ['_design', 'transactions', '_view', 'log']
+ resource = db.resource(*ddoc_path)
+ _, _, data = resource.get_json()
+ rows = data['rows']
+ transaction_log = []
+ gen = 1
+ for row in rows:
+ transaction_log.append((gen, row['id'], row['value']))
+ gen += 1
+ return transaction_log
+
+
+def _get_user_dbs(server):
+ user_dbs = filter(lambda dbname: dbname.startswith('user-'), server)
+ return user_dbs
+
+
+#
+# migration main functions
+#
+
+def migrate(args, target_version):
+ server = _get_couch_server(args.couch_url)
+ logger.info('starting couch schema migration to %s...' % target_version)
+ if not args.do_migrate:
+ logger.warning('dry-run: no changes will be made to databases')
+ user_dbs = _get_user_dbs(server)
+ for dbname in user_dbs:
+ db = server[dbname]
+ if not _is_migrateable(db):
+ logger.warning("skipping user db: %s" % dbname)
+ continue
+ logger.info("starting migration of user db: %s" % dbname)
+ _migrate_user_db(db, args.do_migrate)
+ logger.info("finished migration of user db: %s" % dbname)
+ logger.info('finished couch schema migration to %s' % target_version)
+
+
+def _migrate_user_db(db, do_migrate):
+ _migrate_transaction_log(db, do_migrate)
+ _migrate_config_doc(db, do_migrate)
+ _migrate_sync_docs(db, do_migrate)
+ _delete_design_docs(db, do_migrate)
+
+
+def _migrate_transaction_log(db, do_migrate):
+ transaction_log = _get_transaction_log(db)
+ for gen, doc_id, trans_id in transaction_log:
+ gen_doc_id = 'gen-%s' % str(gen).zfill(10)
+ doc = {
+ '_id': gen_doc_id,
+ GENERATION_KEY: gen,
+ DOC_ID_KEY: doc_id,
+ TRANSACTION_ID_KEY: trans_id,
+ }
+ logger.info('creating gen doc: %s' % (gen_doc_id))
+ if do_migrate:
+ db.save(doc)
+
+
+def _migrate_config_doc(db, do_migrate):
+ old_doc = db['u1db_config']
+ new_doc = {
+ '_id': CONFIG_DOC_ID,
+ REPLICA_UID_KEY: old_doc[REPLICA_UID_KEY],
+ SCHEMA_VERSION_KEY: SCHEMA_VERSION,
+ }
+ logger.info("moving config doc: %s -> %s"
+ % (old_doc['_id'], new_doc['_id']))
+ if do_migrate:
+ db.save(new_doc)
+ db.delete(old_doc)
+
+
+def _migrate_sync_docs(db, do_migrate):
+ view = db.view(
+ '_all_docs',
+ startkey='u1db_sync',
+ endkey='u1db_synd',
+ include_docs='true')
+ for row in view.rows:
+ old_doc = row['doc']
+ old_id = old_doc['_id']
+ replica_uid = old_id.replace('u1db_sync_', '')
+ new_id = "%s%s" % (SYNC_DOC_ID_PREFIX, replica_uid)
+ new_doc = {
+ '_id': new_id,
+ GENERATION_KEY: old_doc['generation'],
+ TRANSACTION_ID_KEY: old_doc['transaction_id'],
+ REPLICA_UID_KEY: replica_uid,
+ }
+ logger.info("moving sync doc: %s -> %s" % (old_id, new_id))
+ if do_migrate:
+ db.save(new_doc)
+ db.delete(old_doc)
+
+
+def _delete_design_docs(db, do_migrate):
+ for ddoc in ['docs', 'syncs', 'transactions']:
+ doc_id = '_design/%s' % ddoc
+ doc = db.get(doc_id)
+ logger.info("deleting design doc: %s" % doc_id)
+ if do_migrate:
+ db.delete(doc)
diff --git a/scripts/migration/0.8.2/setup.py b/scripts/migration/0.8.2/setup.py
new file mode 100644
index 00000000..0467e932
--- /dev/null
+++ b/scripts/migration/0.8.2/setup.py
@@ -0,0 +1,8 @@
+from setuptools import setup
+from setuptools import find_packages
+
+
+setup(
+ name='migrate_couch_schema',
+ packages=find_packages('.'),
+)
diff --git a/scripts/migration/0.8.2/tests/conftest.py b/scripts/migration/0.8.2/tests/conftest.py
new file mode 100644
index 00000000..92d1e17e
--- /dev/null
+++ b/scripts/migration/0.8.2/tests/conftest.py
@@ -0,0 +1,46 @@
+# conftest.py
+
+"""
+Provide a couch database with content stored in old schema.
+"""
+
+import couchdb
+import pytest
+import uuid
+
+
+COUCH_URL = 'http://127.0.0.1:5984'
+
+transaction_map = """
+function(doc) {
+ if (doc.u1db_transactions)
+ doc.u1db_transactions.forEach(function(t) {
+ emit(t[0], // use timestamp as key so the results are ordered
+ t[1]); // value is the transaction_id
+ });
+}
+"""
+
+initial_docs = [
+ {'_id': 'u1db_config', 'replica_uid': 'an-uid'},
+ {'_id': 'u1db_sync_A', 'generation': 0, 'replica_uid': 'A',
+ 'transaction_id': ''},
+ {'_id': 'u1db_sync_B', 'generation': 2, 'replica_uid': 'B',
+ 'transaction_id': 'X'},
+ {'_id': 'doc1', 'u1db_transactions': [(1, 'trans-1'), (3, 'trans-3')]},
+ {'_id': 'doc2', 'u1db_transactions': [(2, 'trans-2'), (4, 'trans-4')]},
+ {'_id': '_design/docs'},
+ {'_id': '_design/syncs'},
+ {'_id': '_design/transactions', 'views': {'log': {'map': transaction_map}}}
+]
+
+
+@pytest.fixture(scope='function')
+def db(request):
+ server = couchdb.Server(COUCH_URL)
+ dbname = "user-" + uuid.uuid4().hex
+ db = server.create(dbname)
+ for doc in initial_docs:
+ db.save(doc)
+ request.addfinalizer(lambda: server.delete(dbname))
+ return db
diff --git a/scripts/migration/0.8.2/tests/test_migrate.py b/scripts/migration/0.8.2/tests/test_migrate.py
new file mode 100644
index 00000000..10c8b906
--- /dev/null
+++ b/scripts/migration/0.8.2/tests/test_migrate.py
@@ -0,0 +1,67 @@
+# test_migrate.py
+
+"""
+Ensure that the migration script works!
+"""
+
+from migrate_couch_schema import _migrate_user_db
+
+from leap.soledad.common.couch import GENERATION_KEY
+from leap.soledad.common.couch import TRANSACTION_ID_KEY
+from leap.soledad.common.couch import REPLICA_UID_KEY
+from leap.soledad.common.couch import DOC_ID_KEY
+from leap.soledad.common.couch import SCHEMA_VERSION_KEY
+from leap.soledad.common.couch import CONFIG_DOC_ID
+from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX
+from leap.soledad.common.couch import SCHEMA_VERSION
+
+
+def test__migrate_user_db(db):
+ _migrate_user_db(db, True)
+
+ # we should find exactly 6 documents: 2 normal documents and 4 generation
+ # documents
+ view = db.view('_all_docs')
+ assert len(view.rows) == 6
+
+ # ensure that the ids of the documents we found on the database are correct
+ doc_ids = map(lambda doc: doc.id, view.rows)
+ assert 'doc1' in doc_ids
+ assert 'doc2' in doc_ids
+ assert 'gen-0000000001' in doc_ids
+ assert 'gen-0000000002' in doc_ids
+ assert 'gen-0000000003' in doc_ids
+ assert 'gen-0000000004' in doc_ids
+
+ # assert config doc contents
+ config_doc = db.get(CONFIG_DOC_ID)
+ assert config_doc[REPLICA_UID_KEY] == 'an-uid'
+ assert config_doc[SCHEMA_VERSION_KEY] == SCHEMA_VERSION
+
+ # assert sync docs contents
+ sync_doc_A = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'A'))
+ assert sync_doc_A[GENERATION_KEY] == 0
+ assert sync_doc_A[REPLICA_UID_KEY] == 'A'
+ assert sync_doc_A[TRANSACTION_ID_KEY] == ''
+ sync_doc_B = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'B'))
+ assert sync_doc_B[GENERATION_KEY] == 2
+ assert sync_doc_B[REPLICA_UID_KEY] == 'B'
+ assert sync_doc_B[TRANSACTION_ID_KEY] == 'X'
+
+ # assert gen docs contents
+ gen_1 = db.get('gen-0000000001')
+ assert gen_1[DOC_ID_KEY] == 'doc1'
+ assert gen_1[GENERATION_KEY] == 1
+ assert gen_1[TRANSACTION_ID_KEY] == 'trans-1'
+ gen_2 = db.get('gen-0000000002')
+ assert gen_2[DOC_ID_KEY] == 'doc2'
+ assert gen_2[GENERATION_KEY] == 2
+ assert gen_2[TRANSACTION_ID_KEY] == 'trans-2'
+ gen_3 = db.get('gen-0000000003')
+ assert gen_3[DOC_ID_KEY] == 'doc1'
+ assert gen_3[GENERATION_KEY] == 3
+ assert gen_3[TRANSACTION_ID_KEY] == 'trans-3'
+ gen_4 = db.get('gen-0000000004')
+ assert gen_4[DOC_ID_KEY] == 'doc2'
+ assert gen_4[GENERATION_KEY] == 4
+ assert gen_4[TRANSACTION_ID_KEY] == 'trans-4'
diff --git a/scripts/migration/0.8.2/tox.ini b/scripts/migration/0.8.2/tox.ini
new file mode 100644
index 00000000..2bb6be4c
--- /dev/null
+++ b/scripts/migration/0.8.2/tox.ini
@@ -0,0 +1,13 @@
+[tox]
+envlist = py27
+
+[testenv]
+commands = py.test {posargs}
+changedir = tests
+deps =
+ pytest
+ couchdb
+ pdbpp
+ -e../../../common
+setenv =
+ TERM=xterm