summaryrefslogtreecommitdiff
path: root/scripts/migration/0.9.0
diff options
context:
space:
mode:
authordrebs <drebs@leap.se>2016-11-10 23:50:35 -0200
committerdrebs <drebs@leap.se>2016-11-10 23:50:35 -0200
commit564f55802455d08c9a38e892bb4b25ad6fbcb87d (patch)
treeb214482c46ecd09d531a3bc7bf254bf47d367fb5 /scripts/migration/0.9.0
parentc1950b41e0995b0213227bd0ce2c633f312037dc (diff)
parent0fd7e9f018b02161a844c11332ffced56b256010 (diff)
Merge tag '0.9.0'
Tag version 0.9.0
Diffstat (limited to 'scripts/migration/0.9.0')
-rw-r--r--scripts/migration/0.9.0/.gitignore1
-rw-r--r--scripts/migration/0.9.0/README.md73
-rw-r--r--scripts/migration/0.9.0/log/.empty0
-rwxr-xr-xscripts/migration/0.9.0/migrate.py117
-rw-r--r--scripts/migration/0.9.0/migrate_couch_schema/__init__.py192
-rw-r--r--scripts/migration/0.9.0/requirements.pip3
-rw-r--r--scripts/migration/0.9.0/setup.py8
-rw-r--r--scripts/migration/0.9.0/tests/conftest.py54
-rw-r--r--scripts/migration/0.9.0/tests/test_migrate.py67
-rw-r--r--scripts/migration/0.9.0/tox.ini13
10 files changed, 528 insertions, 0 deletions
diff --git a/scripts/migration/0.9.0/.gitignore b/scripts/migration/0.9.0/.gitignore
new file mode 100644
index 00000000..6115c109
--- /dev/null
+++ b/scripts/migration/0.9.0/.gitignore
@@ -0,0 +1 @@
+log/*
diff --git a/scripts/migration/0.9.0/README.md b/scripts/migration/0.9.0/README.md
new file mode 100644
index 00000000..919a5235
--- /dev/null
+++ b/scripts/migration/0.9.0/README.md
@@ -0,0 +1,73 @@
+CouchDB schema migration to Soledad 0.8.2
+=========================================
+
+Migrate couch database schema from <= 0.8.1 version to 0.8.2 version.
+
+
+ATTENTION!
+----------
+
+ - This script does not backup your data for you. Make sure you have a backup
+ copy of your databases before running this script!
+
+ - Make sure you turn off any service that might be writing to the couch
+ database before running this script.
+
+
+Usage
+-----
+
+To see what the script would do, run:
+
+ ./migrate.py
+
+To actually run the migration, add the --do-migrate command line option:
+
+ ./migrate.py --do-migrate
+
+See command line options:
+
+ ./migrate.py --help
+
+
+Log
+---
+
+If you don't pass a --log-file command line option, a log will be written to
+the `log/` folder.
+
+
+Differences between old and new couch schema
+--------------------------------------------
+
+The differences between old and new schemas are:
+
+ - Transaction metadata was previously stored inside each document, and we
+ used design doc view/list functions to retrieve that information. Now,
+ transaction metadata is stored in documents with special ids
+ (gen-0000000001 to gen-9999999999).
+
+ - Database replica config metadata was stored in a document called
+ "u1db_config", and now we store it in the "_local/config" document.
+
+ - Sync metadata was previously stored in documents with id
+ "u1db_sync_<source-replica-id>", and now are stored in
+ "_local/sync_<source-replica-id>".
+
+ - The new schema doesn't make use of any design documents.
+
+
+What does this script do
+------------------------
+
+- List all databases starting with "user-".
+- For each one, do:
+ - Check if it contains the old "u1db_config" document.
+ - If it doesn't, skip this db.
+ - Get the transaction log using the usual design doc view/list functions.
+ - Write a new "gen-X" document for each line on the transaction log.
+ - Get the "u1db_config" document, create a new one in "_local/config",
+ Delete the old one.
+ - List all "u1db_sync_X" documents, create new ones in "_local/sync_X",
+ delete the old ones.
+ - Delete unused design documents.
diff --git a/scripts/migration/0.9.0/log/.empty b/scripts/migration/0.9.0/log/.empty
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/scripts/migration/0.9.0/log/.empty
diff --git a/scripts/migration/0.9.0/migrate.py b/scripts/migration/0.9.0/migrate.py
new file mode 100755
index 00000000..6ad5bc2d
--- /dev/null
+++ b/scripts/migration/0.9.0/migrate.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+# migrate.py
+
+"""
+Migrate CouchDB schema to Soledad 0.8.2 schema.
+
+******************************************************************************
+ ATTENTION!
+
+ - This script does not backup your data for you. Make sure you have a backup
+ copy of your databases before running this script!
+
+ - Make sure you turn off any service that might be writing to the couch
+ database before running this script.
+
+******************************************************************************
+
+Run this script with the --help option to see command line options.
+
+See the README.md file for more information.
+"""
+
+import datetime
+import logging
+import netrc
+import os
+
+from argparse import ArgumentParser
+
+from leap.soledad.server import load_configuration
+
+from migrate_couch_schema import migrate
+
+
+TARGET_VERSION = '0.8.2'
+DEFAULT_COUCH_URL = 'http://127.0.0.1:5984'
+CONF = load_configuration('/etc/soledad/soledad-server.conf')
+NETRC_PATH = CONF['soledad-server']['admin_netrc']
+
+
+#
+# command line args and execution
+#
+
+def _configure_logger(log_file, level=logging.INFO):
+ if not log_file:
+ fname, _ = os.path.basename(__file__).split('.')
+ timestr = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
+ filename = 'soledad_%s_%s_%s.log' \
+ % (TARGET_VERSION, fname, timestr)
+ dirname = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), 'log')
+ log_file = os.path.join(dirname, filename)
+ logging.basicConfig(
+ filename=log_file,
+ filemode='a',
+ format='%(asctime)s,%(msecs)d %(levelname)s %(message)s',
+ datefmt='%H:%M:%S',
+ level=level)
+
+
+def _default_couch_url():
+ if not os.path.exists(NETRC_PATH):
+ return DEFAULT_COUCH_URL
+ parsed_netrc = netrc.netrc(NETRC_PATH)
+ host, (login, _, password) = parsed_netrc.hosts.items()[0]
+ url = ('http://%(login)s:%(password)s@%(host)s:5984' % {
+ 'login': login,
+ 'password': password,
+ 'host': host})
+ return url
+
+
+def _parse_args():
+ parser = ArgumentParser()
+ parser.add_argument(
+ '--couch_url',
+ help='the url for the couch database',
+ default=_default_couch_url())
+ parser.add_argument(
+ '--do-migrate',
+ help='actually perform the migration (otherwise '
+ 'just print what would be done)',
+ action='store_true')
+ parser.add_argument(
+ '--log-file',
+ help='the log file to use')
+ parser.add_argument(
+ '--pdb', action='store_true',
+ help='escape to pdb shell in case of exception')
+ parser.add_argument(
+ '--verbose', action='store_true',
+ help='output detailed information about the migration '
+ '(i.e. include debug messages)')
+ return parser.parse_args()
+
+
+def _enable_pdb():
+ import sys
+ from IPython.core import ultratb
+ sys.excepthook = ultratb.FormattedTB(
+ mode='Verbose', color_scheme='Linux', call_pdb=1)
+
+
+if __name__ == '__main__':
+ args = _parse_args()
+ if args.pdb:
+ _enable_pdb()
+ _configure_logger(
+ args.log_file,
+ level=logging.DEBUG if args.verbose else logging.INFO)
+ logger = logging.getLogger(__name__)
+ try:
+ migrate(args, TARGET_VERSION)
+ except:
+ logger.exception('Fatal error on migrate script!')
+ raise
diff --git a/scripts/migration/0.9.0/migrate_couch_schema/__init__.py b/scripts/migration/0.9.0/migrate_couch_schema/__init__.py
new file mode 100644
index 00000000..f0b456e4
--- /dev/null
+++ b/scripts/migration/0.9.0/migrate_couch_schema/__init__.py
@@ -0,0 +1,192 @@
+# __init__.py
+"""
+Support functions for migration script.
+"""
+
+import logging
+
+from couchdb import Server
+from couchdb import ResourceNotFound
+from couchdb import ResourceConflict
+
+from leap.soledad.common.couch import GENERATION_KEY
+from leap.soledad.common.couch import TRANSACTION_ID_KEY
+from leap.soledad.common.couch import REPLICA_UID_KEY
+from leap.soledad.common.couch import DOC_ID_KEY
+from leap.soledad.common.couch import SCHEMA_VERSION_KEY
+from leap.soledad.common.couch import CONFIG_DOC_ID
+from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX
+from leap.soledad.common.couch import SCHEMA_VERSION
+
+
+logger = logging.getLogger(__name__)
+
+
+#
+# support functions
+#
+
+def _get_couch_server(couch_url):
+ return Server(couch_url)
+
+
+def _is_migrateable(db):
+ config_doc = db.get('u1db_config')
+ return bool(config_doc)
+
+
+def _get_transaction_log(db):
+ ddoc_path = ['_design', 'transactions', '_view', 'log']
+ resource = db.resource(*ddoc_path)
+ try:
+ _, _, data = resource.get_json()
+ except ResourceNotFound:
+ logger.warning(
+ '[%s] missing transactions design document, '
+ 'can\'t get transaction log.' % db.name)
+ return []
+ rows = data['rows']
+ transaction_log = []
+ gen = 1
+ for row in rows:
+ transaction_log.append((gen, row['id'], row['value']))
+ gen += 1
+ return transaction_log
+
+
+def _get_user_dbs(server):
+ user_dbs = filter(lambda dbname: dbname.startswith('user-'), server)
+ return user_dbs
+
+
+#
+# migration main functions
+#
+
+def migrate(args, target_version):
+ server = _get_couch_server(args.couch_url)
+ logger.info('starting couch schema migration to %s' % target_version)
+ if not args.do_migrate:
+ logger.warning('dry-run: no changes will be made to databases')
+ user_dbs = _get_user_dbs(server)
+ for dbname in user_dbs:
+ db = server[dbname]
+ if not _is_migrateable(db):
+ logger.warning("[%s] skipping not migrateable user db" % dbname)
+ continue
+ logger.info("[%s] starting migration of user db" % dbname)
+ try:
+ _migrate_user_db(db, args.do_migrate)
+ logger.info("[%s] finished migration of user db" % dbname)
+ except:
+ logger.exception('[%s] error migrating user db' % dbname)
+ logger.error('continuing with next database.')
+ logger.info('finished couch schema migration to %s' % target_version)
+
+
+def _migrate_user_db(db, do_migrate):
+ _migrate_transaction_log(db, do_migrate)
+ _migrate_sync_docs(db, do_migrate)
+ _delete_design_docs(db, do_migrate)
+ _migrate_config_doc(db, do_migrate)
+
+
+def _migrate_transaction_log(db, do_migrate):
+ transaction_log = _get_transaction_log(db)
+ for gen, doc_id, trans_id in transaction_log:
+ gen_doc_id = 'gen-%s' % str(gen).zfill(10)
+ doc = {
+ '_id': gen_doc_id,
+ GENERATION_KEY: gen,
+ DOC_ID_KEY: doc_id,
+ TRANSACTION_ID_KEY: trans_id,
+ }
+ logger.debug('[%s] creating gen doc: %s' % (db.name, gen_doc_id))
+ if do_migrate:
+ try:
+ db.save(doc)
+ except ResourceConflict:
+ # this gen document already exists. if documents are the same,
+ # continue with migration.
+ existing_doc = db.get(gen_doc_id)
+ for key in [GENERATION_KEY, DOC_ID_KEY, TRANSACTION_ID_KEY]:
+ if existing_doc[key] != doc[key]:
+ raise
+
+
+def _migrate_config_doc(db, do_migrate):
+ old_doc = db['u1db_config']
+ new_doc = {
+ '_id': CONFIG_DOC_ID,
+ REPLICA_UID_KEY: old_doc[REPLICA_UID_KEY],
+ SCHEMA_VERSION_KEY: SCHEMA_VERSION,
+ }
+ logger.info("[%s] moving config doc: %s -> %s"
+ % (db.name, old_doc['_id'], new_doc['_id']))
+ if do_migrate:
+ # the config doc must not exist, otherwise we would have skipped this
+ # database.
+ db.save(new_doc)
+ db.delete(old_doc)
+
+
+def _migrate_sync_docs(db, do_migrate):
+ logger.info('[%s] moving sync docs' % db.name)
+ view = db.view(
+ '_all_docs',
+ startkey='u1db_sync',
+ endkey='u1db_synd',
+ include_docs='true')
+ for row in view.rows:
+ old_doc = row['doc']
+ old_id = old_doc['_id']
+
+ # older schemas used different documents with ids starting with
+ # "u1db_sync" to store sync-related data:
+ #
+ # - u1db_sync_log: was used to store the whole sync log.
+ # - u1db_sync_state: was used to store the sync state.
+ #
+ # if any of these documents exist in the current db, they are leftover
+ # from previous migrations, and should just be removed.
+ if old_id in ['u1db_sync_log', 'u1db_sync_state']:
+ logger.info('[%s] removing leftover document: %s'
+ % (db.name, old_id))
+ if do_migrate:
+ db.delete(old_doc)
+ continue
+
+ replica_uid = old_id.replace('u1db_sync_', '')
+ new_id = "%s%s" % (SYNC_DOC_ID_PREFIX, replica_uid)
+ new_doc = {
+ '_id': new_id,
+ GENERATION_KEY: old_doc['generation'],
+ TRANSACTION_ID_KEY: old_doc['transaction_id'],
+ REPLICA_UID_KEY: replica_uid,
+ }
+ logger.debug("[%s] moving sync doc: %s -> %s"
+ % (db.name, old_id, new_id))
+ if do_migrate:
+ try:
+ db.save(new_doc)
+ except ResourceConflict:
+ # this sync document already exists. if documents are the same,
+ # continue with migration.
+ existing_doc = db.get(new_id)
+ for key in [GENERATION_KEY, TRANSACTION_ID_KEY,
+ REPLICA_UID_KEY]:
+ if existing_doc[key] != new_doc[key]:
+ raise
+ db.delete(old_doc)
+
+
+def _delete_design_docs(db, do_migrate):
+ for ddoc in ['docs', 'syncs', 'transactions']:
+ doc_id = '_design/%s' % ddoc
+ doc = db.get(doc_id)
+ if doc:
+ logger.info("[%s] deleting design doc: %s" % (db.name, doc_id))
+ if do_migrate:
+ db.delete(doc)
+ else:
+ logger.warning("[%s] design doc not found: %s" % (db.name, doc_id))
diff --git a/scripts/migration/0.9.0/requirements.pip b/scripts/migration/0.9.0/requirements.pip
new file mode 100644
index 00000000..ea22a1a4
--- /dev/null
+++ b/scripts/migration/0.9.0/requirements.pip
@@ -0,0 +1,3 @@
+couchdb
+leap.soledad.common==0.9.0
+leap.soledad.server==0.9.0
diff --git a/scripts/migration/0.9.0/setup.py b/scripts/migration/0.9.0/setup.py
new file mode 100644
index 00000000..0467e932
--- /dev/null
+++ b/scripts/migration/0.9.0/setup.py
@@ -0,0 +1,8 @@
+from setuptools import setup
+from setuptools import find_packages
+
+
+setup(
+ name='migrate_couch_schema',
+ packages=find_packages('.'),
+)
diff --git a/scripts/migration/0.9.0/tests/conftest.py b/scripts/migration/0.9.0/tests/conftest.py
new file mode 100644
index 00000000..61f6c7ee
--- /dev/null
+++ b/scripts/migration/0.9.0/tests/conftest.py
@@ -0,0 +1,54 @@
+# conftest.py
+
+"""
+Provide a couch database with content stored in old schema.
+"""
+
+import couchdb
+import pytest
+import uuid
+
+
+COUCH_URL = 'http://127.0.0.1:5984'
+
+transaction_map = """
+function(doc) {
+ if (doc.u1db_transactions)
+ doc.u1db_transactions.forEach(function(t) {
+ emit(t[0], // use timestamp as key so the results are ordered
+ t[1]); // value is the transaction_id
+ });
+}
+"""
+
+initial_docs = [
+ {'_id': 'u1db_config', 'replica_uid': 'an-uid'},
+ {'_id': 'u1db_sync_A', 'generation': 0, 'replica_uid': 'A',
+ 'transaction_id': ''},
+ {'_id': 'u1db_sync_B', 'generation': 2, 'replica_uid': 'B',
+ 'transaction_id': 'X'},
+ {'_id': 'doc1', 'u1db_transactions': [(1, 'trans-1'), (3, 'trans-3')]},
+ {'_id': 'doc2', 'u1db_transactions': [(2, 'trans-2'), (4, 'trans-4')]},
+ {'_id': '_design/docs'},
+ {'_id': '_design/syncs'},
+ {'_id': '_design/transactions',
+ 'views': {'log': {'map': transaction_map}}},
+ # add some data from previous interrupted migration
+ {'_id': '_local/sync_A', 'gen': 0, 'trans_id': '', 'replica_uid': 'A'},
+ {'_id': 'gen-0000000002',
+ 'gen': 2, 'trans_id': 'trans-2', 'doc_id': 'doc2'},
+ # the following should be removed if found in the dbs
+ {'_id': 'u1db_sync_log'},
+ {'_id': 'u1db_sync_state'},
+]
+
+
+@pytest.fixture(scope='function')
+def db(request):
+ server = couchdb.Server(COUCH_URL)
+ dbname = "user-" + uuid.uuid4().hex
+ db = server.create(dbname)
+ for doc in initial_docs:
+ db.save(doc)
+ request.addfinalizer(lambda: server.delete(dbname))
+ return db
diff --git a/scripts/migration/0.9.0/tests/test_migrate.py b/scripts/migration/0.9.0/tests/test_migrate.py
new file mode 100644
index 00000000..10c8b906
--- /dev/null
+++ b/scripts/migration/0.9.0/tests/test_migrate.py
@@ -0,0 +1,67 @@
+# test_migrate.py
+
+"""
+Ensure that the migration script works!
+"""
+
+from migrate_couch_schema import _migrate_user_db
+
+from leap.soledad.common.couch import GENERATION_KEY
+from leap.soledad.common.couch import TRANSACTION_ID_KEY
+from leap.soledad.common.couch import REPLICA_UID_KEY
+from leap.soledad.common.couch import DOC_ID_KEY
+from leap.soledad.common.couch import SCHEMA_VERSION_KEY
+from leap.soledad.common.couch import CONFIG_DOC_ID
+from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX
+from leap.soledad.common.couch import SCHEMA_VERSION
+
+
+def test__migrate_user_db(db):
+ _migrate_user_db(db, True)
+
+ # we should find exactly 6 documents: 2 normal documents and 4 generation
+ # documents
+ view = db.view('_all_docs')
+ assert len(view.rows) == 6
+
+ # ensure that the ids of the documents we found on the database are correct
+ doc_ids = map(lambda doc: doc.id, view.rows)
+ assert 'doc1' in doc_ids
+ assert 'doc2' in doc_ids
+ assert 'gen-0000000001' in doc_ids
+ assert 'gen-0000000002' in doc_ids
+ assert 'gen-0000000003' in doc_ids
+ assert 'gen-0000000004' in doc_ids
+
+ # assert config doc contents
+ config_doc = db.get(CONFIG_DOC_ID)
+ assert config_doc[REPLICA_UID_KEY] == 'an-uid'
+ assert config_doc[SCHEMA_VERSION_KEY] == SCHEMA_VERSION
+
+ # assert sync docs contents
+ sync_doc_A = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'A'))
+ assert sync_doc_A[GENERATION_KEY] == 0
+ assert sync_doc_A[REPLICA_UID_KEY] == 'A'
+ assert sync_doc_A[TRANSACTION_ID_KEY] == ''
+ sync_doc_B = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'B'))
+ assert sync_doc_B[GENERATION_KEY] == 2
+ assert sync_doc_B[REPLICA_UID_KEY] == 'B'
+ assert sync_doc_B[TRANSACTION_ID_KEY] == 'X'
+
+ # assert gen docs contents
+ gen_1 = db.get('gen-0000000001')
+ assert gen_1[DOC_ID_KEY] == 'doc1'
+ assert gen_1[GENERATION_KEY] == 1
+ assert gen_1[TRANSACTION_ID_KEY] == 'trans-1'
+ gen_2 = db.get('gen-0000000002')
+ assert gen_2[DOC_ID_KEY] == 'doc2'
+ assert gen_2[GENERATION_KEY] == 2
+ assert gen_2[TRANSACTION_ID_KEY] == 'trans-2'
+ gen_3 = db.get('gen-0000000003')
+ assert gen_3[DOC_ID_KEY] == 'doc1'
+ assert gen_3[GENERATION_KEY] == 3
+ assert gen_3[TRANSACTION_ID_KEY] == 'trans-3'
+ gen_4 = db.get('gen-0000000004')
+ assert gen_4[DOC_ID_KEY] == 'doc2'
+ assert gen_4[GENERATION_KEY] == 4
+ assert gen_4[TRANSACTION_ID_KEY] == 'trans-4'
diff --git a/scripts/migration/0.9.0/tox.ini b/scripts/migration/0.9.0/tox.ini
new file mode 100644
index 00000000..2bb6be4c
--- /dev/null
+++ b/scripts/migration/0.9.0/tox.ini
@@ -0,0 +1,13 @@
+[tox]
+envlist = py27
+
+[testenv]
+commands = py.test {posargs}
+changedir = tests
+deps =
+ pytest
+ couchdb
+ pdbpp
+ -e../../../common
+setenv =
+ TERM=xterm