From bc28ea6e652418791dcf63fadcc81db9c50e2d45 Mon Sep 17 00:00:00 2001 From: drebs Date: Fri, 29 Jul 2016 10:01:46 -0300 Subject: [pkg] add couch schema migration script --- scripts/migration/0.8.2/README.md | 73 +++++++++++ scripts/migration/0.8.2/log/.empty | 0 scripts/migration/0.8.2/migrate.py | 77 +++++++++++ .../0.8.2/migrate_couch_schema/__init__.py | 142 +++++++++++++++++++++ scripts/migration/0.8.2/setup.py | 8 ++ scripts/migration/0.8.2/tests/conftest.py | 46 +++++++ scripts/migration/0.8.2/tests/test_migrate.py | 67 ++++++++++ scripts/migration/0.8.2/tox.ini | 13 ++ 8 files changed, 426 insertions(+) create mode 100644 scripts/migration/0.8.2/README.md create mode 100644 scripts/migration/0.8.2/log/.empty create mode 100755 scripts/migration/0.8.2/migrate.py create mode 100644 scripts/migration/0.8.2/migrate_couch_schema/__init__.py create mode 100644 scripts/migration/0.8.2/setup.py create mode 100644 scripts/migration/0.8.2/tests/conftest.py create mode 100644 scripts/migration/0.8.2/tests/test_migrate.py create mode 100644 scripts/migration/0.8.2/tox.ini (limited to 'scripts') diff --git a/scripts/migration/0.8.2/README.md b/scripts/migration/0.8.2/README.md new file mode 100644 index 00000000..919a5235 --- /dev/null +++ b/scripts/migration/0.8.2/README.md @@ -0,0 +1,73 @@ +CouchDB schema migration to Soledad 0.8.2 +========================================= + +Migrate couch database schema from <= 0.8.1 version to 0.8.2 version. + + +ATTENTION! +---------- + + - This script does not backup your data for you. Make sure you have a backup + copy of your databases before running this script! + + - Make sure you turn off any service that might be writing to the couch + database before running this script. + + +Usage +----- + +To see what the script would do, run: + + ./migrate.py + +To actually run the migration, add the --do-migrate command line option: + + ./migrate.py --do-migrate + +See command line options: + + ./migrate.py --help + + +Log +--- + +If you don't pass a --log-file command line option, a log will be written to +the `log/` folder. + + +Differences between old and new couch schema +-------------------------------------------- + +The differences between old and new schemas are: + + - Transaction metadata was previously stored inside each document, and we + used design doc view/list functions to retrieve that information. Now, + transaction metadata is stored in documents with special ids + (gen-0000000001 to gen-9999999999). + + - Database replica config metadata was stored in a document called + "u1db_config", and now we store it in the "_local/config" document. + + - Sync metadata was previously stored in documents with id + "u1db_sync_", and now are stored in + "_local/sync_". + + - The new schema doesn't make use of any design documents. + + +What does this script do +------------------------ + +- List all databases starting with "user-". +- For each one, do: + - Check if it contains the old "u1db_config" document. + - If it doesn't, skip this db. + - Get the transaction log using the usual design doc view/list functions. + - Write a new "gen-X" document for each line on the transaction log. + - Get the "u1db_config" document, create a new one in "_local/config", + Delete the old one. + - List all "u1db_sync_X" documents, create new ones in "_local/sync_X", + delete the old ones. + - Delete unused design documents. diff --git a/scripts/migration/0.8.2/log/.empty b/scripts/migration/0.8.2/log/.empty new file mode 100644 index 00000000..e69de29b diff --git a/scripts/migration/0.8.2/migrate.py b/scripts/migration/0.8.2/migrate.py new file mode 100755 index 00000000..159905ef --- /dev/null +++ b/scripts/migration/0.8.2/migrate.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python +# migrate.py + +""" +Migrate CouchDB schema to Soledad 0.8.2 schema. + +****************************************************************************** + ATTENTION! + + - This script does not backup your data for you. Make sure you have a backup + copy of your databases before running this script! + + - Make sure you turn off any service that might be writing to the couch + database before running this script. + +****************************************************************************** + +Run this script with the --help option to see command line options. + +See the README.md file for more information. +""" + +import datetime +import logging +import os + +from argparse import ArgumentParser + +from migrate_couch_schema import migrate + + +TARGET_VERSION = '0.8.2' +DEFAULT_COUCH_URL = 'http://127.0.0.1:5984' + + +# +# command line args and execution +# + +def _configure_logger(log_file): + if not log_file: + fname, _ = os.path.basename(__file__).split('.') + timestr = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S') + filename = 'soledad_%s_%s_%s.log' \ + % (TARGET_VERSION, fname, timestr) + dirname = os.path.join( + os.path.dirname(os.path.realpath(__file__)), 'log') + log_file = os.path.join(dirname, filename) + logging.basicConfig( + filename=log_file, + filemode='a', + format='%(asctime)s,%(msecs)d %(levelname)s %(message)s', + datefmt='%H:%M:%S', + level=logging.DEBUG) + + +def _parse_args(): + parser = ArgumentParser() + parser.add_argument( + '--couch_url', + help='the url for the couch database', + default=DEFAULT_COUCH_URL) + parser.add_argument( + '--do-migrate', + help='actually perform the migration (otherwise ' + 'just print what would be done)', + action='store_true') + parser.add_argument( + '--log-file', + help='the log file to use') + return parser.parse_args() + + +if __name__ == '__main__': + args = _parse_args() + _configure_logger(args.log_file) + migrate(args, TARGET_VERSION) diff --git a/scripts/migration/0.8.2/migrate_couch_schema/__init__.py b/scripts/migration/0.8.2/migrate_couch_schema/__init__.py new file mode 100644 index 00000000..37e5a525 --- /dev/null +++ b/scripts/migration/0.8.2/migrate_couch_schema/__init__.py @@ -0,0 +1,142 @@ +# __init__.py +""" +Support functions for migration script. +""" + +import logging + +from couchdb import Server + +from leap.soledad.common.couch import GENERATION_KEY +from leap.soledad.common.couch import TRANSACTION_ID_KEY +from leap.soledad.common.couch import REPLICA_UID_KEY +from leap.soledad.common.couch import DOC_ID_KEY +from leap.soledad.common.couch import SCHEMA_VERSION_KEY +from leap.soledad.common.couch import CONFIG_DOC_ID +from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX +from leap.soledad.common.couch import SCHEMA_VERSION + + +logger = logging.getLogger(__name__) + + +# +# support functions +# + +def _get_couch_server(couch_url): + return Server(couch_url) + + +def _is_migrateable(db): + config_doc = db.get('u1db_config') + if config_doc is None: + return False + return True + + +def _get_transaction_log(db): + ddoc_path = ['_design', 'transactions', '_view', 'log'] + resource = db.resource(*ddoc_path) + _, _, data = resource.get_json() + rows = data['rows'] + transaction_log = [] + gen = 1 + for row in rows: + transaction_log.append((gen, row['id'], row['value'])) + gen += 1 + return transaction_log + + +def _get_user_dbs(server): + user_dbs = filter(lambda dbname: dbname.startswith('user-'), server) + return user_dbs + + +# +# migration main functions +# + +def migrate(args, target_version): + server = _get_couch_server(args.couch_url) + logger.info('starting couch schema migration to %s...' % target_version) + if not args.do_migrate: + logger.warning('dry-run: no changes will be made to databases') + user_dbs = _get_user_dbs(server) + for dbname in user_dbs: + db = server[dbname] + if not _is_migrateable(db): + logger.warning("skipping user db: %s" % dbname) + continue + logger.info("starting migration of user db: %s" % dbname) + _migrate_user_db(db, args.do_migrate) + logger.info("finished migration of user db: %s" % dbname) + logger.info('finished couch schema migration to %s' % target_version) + + +def _migrate_user_db(db, do_migrate): + _migrate_transaction_log(db, do_migrate) + _migrate_config_doc(db, do_migrate) + _migrate_sync_docs(db, do_migrate) + _delete_design_docs(db, do_migrate) + + +def _migrate_transaction_log(db, do_migrate): + transaction_log = _get_transaction_log(db) + for gen, doc_id, trans_id in transaction_log: + gen_doc_id = 'gen-%s' % str(gen).zfill(10) + doc = { + '_id': gen_doc_id, + GENERATION_KEY: gen, + DOC_ID_KEY: doc_id, + TRANSACTION_ID_KEY: trans_id, + } + logger.info('creating gen doc: %s' % (gen_doc_id)) + if do_migrate: + db.save(doc) + + +def _migrate_config_doc(db, do_migrate): + old_doc = db['u1db_config'] + new_doc = { + '_id': CONFIG_DOC_ID, + REPLICA_UID_KEY: old_doc[REPLICA_UID_KEY], + SCHEMA_VERSION_KEY: SCHEMA_VERSION, + } + logger.info("moving config doc: %s -> %s" + % (old_doc['_id'], new_doc['_id'])) + if do_migrate: + db.save(new_doc) + db.delete(old_doc) + + +def _migrate_sync_docs(db, do_migrate): + view = db.view( + '_all_docs', + startkey='u1db_sync', + endkey='u1db_synd', + include_docs='true') + for row in view.rows: + old_doc = row['doc'] + old_id = old_doc['_id'] + replica_uid = old_id.replace('u1db_sync_', '') + new_id = "%s%s" % (SYNC_DOC_ID_PREFIX, replica_uid) + new_doc = { + '_id': new_id, + GENERATION_KEY: old_doc['generation'], + TRANSACTION_ID_KEY: old_doc['transaction_id'], + REPLICA_UID_KEY: replica_uid, + } + logger.info("moving sync doc: %s -> %s" % (old_id, new_id)) + if do_migrate: + db.save(new_doc) + db.delete(old_doc) + + +def _delete_design_docs(db, do_migrate): + for ddoc in ['docs', 'syncs', 'transactions']: + doc_id = '_design/%s' % ddoc + doc = db.get(doc_id) + logger.info("deleting design doc: %s" % doc_id) + if do_migrate: + db.delete(doc) diff --git a/scripts/migration/0.8.2/setup.py b/scripts/migration/0.8.2/setup.py new file mode 100644 index 00000000..0467e932 --- /dev/null +++ b/scripts/migration/0.8.2/setup.py @@ -0,0 +1,8 @@ +from setuptools import setup +from setuptools import find_packages + + +setup( + name='migrate_couch_schema', + packages=find_packages('.'), +) diff --git a/scripts/migration/0.8.2/tests/conftest.py b/scripts/migration/0.8.2/tests/conftest.py new file mode 100644 index 00000000..92d1e17e --- /dev/null +++ b/scripts/migration/0.8.2/tests/conftest.py @@ -0,0 +1,46 @@ +# conftest.py + +""" +Provide a couch database with content stored in old schema. +""" + +import couchdb +import pytest +import uuid + + +COUCH_URL = 'http://127.0.0.1:5984' + +transaction_map = """ +function(doc) { + if (doc.u1db_transactions) + doc.u1db_transactions.forEach(function(t) { + emit(t[0], // use timestamp as key so the results are ordered + t[1]); // value is the transaction_id + }); +} +""" + +initial_docs = [ + {'_id': 'u1db_config', 'replica_uid': 'an-uid'}, + {'_id': 'u1db_sync_A', 'generation': 0, 'replica_uid': 'A', + 'transaction_id': ''}, + {'_id': 'u1db_sync_B', 'generation': 2, 'replica_uid': 'B', + 'transaction_id': 'X'}, + {'_id': 'doc1', 'u1db_transactions': [(1, 'trans-1'), (3, 'trans-3')]}, + {'_id': 'doc2', 'u1db_transactions': [(2, 'trans-2'), (4, 'trans-4')]}, + {'_id': '_design/docs'}, + {'_id': '_design/syncs'}, + {'_id': '_design/transactions', 'views': {'log': {'map': transaction_map}}} +] + + +@pytest.fixture(scope='function') +def db(request): + server = couchdb.Server(COUCH_URL) + dbname = "user-" + uuid.uuid4().hex + db = server.create(dbname) + for doc in initial_docs: + db.save(doc) + request.addfinalizer(lambda: server.delete(dbname)) + return db diff --git a/scripts/migration/0.8.2/tests/test_migrate.py b/scripts/migration/0.8.2/tests/test_migrate.py new file mode 100644 index 00000000..10c8b906 --- /dev/null +++ b/scripts/migration/0.8.2/tests/test_migrate.py @@ -0,0 +1,67 @@ +# test_migrate.py + +""" +Ensure that the migration script works! +""" + +from migrate_couch_schema import _migrate_user_db + +from leap.soledad.common.couch import GENERATION_KEY +from leap.soledad.common.couch import TRANSACTION_ID_KEY +from leap.soledad.common.couch import REPLICA_UID_KEY +from leap.soledad.common.couch import DOC_ID_KEY +from leap.soledad.common.couch import SCHEMA_VERSION_KEY +from leap.soledad.common.couch import CONFIG_DOC_ID +from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX +from leap.soledad.common.couch import SCHEMA_VERSION + + +def test__migrate_user_db(db): + _migrate_user_db(db, True) + + # we should find exactly 6 documents: 2 normal documents and 4 generation + # documents + view = db.view('_all_docs') + assert len(view.rows) == 6 + + # ensure that the ids of the documents we found on the database are correct + doc_ids = map(lambda doc: doc.id, view.rows) + assert 'doc1' in doc_ids + assert 'doc2' in doc_ids + assert 'gen-0000000001' in doc_ids + assert 'gen-0000000002' in doc_ids + assert 'gen-0000000003' in doc_ids + assert 'gen-0000000004' in doc_ids + + # assert config doc contents + config_doc = db.get(CONFIG_DOC_ID) + assert config_doc[REPLICA_UID_KEY] == 'an-uid' + assert config_doc[SCHEMA_VERSION_KEY] == SCHEMA_VERSION + + # assert sync docs contents + sync_doc_A = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'A')) + assert sync_doc_A[GENERATION_KEY] == 0 + assert sync_doc_A[REPLICA_UID_KEY] == 'A' + assert sync_doc_A[TRANSACTION_ID_KEY] == '' + sync_doc_B = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'B')) + assert sync_doc_B[GENERATION_KEY] == 2 + assert sync_doc_B[REPLICA_UID_KEY] == 'B' + assert sync_doc_B[TRANSACTION_ID_KEY] == 'X' + + # assert gen docs contents + gen_1 = db.get('gen-0000000001') + assert gen_1[DOC_ID_KEY] == 'doc1' + assert gen_1[GENERATION_KEY] == 1 + assert gen_1[TRANSACTION_ID_KEY] == 'trans-1' + gen_2 = db.get('gen-0000000002') + assert gen_2[DOC_ID_KEY] == 'doc2' + assert gen_2[GENERATION_KEY] == 2 + assert gen_2[TRANSACTION_ID_KEY] == 'trans-2' + gen_3 = db.get('gen-0000000003') + assert gen_3[DOC_ID_KEY] == 'doc1' + assert gen_3[GENERATION_KEY] == 3 + assert gen_3[TRANSACTION_ID_KEY] == 'trans-3' + gen_4 = db.get('gen-0000000004') + assert gen_4[DOC_ID_KEY] == 'doc2' + assert gen_4[GENERATION_KEY] == 4 + assert gen_4[TRANSACTION_ID_KEY] == 'trans-4' diff --git a/scripts/migration/0.8.2/tox.ini b/scripts/migration/0.8.2/tox.ini new file mode 100644 index 00000000..2bb6be4c --- /dev/null +++ b/scripts/migration/0.8.2/tox.ini @@ -0,0 +1,13 @@ +[tox] +envlist = py27 + +[testenv] +commands = py.test {posargs} +changedir = tests +deps = + pytest + couchdb + pdbpp + -e../../../common +setenv = + TERM=xterm -- cgit v1.2.3