From aaef111ae270cd1b377974b89d950f72099b3a50 Mon Sep 17 00:00:00 2001 From: drebs Date: Thu, 7 Sep 2017 18:16:36 -0300 Subject: fixup! [pkg] improve naming of directory of migration script --- scripts/migration/0.8-to-0.9/README.md | 87 -------- scripts/migration/0.8-to-0.9/log/.empty | 0 scripts/migration/0.8-to-0.9/migrate.py | 117 ----------- .../0.8-to-0.9/migrate_couch_schema/__init__.py | 221 --------------------- scripts/migration/0.8-to-0.9/requirements.pip | 3 - scripts/migration/0.8-to-0.9/setup.py | 8 - scripts/migration/0.8-to-0.9/tests/conftest.py | 54 ----- scripts/migration/0.8-to-0.9/tests/test_migrate.py | 67 ------- scripts/migration/0.8-to-0.9/tox.ini | 13 -- scripts/migration/0.9/README.md | 87 ++++++++ scripts/migration/0.9/log/.empty | 0 scripts/migration/0.9/migrate.py | 117 +++++++++++ .../migration/0.9/migrate_couch_schema/__init__.py | 221 +++++++++++++++++++++ scripts/migration/0.9/requirements.pip | 3 + scripts/migration/0.9/setup.py | 8 + scripts/migration/0.9/tests/conftest.py | 54 +++++ scripts/migration/0.9/tests/test_migrate.py | 67 +++++++ scripts/migration/0.9/tox.ini | 13 ++ 18 files changed, 570 insertions(+), 570 deletions(-) delete mode 100644 scripts/migration/0.8-to-0.9/README.md delete mode 100644 scripts/migration/0.8-to-0.9/log/.empty delete mode 100755 scripts/migration/0.8-to-0.9/migrate.py delete mode 100644 scripts/migration/0.8-to-0.9/migrate_couch_schema/__init__.py delete mode 100644 scripts/migration/0.8-to-0.9/requirements.pip delete mode 100644 scripts/migration/0.8-to-0.9/setup.py delete mode 100644 scripts/migration/0.8-to-0.9/tests/conftest.py delete mode 100644 scripts/migration/0.8-to-0.9/tests/test_migrate.py delete mode 100644 scripts/migration/0.8-to-0.9/tox.ini create mode 100644 scripts/migration/0.9/README.md create mode 100644 scripts/migration/0.9/log/.empty create mode 100755 scripts/migration/0.9/migrate.py create mode 100644 scripts/migration/0.9/migrate_couch_schema/__init__.py create mode 100644 scripts/migration/0.9/requirements.pip create mode 100644 scripts/migration/0.9/setup.py create mode 100644 scripts/migration/0.9/tests/conftest.py create mode 100644 scripts/migration/0.9/tests/test_migrate.py create mode 100644 scripts/migration/0.9/tox.ini diff --git a/scripts/migration/0.8-to-0.9/README.md b/scripts/migration/0.8-to-0.9/README.md deleted file mode 100644 index ceb53bb0..00000000 --- a/scripts/migration/0.8-to-0.9/README.md +++ /dev/null @@ -1,87 +0,0 @@ -CouchDB schema migration script: from soledad-server < 0.9.0 to >= 0.9.0 -======================================================================== - -Starting with Soledad Server 0.9.0, the CouchDB database schema was changed to -improve speed of the server side storage backend. Because of that, this script -has to be run for all Leap providers that used to provide email using Soledad -Server < 0.9.0. - -If you never provided email with Leap, you don't need to run this script. - - -ATTENTION! ----------- - - - This script does not backup your data for you. Make sure you have a backup - copy of your databases before running this script! - - - Make sure you turn off any service that might be writing to the couch user - databases before running this script. From the Leap side, these would be - Leap MX in the "mx" node and Soledad Server in the "soledad" node. - - -Usage ------ - -When you run the script, you will see no output. All the output will be logged -to files, as explained in the Log section below. - -To see command line options, run: - - ./migrate.py --help - -To see what the script would do, run the following and check the logs -afterwards: - - ./migrate.py - -To actually run the migration, add the --do-migrate command line option: - - ./migrate.py --do-migrate - - -Log ---- - -The script will be installed in ``/usr/share/soledad-server/migration/0.9.0``, -and will log the results of any run by default to the ``logs/`` subdirectory of -that folder (i.e. ``/usr/share/soledad-server/migration/0.9.0/logs``). - -If you don't pass a ``--log-file`` command line option, a log will be written -to the log folder as described above. - - -Differences between old and new couch schema --------------------------------------------- - -The differences between old and new schemas are: - - - Transaction metadata was previously stored inside each document, and we - used design doc view/list functions to retrieve that information. Now, - transaction metadata is stored in documents with special ids - (gen-0000000001 to gen-9999999999). - - - Database replica config metadata was stored in a document called - "u1db_config", and now we store it in the "_local/config" document. - - - Sync metadata was previously stored in documents with id - "u1db_sync_", and now are stored in - "_local/sync_". - - - The new schema doesn't make use of any design documents. - - -What does this script do ------------------------- - -- List all databases starting with "user-". -- For each one, do: - - Check if it contains the old "u1db_config" document. - - If it doesn't, skip this db. - - Get the transaction log using the usual design doc view/list functions. - - Write a new "gen-X" document for each line on the transaction log. - - Get the "u1db_config" document, create a new one in "_local/config", - Delete the old one. - - List all "u1db_sync_X" documents, create new ones in "_local/sync_X", - delete the old ones. - - Delete unused design documents. diff --git a/scripts/migration/0.8-to-0.9/log/.empty b/scripts/migration/0.8-to-0.9/log/.empty deleted file mode 100644 index e69de29b..00000000 diff --git a/scripts/migration/0.8-to-0.9/migrate.py b/scripts/migration/0.8-to-0.9/migrate.py deleted file mode 100755 index 5c49f5b4..00000000 --- a/scripts/migration/0.8-to-0.9/migrate.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python -# migrate.py - -""" -Migrate CouchDB schema to version 1 (soledad-server >= 0.9.0). - -****************************************************************************** - ATTENTION! - - - This script does not backup your data for you. Make sure you have a backup - copy of your databases before running this script! - - - Make sure you turn off any service that might be writing to the couch - database before running this script. - -****************************************************************************** - -Run this script with the --help option to see command line options. - -See the README.md file for more information. -""" - -import datetime -import logging -import netrc -import os - -from argparse import ArgumentParser - -from leap.soledad.server import get_config - -from migrate_couch_schema import migrate - - -TARGET_VERSION = '0.9' -DEFAULT_COUCH_URL = 'http://127.0.0.1:5984' -CONF = get_config() -NETRC_PATH = CONF['admin_netrc'] - - -# -# command line args and execution -# - -def _configure_logger(log_file, level=logging.INFO): - if not log_file: - fname, _ = os.path.basename(__file__).split('.') - timestr = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S') - filename = 'soledad_%s_%s_%s.log' \ - % (TARGET_VERSION, fname, timestr) - dirname = os.path.join( - os.path.dirname(os.path.realpath(__file__)), 'log') - log_file = os.path.join(dirname, filename) - logging.basicConfig( - filename=log_file, - filemode='a', - format='%(asctime)s,%(msecs)d %(levelname)s %(message)s', - datefmt='%H:%M:%S', - level=level) - - -def _default_couch_url(): - if not os.path.exists(NETRC_PATH): - return DEFAULT_COUCH_URL - parsed_netrc = netrc.netrc(NETRC_PATH) - host, (login, _, password) = parsed_netrc.hosts.items()[0] - url = ('http://%(login)s:%(password)s@%(host)s:5984' % { - 'login': login, - 'password': password, - 'host': host}) - return url - - -def _parse_args(): - parser = ArgumentParser() - parser.add_argument( - '--couch_url', - help='the url for the couch database', - default=_default_couch_url()) - parser.add_argument( - '--do-migrate', - help='actually perform the migration (otherwise ' - 'just print what would be done)', - action='store_true') - parser.add_argument( - '--log-file', - help='the log file to use') - parser.add_argument( - '--pdb', action='store_true', - help='escape to pdb shell in case of exception') - parser.add_argument( - '--verbose', action='store_true', - help='output detailed information about the migration ' - '(i.e. include debug messages)') - return parser.parse_args() - - -def _enable_pdb(): - import sys - from IPython.core import ultratb - sys.excepthook = ultratb.FormattedTB( - mode='Verbose', color_scheme='Linux', call_pdb=1) - - -if __name__ == '__main__': - args = _parse_args() - if args.pdb: - _enable_pdb() - _configure_logger( - args.log_file, - level=logging.DEBUG if args.verbose else logging.INFO) - logger = logging.getLogger(__name__) - try: - migrate(args, TARGET_VERSION) - except: - logger.exception('Fatal error on migrate script!') - raise diff --git a/scripts/migration/0.8-to-0.9/migrate_couch_schema/__init__.py b/scripts/migration/0.8-to-0.9/migrate_couch_schema/__init__.py deleted file mode 100644 index 1e51eccd..00000000 --- a/scripts/migration/0.8-to-0.9/migrate_couch_schema/__init__.py +++ /dev/null @@ -1,221 +0,0 @@ -# __init__.py -""" -Support functions for migration script. -""" - -import logging - -from couchdb import Server -from couchdb import ResourceNotFound -from couchdb import ResourceConflict - -from leap.soledad.common.couch import GENERATION_KEY -from leap.soledad.common.couch import TRANSACTION_ID_KEY -from leap.soledad.common.couch import REPLICA_UID_KEY -from leap.soledad.common.couch import DOC_ID_KEY -from leap.soledad.common.couch import SCHEMA_VERSION_KEY -from leap.soledad.common.couch import CONFIG_DOC_ID -from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX -from leap.soledad.common.couch import SCHEMA_VERSION - - -logger = logging.getLogger(__name__) - - -# -# support functions -# - -def _get_couch_server(couch_url): - return Server(couch_url) - - -def _has_u1db_config_doc(db): - config_doc = db.get('u1db_config') - return bool(config_doc) - - -def _get_transaction_log(db): - ddoc_path = ['_design', 'transactions', '_view', 'log'] - resource = db.resource(*ddoc_path) - try: - _, _, data = resource.get_json() - except ResourceNotFound: - logger.warning( - '[%s] missing transactions design document, ' - 'can\'t get transaction log.' % db.name) - return [] - rows = data['rows'] - transaction_log = [] - gen = 1 - for row in rows: - transaction_log.append((gen, row['id'], row['value'])) - gen += 1 - return transaction_log - - -def _get_user_dbs(server): - user_dbs = filter(lambda dbname: dbname.startswith('user-'), server) - return user_dbs - - -# -# migration main functions -# - -def _report_missing_u1db_config_doc(dbname, db): - config_doc = db.get(CONFIG_DOC_ID) - if not config_doc: - logger.warning( - "[%s] no '%s' or '%s' documents found, possibly an empty db? I " - "don't know what to do with this db, so I am skipping it." - % (dbname, 'u1db_config', CONFIG_DOC_ID)) - else: - if SCHEMA_VERSION_KEY in config_doc: - version = config_doc[SCHEMA_VERSION_KEY] - if version == SCHEMA_VERSION: - logger.info( - "[%s] '%s' document exists, and schema versions match " - "(expected %r and found %r). This database reports to be " - "using the new schema version, so I am skipping it." - % (dbname, CONFIG_DOC_ID, SCHEMA_VERSION, version)) - else: - logger.error( - "[%s] '%s' document exists, but schema versions don't " - "match (expected %r, found %r instead). I don't know " - "how to migrate such a db, so I am skipping it." - % (dbname, CONFIG_DOC_ID, SCHEMA_VERSION, version)) - else: - logger.error( - "[%s] '%s' document exists, but has no schema version " - "information in it. I don't know how to migrate such a db, " - "so I am skipping it." % (dbname, CONFIG_DOC_ID)) - - -def migrate(args, target_version): - server = _get_couch_server(args.couch_url) - logger.info('starting couch schema migration to %s' % target_version) - if not args.do_migrate: - logger.warning('dry-run: no changes will be made to databases') - user_dbs = _get_user_dbs(server) - for dbname in user_dbs: - db = server[dbname] - if not _has_u1db_config_doc(db): - _report_missing_u1db_config_doc(dbname, db) - continue - logger.info("[%s] starting migration of user db" % dbname) - try: - _migrate_user_db(db, args.do_migrate) - logger.info("[%s] finished migration of user db" % dbname) - except: - logger.exception('[%s] error migrating user db' % dbname) - logger.error('continuing with next database.') - logger.info('finished couch schema migration to %s' % target_version) - - -def _migrate_user_db(db, do_migrate): - _migrate_transaction_log(db, do_migrate) - _migrate_sync_docs(db, do_migrate) - _delete_design_docs(db, do_migrate) - _migrate_config_doc(db, do_migrate) - - -def _migrate_transaction_log(db, do_migrate): - transaction_log = _get_transaction_log(db) - for gen, doc_id, trans_id in transaction_log: - gen_doc_id = 'gen-%s' % str(gen).zfill(10) - doc = { - '_id': gen_doc_id, - GENERATION_KEY: gen, - DOC_ID_KEY: doc_id, - TRANSACTION_ID_KEY: trans_id, - } - logger.debug('[%s] creating gen doc: %s' % (db.name, gen_doc_id)) - if do_migrate: - try: - db.save(doc) - except ResourceConflict: - # this gen document already exists. if documents are the same, - # continue with migration. - existing_doc = db.get(gen_doc_id) - for key in [GENERATION_KEY, DOC_ID_KEY, TRANSACTION_ID_KEY]: - if existing_doc[key] != doc[key]: - raise - - -def _migrate_config_doc(db, do_migrate): - old_doc = db['u1db_config'] - new_doc = { - '_id': CONFIG_DOC_ID, - REPLICA_UID_KEY: old_doc[REPLICA_UID_KEY], - SCHEMA_VERSION_KEY: SCHEMA_VERSION, - } - logger.info("[%s] moving config doc: %s -> %s" - % (db.name, old_doc['_id'], new_doc['_id'])) - if do_migrate: - # the config doc must not exist, otherwise we would have skipped this - # database. - db.save(new_doc) - db.delete(old_doc) - - -def _migrate_sync_docs(db, do_migrate): - logger.info('[%s] moving sync docs' % db.name) - view = db.view( - '_all_docs', - startkey='u1db_sync', - endkey='u1db_synd', - include_docs='true') - for row in view.rows: - old_doc = row['doc'] - old_id = old_doc['_id'] - - # older schemas used different documents with ids starting with - # "u1db_sync" to store sync-related data: - # - # - u1db_sync_log: was used to store the whole sync log. - # - u1db_sync_state: was used to store the sync state. - # - # if any of these documents exist in the current db, they are leftover - # from previous migrations, and should just be removed. - if old_id in ['u1db_sync_log', 'u1db_sync_state']: - logger.info('[%s] removing leftover document: %s' - % (db.name, old_id)) - if do_migrate: - db.delete(old_doc) - continue - - replica_uid = old_id.replace('u1db_sync_', '') - new_id = "%s%s" % (SYNC_DOC_ID_PREFIX, replica_uid) - new_doc = { - '_id': new_id, - GENERATION_KEY: old_doc['generation'], - TRANSACTION_ID_KEY: old_doc['transaction_id'], - REPLICA_UID_KEY: replica_uid, - } - logger.debug("[%s] moving sync doc: %s -> %s" - % (db.name, old_id, new_id)) - if do_migrate: - try: - db.save(new_doc) - except ResourceConflict: - # this sync document already exists. if documents are the same, - # continue with migration. - existing_doc = db.get(new_id) - for key in [GENERATION_KEY, TRANSACTION_ID_KEY, - REPLICA_UID_KEY]: - if existing_doc[key] != new_doc[key]: - raise - db.delete(old_doc) - - -def _delete_design_docs(db, do_migrate): - for ddoc in ['docs', 'syncs', 'transactions']: - doc_id = '_design/%s' % ddoc - doc = db.get(doc_id) - if doc: - logger.info("[%s] deleting design doc: %s" % (db.name, doc_id)) - if do_migrate: - db.delete(doc) - else: - logger.warning("[%s] design doc not found: %s" % (db.name, doc_id)) diff --git a/scripts/migration/0.8-to-0.9/requirements.pip b/scripts/migration/0.8-to-0.9/requirements.pip deleted file mode 100644 index a1946833..00000000 --- a/scripts/migration/0.8-to-0.9/requirements.pip +++ /dev/null @@ -1,3 +0,0 @@ -couchdb -leap.soledad.common>=0.9.0 -leap.soledad.server>=0.9.0 diff --git a/scripts/migration/0.8-to-0.9/setup.py b/scripts/migration/0.8-to-0.9/setup.py deleted file mode 100644 index 0467e932..00000000 --- a/scripts/migration/0.8-to-0.9/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -from setuptools import setup -from setuptools import find_packages - - -setup( - name='migrate_couch_schema', - packages=find_packages('.'), -) diff --git a/scripts/migration/0.8-to-0.9/tests/conftest.py b/scripts/migration/0.8-to-0.9/tests/conftest.py deleted file mode 100644 index 61f6c7ee..00000000 --- a/scripts/migration/0.8-to-0.9/tests/conftest.py +++ /dev/null @@ -1,54 +0,0 @@ -# conftest.py - -""" -Provide a couch database with content stored in old schema. -""" - -import couchdb -import pytest -import uuid - - -COUCH_URL = 'http://127.0.0.1:5984' - -transaction_map = """ -function(doc) { - if (doc.u1db_transactions) - doc.u1db_transactions.forEach(function(t) { - emit(t[0], // use timestamp as key so the results are ordered - t[1]); // value is the transaction_id - }); -} -""" - -initial_docs = [ - {'_id': 'u1db_config', 'replica_uid': 'an-uid'}, - {'_id': 'u1db_sync_A', 'generation': 0, 'replica_uid': 'A', - 'transaction_id': ''}, - {'_id': 'u1db_sync_B', 'generation': 2, 'replica_uid': 'B', - 'transaction_id': 'X'}, - {'_id': 'doc1', 'u1db_transactions': [(1, 'trans-1'), (3, 'trans-3')]}, - {'_id': 'doc2', 'u1db_transactions': [(2, 'trans-2'), (4, 'trans-4')]}, - {'_id': '_design/docs'}, - {'_id': '_design/syncs'}, - {'_id': '_design/transactions', - 'views': {'log': {'map': transaction_map}}}, - # add some data from previous interrupted migration - {'_id': '_local/sync_A', 'gen': 0, 'trans_id': '', 'replica_uid': 'A'}, - {'_id': 'gen-0000000002', - 'gen': 2, 'trans_id': 'trans-2', 'doc_id': 'doc2'}, - # the following should be removed if found in the dbs - {'_id': 'u1db_sync_log'}, - {'_id': 'u1db_sync_state'}, -] - - -@pytest.fixture(scope='function') -def db(request): - server = couchdb.Server(COUCH_URL) - dbname = "user-" + uuid.uuid4().hex - db = server.create(dbname) - for doc in initial_docs: - db.save(doc) - request.addfinalizer(lambda: server.delete(dbname)) - return db diff --git a/scripts/migration/0.8-to-0.9/tests/test_migrate.py b/scripts/migration/0.8-to-0.9/tests/test_migrate.py deleted file mode 100644 index 10c8b906..00000000 --- a/scripts/migration/0.8-to-0.9/tests/test_migrate.py +++ /dev/null @@ -1,67 +0,0 @@ -# test_migrate.py - -""" -Ensure that the migration script works! -""" - -from migrate_couch_schema import _migrate_user_db - -from leap.soledad.common.couch import GENERATION_KEY -from leap.soledad.common.couch import TRANSACTION_ID_KEY -from leap.soledad.common.couch import REPLICA_UID_KEY -from leap.soledad.common.couch import DOC_ID_KEY -from leap.soledad.common.couch import SCHEMA_VERSION_KEY -from leap.soledad.common.couch import CONFIG_DOC_ID -from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX -from leap.soledad.common.couch import SCHEMA_VERSION - - -def test__migrate_user_db(db): - _migrate_user_db(db, True) - - # we should find exactly 6 documents: 2 normal documents and 4 generation - # documents - view = db.view('_all_docs') - assert len(view.rows) == 6 - - # ensure that the ids of the documents we found on the database are correct - doc_ids = map(lambda doc: doc.id, view.rows) - assert 'doc1' in doc_ids - assert 'doc2' in doc_ids - assert 'gen-0000000001' in doc_ids - assert 'gen-0000000002' in doc_ids - assert 'gen-0000000003' in doc_ids - assert 'gen-0000000004' in doc_ids - - # assert config doc contents - config_doc = db.get(CONFIG_DOC_ID) - assert config_doc[REPLICA_UID_KEY] == 'an-uid' - assert config_doc[SCHEMA_VERSION_KEY] == SCHEMA_VERSION - - # assert sync docs contents - sync_doc_A = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'A')) - assert sync_doc_A[GENERATION_KEY] == 0 - assert sync_doc_A[REPLICA_UID_KEY] == 'A' - assert sync_doc_A[TRANSACTION_ID_KEY] == '' - sync_doc_B = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'B')) - assert sync_doc_B[GENERATION_KEY] == 2 - assert sync_doc_B[REPLICA_UID_KEY] == 'B' - assert sync_doc_B[TRANSACTION_ID_KEY] == 'X' - - # assert gen docs contents - gen_1 = db.get('gen-0000000001') - assert gen_1[DOC_ID_KEY] == 'doc1' - assert gen_1[GENERATION_KEY] == 1 - assert gen_1[TRANSACTION_ID_KEY] == 'trans-1' - gen_2 = db.get('gen-0000000002') - assert gen_2[DOC_ID_KEY] == 'doc2' - assert gen_2[GENERATION_KEY] == 2 - assert gen_2[TRANSACTION_ID_KEY] == 'trans-2' - gen_3 = db.get('gen-0000000003') - assert gen_3[DOC_ID_KEY] == 'doc1' - assert gen_3[GENERATION_KEY] == 3 - assert gen_3[TRANSACTION_ID_KEY] == 'trans-3' - gen_4 = db.get('gen-0000000004') - assert gen_4[DOC_ID_KEY] == 'doc2' - assert gen_4[GENERATION_KEY] == 4 - assert gen_4[TRANSACTION_ID_KEY] == 'trans-4' diff --git a/scripts/migration/0.8-to-0.9/tox.ini b/scripts/migration/0.8-to-0.9/tox.ini deleted file mode 100644 index 2bb6be4c..00000000 --- a/scripts/migration/0.8-to-0.9/tox.ini +++ /dev/null @@ -1,13 +0,0 @@ -[tox] -envlist = py27 - -[testenv] -commands = py.test {posargs} -changedir = tests -deps = - pytest - couchdb - pdbpp - -e../../../common -setenv = - TERM=xterm diff --git a/scripts/migration/0.9/README.md b/scripts/migration/0.9/README.md new file mode 100644 index 00000000..c4556a59 --- /dev/null +++ b/scripts/migration/0.9/README.md @@ -0,0 +1,87 @@ +CouchDB schema migration script: from soledad-server < 0.9.0 to >= 0.9.0 +======================================================================== + +Starting with Soledad Server 0.9.0, the CouchDB database schema was changed to +improve speed of the server side storage backend. Because of that, this script +has to be run for all Leap providers that used to provide email using Soledad +Server < 0.9.0. + +If you never provided email with Leap, you don't need to run this script. + + +ATTENTION! +---------- + + - This script does not backup your data for you. Make sure you have a backup + copy of your databases before running this script! + + - Make sure you turn off any service that might be writing to the couch user + databases before running this script. From the Leap side, these would be + Leap MX in the "mx" node and Soledad Server in the "soledad" node. + + +Usage +----- + +When you run the script, you will see no output. All the output will be logged +to files, as explained in the Log section below. + +To see command line options, run: + + ./migrate.py --help + +To see what the script would do, run the following and check the logs +afterwards: + + ./migrate.py + +To actually run the migration, add the --do-migrate command line option: + + ./migrate.py --do-migrate + + +Log +--- + +The script will be installed in ``/usr/share/soledad-server/migration/0.9``, +and will log the results of any run by default to the ``logs/`` subdirectory of +that folder (i.e. ``/usr/share/soledad-server/migration/0.9/logs``). + +If you don't pass a ``--log-file`` command line option, a log will be written +to the log folder as described above. + + +Differences between old and new couch schema +-------------------------------------------- + +The differences between old and new schemas are: + + - Transaction metadata was previously stored inside each document, and we + used design doc view/list functions to retrieve that information. Now, + transaction metadata is stored in documents with special ids + (gen-0000000001 to gen-9999999999). + + - Database replica config metadata was stored in a document called + "u1db_config", and now we store it in the "_local/config" document. + + - Sync metadata was previously stored in documents with id + "u1db_sync_", and now are stored in + "_local/sync_". + + - The new schema doesn't make use of any design documents. + + +What does this script do +------------------------ + +- List all databases starting with "user-". +- For each one, do: + - Check if it contains the old "u1db_config" document. + - If it doesn't, skip this db. + - Get the transaction log using the usual design doc view/list functions. + - Write a new "gen-X" document for each line on the transaction log. + - Get the "u1db_config" document, create a new one in "_local/config", + Delete the old one. + - List all "u1db_sync_X" documents, create new ones in "_local/sync_X", + delete the old ones. + - Delete unused design documents. diff --git a/scripts/migration/0.9/log/.empty b/scripts/migration/0.9/log/.empty new file mode 100644 index 00000000..e69de29b diff --git a/scripts/migration/0.9/migrate.py b/scripts/migration/0.9/migrate.py new file mode 100755 index 00000000..5c49f5b4 --- /dev/null +++ b/scripts/migration/0.9/migrate.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +# migrate.py + +""" +Migrate CouchDB schema to version 1 (soledad-server >= 0.9.0). + +****************************************************************************** + ATTENTION! + + - This script does not backup your data for you. Make sure you have a backup + copy of your databases before running this script! + + - Make sure you turn off any service that might be writing to the couch + database before running this script. + +****************************************************************************** + +Run this script with the --help option to see command line options. + +See the README.md file for more information. +""" + +import datetime +import logging +import netrc +import os + +from argparse import ArgumentParser + +from leap.soledad.server import get_config + +from migrate_couch_schema import migrate + + +TARGET_VERSION = '0.9' +DEFAULT_COUCH_URL = 'http://127.0.0.1:5984' +CONF = get_config() +NETRC_PATH = CONF['admin_netrc'] + + +# +# command line args and execution +# + +def _configure_logger(log_file, level=logging.INFO): + if not log_file: + fname, _ = os.path.basename(__file__).split('.') + timestr = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S') + filename = 'soledad_%s_%s_%s.log' \ + % (TARGET_VERSION, fname, timestr) + dirname = os.path.join( + os.path.dirname(os.path.realpath(__file__)), 'log') + log_file = os.path.join(dirname, filename) + logging.basicConfig( + filename=log_file, + filemode='a', + format='%(asctime)s,%(msecs)d %(levelname)s %(message)s', + datefmt='%H:%M:%S', + level=level) + + +def _default_couch_url(): + if not os.path.exists(NETRC_PATH): + return DEFAULT_COUCH_URL + parsed_netrc = netrc.netrc(NETRC_PATH) + host, (login, _, password) = parsed_netrc.hosts.items()[0] + url = ('http://%(login)s:%(password)s@%(host)s:5984' % { + 'login': login, + 'password': password, + 'host': host}) + return url + + +def _parse_args(): + parser = ArgumentParser() + parser.add_argument( + '--couch_url', + help='the url for the couch database', + default=_default_couch_url()) + parser.add_argument( + '--do-migrate', + help='actually perform the migration (otherwise ' + 'just print what would be done)', + action='store_true') + parser.add_argument( + '--log-file', + help='the log file to use') + parser.add_argument( + '--pdb', action='store_true', + help='escape to pdb shell in case of exception') + parser.add_argument( + '--verbose', action='store_true', + help='output detailed information about the migration ' + '(i.e. include debug messages)') + return parser.parse_args() + + +def _enable_pdb(): + import sys + from IPython.core import ultratb + sys.excepthook = ultratb.FormattedTB( + mode='Verbose', color_scheme='Linux', call_pdb=1) + + +if __name__ == '__main__': + args = _parse_args() + if args.pdb: + _enable_pdb() + _configure_logger( + args.log_file, + level=logging.DEBUG if args.verbose else logging.INFO) + logger = logging.getLogger(__name__) + try: + migrate(args, TARGET_VERSION) + except: + logger.exception('Fatal error on migrate script!') + raise diff --git a/scripts/migration/0.9/migrate_couch_schema/__init__.py b/scripts/migration/0.9/migrate_couch_schema/__init__.py new file mode 100644 index 00000000..1e51eccd --- /dev/null +++ b/scripts/migration/0.9/migrate_couch_schema/__init__.py @@ -0,0 +1,221 @@ +# __init__.py +""" +Support functions for migration script. +""" + +import logging + +from couchdb import Server +from couchdb import ResourceNotFound +from couchdb import ResourceConflict + +from leap.soledad.common.couch import GENERATION_KEY +from leap.soledad.common.couch import TRANSACTION_ID_KEY +from leap.soledad.common.couch import REPLICA_UID_KEY +from leap.soledad.common.couch import DOC_ID_KEY +from leap.soledad.common.couch import SCHEMA_VERSION_KEY +from leap.soledad.common.couch import CONFIG_DOC_ID +from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX +from leap.soledad.common.couch import SCHEMA_VERSION + + +logger = logging.getLogger(__name__) + + +# +# support functions +# + +def _get_couch_server(couch_url): + return Server(couch_url) + + +def _has_u1db_config_doc(db): + config_doc = db.get('u1db_config') + return bool(config_doc) + + +def _get_transaction_log(db): + ddoc_path = ['_design', 'transactions', '_view', 'log'] + resource = db.resource(*ddoc_path) + try: + _, _, data = resource.get_json() + except ResourceNotFound: + logger.warning( + '[%s] missing transactions design document, ' + 'can\'t get transaction log.' % db.name) + return [] + rows = data['rows'] + transaction_log = [] + gen = 1 + for row in rows: + transaction_log.append((gen, row['id'], row['value'])) + gen += 1 + return transaction_log + + +def _get_user_dbs(server): + user_dbs = filter(lambda dbname: dbname.startswith('user-'), server) + return user_dbs + + +# +# migration main functions +# + +def _report_missing_u1db_config_doc(dbname, db): + config_doc = db.get(CONFIG_DOC_ID) + if not config_doc: + logger.warning( + "[%s] no '%s' or '%s' documents found, possibly an empty db? I " + "don't know what to do with this db, so I am skipping it." + % (dbname, 'u1db_config', CONFIG_DOC_ID)) + else: + if SCHEMA_VERSION_KEY in config_doc: + version = config_doc[SCHEMA_VERSION_KEY] + if version == SCHEMA_VERSION: + logger.info( + "[%s] '%s' document exists, and schema versions match " + "(expected %r and found %r). This database reports to be " + "using the new schema version, so I am skipping it." + % (dbname, CONFIG_DOC_ID, SCHEMA_VERSION, version)) + else: + logger.error( + "[%s] '%s' document exists, but schema versions don't " + "match (expected %r, found %r instead). I don't know " + "how to migrate such a db, so I am skipping it." + % (dbname, CONFIG_DOC_ID, SCHEMA_VERSION, version)) + else: + logger.error( + "[%s] '%s' document exists, but has no schema version " + "information in it. I don't know how to migrate such a db, " + "so I am skipping it." % (dbname, CONFIG_DOC_ID)) + + +def migrate(args, target_version): + server = _get_couch_server(args.couch_url) + logger.info('starting couch schema migration to %s' % target_version) + if not args.do_migrate: + logger.warning('dry-run: no changes will be made to databases') + user_dbs = _get_user_dbs(server) + for dbname in user_dbs: + db = server[dbname] + if not _has_u1db_config_doc(db): + _report_missing_u1db_config_doc(dbname, db) + continue + logger.info("[%s] starting migration of user db" % dbname) + try: + _migrate_user_db(db, args.do_migrate) + logger.info("[%s] finished migration of user db" % dbname) + except: + logger.exception('[%s] error migrating user db' % dbname) + logger.error('continuing with next database.') + logger.info('finished couch schema migration to %s' % target_version) + + +def _migrate_user_db(db, do_migrate): + _migrate_transaction_log(db, do_migrate) + _migrate_sync_docs(db, do_migrate) + _delete_design_docs(db, do_migrate) + _migrate_config_doc(db, do_migrate) + + +def _migrate_transaction_log(db, do_migrate): + transaction_log = _get_transaction_log(db) + for gen, doc_id, trans_id in transaction_log: + gen_doc_id = 'gen-%s' % str(gen).zfill(10) + doc = { + '_id': gen_doc_id, + GENERATION_KEY: gen, + DOC_ID_KEY: doc_id, + TRANSACTION_ID_KEY: trans_id, + } + logger.debug('[%s] creating gen doc: %s' % (db.name, gen_doc_id)) + if do_migrate: + try: + db.save(doc) + except ResourceConflict: + # this gen document already exists. if documents are the same, + # continue with migration. + existing_doc = db.get(gen_doc_id) + for key in [GENERATION_KEY, DOC_ID_KEY, TRANSACTION_ID_KEY]: + if existing_doc[key] != doc[key]: + raise + + +def _migrate_config_doc(db, do_migrate): + old_doc = db['u1db_config'] + new_doc = { + '_id': CONFIG_DOC_ID, + REPLICA_UID_KEY: old_doc[REPLICA_UID_KEY], + SCHEMA_VERSION_KEY: SCHEMA_VERSION, + } + logger.info("[%s] moving config doc: %s -> %s" + % (db.name, old_doc['_id'], new_doc['_id'])) + if do_migrate: + # the config doc must not exist, otherwise we would have skipped this + # database. + db.save(new_doc) + db.delete(old_doc) + + +def _migrate_sync_docs(db, do_migrate): + logger.info('[%s] moving sync docs' % db.name) + view = db.view( + '_all_docs', + startkey='u1db_sync', + endkey='u1db_synd', + include_docs='true') + for row in view.rows: + old_doc = row['doc'] + old_id = old_doc['_id'] + + # older schemas used different documents with ids starting with + # "u1db_sync" to store sync-related data: + # + # - u1db_sync_log: was used to store the whole sync log. + # - u1db_sync_state: was used to store the sync state. + # + # if any of these documents exist in the current db, they are leftover + # from previous migrations, and should just be removed. + if old_id in ['u1db_sync_log', 'u1db_sync_state']: + logger.info('[%s] removing leftover document: %s' + % (db.name, old_id)) + if do_migrate: + db.delete(old_doc) + continue + + replica_uid = old_id.replace('u1db_sync_', '') + new_id = "%s%s" % (SYNC_DOC_ID_PREFIX, replica_uid) + new_doc = { + '_id': new_id, + GENERATION_KEY: old_doc['generation'], + TRANSACTION_ID_KEY: old_doc['transaction_id'], + REPLICA_UID_KEY: replica_uid, + } + logger.debug("[%s] moving sync doc: %s -> %s" + % (db.name, old_id, new_id)) + if do_migrate: + try: + db.save(new_doc) + except ResourceConflict: + # this sync document already exists. if documents are the same, + # continue with migration. + existing_doc = db.get(new_id) + for key in [GENERATION_KEY, TRANSACTION_ID_KEY, + REPLICA_UID_KEY]: + if existing_doc[key] != new_doc[key]: + raise + db.delete(old_doc) + + +def _delete_design_docs(db, do_migrate): + for ddoc in ['docs', 'syncs', 'transactions']: + doc_id = '_design/%s' % ddoc + doc = db.get(doc_id) + if doc: + logger.info("[%s] deleting design doc: %s" % (db.name, doc_id)) + if do_migrate: + db.delete(doc) + else: + logger.warning("[%s] design doc not found: %s" % (db.name, doc_id)) diff --git a/scripts/migration/0.9/requirements.pip b/scripts/migration/0.9/requirements.pip new file mode 100644 index 00000000..a1946833 --- /dev/null +++ b/scripts/migration/0.9/requirements.pip @@ -0,0 +1,3 @@ +couchdb +leap.soledad.common>=0.9.0 +leap.soledad.server>=0.9.0 diff --git a/scripts/migration/0.9/setup.py b/scripts/migration/0.9/setup.py new file mode 100644 index 00000000..0467e932 --- /dev/null +++ b/scripts/migration/0.9/setup.py @@ -0,0 +1,8 @@ +from setuptools import setup +from setuptools import find_packages + + +setup( + name='migrate_couch_schema', + packages=find_packages('.'), +) diff --git a/scripts/migration/0.9/tests/conftest.py b/scripts/migration/0.9/tests/conftest.py new file mode 100644 index 00000000..61f6c7ee --- /dev/null +++ b/scripts/migration/0.9/tests/conftest.py @@ -0,0 +1,54 @@ +# conftest.py + +""" +Provide a couch database with content stored in old schema. +""" + +import couchdb +import pytest +import uuid + + +COUCH_URL = 'http://127.0.0.1:5984' + +transaction_map = """ +function(doc) { + if (doc.u1db_transactions) + doc.u1db_transactions.forEach(function(t) { + emit(t[0], // use timestamp as key so the results are ordered + t[1]); // value is the transaction_id + }); +} +""" + +initial_docs = [ + {'_id': 'u1db_config', 'replica_uid': 'an-uid'}, + {'_id': 'u1db_sync_A', 'generation': 0, 'replica_uid': 'A', + 'transaction_id': ''}, + {'_id': 'u1db_sync_B', 'generation': 2, 'replica_uid': 'B', + 'transaction_id': 'X'}, + {'_id': 'doc1', 'u1db_transactions': [(1, 'trans-1'), (3, 'trans-3')]}, + {'_id': 'doc2', 'u1db_transactions': [(2, 'trans-2'), (4, 'trans-4')]}, + {'_id': '_design/docs'}, + {'_id': '_design/syncs'}, + {'_id': '_design/transactions', + 'views': {'log': {'map': transaction_map}}}, + # add some data from previous interrupted migration + {'_id': '_local/sync_A', 'gen': 0, 'trans_id': '', 'replica_uid': 'A'}, + {'_id': 'gen-0000000002', + 'gen': 2, 'trans_id': 'trans-2', 'doc_id': 'doc2'}, + # the following should be removed if found in the dbs + {'_id': 'u1db_sync_log'}, + {'_id': 'u1db_sync_state'}, +] + + +@pytest.fixture(scope='function') +def db(request): + server = couchdb.Server(COUCH_URL) + dbname = "user-" + uuid.uuid4().hex + db = server.create(dbname) + for doc in initial_docs: + db.save(doc) + request.addfinalizer(lambda: server.delete(dbname)) + return db diff --git a/scripts/migration/0.9/tests/test_migrate.py b/scripts/migration/0.9/tests/test_migrate.py new file mode 100644 index 00000000..10c8b906 --- /dev/null +++ b/scripts/migration/0.9/tests/test_migrate.py @@ -0,0 +1,67 @@ +# test_migrate.py + +""" +Ensure that the migration script works! +""" + +from migrate_couch_schema import _migrate_user_db + +from leap.soledad.common.couch import GENERATION_KEY +from leap.soledad.common.couch import TRANSACTION_ID_KEY +from leap.soledad.common.couch import REPLICA_UID_KEY +from leap.soledad.common.couch import DOC_ID_KEY +from leap.soledad.common.couch import SCHEMA_VERSION_KEY +from leap.soledad.common.couch import CONFIG_DOC_ID +from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX +from leap.soledad.common.couch import SCHEMA_VERSION + + +def test__migrate_user_db(db): + _migrate_user_db(db, True) + + # we should find exactly 6 documents: 2 normal documents and 4 generation + # documents + view = db.view('_all_docs') + assert len(view.rows) == 6 + + # ensure that the ids of the documents we found on the database are correct + doc_ids = map(lambda doc: doc.id, view.rows) + assert 'doc1' in doc_ids + assert 'doc2' in doc_ids + assert 'gen-0000000001' in doc_ids + assert 'gen-0000000002' in doc_ids + assert 'gen-0000000003' in doc_ids + assert 'gen-0000000004' in doc_ids + + # assert config doc contents + config_doc = db.get(CONFIG_DOC_ID) + assert config_doc[REPLICA_UID_KEY] == 'an-uid' + assert config_doc[SCHEMA_VERSION_KEY] == SCHEMA_VERSION + + # assert sync docs contents + sync_doc_A = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'A')) + assert sync_doc_A[GENERATION_KEY] == 0 + assert sync_doc_A[REPLICA_UID_KEY] == 'A' + assert sync_doc_A[TRANSACTION_ID_KEY] == '' + sync_doc_B = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'B')) + assert sync_doc_B[GENERATION_KEY] == 2 + assert sync_doc_B[REPLICA_UID_KEY] == 'B' + assert sync_doc_B[TRANSACTION_ID_KEY] == 'X' + + # assert gen docs contents + gen_1 = db.get('gen-0000000001') + assert gen_1[DOC_ID_KEY] == 'doc1' + assert gen_1[GENERATION_KEY] == 1 + assert gen_1[TRANSACTION_ID_KEY] == 'trans-1' + gen_2 = db.get('gen-0000000002') + assert gen_2[DOC_ID_KEY] == 'doc2' + assert gen_2[GENERATION_KEY] == 2 + assert gen_2[TRANSACTION_ID_KEY] == 'trans-2' + gen_3 = db.get('gen-0000000003') + assert gen_3[DOC_ID_KEY] == 'doc1' + assert gen_3[GENERATION_KEY] == 3 + assert gen_3[TRANSACTION_ID_KEY] == 'trans-3' + gen_4 = db.get('gen-0000000004') + assert gen_4[DOC_ID_KEY] == 'doc2' + assert gen_4[GENERATION_KEY] == 4 + assert gen_4[TRANSACTION_ID_KEY] == 'trans-4' diff --git a/scripts/migration/0.9/tox.ini b/scripts/migration/0.9/tox.ini new file mode 100644 index 00000000..2bb6be4c --- /dev/null +++ b/scripts/migration/0.9/tox.ini @@ -0,0 +1,13 @@ +[tox] +envlist = py27 + +[testenv] +commands = py.test {posargs} +changedir = tests +deps = + pytest + couchdb + pdbpp + -e../../../common +setenv = + TERM=xterm -- cgit v1.2.3