diff options
Diffstat (limited to 'scripts/migration')
| -rw-r--r-- | scripts/migration/0.9.0/.gitignore | 1 | ||||
| -rw-r--r-- | scripts/migration/0.9.0/README.md | 73 | ||||
| -rw-r--r-- | scripts/migration/0.9.0/log/.empty | 0 | ||||
| -rwxr-xr-x | scripts/migration/0.9.0/migrate.py | 117 | ||||
| -rw-r--r-- | scripts/migration/0.9.0/migrate_couch_schema/__init__.py | 192 | ||||
| -rw-r--r-- | scripts/migration/0.9.0/requirements.pip | 3 | ||||
| -rw-r--r-- | scripts/migration/0.9.0/setup.py | 8 | ||||
| -rw-r--r-- | scripts/migration/0.9.0/tests/conftest.py | 54 | ||||
| -rw-r--r-- | scripts/migration/0.9.0/tests/test_migrate.py | 67 | ||||
| -rw-r--r-- | scripts/migration/0.9.0/tox.ini | 13 | 
10 files changed, 528 insertions, 0 deletions
diff --git a/scripts/migration/0.9.0/.gitignore b/scripts/migration/0.9.0/.gitignore new file mode 100644 index 00000000..6115c109 --- /dev/null +++ b/scripts/migration/0.9.0/.gitignore @@ -0,0 +1 @@ +log/* diff --git a/scripts/migration/0.9.0/README.md b/scripts/migration/0.9.0/README.md new file mode 100644 index 00000000..919a5235 --- /dev/null +++ b/scripts/migration/0.9.0/README.md @@ -0,0 +1,73 @@ +CouchDB schema migration to Soledad 0.8.2 +========================================= + +Migrate couch database schema from <= 0.8.1 version to 0.8.2 version. + + +ATTENTION! +---------- + +  - This script does not backup your data for you. Make sure you have a backup +    copy of your databases before running this script! + +  - Make sure you turn off any service that might be writing to the couch +    database before running this script. + + +Usage +----- + +To see what the script would do, run: + +    ./migrate.py + +To actually run the migration, add the --do-migrate command line option: + +    ./migrate.py --do-migrate + +See command line options: + +    ./migrate.py --help + + +Log +--- + +If you don't pass a --log-file command line option, a log will be written to +the `log/` folder. + + +Differences between old and new couch schema +-------------------------------------------- + +The differences between old and new schemas are: + +    - Transaction metadata was previously stored inside each document, and we +      used design doc view/list functions to retrieve that information. Now, +      transaction metadata is stored in documents with special ids +      (gen-0000000001 to gen-9999999999). + +    - Database replica config metadata was stored in a document called +      "u1db_config", and now we store it in the "_local/config" document. + +    - Sync metadata was previously stored in documents with id +      "u1db_sync_<source-replica-id>", and now are stored in +      "_local/sync_<source-replica-id>". + +    - The new schema doesn't make use of any design documents. + + +What does this script do +------------------------ + +- List all databases starting with "user-". +- For each one, do: +  - Check if it contains the old "u1db_config" document. +  - If it doesn't, skip this db. +  - Get the transaction log using the usual design doc view/list functions. +  - Write a new "gen-X" document for each line on the transaction log. +  - Get the "u1db_config" document, create a new one in "_local/config", +    Delete the old one. +  - List all "u1db_sync_X" documents, create new ones in "_local/sync_X", +    delete the old ones. +  - Delete unused design documents. diff --git a/scripts/migration/0.9.0/log/.empty b/scripts/migration/0.9.0/log/.empty new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/scripts/migration/0.9.0/log/.empty diff --git a/scripts/migration/0.9.0/migrate.py b/scripts/migration/0.9.0/migrate.py new file mode 100755 index 00000000..6ad5bc2d --- /dev/null +++ b/scripts/migration/0.9.0/migrate.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +# migrate.py + +""" +Migrate CouchDB schema to Soledad 0.8.2 schema. + +****************************************************************************** +                               ATTENTION! + +  - This script does not backup your data for you. Make sure you have a backup +    copy of your databases before running this script! + +  - Make sure you turn off any service that might be writing to the couch +    database before running this script. + +****************************************************************************** + +Run this script with the --help option to see command line options. + +See the README.md file for more information. +""" + +import datetime +import logging +import netrc +import os + +from argparse import ArgumentParser + +from leap.soledad.server import load_configuration + +from migrate_couch_schema import migrate + + +TARGET_VERSION = '0.8.2' +DEFAULT_COUCH_URL = 'http://127.0.0.1:5984' +CONF = load_configuration('/etc/soledad/soledad-server.conf') +NETRC_PATH = CONF['soledad-server']['admin_netrc'] + + +# +# command line args and execution +# + +def _configure_logger(log_file, level=logging.INFO): +    if not log_file: +        fname, _ = os.path.basename(__file__).split('.') +        timestr = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S') +        filename = 'soledad_%s_%s_%s.log' \ +                   % (TARGET_VERSION, fname, timestr) +        dirname = os.path.join( +            os.path.dirname(os.path.realpath(__file__)), 'log') +        log_file = os.path.join(dirname, filename) +    logging.basicConfig( +        filename=log_file, +        filemode='a', +        format='%(asctime)s,%(msecs)d %(levelname)s %(message)s', +        datefmt='%H:%M:%S', +        level=level) + + +def _default_couch_url(): +    if not os.path.exists(NETRC_PATH): +        return DEFAULT_COUCH_URL +    parsed_netrc = netrc.netrc(NETRC_PATH) +    host, (login, _, password) = parsed_netrc.hosts.items()[0] +    url = ('http://%(login)s:%(password)s@%(host)s:5984' % { +           'login': login, +           'password': password, +           'host': host}) +    return url + + +def _parse_args(): +    parser = ArgumentParser() +    parser.add_argument( +        '--couch_url', +        help='the url for the couch database', +        default=_default_couch_url()) +    parser.add_argument( +        '--do-migrate', +        help='actually perform the migration (otherwise ' +             'just print what would be done)', +        action='store_true') +    parser.add_argument( +        '--log-file', +        help='the log file to use') +    parser.add_argument( +        '--pdb', action='store_true', +        help='escape to pdb shell in case of exception') +    parser.add_argument( +        '--verbose', action='store_true', +        help='output detailed information about the migration ' +             '(i.e. include debug messages)') +    return parser.parse_args() + + +def _enable_pdb(): +    import sys +    from IPython.core import ultratb +    sys.excepthook = ultratb.FormattedTB( +        mode='Verbose', color_scheme='Linux', call_pdb=1) + + +if __name__ == '__main__': +    args = _parse_args() +    if args.pdb: +        _enable_pdb() +    _configure_logger( +        args.log_file, +        level=logging.DEBUG if args.verbose else logging.INFO) +    logger = logging.getLogger(__name__) +    try: +        migrate(args, TARGET_VERSION) +    except: +        logger.exception('Fatal error on migrate script!') +        raise diff --git a/scripts/migration/0.9.0/migrate_couch_schema/__init__.py b/scripts/migration/0.9.0/migrate_couch_schema/__init__.py new file mode 100644 index 00000000..f0b456e4 --- /dev/null +++ b/scripts/migration/0.9.0/migrate_couch_schema/__init__.py @@ -0,0 +1,192 @@ +# __init__.py +""" +Support functions for migration script. +""" + +import logging + +from couchdb import Server +from couchdb import ResourceNotFound +from couchdb import ResourceConflict + +from leap.soledad.common.couch import GENERATION_KEY +from leap.soledad.common.couch import TRANSACTION_ID_KEY +from leap.soledad.common.couch import REPLICA_UID_KEY +from leap.soledad.common.couch import DOC_ID_KEY +from leap.soledad.common.couch import SCHEMA_VERSION_KEY +from leap.soledad.common.couch import CONFIG_DOC_ID +from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX +from leap.soledad.common.couch import SCHEMA_VERSION + + +logger = logging.getLogger(__name__) + + +# +# support functions +# + +def _get_couch_server(couch_url): +    return Server(couch_url) + + +def _is_migrateable(db): +    config_doc = db.get('u1db_config') +    return bool(config_doc) + + +def _get_transaction_log(db): +    ddoc_path = ['_design', 'transactions', '_view', 'log'] +    resource = db.resource(*ddoc_path) +    try: +        _, _, data = resource.get_json() +    except ResourceNotFound: +        logger.warning( +            '[%s] missing transactions design document, ' +            'can\'t get transaction log.' % db.name) +        return [] +    rows = data['rows'] +    transaction_log = [] +    gen = 1 +    for row in rows: +        transaction_log.append((gen, row['id'], row['value'])) +        gen += 1 +    return transaction_log + + +def _get_user_dbs(server): +    user_dbs = filter(lambda dbname: dbname.startswith('user-'), server) +    return user_dbs + + +# +# migration main functions +# + +def migrate(args, target_version): +    server = _get_couch_server(args.couch_url) +    logger.info('starting couch schema migration to %s' % target_version) +    if not args.do_migrate: +        logger.warning('dry-run: no changes will be made to databases') +    user_dbs = _get_user_dbs(server) +    for dbname in user_dbs: +        db = server[dbname] +        if not _is_migrateable(db): +            logger.warning("[%s] skipping not migrateable user db" % dbname) +            continue +        logger.info("[%s] starting migration of user db" % dbname) +        try: +            _migrate_user_db(db, args.do_migrate) +            logger.info("[%s] finished migration of user db" % dbname) +        except: +            logger.exception('[%s] error migrating user db' % dbname) +            logger.error('continuing with next database.') +    logger.info('finished couch schema migration to %s' % target_version) + + +def _migrate_user_db(db, do_migrate): +    _migrate_transaction_log(db, do_migrate) +    _migrate_sync_docs(db, do_migrate) +    _delete_design_docs(db, do_migrate) +    _migrate_config_doc(db, do_migrate) + + +def _migrate_transaction_log(db, do_migrate): +    transaction_log = _get_transaction_log(db) +    for gen, doc_id, trans_id in transaction_log: +        gen_doc_id = 'gen-%s' % str(gen).zfill(10) +        doc = { +            '_id': gen_doc_id, +            GENERATION_KEY: gen, +            DOC_ID_KEY: doc_id, +            TRANSACTION_ID_KEY: trans_id, +        } +        logger.debug('[%s] creating gen doc: %s' % (db.name, gen_doc_id)) +        if do_migrate: +            try: +                db.save(doc) +            except ResourceConflict: +                # this gen document already exists. if documents are the same, +                # continue with migration. +                existing_doc = db.get(gen_doc_id) +                for key in [GENERATION_KEY, DOC_ID_KEY, TRANSACTION_ID_KEY]: +                    if existing_doc[key] != doc[key]: +                        raise + + +def _migrate_config_doc(db, do_migrate): +    old_doc = db['u1db_config'] +    new_doc = { +        '_id': CONFIG_DOC_ID, +        REPLICA_UID_KEY: old_doc[REPLICA_UID_KEY], +        SCHEMA_VERSION_KEY: SCHEMA_VERSION, +    } +    logger.info("[%s] moving config doc: %s -> %s" +                % (db.name, old_doc['_id'], new_doc['_id'])) +    if do_migrate: +        # the config doc must not exist, otherwise we would have skipped this +        # database. +        db.save(new_doc) +        db.delete(old_doc) + + +def _migrate_sync_docs(db, do_migrate): +    logger.info('[%s] moving sync docs' % db.name) +    view = db.view( +        '_all_docs', +        startkey='u1db_sync', +        endkey='u1db_synd', +        include_docs='true') +    for row in view.rows: +        old_doc = row['doc'] +        old_id = old_doc['_id'] + +        # older schemas used different documents with ids starting with +        # "u1db_sync" to store sync-related data: +        # +        #   - u1db_sync_log: was used to store the whole sync log. +        #   - u1db_sync_state: was used to store the sync state. +        # +        # if any of these documents exist in the current db, they are leftover +        # from previous migrations, and should just be removed. +        if old_id in ['u1db_sync_log', 'u1db_sync_state']: +            logger.info('[%s] removing leftover document: %s' +                        % (db.name, old_id)) +            if do_migrate: +                db.delete(old_doc) +            continue + +        replica_uid = old_id.replace('u1db_sync_', '') +        new_id = "%s%s" % (SYNC_DOC_ID_PREFIX, replica_uid) +        new_doc = { +            '_id': new_id, +            GENERATION_KEY: old_doc['generation'], +            TRANSACTION_ID_KEY: old_doc['transaction_id'], +            REPLICA_UID_KEY: replica_uid, +        } +        logger.debug("[%s] moving sync doc: %s -> %s" +                     % (db.name, old_id, new_id)) +        if do_migrate: +            try: +                db.save(new_doc) +            except ResourceConflict: +                # this sync document already exists. if documents are the same, +                # continue with migration. +                existing_doc = db.get(new_id) +                for key in [GENERATION_KEY, TRANSACTION_ID_KEY, +                            REPLICA_UID_KEY]: +                    if existing_doc[key] != new_doc[key]: +                        raise +            db.delete(old_doc) + + +def _delete_design_docs(db, do_migrate): +    for ddoc in ['docs', 'syncs', 'transactions']: +        doc_id = '_design/%s' % ddoc +        doc = db.get(doc_id) +        if doc: +            logger.info("[%s] deleting design doc: %s" % (db.name, doc_id)) +            if do_migrate: +                db.delete(doc) +        else: +            logger.warning("[%s] design doc not found: %s" % (db.name, doc_id)) diff --git a/scripts/migration/0.9.0/requirements.pip b/scripts/migration/0.9.0/requirements.pip new file mode 100644 index 00000000..ea22a1a4 --- /dev/null +++ b/scripts/migration/0.9.0/requirements.pip @@ -0,0 +1,3 @@ +couchdb +leap.soledad.common==0.9.0 +leap.soledad.server==0.9.0 diff --git a/scripts/migration/0.9.0/setup.py b/scripts/migration/0.9.0/setup.py new file mode 100644 index 00000000..0467e932 --- /dev/null +++ b/scripts/migration/0.9.0/setup.py @@ -0,0 +1,8 @@ +from setuptools import setup +from setuptools import find_packages + + +setup( +    name='migrate_couch_schema', +    packages=find_packages('.'), +) diff --git a/scripts/migration/0.9.0/tests/conftest.py b/scripts/migration/0.9.0/tests/conftest.py new file mode 100644 index 00000000..61f6c7ee --- /dev/null +++ b/scripts/migration/0.9.0/tests/conftest.py @@ -0,0 +1,54 @@ +# conftest.py + +""" +Provide a couch database with content stored in old schema. +""" + +import couchdb +import pytest +import uuid + + +COUCH_URL = 'http://127.0.0.1:5984' + +transaction_map = """ +function(doc) { +    if (doc.u1db_transactions) +        doc.u1db_transactions.forEach(function(t) { +            emit(t[0],  // use timestamp as key so the results are ordered +                 t[1]); // value is the transaction_id +        }); +} +""" + +initial_docs = [ +    {'_id': 'u1db_config', 'replica_uid': 'an-uid'}, +    {'_id': 'u1db_sync_A', 'generation': 0, 'replica_uid': 'A', +     'transaction_id': ''}, +    {'_id': 'u1db_sync_B', 'generation': 2, 'replica_uid': 'B', +     'transaction_id': 'X'}, +    {'_id': 'doc1', 'u1db_transactions': [(1, 'trans-1'), (3, 'trans-3')]}, +    {'_id': 'doc2', 'u1db_transactions': [(2, 'trans-2'), (4, 'trans-4')]}, +    {'_id': '_design/docs'}, +    {'_id': '_design/syncs'}, +    {'_id': '_design/transactions', +     'views': {'log': {'map': transaction_map}}}, +    # add some data from previous interrupted migration +    {'_id': '_local/sync_A', 'gen': 0, 'trans_id': '', 'replica_uid': 'A'}, +    {'_id': 'gen-0000000002', +     'gen': 2, 'trans_id': 'trans-2', 'doc_id': 'doc2'}, +    # the following should be removed if found in the dbs +    {'_id': 'u1db_sync_log'}, +    {'_id': 'u1db_sync_state'}, +] + + +@pytest.fixture(scope='function') +def db(request): +    server = couchdb.Server(COUCH_URL) +    dbname = "user-" + uuid.uuid4().hex +    db = server.create(dbname) +    for doc in initial_docs: +        db.save(doc) +    request.addfinalizer(lambda: server.delete(dbname)) +    return db diff --git a/scripts/migration/0.9.0/tests/test_migrate.py b/scripts/migration/0.9.0/tests/test_migrate.py new file mode 100644 index 00000000..10c8b906 --- /dev/null +++ b/scripts/migration/0.9.0/tests/test_migrate.py @@ -0,0 +1,67 @@ +# test_migrate.py + +""" +Ensure that the migration script works! +""" + +from migrate_couch_schema import _migrate_user_db + +from leap.soledad.common.couch import GENERATION_KEY +from leap.soledad.common.couch import TRANSACTION_ID_KEY +from leap.soledad.common.couch import REPLICA_UID_KEY +from leap.soledad.common.couch import DOC_ID_KEY +from leap.soledad.common.couch import SCHEMA_VERSION_KEY +from leap.soledad.common.couch import CONFIG_DOC_ID +from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX +from leap.soledad.common.couch import SCHEMA_VERSION + + +def test__migrate_user_db(db): +    _migrate_user_db(db, True) + +    # we should find exactly 6 documents: 2 normal documents and 4 generation +    # documents +    view = db.view('_all_docs') +    assert len(view.rows) == 6 + +    # ensure that the ids of the documents we found on the database are correct +    doc_ids = map(lambda doc: doc.id, view.rows) +    assert 'doc1' in doc_ids +    assert 'doc2' in doc_ids +    assert 'gen-0000000001' in doc_ids +    assert 'gen-0000000002' in doc_ids +    assert 'gen-0000000003' in doc_ids +    assert 'gen-0000000004' in doc_ids + +    # assert config doc contents +    config_doc = db.get(CONFIG_DOC_ID) +    assert config_doc[REPLICA_UID_KEY] == 'an-uid' +    assert config_doc[SCHEMA_VERSION_KEY] == SCHEMA_VERSION + +    # assert sync docs contents +    sync_doc_A = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'A')) +    assert sync_doc_A[GENERATION_KEY] == 0 +    assert sync_doc_A[REPLICA_UID_KEY] == 'A' +    assert sync_doc_A[TRANSACTION_ID_KEY] == '' +    sync_doc_B = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'B')) +    assert sync_doc_B[GENERATION_KEY] == 2 +    assert sync_doc_B[REPLICA_UID_KEY] == 'B' +    assert sync_doc_B[TRANSACTION_ID_KEY] == 'X' + +    # assert gen docs contents +    gen_1 = db.get('gen-0000000001') +    assert gen_1[DOC_ID_KEY] == 'doc1' +    assert gen_1[GENERATION_KEY] == 1 +    assert gen_1[TRANSACTION_ID_KEY] == 'trans-1' +    gen_2 = db.get('gen-0000000002') +    assert gen_2[DOC_ID_KEY] == 'doc2' +    assert gen_2[GENERATION_KEY] == 2 +    assert gen_2[TRANSACTION_ID_KEY] == 'trans-2' +    gen_3 = db.get('gen-0000000003') +    assert gen_3[DOC_ID_KEY] == 'doc1' +    assert gen_3[GENERATION_KEY] == 3 +    assert gen_3[TRANSACTION_ID_KEY] == 'trans-3' +    gen_4 = db.get('gen-0000000004') +    assert gen_4[DOC_ID_KEY] == 'doc2' +    assert gen_4[GENERATION_KEY] == 4 +    assert gen_4[TRANSACTION_ID_KEY] == 'trans-4' diff --git a/scripts/migration/0.9.0/tox.ini b/scripts/migration/0.9.0/tox.ini new file mode 100644 index 00000000..2bb6be4c --- /dev/null +++ b/scripts/migration/0.9.0/tox.ini @@ -0,0 +1,13 @@ +[tox] +envlist = py27 + +[testenv] +commands = py.test {posargs} +changedir = tests +deps = +    pytest +    couchdb +    pdbpp +    -e../../../common +setenv = +    TERM=xterm  | 
