summaryrefslogtreecommitdiff
path: root/scripts/migration/0.9.0
diff options
context:
space:
mode:
authordrebs <drebs@riseup.net>2017-09-07 18:12:51 -0300
committerdrebs <drebs@riseup.net>2017-09-07 18:12:51 -0300
commitea9bed110bdc3c8044cce77f52869aac4f89a8d6 (patch)
treef5d17a5f936624ef4c0592d87ae6507177751b73 /scripts/migration/0.9.0
parent1b994c717e2e970ad665c49642459c1e34459b28 (diff)
[pkg] improve naming of directory of migration script
Diffstat (limited to 'scripts/migration/0.9.0')
-rw-r--r--scripts/migration/0.9.0/README.md87
-rw-r--r--scripts/migration/0.9.0/log/.empty0
-rwxr-xr-xscripts/migration/0.9.0/migrate.py117
-rw-r--r--scripts/migration/0.9.0/migrate_couch_schema/__init__.py221
-rw-r--r--scripts/migration/0.9.0/requirements.pip3
-rw-r--r--scripts/migration/0.9.0/setup.py8
-rw-r--r--scripts/migration/0.9.0/tests/conftest.py54
-rw-r--r--scripts/migration/0.9.0/tests/test_migrate.py67
-rw-r--r--scripts/migration/0.9.0/tox.ini13
9 files changed, 0 insertions, 570 deletions
diff --git a/scripts/migration/0.9.0/README.md b/scripts/migration/0.9.0/README.md
deleted file mode 100644
index ceb53bb0..00000000
--- a/scripts/migration/0.9.0/README.md
+++ /dev/null
@@ -1,87 +0,0 @@
-CouchDB schema migration script: from soledad-server < 0.9.0 to >= 0.9.0
-========================================================================
-
-Starting with Soledad Server 0.9.0, the CouchDB database schema was changed to
-improve speed of the server side storage backend. Because of that, this script
-has to be run for all Leap providers that used to provide email using Soledad
-Server < 0.9.0.
-
-If you never provided email with Leap, you don't need to run this script.
-
-
-ATTENTION!
-----------
-
- - This script does not backup your data for you. Make sure you have a backup
- copy of your databases before running this script!
-
- - Make sure you turn off any service that might be writing to the couch user
- databases before running this script. From the Leap side, these would be
- Leap MX in the "mx" node and Soledad Server in the "soledad" node.
-
-
-Usage
------
-
-When you run the script, you will see no output. All the output will be logged
-to files, as explained in the Log section below.
-
-To see command line options, run:
-
- ./migrate.py --help
-
-To see what the script would do, run the following and check the logs
-afterwards:
-
- ./migrate.py
-
-To actually run the migration, add the --do-migrate command line option:
-
- ./migrate.py --do-migrate
-
-
-Log
----
-
-The script will be installed in ``/usr/share/soledad-server/migration/0.9.0``,
-and will log the results of any run by default to the ``logs/`` subdirectory of
-that folder (i.e. ``/usr/share/soledad-server/migration/0.9.0/logs``).
-
-If you don't pass a ``--log-file`` command line option, a log will be written
-to the log folder as described above.
-
-
-Differences between old and new couch schema
---------------------------------------------
-
-The differences between old and new schemas are:
-
- - Transaction metadata was previously stored inside each document, and we
- used design doc view/list functions to retrieve that information. Now,
- transaction metadata is stored in documents with special ids
- (gen-0000000001 to gen-9999999999).
-
- - Database replica config metadata was stored in a document called
- "u1db_config", and now we store it in the "_local/config" document.
-
- - Sync metadata was previously stored in documents with id
- "u1db_sync_<source-replica-id>", and now are stored in
- "_local/sync_<source-replica-id>".
-
- - The new schema doesn't make use of any design documents.
-
-
-What does this script do
-------------------------
-
-- List all databases starting with "user-".
-- For each one, do:
- - Check if it contains the old "u1db_config" document.
- - If it doesn't, skip this db.
- - Get the transaction log using the usual design doc view/list functions.
- - Write a new "gen-X" document for each line on the transaction log.
- - Get the "u1db_config" document, create a new one in "_local/config",
- Delete the old one.
- - List all "u1db_sync_X" documents, create new ones in "_local/sync_X",
- delete the old ones.
- - Delete unused design documents.
diff --git a/scripts/migration/0.9.0/log/.empty b/scripts/migration/0.9.0/log/.empty
deleted file mode 100644
index e69de29b..00000000
--- a/scripts/migration/0.9.0/log/.empty
+++ /dev/null
diff --git a/scripts/migration/0.9.0/migrate.py b/scripts/migration/0.9.0/migrate.py
deleted file mode 100755
index 5c49f5b4..00000000
--- a/scripts/migration/0.9.0/migrate.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python
-# migrate.py
-
-"""
-Migrate CouchDB schema to version 1 (soledad-server >= 0.9.0).
-
-******************************************************************************
- ATTENTION!
-
- - This script does not backup your data for you. Make sure you have a backup
- copy of your databases before running this script!
-
- - Make sure you turn off any service that might be writing to the couch
- database before running this script.
-
-******************************************************************************
-
-Run this script with the --help option to see command line options.
-
-See the README.md file for more information.
-"""
-
-import datetime
-import logging
-import netrc
-import os
-
-from argparse import ArgumentParser
-
-from leap.soledad.server import get_config
-
-from migrate_couch_schema import migrate
-
-
-TARGET_VERSION = '0.9'
-DEFAULT_COUCH_URL = 'http://127.0.0.1:5984'
-CONF = get_config()
-NETRC_PATH = CONF['admin_netrc']
-
-
-#
-# command line args and execution
-#
-
-def _configure_logger(log_file, level=logging.INFO):
- if not log_file:
- fname, _ = os.path.basename(__file__).split('.')
- timestr = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
- filename = 'soledad_%s_%s_%s.log' \
- % (TARGET_VERSION, fname, timestr)
- dirname = os.path.join(
- os.path.dirname(os.path.realpath(__file__)), 'log')
- log_file = os.path.join(dirname, filename)
- logging.basicConfig(
- filename=log_file,
- filemode='a',
- format='%(asctime)s,%(msecs)d %(levelname)s %(message)s',
- datefmt='%H:%M:%S',
- level=level)
-
-
-def _default_couch_url():
- if not os.path.exists(NETRC_PATH):
- return DEFAULT_COUCH_URL
- parsed_netrc = netrc.netrc(NETRC_PATH)
- host, (login, _, password) = parsed_netrc.hosts.items()[0]
- url = ('http://%(login)s:%(password)s@%(host)s:5984' % {
- 'login': login,
- 'password': password,
- 'host': host})
- return url
-
-
-def _parse_args():
- parser = ArgumentParser()
- parser.add_argument(
- '--couch_url',
- help='the url for the couch database',
- default=_default_couch_url())
- parser.add_argument(
- '--do-migrate',
- help='actually perform the migration (otherwise '
- 'just print what would be done)',
- action='store_true')
- parser.add_argument(
- '--log-file',
- help='the log file to use')
- parser.add_argument(
- '--pdb', action='store_true',
- help='escape to pdb shell in case of exception')
- parser.add_argument(
- '--verbose', action='store_true',
- help='output detailed information about the migration '
- '(i.e. include debug messages)')
- return parser.parse_args()
-
-
-def _enable_pdb():
- import sys
- from IPython.core import ultratb
- sys.excepthook = ultratb.FormattedTB(
- mode='Verbose', color_scheme='Linux', call_pdb=1)
-
-
-if __name__ == '__main__':
- args = _parse_args()
- if args.pdb:
- _enable_pdb()
- _configure_logger(
- args.log_file,
- level=logging.DEBUG if args.verbose else logging.INFO)
- logger = logging.getLogger(__name__)
- try:
- migrate(args, TARGET_VERSION)
- except:
- logger.exception('Fatal error on migrate script!')
- raise
diff --git a/scripts/migration/0.9.0/migrate_couch_schema/__init__.py b/scripts/migration/0.9.0/migrate_couch_schema/__init__.py
deleted file mode 100644
index 1e51eccd..00000000
--- a/scripts/migration/0.9.0/migrate_couch_schema/__init__.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# __init__.py
-"""
-Support functions for migration script.
-"""
-
-import logging
-
-from couchdb import Server
-from couchdb import ResourceNotFound
-from couchdb import ResourceConflict
-
-from leap.soledad.common.couch import GENERATION_KEY
-from leap.soledad.common.couch import TRANSACTION_ID_KEY
-from leap.soledad.common.couch import REPLICA_UID_KEY
-from leap.soledad.common.couch import DOC_ID_KEY
-from leap.soledad.common.couch import SCHEMA_VERSION_KEY
-from leap.soledad.common.couch import CONFIG_DOC_ID
-from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX
-from leap.soledad.common.couch import SCHEMA_VERSION
-
-
-logger = logging.getLogger(__name__)
-
-
-#
-# support functions
-#
-
-def _get_couch_server(couch_url):
- return Server(couch_url)
-
-
-def _has_u1db_config_doc(db):
- config_doc = db.get('u1db_config')
- return bool(config_doc)
-
-
-def _get_transaction_log(db):
- ddoc_path = ['_design', 'transactions', '_view', 'log']
- resource = db.resource(*ddoc_path)
- try:
- _, _, data = resource.get_json()
- except ResourceNotFound:
- logger.warning(
- '[%s] missing transactions design document, '
- 'can\'t get transaction log.' % db.name)
- return []
- rows = data['rows']
- transaction_log = []
- gen = 1
- for row in rows:
- transaction_log.append((gen, row['id'], row['value']))
- gen += 1
- return transaction_log
-
-
-def _get_user_dbs(server):
- user_dbs = filter(lambda dbname: dbname.startswith('user-'), server)
- return user_dbs
-
-
-#
-# migration main functions
-#
-
-def _report_missing_u1db_config_doc(dbname, db):
- config_doc = db.get(CONFIG_DOC_ID)
- if not config_doc:
- logger.warning(
- "[%s] no '%s' or '%s' documents found, possibly an empty db? I "
- "don't know what to do with this db, so I am skipping it."
- % (dbname, 'u1db_config', CONFIG_DOC_ID))
- else:
- if SCHEMA_VERSION_KEY in config_doc:
- version = config_doc[SCHEMA_VERSION_KEY]
- if version == SCHEMA_VERSION:
- logger.info(
- "[%s] '%s' document exists, and schema versions match "
- "(expected %r and found %r). This database reports to be "
- "using the new schema version, so I am skipping it."
- % (dbname, CONFIG_DOC_ID, SCHEMA_VERSION, version))
- else:
- logger.error(
- "[%s] '%s' document exists, but schema versions don't "
- "match (expected %r, found %r instead). I don't know "
- "how to migrate such a db, so I am skipping it."
- % (dbname, CONFIG_DOC_ID, SCHEMA_VERSION, version))
- else:
- logger.error(
- "[%s] '%s' document exists, but has no schema version "
- "information in it. I don't know how to migrate such a db, "
- "so I am skipping it." % (dbname, CONFIG_DOC_ID))
-
-
-def migrate(args, target_version):
- server = _get_couch_server(args.couch_url)
- logger.info('starting couch schema migration to %s' % target_version)
- if not args.do_migrate:
- logger.warning('dry-run: no changes will be made to databases')
- user_dbs = _get_user_dbs(server)
- for dbname in user_dbs:
- db = server[dbname]
- if not _has_u1db_config_doc(db):
- _report_missing_u1db_config_doc(dbname, db)
- continue
- logger.info("[%s] starting migration of user db" % dbname)
- try:
- _migrate_user_db(db, args.do_migrate)
- logger.info("[%s] finished migration of user db" % dbname)
- except:
- logger.exception('[%s] error migrating user db' % dbname)
- logger.error('continuing with next database.')
- logger.info('finished couch schema migration to %s' % target_version)
-
-
-def _migrate_user_db(db, do_migrate):
- _migrate_transaction_log(db, do_migrate)
- _migrate_sync_docs(db, do_migrate)
- _delete_design_docs(db, do_migrate)
- _migrate_config_doc(db, do_migrate)
-
-
-def _migrate_transaction_log(db, do_migrate):
- transaction_log = _get_transaction_log(db)
- for gen, doc_id, trans_id in transaction_log:
- gen_doc_id = 'gen-%s' % str(gen).zfill(10)
- doc = {
- '_id': gen_doc_id,
- GENERATION_KEY: gen,
- DOC_ID_KEY: doc_id,
- TRANSACTION_ID_KEY: trans_id,
- }
- logger.debug('[%s] creating gen doc: %s' % (db.name, gen_doc_id))
- if do_migrate:
- try:
- db.save(doc)
- except ResourceConflict:
- # this gen document already exists. if documents are the same,
- # continue with migration.
- existing_doc = db.get(gen_doc_id)
- for key in [GENERATION_KEY, DOC_ID_KEY, TRANSACTION_ID_KEY]:
- if existing_doc[key] != doc[key]:
- raise
-
-
-def _migrate_config_doc(db, do_migrate):
- old_doc = db['u1db_config']
- new_doc = {
- '_id': CONFIG_DOC_ID,
- REPLICA_UID_KEY: old_doc[REPLICA_UID_KEY],
- SCHEMA_VERSION_KEY: SCHEMA_VERSION,
- }
- logger.info("[%s] moving config doc: %s -> %s"
- % (db.name, old_doc['_id'], new_doc['_id']))
- if do_migrate:
- # the config doc must not exist, otherwise we would have skipped this
- # database.
- db.save(new_doc)
- db.delete(old_doc)
-
-
-def _migrate_sync_docs(db, do_migrate):
- logger.info('[%s] moving sync docs' % db.name)
- view = db.view(
- '_all_docs',
- startkey='u1db_sync',
- endkey='u1db_synd',
- include_docs='true')
- for row in view.rows:
- old_doc = row['doc']
- old_id = old_doc['_id']
-
- # older schemas used different documents with ids starting with
- # "u1db_sync" to store sync-related data:
- #
- # - u1db_sync_log: was used to store the whole sync log.
- # - u1db_sync_state: was used to store the sync state.
- #
- # if any of these documents exist in the current db, they are leftover
- # from previous migrations, and should just be removed.
- if old_id in ['u1db_sync_log', 'u1db_sync_state']:
- logger.info('[%s] removing leftover document: %s'
- % (db.name, old_id))
- if do_migrate:
- db.delete(old_doc)
- continue
-
- replica_uid = old_id.replace('u1db_sync_', '')
- new_id = "%s%s" % (SYNC_DOC_ID_PREFIX, replica_uid)
- new_doc = {
- '_id': new_id,
- GENERATION_KEY: old_doc['generation'],
- TRANSACTION_ID_KEY: old_doc['transaction_id'],
- REPLICA_UID_KEY: replica_uid,
- }
- logger.debug("[%s] moving sync doc: %s -> %s"
- % (db.name, old_id, new_id))
- if do_migrate:
- try:
- db.save(new_doc)
- except ResourceConflict:
- # this sync document already exists. if documents are the same,
- # continue with migration.
- existing_doc = db.get(new_id)
- for key in [GENERATION_KEY, TRANSACTION_ID_KEY,
- REPLICA_UID_KEY]:
- if existing_doc[key] != new_doc[key]:
- raise
- db.delete(old_doc)
-
-
-def _delete_design_docs(db, do_migrate):
- for ddoc in ['docs', 'syncs', 'transactions']:
- doc_id = '_design/%s' % ddoc
- doc = db.get(doc_id)
- if doc:
- logger.info("[%s] deleting design doc: %s" % (db.name, doc_id))
- if do_migrate:
- db.delete(doc)
- else:
- logger.warning("[%s] design doc not found: %s" % (db.name, doc_id))
diff --git a/scripts/migration/0.9.0/requirements.pip b/scripts/migration/0.9.0/requirements.pip
deleted file mode 100644
index ea22a1a4..00000000
--- a/scripts/migration/0.9.0/requirements.pip
+++ /dev/null
@@ -1,3 +0,0 @@
-couchdb
-leap.soledad.common==0.9.0
-leap.soledad.server==0.9.0
diff --git a/scripts/migration/0.9.0/setup.py b/scripts/migration/0.9.0/setup.py
deleted file mode 100644
index 0467e932..00000000
--- a/scripts/migration/0.9.0/setup.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from setuptools import setup
-from setuptools import find_packages
-
-
-setup(
- name='migrate_couch_schema',
- packages=find_packages('.'),
-)
diff --git a/scripts/migration/0.9.0/tests/conftest.py b/scripts/migration/0.9.0/tests/conftest.py
deleted file mode 100644
index 61f6c7ee..00000000
--- a/scripts/migration/0.9.0/tests/conftest.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# conftest.py
-
-"""
-Provide a couch database with content stored in old schema.
-"""
-
-import couchdb
-import pytest
-import uuid
-
-
-COUCH_URL = 'http://127.0.0.1:5984'
-
-transaction_map = """
-function(doc) {
- if (doc.u1db_transactions)
- doc.u1db_transactions.forEach(function(t) {
- emit(t[0], // use timestamp as key so the results are ordered
- t[1]); // value is the transaction_id
- });
-}
-"""
-
-initial_docs = [
- {'_id': 'u1db_config', 'replica_uid': 'an-uid'},
- {'_id': 'u1db_sync_A', 'generation': 0, 'replica_uid': 'A',
- 'transaction_id': ''},
- {'_id': 'u1db_sync_B', 'generation': 2, 'replica_uid': 'B',
- 'transaction_id': 'X'},
- {'_id': 'doc1', 'u1db_transactions': [(1, 'trans-1'), (3, 'trans-3')]},
- {'_id': 'doc2', 'u1db_transactions': [(2, 'trans-2'), (4, 'trans-4')]},
- {'_id': '_design/docs'},
- {'_id': '_design/syncs'},
- {'_id': '_design/transactions',
- 'views': {'log': {'map': transaction_map}}},
- # add some data from previous interrupted migration
- {'_id': '_local/sync_A', 'gen': 0, 'trans_id': '', 'replica_uid': 'A'},
- {'_id': 'gen-0000000002',
- 'gen': 2, 'trans_id': 'trans-2', 'doc_id': 'doc2'},
- # the following should be removed if found in the dbs
- {'_id': 'u1db_sync_log'},
- {'_id': 'u1db_sync_state'},
-]
-
-
-@pytest.fixture(scope='function')
-def db(request):
- server = couchdb.Server(COUCH_URL)
- dbname = "user-" + uuid.uuid4().hex
- db = server.create(dbname)
- for doc in initial_docs:
- db.save(doc)
- request.addfinalizer(lambda: server.delete(dbname))
- return db
diff --git a/scripts/migration/0.9.0/tests/test_migrate.py b/scripts/migration/0.9.0/tests/test_migrate.py
deleted file mode 100644
index 10c8b906..00000000
--- a/scripts/migration/0.9.0/tests/test_migrate.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# test_migrate.py
-
-"""
-Ensure that the migration script works!
-"""
-
-from migrate_couch_schema import _migrate_user_db
-
-from leap.soledad.common.couch import GENERATION_KEY
-from leap.soledad.common.couch import TRANSACTION_ID_KEY
-from leap.soledad.common.couch import REPLICA_UID_KEY
-from leap.soledad.common.couch import DOC_ID_KEY
-from leap.soledad.common.couch import SCHEMA_VERSION_KEY
-from leap.soledad.common.couch import CONFIG_DOC_ID
-from leap.soledad.common.couch import SYNC_DOC_ID_PREFIX
-from leap.soledad.common.couch import SCHEMA_VERSION
-
-
-def test__migrate_user_db(db):
- _migrate_user_db(db, True)
-
- # we should find exactly 6 documents: 2 normal documents and 4 generation
- # documents
- view = db.view('_all_docs')
- assert len(view.rows) == 6
-
- # ensure that the ids of the documents we found on the database are correct
- doc_ids = map(lambda doc: doc.id, view.rows)
- assert 'doc1' in doc_ids
- assert 'doc2' in doc_ids
- assert 'gen-0000000001' in doc_ids
- assert 'gen-0000000002' in doc_ids
- assert 'gen-0000000003' in doc_ids
- assert 'gen-0000000004' in doc_ids
-
- # assert config doc contents
- config_doc = db.get(CONFIG_DOC_ID)
- assert config_doc[REPLICA_UID_KEY] == 'an-uid'
- assert config_doc[SCHEMA_VERSION_KEY] == SCHEMA_VERSION
-
- # assert sync docs contents
- sync_doc_A = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'A'))
- assert sync_doc_A[GENERATION_KEY] == 0
- assert sync_doc_A[REPLICA_UID_KEY] == 'A'
- assert sync_doc_A[TRANSACTION_ID_KEY] == ''
- sync_doc_B = db.get('%s%s' % (SYNC_DOC_ID_PREFIX, 'B'))
- assert sync_doc_B[GENERATION_KEY] == 2
- assert sync_doc_B[REPLICA_UID_KEY] == 'B'
- assert sync_doc_B[TRANSACTION_ID_KEY] == 'X'
-
- # assert gen docs contents
- gen_1 = db.get('gen-0000000001')
- assert gen_1[DOC_ID_KEY] == 'doc1'
- assert gen_1[GENERATION_KEY] == 1
- assert gen_1[TRANSACTION_ID_KEY] == 'trans-1'
- gen_2 = db.get('gen-0000000002')
- assert gen_2[DOC_ID_KEY] == 'doc2'
- assert gen_2[GENERATION_KEY] == 2
- assert gen_2[TRANSACTION_ID_KEY] == 'trans-2'
- gen_3 = db.get('gen-0000000003')
- assert gen_3[DOC_ID_KEY] == 'doc1'
- assert gen_3[GENERATION_KEY] == 3
- assert gen_3[TRANSACTION_ID_KEY] == 'trans-3'
- gen_4 = db.get('gen-0000000004')
- assert gen_4[DOC_ID_KEY] == 'doc2'
- assert gen_4[GENERATION_KEY] == 4
- assert gen_4[TRANSACTION_ID_KEY] == 'trans-4'
diff --git a/scripts/migration/0.9.0/tox.ini b/scripts/migration/0.9.0/tox.ini
deleted file mode 100644
index 2bb6be4c..00000000
--- a/scripts/migration/0.9.0/tox.ini
+++ /dev/null
@@ -1,13 +0,0 @@
-[tox]
-envlist = py27
-
-[testenv]
-commands = py.test {posargs}
-changedir = tests
-deps =
- pytest
- couchdb
- pdbpp
- -e../../../common
-setenv =
- TERM=xterm