From 78621bb742cd0a816dc507010743a7d765d84538 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Thu, 10 Aug 2017 00:15:01 -0300 Subject: [feature] add path partitioning to namespaces All blobs were being stored in a single folder when using namespaces, this commits adds path partitioning as discussed on #8882, which should help with a large number of files (each folder will hold a smaller subset, allowing the use of better filesystem walk strategies). Also, the default empty namespace is now called 'default' to prevent it from listing other namespaces contents. So everything will always use namespaces, with the option to use it explicitly or just fall to the default one. -- Related: #8882 --- src/leap/soledad/server/_blobs.py | 20 ++++++++++++-------- testing/tests/blobs/test_fs_backend.py | 6 ++++-- testing/tests/server/test_blobs_server.py | 3 ++- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/leap/soledad/server/_blobs.py b/src/leap/soledad/server/_blobs.py index 7d51e1b5..dc2c204c 100644 --- a/src/leap/soledad/server/_blobs.py +++ b/src/leap/soledad/server/_blobs.py @@ -135,7 +135,7 @@ class FilesystemBlobsBackend(object): raise NotImplementedError def count(self, user, request, namespace=''): - base_path = self._get_path(user, custom_preffix=namespace) + base_path = self._get_path(user, namespace=namespace) count = 0 for _, _, filenames in os.walk(base_path): count += len(filter(lambda i: not i.endswith('.flags'), filenames)) @@ -144,7 +144,7 @@ class FilesystemBlobsBackend(object): def list_blobs(self, user, request, namespace='', order_by=None, filter_flag=False): blob_ids = [] - base_path = self._get_path(user, custom_preffix=namespace) + base_path = self._get_path(user, namespace=namespace) for root, dirs, filenames in os.walk(base_path): blob_ids += [os.path.join(root, name) for name in filenames if not name.endswith('.flags')] @@ -200,18 +200,22 @@ class FilesystemBlobsBackend(object): raise Exception(err) return desired_path - def _get_path(self, user, blob_id='', custom_preffix=''): + def _get_path(self, user, blob_id='', namespace=''): parts = [user] - parts += self._get_preffix(blob_id, custom_preffix) if blob_id: - parts += [blob_id] + namespace = namespace or 'default' + parts += self._get_path_parts(blob_id, namespace) + elif namespace and not blob_id: + parts += [namespace] # namespace path + else: + pass # root path path = os.path.join(self.path, *parts) return self._validate_path(path, user, blob_id) - def _get_preffix(self, blob_id, custom=''): - if custom or not blob_id: + def _get_path_parts(self, blob_id, custom): + if custom and not blob_id: return [custom] - return [blob_id[0], blob_id[0:3], blob_id[0:6]] + return [custom] + [blob_id[0], blob_id[0:3], blob_id[0:6]] + [blob_id] class ImproperlyConfiguredException(Exception): diff --git a/testing/tests/blobs/test_fs_backend.py b/testing/tests/blobs/test_fs_backend.py index f742f702..53f3127d 100644 --- a/testing/tests/blobs/test_fs_backend.py +++ b/testing/tests/blobs/test_fs_backend.py @@ -88,13 +88,15 @@ class FilesystemBackendTestCase(unittest.TestCase): backend = _blobs.FilesystemBlobsBackend() backend.path = '/somewhere/' path = backend._get_path('user', 'blob_id', '') - self.assertEquals(path, '/somewhere/user/b/blo/blob_i/blob_id') + expected = '/somewhere/user/default/b/blo/blob_i/blob_id' + self.assertEquals(path, expected) def test_get_path_custom(self): backend = _blobs.FilesystemBlobsBackend() backend.path = '/somewhere/' path = backend._get_path('user', 'blob_id', 'wonderland') - self.assertEquals(path, '/somewhere/user/wonderland/blob_id') + expected = '/somewhere/user/wonderland/b/blo/blob_i/blob_id' + self.assertEquals(expected, path) def test_get_path_namespace_traversal_raises(self): backend = _blobs.FilesystemBlobsBackend() diff --git a/testing/tests/server/test_blobs_server.py b/testing/tests/server/test_blobs_server.py index cf370147..f57a1d2d 100644 --- a/testing/tests/server/test_blobs_server.py +++ b/testing/tests/server/test_blobs_server.py @@ -161,7 +161,8 @@ class BlobServerTestCase(unittest.TestCase): yield manager._encrypt_and_upload('blob_id2', BytesIO("2")) blobs_list = yield manager.remote_list(order_by='date') self.assertEquals(['blob_id1', 'blob_id2'], blobs_list) - self.__touch(self.tempdir, 'user', 'b', 'blo', 'blob_i', 'blob_id1') + parts = ['user', 'default', 'b', 'blo', 'blob_i', 'blob_id1'] + self.__touch(self.tempdir, *parts) blobs_list = yield manager.remote_list(order_by='+date') self.assertEquals(['blob_id2', 'blob_id1'], blobs_list) blobs_list = yield manager.remote_list(order_by='-date') -- cgit v1.2.3