client/src/leap/soledad/client/http_target/fetch_protocol.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150

# -*- coding: utf-8 -*-
# fetch_protocol.py
# Copyright (C) 2016 LEAP
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import json
from functools import partial
from cStringIO import StringIO
from twisted.web._newclient import ResponseDone
from leap.soledad.common.l2db import errors
from leap.soledad.common.l2db.remote import utils
from .support import ReadBodyProtocol
from .support import readBody


class DocStreamReceiver(ReadBodyProtocol):
    """
    A protocol implementation that can parse incoming data from server based
    on a line format specified on u1db implementation. Except that we split doc
    attributes from content to ease parsing and increment throughput for larger
    documents.
    [\r\n
    {metadata},\r\n
    {doc_info},\r\n
    {content},\r\n
    ...
    {doc_info},\r\n
    {content},\r\n
    ]
    """

    def __init__(self, response, deferred, doc_reader):
        self.deferred = deferred
        self.status = response.code if response else None
        self.message = response.phrase if response else None
        self.headers = response.headers if response else {}
        self.delimiter = '\r\n'
        self._doc_reader = doc_reader
        self.reset()

    def reset(self):
        self._line = 0
        self._buffer = StringIO()
        self._properly_finished = False

    def connectionLost(self, reason):
        """
        Deliver the accumulated response bytes to the waiting L{Deferred}, if
        the response body has been completely received without error.
        """
        try:
            if reason.check(ResponseDone):
                self.dataBuffer = self.metadata
            else:
                self.dataBuffer = self.finish()
        except errors.BrokenSyncStream, e:
            return self.deferred.errback(e)
        return ReadBodyProtocol.connectionLost(self, reason)

    def consumeBufferLines(self):
        """
        Consumes lines from buffer and rewind it, writing remaining data
        that didn't formed a line back into buffer.
        """
        content = self._buffer.getvalue()[0:self._buffer.tell()]
        self._buffer.seek(0)
        lines = content.split(self.delimiter)
        self._buffer.write(lines.pop(-1))
        return lines

    def dataReceived(self, data):
        """
        Buffer incoming data until a line breaks comes in. We check only
        the incoming data for efficiency.
        """
        self._buffer.write(data)
        if '\n' not in data:
            return
        lines = self.consumeBufferLines()
        while lines:
            line, _ = utils.check_and_strip_comma(lines.pop(0))
            self.lineReceived(line)
            self._line += 1

    def lineReceived(self, line):
        """
        Protocol implementation.
        0:      [\r\n
        1:      {metadata},\r\n
        (even): {doc_info},\r\n
        (odd):  {data},\r\n
        (last): ]
        """
        if self._properly_finished:
            raise errors.BrokenSyncStream("Reading a finished stream")
        if ']' == line:
            self._properly_finished = True
        elif self._line == 0:
            if line is not '[':
                raise errors.BrokenSyncStream("Invalid start")
        elif self._line == 1:
            self.metadata = line
            if 'error' in self.metadata:
                raise errors.BrokenSyncStream("Error from server: %s" % line)
            self.total = json.loads(line).get('number_of_changes', -1)
        elif (self._line % 2) == 0:
            self.current_doc = json.loads(line)
            if 'error' in self.current_doc:
                raise errors.BrokenSyncStream("Error from server: %s" % line)
        else:
            self._doc_reader(
                self.current_doc, line.strip() or None, self.total)

    def finish(self):
        """
        Checks that ']' came and stream was properly closed.
        """
        if not self._properly_finished:
            raise errors.BrokenSyncStream()
        content = self._buffer.getvalue()[0:self._buffer.tell()]
        self._buffer.close()
        return content


def build_body_reader(doc_reader):
    """
    Get the documents from a sync stream and call doc_reader on each
    doc received.

    @param doc_reader: Function to be called for processing an incoming doc.
        Will be called with doc metadata (dict parsed from 1st line) and doc
        content (string)
    @type doc_reader: function

    @return: A function that can be called by the http Agent to create and
    configure the proper protocol.
    """
    protocolClass = partial(DocStreamReceiver, doc_reader=doc_reader)
    return partial(readBody, protocolClass=protocolClass)