~bzr-pqm/bzr/bzr.dev

0.9.2 by Aaron Bentley
Get single-parent comparison working
1
from difflib import SequenceMatcher
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
2
from bzrlib import patiencediff
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
3
0.9.1 by Aaron Bentley
Get trivial case passing
4
class MultiParent(object):
5
0.9.2 by Aaron Bentley
Get single-parent comparison working
6
    def __init__(self, hunks=None):
7
        if hunks is not None:
8
            self.hunks = hunks
9
        else:
10
            self.hunks = []
11
12
    def __repr__(self):
13
        return "MultiParent(%r)" % self.hunks
14
15
    def __eq__(self, other):
16
        if self.__class__ is not other.__class__:
17
            return False
18
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
19
20
    @staticmethod
21
    def from_lines(text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
22
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
23
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
24
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
25
                                                           text)
26
            return matcher.get_matching_blocks()
0.9.2 by Aaron Bentley
Get single-parent comparison working
27
        parent_comparisons = [compare(p) for p in parents]
28
        cur_line = 0
29
        new_text = NewText([])
30
        parent_text = []
31
        block_iter = [iter(i) for i in parent_comparisons]
32
        diff = MultiParent([])
33
        def next_block(p):
34
            try:
35
                return block_iter[p].next()
36
            except StopIteration:
37
                return None
38
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
39
        while cur_line < len(text):
40
            best_match = None
41
            for p, block in enumerate(cur_block):
42
                if block is None:
43
                    continue
44
                i, j, n = block
45
                while j + n < cur_line:
46
                    block = cur_block[p] = next_block(p)
47
                    if block is None:
48
                        break
49
                    i, j, n = block
50
                if block is None:
51
                    continue
52
                if j > cur_line:
53
                    continue
54
                offset = cur_line - j
55
                i += offset
56
                j = cur_line
57
                n -= offset
58
                if n == 0:
59
                    continue
60
                if best_match is None or n > best_match.num_lines:
61
                    best_match = ParentText(p, i, j, n)
62
            if best_match is None:
63
                new_text.lines.append(text[cur_line])
64
                cur_line += 1
65
            else:
66
                if len(new_text.lines) > 0:
67
                    diff.hunks.append(new_text)
68
                    new_text = NewText([])
69
                diff.hunks.append(best_match)
70
                cur_line += best_match.num_lines
71
        if len(new_text.lines) > 0:
72
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
73
        return diff
74
75
    @classmethod
76
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
77
        """Produce a MultiParent from a text and list of parent text"""
0.9.1 by Aaron Bentley
Get trivial case passing
78
        return cls.from_lines(text.splitlines(True),
79
                              [p.splitlines(True) for p in parents])
80
0.9.4 by Aaron Bentley
Start supporting serialization
81
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
82
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
83
        for hunk in self.hunks:
84
            for line in hunk.to_patch():
85
                yield line
86
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
87
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
88
        """Iterate through the hunks, with range indicated
89
90
        kind is "new" or "parent".
91
        for "new", data is a list of lines.
92
        for "parent", data is (parent, parent_start, parent_end)
93
        :return: a generator of (start, end, kind, data)
94
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
95
        start = 0
96
        for hunk in self.hunks:
97
            if isinstance(hunk, NewText):
98
                kind = 'new'
99
                end = start + len(hunk.lines)
100
                data = hunk.lines
101
            else:
102
                kind = 'parent'
103
                start = hunk.child_pos
104
                end = start + hunk.num_lines
105
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
106
                        hunk.num_lines)
107
            yield start, end, kind, data
108
            start = end
109
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
110
    def num_lines(self):
111
        extra_n = 0
112
        for hunk in reversed(self.hunks):
113
            if isinstance(hunk, ParentText):
114
               return hunk.child_pos + hunk.num_lines + extra_n
115
            extra_n += len(hunk.lines)
116
        return extra_n
117
0.9.1 by Aaron Bentley
Get trivial case passing
118
119
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
120
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
121
122
    def __init__(self, lines):
123
        self.lines = lines
124
125
    def __eq__(self, other):
126
        if self.__class__ is not other.__class__:
127
            return False
128
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
129
130
    def __repr__(self):
131
        return 'NewText(%r)' % self.lines
132
0.9.4 by Aaron Bentley
Start supporting serialization
133
    def to_patch(self):
134
        yield 'i %d\n' % len(self.lines)
135
        for line in self.lines:
136
            yield line
137
        yield '\n'
138
0.9.2 by Aaron Bentley
Get single-parent comparison working
139
140
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
141
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
142
143
    def __init__(self, parent, parent_pos, child_pos, num_lines):
144
        self.parent = parent
145
        self.parent_pos = parent_pos
146
        self.child_pos = child_pos
147
        self.num_lines = num_lines
148
149
    def __repr__(self):
150
        return 'ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'\
151
            ' %(num_lines)r)' % self.__dict__
152
153
    def __eq__(self, other):
154
        if self.__class__ != other.__class__:
155
            return False
156
        return (self.__dict__ == other.__dict__)
0.9.4 by Aaron Bentley
Start supporting serialization
157
158
    def to_patch(self):
159
        yield 'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'\
160
            % self.__dict__
0.9.8 by Aaron Bentley
get add_version working
161
162
163
class MultiVersionedFile(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
164
    """VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
165
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
166
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
167
        self._diffs = {}
168
        self._lines = {}
169
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
170
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
171
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
172
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
173
174
    def do_snapshot(self, version_id, parent_ids):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
175
        if self.snapshot_interval is None:
176
            return False
177
        if self.max_snapshots is not None and\
178
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
179
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
180
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
181
            return True
182
        for ignored in xrange(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
183
            if len(parent_ids) == 0:
184
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
185
            version_ids = parent_ids
186
            parent_ids = []
187
            for version_id in version_ids:
188
                if version_id not in self._snapshots:
189
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
190
        else:
191
            return True
0.9.8 by Aaron Bentley
get add_version working
192
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
193
    def add_version(self, lines, version_id, parent_ids,
194
                    force_snapshot=None):
195
        if force_snapshot is None:
196
            do_snapshot = self.do_snapshot(version_id, parent_ids)
197
        else:
198
            do_snapshot = force_snapshot
199
        if do_snapshot:
200
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
201
            diff = MultiParent([NewText(lines)])
202
        else:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
203
            parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
204
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.8 by Aaron Bentley
get add_version working
205
        self.add_diff(diff, version_id, parent_ids)
206
        self._lines[version_id] = lines
207
208
    def add_diff(self, diff, version_id, parent_ids):
209
        self._diffs[version_id] = diff
210
        self._parents[version_id] = parent_ids
211
212
    def clear_cache(self):
213
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
214
215
    def get_line_list(self, version_ids):
216
        return [self.cache_version(v) for v in version_ids]
217
218
    def cache_version(self, version_id):
219
        try:
220
            return self._lines[version_id]
221
        except KeyError:
222
            pass
223
        diff = self._diffs[version_id]
224
        lines = []
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
225
        reconstructor = _Reconstructor(self._diffs, self._lines,
226
                                       self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
227
        reconstructor.reconstruct_version(lines, version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
228
        self._lines[version_id] = lines
229
        return lines
230
231
232
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
233
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
234
235
    def __init__(self, diffs, lines, parents):
236
        self.diffs = diffs
237
        self.lines = lines
238
        self.parents = parents
239
        self.cursor = {}
240
241
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
242
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
243
        parent_id = self.parents[version_id][parent_text.parent]
244
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
245
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
246
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
247
248
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
249
        """Append lines for the requested version_id range"""
250
        # stack of pending range requests
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
251
        pending_reqs = [(req_version_id, req_start, req_end)]
252
        while len(pending_reqs) > 0:
253
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
254
            # lazily allocate cursors for versions
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
255
            try:
256
                start, end, kind, data, iterator = self.cursor[req_version_id]
257
            except KeyError:
258
                iterator = self.diffs[req_version_id].range_iterator()
259
                start, end, kind, data = iterator.next()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
260
            # find the first hunk relevant to the request
261
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
262
                start, end, kind, data = iterator.next()
263
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
264
            # if the hunk can't satisfy the whole request, split it in two,
265
            # and leave the second half for later.
266
            if req_end > end:
267
                pending_reqs.append((req_version_id, end, req_end))
268
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
269
            if kind == 'new':
270
                lines.extend(data[req_start - start: (req_end - start)])
271
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
272
                # If the hunk is a ParentText, rewrite it as a range request
273
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
274
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
275
                new_version_id = self.parents[req_version_id][parent]
276
                new_start = parent_start + req_start - start
277
                new_end = parent_end + req_end - end
278
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
279
280
    def reconstruct_version(self, lines, version_id):
281
        length = self.diffs[version_id].num_lines()
282
        return self._reconstruct(lines, version_id, 0, length)