~bzr-pqm/bzr/bzr.dev

2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
17
from bzrlib.lazy_import import lazy_import
18
19
lazy_import(globals(), """
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
20
import errno
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
21
import itertools
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
22
import os
0.9.25 by Aaron Bentley
More messy hacking
23
from StringIO import StringIO
0.9.19 by Aaron Bentley
More tweakage
24
0.9.25 by Aaron Bentley
More messy hacking
25
from bzrlib import (
26
    patiencediff,
27
    trace,
28
    ui,
29
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
30
from bzrlib.util import bencode
31
""")
0.9.25 by Aaron Bentley
More messy hacking
32
from bzrlib.tuned_gzip import GzipFile
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
33
0.9.33 by Aaron Bentley
Enable caching commandline param
34
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
35
def topo_iter(vf, versions=None):
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
36
    seen = set()
37
    descendants = {}
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
38
    if versions is None:
39
        versions = vf.versions()
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
40
    def pending_parents(version):
41
        return [v for v in vf.get_parents(version) if v in versions and
42
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
43
    for version_id in versions:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
44
        for parent_id in vf.get_parents(version_id):
45
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
46
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
47
    while len(cur) > 0:
48
        next = []
49
        for version_id in cur:
50
            if version_id in seen:
51
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
52
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
53
                continue
54
            next.extend(descendants.get(version_id, []))
55
            yield version_id
56
            seen.add(version_id)
57
        cur = next
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
58
    assert len(seen) == len(versions)
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
59
60
0.9.1 by Aaron Bentley
Get trivial case passing
61
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
62
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
63
0.9.2 by Aaron Bentley
Get single-parent comparison working
64
    def __init__(self, hunks=None):
65
        if hunks is not None:
66
            self.hunks = hunks
67
        else:
68
            self.hunks = []
69
70
    def __repr__(self):
71
        return "MultiParent(%r)" % self.hunks
72
73
    def __eq__(self, other):
74
        if self.__class__ is not other.__class__:
75
            return False
76
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
77
78
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
79
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
80
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
81
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
82
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
83
                                                           text)
84
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
85
        if len(parents) > 0:
86
            if left_blocks is None:
87
                left_blocks = compare(parents[0])
88
            parent_comparisons = [left_blocks] + [compare(p) for p in
89
                                                  parents[1:]]
90
        else:
91
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
92
        cur_line = 0
93
        new_text = NewText([])
94
        parent_text = []
95
        block_iter = [iter(i) for i in parent_comparisons]
96
        diff = MultiParent([])
97
        def next_block(p):
98
            try:
99
                return block_iter[p].next()
100
            except StopIteration:
101
                return None
102
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
103
        while cur_line < len(text):
104
            best_match = None
105
            for p, block in enumerate(cur_block):
106
                if block is None:
107
                    continue
108
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
109
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
110
                    block = cur_block[p] = next_block(p)
111
                    if block is None:
112
                        break
113
                    i, j, n = block
114
                if block is None:
115
                    continue
116
                if j > cur_line:
117
                    continue
118
                offset = cur_line - j
119
                i += offset
120
                j = cur_line
121
                n -= offset
122
                if n == 0:
123
                    continue
124
                if best_match is None or n > best_match.num_lines:
125
                    best_match = ParentText(p, i, j, n)
126
            if best_match is None:
127
                new_text.lines.append(text[cur_line])
128
                cur_line += 1
129
            else:
130
                if len(new_text.lines) > 0:
131
                    diff.hunks.append(new_text)
132
                    new_text = NewText([])
133
                diff.hunks.append(best_match)
134
                cur_line += best_match.num_lines
135
        if len(new_text.lines) > 0:
136
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
137
        return diff
138
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
139
    def get_matching_blocks(self, parent, parent_len):
140
        for hunk in self.hunks:
141
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
142
                continue
143
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
144
        yield parent_len, self.num_lines(), 0
145
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
146
    def to_lines(self, parents=()):
147
        """Contruct a fulltext from this diff and its parents"""
148
        mpvf = MultiMemoryVersionedFile()
149
        for num, parent in enumerate(parents):
150
            mpvf.add_version(StringIO(parent).readlines(), num, [])
151
        mpvf.add_diff(self, 'a', range(len(parents)))
152
        return mpvf.get_line_list(['a'])[0]
153
0.9.1 by Aaron Bentley
Get trivial case passing
154
    @classmethod
155
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
156
        """Produce a MultiParent from a text and list of parent text"""
2520.4.92 by Aaron Bentley
fix line-splitting in MultiParent.from_texts
157
        return cls.from_lines(StringIO(text).readlines(),
158
                              [StringIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
159
0.9.4 by Aaron Bentley
Start supporting serialization
160
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
161
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
162
        for hunk in self.hunks:
163
            for line in hunk.to_patch():
164
                yield line
165
0.9.25 by Aaron Bentley
More messy hacking
166
    def patch_len(self):
167
        return len(''.join(self.to_patch()))
168
169
    def zipped_patch_len(self):
170
        return len(gzip_string(self.to_patch()))
171
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
172
    @classmethod
173
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
174
        """Create a MultiParent from its string form"""
2520.4.31 by Aaron Bentley
Using StringIO for splitting patch lines
175
        return cls._from_patch(StringIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
176
0.9.18 by Aaron Bentley
Implement from_patch
177
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
178
    def _from_patch(lines):
179
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
180
        line_iter = iter(lines)
181
        hunks = []
182
        cur_line = None
183
        while(True):
184
            try:
185
                cur_line = line_iter.next()
186
            except StopIteration:
187
                break
188
            if cur_line[0] == 'i':
189
                num_lines = int(cur_line.split(' ')[1])
190
                hunk_lines = [line_iter.next() for x in xrange(num_lines)]
191
                hunk_lines[-1] = hunk_lines[-1][:-1]
192
                hunks.append(NewText(hunk_lines))
193
            elif cur_line[0] == '\n':
194
                hunks[-1].lines[-1] += '\n'
195
            else:
196
                assert cur_line[0] == 'c', cur_line[0]
197
                parent, parent_pos, child_pos, num_lines =\
198
                    [int(v) for v in cur_line.split(' ')[1:]]
199
                hunks.append(ParentText(parent, parent_pos, child_pos,
200
                                        num_lines))
201
        return MultiParent(hunks)
202
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
203
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
204
        """Iterate through the hunks, with range indicated
205
206
        kind is "new" or "parent".
207
        for "new", data is a list of lines.
208
        for "parent", data is (parent, parent_start, parent_end)
209
        :return: a generator of (start, end, kind, data)
210
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
211
        start = 0
212
        for hunk in self.hunks:
213
            if isinstance(hunk, NewText):
214
                kind = 'new'
215
                end = start + len(hunk.lines)
216
                data = hunk.lines
217
            else:
218
                kind = 'parent'
219
                start = hunk.child_pos
220
                end = start + hunk.num_lines
221
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
222
                        hunk.num_lines)
223
            yield start, end, kind, data
224
            start = end
225
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
226
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
227
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
228
        extra_n = 0
229
        for hunk in reversed(self.hunks):
230
            if isinstance(hunk, ParentText):
231
               return hunk.child_pos + hunk.num_lines + extra_n
232
            extra_n += len(hunk.lines)
233
        return extra_n
234
0.9.25 by Aaron Bentley
More messy hacking
235
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
236
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
237
        if len(self.hunks) != 1:
238
            return False
239
        return (isinstance(self.hunks[0], NewText))
240
0.9.1 by Aaron Bentley
Get trivial case passing
241
242
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
243
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
244
245
    def __init__(self, lines):
246
        self.lines = lines
247
248
    def __eq__(self, other):
249
        if self.__class__ is not other.__class__:
250
            return False
251
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
252
253
    def __repr__(self):
254
        return 'NewText(%r)' % self.lines
255
0.9.4 by Aaron Bentley
Start supporting serialization
256
    def to_patch(self):
257
        yield 'i %d\n' % len(self.lines)
258
        for line in self.lines:
259
            yield line
260
        yield '\n'
261
0.9.2 by Aaron Bentley
Get single-parent comparison working
262
263
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
264
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
265
266
    def __init__(self, parent, parent_pos, child_pos, num_lines):
267
        self.parent = parent
268
        self.parent_pos = parent_pos
269
        self.child_pos = child_pos
270
        self.num_lines = num_lines
271
272
    def __repr__(self):
273
        return 'ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'\
274
            ' %(num_lines)r)' % self.__dict__
275
276
    def __eq__(self, other):
277
        if self.__class__ != other.__class__:
278
            return False
279
        return (self.__dict__ == other.__dict__)
0.9.4 by Aaron Bentley
Start supporting serialization
280
281
    def to_patch(self):
282
        yield 'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'\
283
            % self.__dict__
0.9.8 by Aaron Bentley
get add_version working
284
285
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
286
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
287
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
288
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
289
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
290
        self._lines = {}
291
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
292
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
293
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
294
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
295
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
296
    def versions(self):
297
        return iter(self._parents)
298
2520.4.61 by Aaron Bentley
Do bulk insertion of records
299
    def has_version(self, version):
300
        return version in self._parents
301
0.9.12 by Aaron Bentley
Make benchmarks for mp
302
    def do_snapshot(self, version_id, parent_ids):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
303
        """Determine whether to perform a a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
304
        if self.snapshot_interval is None:
305
            return False
306
        if self.max_snapshots is not None and\
307
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
308
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
309
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
310
            return True
311
        for ignored in xrange(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
312
            if len(parent_ids) == 0:
313
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
314
            version_ids = parent_ids
315
            parent_ids = []
316
            for version_id in version_ids:
317
                if version_id not in self._snapshots:
318
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
319
        else:
320
            return True
0.9.8 by Aaron Bentley
get add_version working
321
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
322
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
323
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
324
        """Add a version to the versionedfile
325
326
        :param lines: The list of lines to add.  Must be split on '\n'.
327
        :param version_id: The version_id of the version to add
328
        :param force_snapshot: If true, force this version to be added as a
329
            snapshot version.  If false, force this version to be added as a
330
            diff.  If none, determine this automatically.
331
        :param single_parent: If true, use a single parent, rather than
332
            multiple parents.
333
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
334
        if force_snapshot is None:
335
            do_snapshot = self.do_snapshot(version_id, parent_ids)
336
        else:
337
            do_snapshot = force_snapshot
338
        if do_snapshot:
339
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
340
            diff = MultiParent([NewText(lines)])
341
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
342
            if single_parent:
343
                parent_lines = self.get_line_list(parent_ids[:1])
344
            else:
345
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
346
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
347
            if diff.is_snapshot():
348
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
349
        self.add_diff(diff, version_id, parent_ids)
350
        self._lines[version_id] = lines
351
0.9.35 by Aaron Bentley
Add build ranking
352
    def get_parents(self, version_id):
353
        return self._parents[version_id]
354
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
355
    def make_snapshot(self, version_id):
356
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
357
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
358
        self._snapshots.add(version_id)
359
0.9.20 by Aaron Bentley
Convert to a plugin
360
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
361
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
362
        """Import all revisions of a versionedfile
363
364
        :param vf: The versionedfile to import
365
        :param snapshots: If provided, the revisions to make snapshots of.
366
            Otherwise, this will be auto-determined
367
        :param no_cache: If true, clear the cache after every add.
368
        :param single_parent: If true, omit all but one parent text, (but
369
            retain parent metadata).
370
        """
0.9.22 by Aaron Bentley
Fix restoration bug
371
        assert no_cache or not verify
0.9.19 by Aaron Bentley
More tweakage
372
        revisions = set(vf.versions())
373
        total = len(revisions)
0.9.20 by Aaron Bentley
Convert to a plugin
374
        pb = ui.ui_factory.nested_progress_bar()
375
        try:
376
            while len(revisions) > 0:
377
                added = set()
378
                for revision in revisions:
379
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
380
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
381
                        continue
382
                    lines = [a + ' ' + l for a, l in
383
                             vf.annotate_iter(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
384
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
385
                        force_snapshot = None
386
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
387
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
388
                    self.add_version(lines, revision, parents, force_snapshot,
389
                                     single_parent)
390
                    added.add(revision)
391
                    if no_cache:
392
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
393
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
394
                        if verify:
395
                            assert lines == self.get_line_list([revision])[0]
396
                            self.clear_cache()
0.9.20 by Aaron Bentley
Convert to a plugin
397
                    pb.update('Importing revisions',
398
                              (total - len(revisions)) + len(added), total)
399
                revisions = [r for r in revisions if r not in added]
400
        finally:
401
            pb.finished()
0.9.19 by Aaron Bentley
More tweakage
402
0.9.23 by Aaron Bentley
handle snapshots all at once
403
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
404
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
405
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
406
        descendants = {}
407
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
408
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
409
            potential_build_ancestors = set(vf.get_parents(version_id))
410
            parents = vf.get_parents(version_id)
411
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
412
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
413
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
414
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
415
                for parent in vf.get_parents(version_id):
416
                    potential_build_ancestors.update(build_ancestors[parent])
417
                if len(potential_build_ancestors) > self.snapshot_interval:
418
                    snapshots.add(version_id)
419
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
420
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
421
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
422
        return snapshots
423
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
424
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
425
        """Select snapshots for minimum output size"""
426
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
427
        new_snapshots = self.get_size_ranking()[-num:]
428
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
429
430
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
431
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
432
        versions = []
433
        new_snapshots = set()
434
        for version_id in self.versions():
435
            if version_id in self._snapshots:
436
                continue
437
            diff_len = self.get_diff(version_id).patch_len()
438
            snapshot_len = MultiParent([NewText(
439
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
440
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
441
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
442
        return versions
443
444
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
445
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
446
        for version_id in vf.versions():
447
            self.add_diff(vf.get_diff(version_id), version_id,
448
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
449
0.9.35 by Aaron Bentley
Add build ranking
450
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
451
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
452
        could_avoid = {}
453
        referenced_by = {}
454
        for version_id in topo_iter(self):
455
            could_avoid[version_id] = set()
456
            if version_id not in self._snapshots:
457
                for parent_id in self._parents[version_id]:
458
                    could_avoid[version_id].update(could_avoid[parent_id])
459
                could_avoid[version_id].update(self._parents)
460
                could_avoid[version_id].discard(version_id)
461
            for avoid_id in could_avoid[version_id]:
462
                referenced_by.setdefault(avoid_id, set()).add(version_id)
463
        available_versions = list(self.versions())
464
        ranking = []
465
        while len(available_versions) > 0:
466
            available_versions.sort(key=lambda x:
467
                len(could_avoid[x]) *
468
                len(referenced_by.get(x, [])))
469
            selected = available_versions.pop()
470
            ranking.append(selected)
471
            for version_id in referenced_by[selected]:
472
                could_avoid[version_id].difference_update(
473
                    could_avoid[selected])
474
            for version_id in could_avoid[selected]:
475
                referenced_by[version_id].difference_update(
476
                    referenced_by[selected]
477
                )
478
        return ranking
479
0.9.8 by Aaron Bentley
get add_version working
480
    def clear_cache(self):
481
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
482
483
    def get_line_list(self, version_ids):
484
        return [self.cache_version(v) for v in version_ids]
485
486
    def cache_version(self, version_id):
487
        try:
488
            return self._lines[version_id]
489
        except KeyError:
490
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
491
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
492
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
493
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
494
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
495
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
496
        return lines
497
0.9.33 by Aaron Bentley
Enable caching commandline param
498
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
499
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
500
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
501
502
    def __init__(self, snapshot_interval=25, max_snapshots=None):
503
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
504
        self._diffs = {}
505
506
    def add_diff(self, diff, version_id, parent_ids):
507
        self._diffs[version_id] = diff
508
        self._parents[version_id] = parent_ids
509
510
    def get_diff(self, version_id):
511
        return self._diffs[version_id]
512
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
513
    def destroy(self):
514
        self._diffs = {}
515
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
516
517
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
518
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
519
520
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
521
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
522
        self._filename = filename
523
        self._diff_offset = {}
524
525
    def get_diff(self, version_id):
526
        start, count = self._diff_offset[version_id]
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
527
        infile = open(self._filename + '.mpknit', 'rb')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
528
        try:
529
            infile.seek(start)
530
            sio = StringIO(infile.read(count))
531
        finally:
532
            infile.close()
533
        zip_file = GzipFile(None, mode='rb', fileobj=sio)
534
        try:
535
            file_version_id = zip_file.readline()
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
536
            return MultiParent.from_patch(zip_file.read())
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
537
        finally:
538
            zip_file.close()
539
540
    def add_diff(self, diff, version_id, parent_ids):
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
541
        outfile = open(self._filename + '.mpknit', 'ab')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
542
        try:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
543
            outfile.seek(0, 2)      # workaround for windows bug:
544
                                    # .tell() for files opened in 'ab' mode
545
                                    # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
546
            start = outfile.tell()
547
            try:
548
                zipfile = GzipFile(None, mode='ab', fileobj=outfile)
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
549
                zipfile.writelines(itertools.chain(
550
                    ['version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
551
            finally:
552
                zipfile.close()
553
            end = outfile.tell()
554
        finally:
555
            outfile.close()
556
        self._diff_offset[version_id] = (start, end-start)
557
        self._parents[version_id] = parent_ids
558
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
559
    def destroy(self):
560
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
561
            os.unlink(self._filename + '.mpknit')
562
        except OSError, e:
563
            if e.errno != errno.ENOENT:
564
                raise
565
        try:
566
            os.unlink(self._filename + '.mpidx')
567
        except OSError, e:
568
            if e.errno != errno.ENOENT:
569
                raise
570
571
    def save(self):
572
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
573
            (self._parents, list(self._snapshots), self._diff_offset)))
574
575
    def load(self):
576
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
577
            open(self._filename + '.mpidx', 'rb').read())
578
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
579
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
580
581
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
582
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
583
584
    def __init__(self, diffs, lines, parents):
585
        self.diffs = diffs
586
        self.lines = lines
587
        self.parents = parents
588
        self.cursor = {}
589
590
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
591
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
592
        parent_id = self.parents[version_id][parent_text.parent]
593
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
594
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
595
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
596
597
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
598
        """Append lines for the requested version_id range"""
599
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
600
        if req_start == req_end:
601
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
602
        pending_reqs = [(req_version_id, req_start, req_end)]
603
        while len(pending_reqs) > 0:
604
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
605
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
606
            if req_version_id in self.lines:
607
                lines.extend(self.lines[req_version_id][req_start:req_end])
608
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
609
            try:
610
                start, end, kind, data, iterator = self.cursor[req_version_id]
611
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
612
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
613
                start, end, kind, data = iterator.next()
0.9.22 by Aaron Bentley
Fix restoration bug
614
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
615
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.22 by Aaron Bentley
Fix restoration bug
616
                start, end, kind, data = iterator.next()
617
0.9.10 by Aaron Bentley
Text reconstruction seems to work
618
            # find the first hunk relevant to the request
619
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
620
                start, end, kind, data = iterator.next()
621
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
622
            # if the hunk can't satisfy the whole request, split it in two,
623
            # and leave the second half for later.
624
            if req_end > end:
625
                pending_reqs.append((req_version_id, end, req_end))
626
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
627
            if kind == 'new':
628
                lines.extend(data[req_start - start: (req_end - start)])
629
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
630
                # If the hunk is a ParentText, rewrite it as a range request
631
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
632
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
633
                new_version_id = self.parents[req_version_id][parent]
634
                new_start = parent_start + req_start - start
635
                new_end = parent_end + req_end - end
636
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
637
638
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
639
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
640
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
641
2520.4.6 by Aaron Bentley
Get installation started
642
0.9.25 by Aaron Bentley
More messy hacking
643
def gzip_string(lines):
644
    sio = StringIO()
645
    data_file = GzipFile(None, mode='wb', fileobj=sio)
646
    data_file.writelines(lines)
647
    data_file.close()
648
    return sio.getvalue()