~bzr-pqm/bzr/bzr.dev

2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
17
from bzrlib.lazy_import import lazy_import
18
19
lazy_import(globals(), """
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
20
import errno
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
21
import itertools
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
22
import os
0.9.25 by Aaron Bentley
More messy hacking
23
from StringIO import StringIO
0.9.19 by Aaron Bentley
More tweakage
24
0.9.25 by Aaron Bentley
More messy hacking
25
from bzrlib import (
26
    patiencediff,
27
    trace,
28
    ui,
29
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
30
from bzrlib.util import bencode
31
""")
0.9.25 by Aaron Bentley
More messy hacking
32
from bzrlib.tuned_gzip import GzipFile
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
33
0.9.33 by Aaron Bentley
Enable caching commandline param
34
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
35
def topo_iter(vf, versions=None):
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
36
    seen = set()
37
    descendants = {}
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
38
    if versions is None:
39
        versions = vf.versions()
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
40
    def pending_parents(version):
41
        return [v for v in vf.get_parents(version) if v in versions and
42
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
43
    for version_id in versions:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
44
        for parent_id in vf.get_parents(version_id):
45
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
46
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
47
    while len(cur) > 0:
48
        next = []
49
        for version_id in cur:
50
            if version_id in seen:
51
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
52
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
53
                continue
54
            next.extend(descendants.get(version_id, []))
55
            yield version_id
56
            seen.add(version_id)
57
        cur = next
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
58
    assert len(seen) == len(versions)
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
59
60
0.9.1 by Aaron Bentley
Get trivial case passing
61
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
62
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
63
0.9.2 by Aaron Bentley
Get single-parent comparison working
64
    def __init__(self, hunks=None):
65
        if hunks is not None:
66
            self.hunks = hunks
67
        else:
68
            self.hunks = []
69
70
    def __repr__(self):
71
        return "MultiParent(%r)" % self.hunks
72
73
    def __eq__(self, other):
74
        if self.__class__ is not other.__class__:
75
            return False
76
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
77
78
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
79
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
80
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
81
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
82
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
83
                                                           text)
84
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
85
        if len(parents) > 0:
86
            if left_blocks is None:
87
                left_blocks = compare(parents[0])
88
            parent_comparisons = [left_blocks] + [compare(p) for p in
89
                                                  parents[1:]]
90
        else:
91
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
92
        cur_line = 0
93
        new_text = NewText([])
94
        parent_text = []
95
        block_iter = [iter(i) for i in parent_comparisons]
96
        diff = MultiParent([])
97
        def next_block(p):
98
            try:
99
                return block_iter[p].next()
100
            except StopIteration:
101
                return None
102
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
103
        while cur_line < len(text):
104
            best_match = None
105
            for p, block in enumerate(cur_block):
106
                if block is None:
107
                    continue
108
                i, j, n = block
109
                while j + n < cur_line:
110
                    block = cur_block[p] = next_block(p)
111
                    if block is None:
112
                        break
113
                    i, j, n = block
114
                if block is None:
115
                    continue
116
                if j > cur_line:
117
                    continue
118
                offset = cur_line - j
119
                i += offset
120
                j = cur_line
121
                n -= offset
122
                if n == 0:
123
                    continue
124
                if best_match is None or n > best_match.num_lines:
125
                    best_match = ParentText(p, i, j, n)
126
            if best_match is None:
127
                new_text.lines.append(text[cur_line])
128
                cur_line += 1
129
            else:
130
                if len(new_text.lines) > 0:
131
                    diff.hunks.append(new_text)
132
                    new_text = NewText([])
133
                diff.hunks.append(best_match)
134
                cur_line += best_match.num_lines
135
        if len(new_text.lines) > 0:
136
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
137
        return diff
138
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
139
    def to_lines(self, parents=()):
140
        """Contruct a fulltext from this diff and its parents"""
141
        mpvf = MultiMemoryVersionedFile()
142
        for num, parent in enumerate(parents):
143
            mpvf.add_version(StringIO(parent).readlines(), num, [])
144
        mpvf.add_diff(self, 'a', range(len(parents)))
145
        return mpvf.get_line_list(['a'])[0]
146
0.9.1 by Aaron Bentley
Get trivial case passing
147
    @classmethod
148
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
149
        """Produce a MultiParent from a text and list of parent text"""
2520.4.92 by Aaron Bentley
fix line-splitting in MultiParent.from_texts
150
        return cls.from_lines(StringIO(text).readlines(),
151
                              [StringIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
152
0.9.4 by Aaron Bentley
Start supporting serialization
153
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
154
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
155
        for hunk in self.hunks:
156
            for line in hunk.to_patch():
157
                yield line
158
0.9.25 by Aaron Bentley
More messy hacking
159
    def patch_len(self):
160
        return len(''.join(self.to_patch()))
161
162
    def zipped_patch_len(self):
163
        return len(gzip_string(self.to_patch()))
164
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
165
    @classmethod
166
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
167
        """Create a MultiParent from its string form"""
2520.4.31 by Aaron Bentley
Using StringIO for splitting patch lines
168
        return cls._from_patch(StringIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
169
0.9.18 by Aaron Bentley
Implement from_patch
170
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
171
    def _from_patch(lines):
172
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
173
        line_iter = iter(lines)
174
        hunks = []
175
        cur_line = None
176
        while(True):
177
            try:
178
                cur_line = line_iter.next()
179
            except StopIteration:
180
                break
181
            if cur_line[0] == 'i':
182
                num_lines = int(cur_line.split(' ')[1])
183
                hunk_lines = [line_iter.next() for x in xrange(num_lines)]
184
                hunk_lines[-1] = hunk_lines[-1][:-1]
185
                hunks.append(NewText(hunk_lines))
186
            elif cur_line[0] == '\n':
187
                hunks[-1].lines[-1] += '\n'
188
            else:
189
                assert cur_line[0] == 'c', cur_line[0]
190
                parent, parent_pos, child_pos, num_lines =\
191
                    [int(v) for v in cur_line.split(' ')[1:]]
192
                hunks.append(ParentText(parent, parent_pos, child_pos,
193
                                        num_lines))
194
        return MultiParent(hunks)
195
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
196
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
197
        """Iterate through the hunks, with range indicated
198
199
        kind is "new" or "parent".
200
        for "new", data is a list of lines.
201
        for "parent", data is (parent, parent_start, parent_end)
202
        :return: a generator of (start, end, kind, data)
203
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
204
        start = 0
205
        for hunk in self.hunks:
206
            if isinstance(hunk, NewText):
207
                kind = 'new'
208
                end = start + len(hunk.lines)
209
                data = hunk.lines
210
            else:
211
                kind = 'parent'
212
                start = hunk.child_pos
213
                end = start + hunk.num_lines
214
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
215
                        hunk.num_lines)
216
            yield start, end, kind, data
217
            start = end
218
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
219
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
220
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
221
        extra_n = 0
222
        for hunk in reversed(self.hunks):
223
            if isinstance(hunk, ParentText):
224
               return hunk.child_pos + hunk.num_lines + extra_n
225
            extra_n += len(hunk.lines)
226
        return extra_n
227
0.9.25 by Aaron Bentley
More messy hacking
228
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
229
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
230
        if len(self.hunks) != 1:
231
            return False
232
        return (isinstance(self.hunks[0], NewText))
233
0.9.1 by Aaron Bentley
Get trivial case passing
234
235
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
236
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
237
238
    def __init__(self, lines):
239
        self.lines = lines
240
241
    def __eq__(self, other):
242
        if self.__class__ is not other.__class__:
243
            return False
244
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
245
246
    def __repr__(self):
247
        return 'NewText(%r)' % self.lines
248
0.9.4 by Aaron Bentley
Start supporting serialization
249
    def to_patch(self):
250
        yield 'i %d\n' % len(self.lines)
251
        for line in self.lines:
252
            yield line
253
        yield '\n'
254
0.9.2 by Aaron Bentley
Get single-parent comparison working
255
256
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
257
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
258
259
    def __init__(self, parent, parent_pos, child_pos, num_lines):
260
        self.parent = parent
261
        self.parent_pos = parent_pos
262
        self.child_pos = child_pos
263
        self.num_lines = num_lines
264
265
    def __repr__(self):
266
        return 'ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'\
267
            ' %(num_lines)r)' % self.__dict__
268
269
    def __eq__(self, other):
270
        if self.__class__ != other.__class__:
271
            return False
272
        return (self.__dict__ == other.__dict__)
0.9.4 by Aaron Bentley
Start supporting serialization
273
274
    def to_patch(self):
275
        yield 'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'\
276
            % self.__dict__
0.9.8 by Aaron Bentley
get add_version working
277
278
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
279
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
280
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
281
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
282
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
283
        self._lines = {}
284
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
285
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
286
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
287
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
288
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
289
    def versions(self):
290
        return iter(self._parents)
291
2520.4.61 by Aaron Bentley
Do bulk insertion of records
292
    def has_version(self, version):
293
        return version in self._parents
294
0.9.12 by Aaron Bentley
Make benchmarks for mp
295
    def do_snapshot(self, version_id, parent_ids):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
296
        """Determine whether to perform a a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
297
        if self.snapshot_interval is None:
298
            return False
299
        if self.max_snapshots is not None and\
300
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
301
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
302
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
303
            return True
304
        for ignored in xrange(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
305
            if len(parent_ids) == 0:
306
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
307
            version_ids = parent_ids
308
            parent_ids = []
309
            for version_id in version_ids:
310
                if version_id not in self._snapshots:
311
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
312
        else:
313
            return True
0.9.8 by Aaron Bentley
get add_version working
314
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
315
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
316
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
317
        """Add a version to the versionedfile
318
319
        :param lines: The list of lines to add.  Must be split on '\n'.
320
        :param version_id: The version_id of the version to add
321
        :param force_snapshot: If true, force this version to be added as a
322
            snapshot version.  If false, force this version to be added as a
323
            diff.  If none, determine this automatically.
324
        :param single_parent: If true, use a single parent, rather than
325
            multiple parents.
326
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
327
        if force_snapshot is None:
328
            do_snapshot = self.do_snapshot(version_id, parent_ids)
329
        else:
330
            do_snapshot = force_snapshot
331
        if do_snapshot:
332
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
333
            diff = MultiParent([NewText(lines)])
334
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
335
            if single_parent:
336
                parent_lines = self.get_line_list(parent_ids[:1])
337
            else:
338
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
339
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
340
            if diff.is_snapshot():
341
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
342
        self.add_diff(diff, version_id, parent_ids)
343
        self._lines[version_id] = lines
344
0.9.35 by Aaron Bentley
Add build ranking
345
    def get_parents(self, version_id):
346
        return self._parents[version_id]
347
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
348
    def make_snapshot(self, version_id):
349
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
350
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
351
        self._snapshots.add(version_id)
352
0.9.20 by Aaron Bentley
Convert to a plugin
353
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
354
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
355
        """Import all revisions of a versionedfile
356
357
        :param vf: The versionedfile to import
358
        :param snapshots: If provided, the revisions to make snapshots of.
359
            Otherwise, this will be auto-determined
360
        :param no_cache: If true, clear the cache after every add.
361
        :param single_parent: If true, omit all but one parent text, (but
362
            retain parent metadata).
363
        """
0.9.22 by Aaron Bentley
Fix restoration bug
364
        assert no_cache or not verify
0.9.19 by Aaron Bentley
More tweakage
365
        revisions = set(vf.versions())
366
        total = len(revisions)
0.9.20 by Aaron Bentley
Convert to a plugin
367
        pb = ui.ui_factory.nested_progress_bar()
368
        try:
369
            while len(revisions) > 0:
370
                added = set()
371
                for revision in revisions:
372
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
373
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
374
                        continue
375
                    lines = [a + ' ' + l for a, l in
376
                             vf.annotate_iter(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
377
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
378
                        force_snapshot = None
379
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
380
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
381
                    self.add_version(lines, revision, parents, force_snapshot,
382
                                     single_parent)
383
                    added.add(revision)
384
                    if no_cache:
385
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
386
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
387
                        if verify:
388
                            assert lines == self.get_line_list([revision])[0]
389
                            self.clear_cache()
0.9.20 by Aaron Bentley
Convert to a plugin
390
                    pb.update('Importing revisions',
391
                              (total - len(revisions)) + len(added), total)
392
                revisions = [r for r in revisions if r not in added]
393
        finally:
394
            pb.finished()
0.9.19 by Aaron Bentley
More tweakage
395
0.9.23 by Aaron Bentley
handle snapshots all at once
396
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
397
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
398
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
399
        descendants = {}
400
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
401
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
402
            potential_build_ancestors = set(vf.get_parents(version_id))
403
            parents = vf.get_parents(version_id)
404
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
405
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
406
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
407
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
408
                for parent in vf.get_parents(version_id):
409
                    potential_build_ancestors.update(build_ancestors[parent])
410
                if len(potential_build_ancestors) > self.snapshot_interval:
411
                    snapshots.add(version_id)
412
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
413
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
414
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
415
        return snapshots
416
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
417
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
418
        """Select snapshots for minimum output size"""
419
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
420
        new_snapshots = self.get_size_ranking()[-num:]
421
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
422
423
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
424
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
425
        versions = []
426
        new_snapshots = set()
427
        for version_id in self.versions():
428
            if version_id in self._snapshots:
429
                continue
430
            diff_len = self.get_diff(version_id).patch_len()
431
            snapshot_len = MultiParent([NewText(
432
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
433
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
434
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
435
        return versions
436
437
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
438
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
439
        for version_id in vf.versions():
440
            self.add_diff(vf.get_diff(version_id), version_id,
441
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
442
0.9.35 by Aaron Bentley
Add build ranking
443
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
444
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
445
        could_avoid = {}
446
        referenced_by = {}
447
        for version_id in topo_iter(self):
448
            could_avoid[version_id] = set()
449
            if version_id not in self._snapshots:
450
                for parent_id in self._parents[version_id]:
451
                    could_avoid[version_id].update(could_avoid[parent_id])
452
                could_avoid[version_id].update(self._parents)
453
                could_avoid[version_id].discard(version_id)
454
            for avoid_id in could_avoid[version_id]:
455
                referenced_by.setdefault(avoid_id, set()).add(version_id)
456
        available_versions = list(self.versions())
457
        ranking = []
458
        while len(available_versions) > 0:
459
            available_versions.sort(key=lambda x:
460
                len(could_avoid[x]) *
461
                len(referenced_by.get(x, [])))
462
            selected = available_versions.pop()
463
            ranking.append(selected)
464
            for version_id in referenced_by[selected]:
465
                could_avoid[version_id].difference_update(
466
                    could_avoid[selected])
467
            for version_id in could_avoid[selected]:
468
                referenced_by[version_id].difference_update(
469
                    referenced_by[selected]
470
                )
471
        return ranking
472
0.9.8 by Aaron Bentley
get add_version working
473
    def clear_cache(self):
474
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
475
476
    def get_line_list(self, version_ids):
477
        return [self.cache_version(v) for v in version_ids]
478
479
    def cache_version(self, version_id):
480
        try:
481
            return self._lines[version_id]
482
        except KeyError:
483
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
484
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
485
        lines = []
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
486
        reconstructor = _Reconstructor(self, self._lines,
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
487
                                       self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
488
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
489
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
490
        return lines
491
0.9.33 by Aaron Bentley
Enable caching commandline param
492
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
493
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
494
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
495
496
    def __init__(self, snapshot_interval=25, max_snapshots=None):
497
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
498
        self._diffs = {}
499
500
    def add_diff(self, diff, version_id, parent_ids):
501
        self._diffs[version_id] = diff
502
        self._parents[version_id] = parent_ids
503
504
    def get_diff(self, version_id):
505
        return self._diffs[version_id]
506
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
507
    def destroy(self):
508
        self._diffs = {}
509
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
510
511
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
512
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
513
514
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
515
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
516
        self._filename = filename
517
        self._diff_offset = {}
518
519
    def get_diff(self, version_id):
520
        start, count = self._diff_offset[version_id]
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
521
        infile = open(self._filename + '.mpknit', 'rb')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
522
        try:
523
            infile.seek(start)
524
            sio = StringIO(infile.read(count))
525
        finally:
526
            infile.close()
527
        zip_file = GzipFile(None, mode='rb', fileobj=sio)
528
        try:
529
            file_version_id = zip_file.readline()
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
530
            return MultiParent.from_patch(zip_file.read())
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
531
        finally:
532
            zip_file.close()
533
534
    def add_diff(self, diff, version_id, parent_ids):
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
535
        outfile = open(self._filename + '.mpknit', 'ab')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
536
        try:
537
            start = outfile.tell()
538
            try:
539
                zipfile = GzipFile(None, mode='ab', fileobj=outfile)
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
540
                zipfile.writelines(itertools.chain(
541
                    ['version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
542
            finally:
543
                zipfile.close()
544
            end = outfile.tell()
545
        finally:
546
            outfile.close()
547
        self._diff_offset[version_id] = (start, end-start)
548
        self._parents[version_id] = parent_ids
549
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
550
    def destroy(self):
551
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
552
            os.unlink(self._filename + '.mpknit')
553
        except OSError, e:
554
            if e.errno != errno.ENOENT:
555
                raise
556
        try:
557
            os.unlink(self._filename + '.mpidx')
558
        except OSError, e:
559
            if e.errno != errno.ENOENT:
560
                raise
561
562
    def save(self):
563
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
564
            (self._parents, list(self._snapshots), self._diff_offset)))
565
566
    def load(self):
567
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
568
            open(self._filename + '.mpidx', 'rb').read())
569
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
570
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
571
572
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
573
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
574
575
    def __init__(self, diffs, lines, parents):
576
        self.diffs = diffs
577
        self.lines = lines
578
        self.parents = parents
579
        self.cursor = {}
580
581
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
582
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
583
        parent_id = self.parents[version_id][parent_text.parent]
584
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
585
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
586
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
587
588
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
589
        """Append lines for the requested version_id range"""
590
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
591
        if req_start == req_end:
592
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
593
        pending_reqs = [(req_version_id, req_start, req_end)]
594
        while len(pending_reqs) > 0:
595
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
596
            # lazily allocate cursors for versions
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
597
            try:
598
                start, end, kind, data, iterator = self.cursor[req_version_id]
599
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
600
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
601
                start, end, kind, data = iterator.next()
0.9.22 by Aaron Bentley
Fix restoration bug
602
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
603
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.22 by Aaron Bentley
Fix restoration bug
604
                start, end, kind, data = iterator.next()
605
0.9.10 by Aaron Bentley
Text reconstruction seems to work
606
            # find the first hunk relevant to the request
607
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
608
                start, end, kind, data = iterator.next()
609
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
610
            # if the hunk can't satisfy the whole request, split it in two,
611
            # and leave the second half for later.
612
            if req_end > end:
613
                pending_reqs.append((req_version_id, end, req_end))
614
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
615
            if kind == 'new':
616
                lines.extend(data[req_start - start: (req_end - start)])
617
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
618
                # If the hunk is a ParentText, rewrite it as a range request
619
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
620
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
621
                new_version_id = self.parents[req_version_id][parent]
622
                new_start = parent_start + req_start - start
623
                new_end = parent_end + req_end - end
624
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
625
626
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
627
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
628
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
629
2520.4.6 by Aaron Bentley
Get installation started
630
0.9.25 by Aaron Bentley
More messy hacking
631
def gzip_string(lines):
632
    sio = StringIO()
633
    data_file = GzipFile(None, mode='wb', fileobj=sio)
634
    data_file.writelines(lines)
635
    data_file.close()
636
    return sio.getvalue()