~bzr-pqm/bzr/bzr.dev

2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
16
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
17
from bzrlib.lazy_import import lazy_import
18
19
lazy_import(globals(), """
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
20
import errno
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
21
import itertools
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
22
import os
0.9.25 by Aaron Bentley
More messy hacking
23
from StringIO import StringIO
0.9.19 by Aaron Bentley
More tweakage
24
0.9.25 by Aaron Bentley
More messy hacking
25
from bzrlib import (
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
26
    errors,
0.9.25 by Aaron Bentley
More messy hacking
27
    patiencediff,
28
    trace,
29
    ui,
30
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
31
from bzrlib.util import bencode
32
""")
0.9.25 by Aaron Bentley
More messy hacking
33
from bzrlib.tuned_gzip import GzipFile
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
34
0.9.33 by Aaron Bentley
Enable caching commandline param
35
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
36
def topo_iter_keys(vf, keys=None):
37
    if keys is None:
38
        keys = vf.keys()
39
    parents = vf.get_parent_map(keys)
40
    return _topo_iter(parents, keys)
41
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
42
def topo_iter(vf, versions=None):
43
    if versions is None:
44
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
45
    parents = vf.get_parent_map(versions)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
46
    return _topo_iter(parents, versions)
47
48
def _topo_iter(parents, versions):
49
    seen = set()
50
    descendants = {}
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
51
    def pending_parents(version):
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
52
        if parents[version] is None:
53
            return []
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
54
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
55
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
56
    for version_id in versions:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
57
        if parents[version_id] is None:
58
            # parentless
59
            continue
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
60
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
61
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
62
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
63
    while len(cur) > 0:
64
        next = []
65
        for version_id in cur:
66
            if version_id in seen:
67
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
68
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
69
                continue
70
            next.extend(descendants.get(version_id, []))
71
            yield version_id
72
            seen.add(version_id)
73
        cur = next
74
75
0.9.1 by Aaron Bentley
Get trivial case passing
76
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
77
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
78
0.9.2 by Aaron Bentley
Get single-parent comparison working
79
    def __init__(self, hunks=None):
80
        if hunks is not None:
81
            self.hunks = hunks
82
        else:
83
            self.hunks = []
84
85
    def __repr__(self):
86
        return "MultiParent(%r)" % self.hunks
87
88
    def __eq__(self, other):
89
        if self.__class__ is not other.__class__:
90
            return False
91
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
92
93
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
94
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
95
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
96
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
97
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
98
                                                           text)
99
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
100
        if len(parents) > 0:
101
            if left_blocks is None:
102
                left_blocks = compare(parents[0])
103
            parent_comparisons = [left_blocks] + [compare(p) for p in
104
                                                  parents[1:]]
105
        else:
106
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
107
        cur_line = 0
108
        new_text = NewText([])
109
        parent_text = []
110
        block_iter = [iter(i) for i in parent_comparisons]
111
        diff = MultiParent([])
112
        def next_block(p):
113
            try:
114
                return block_iter[p].next()
115
            except StopIteration:
116
                return None
117
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
118
        while cur_line < len(text):
119
            best_match = None
120
            for p, block in enumerate(cur_block):
121
                if block is None:
122
                    continue
123
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
124
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
125
                    block = cur_block[p] = next_block(p)
126
                    if block is None:
127
                        break
128
                    i, j, n = block
129
                if block is None:
130
                    continue
131
                if j > cur_line:
132
                    continue
133
                offset = cur_line - j
134
                i += offset
135
                j = cur_line
136
                n -= offset
137
                if n == 0:
138
                    continue
139
                if best_match is None or n > best_match.num_lines:
140
                    best_match = ParentText(p, i, j, n)
141
            if best_match is None:
142
                new_text.lines.append(text[cur_line])
143
                cur_line += 1
144
            else:
145
                if len(new_text.lines) > 0:
146
                    diff.hunks.append(new_text)
147
                    new_text = NewText([])
148
                diff.hunks.append(best_match)
149
                cur_line += best_match.num_lines
150
        if len(new_text.lines) > 0:
151
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
152
        return diff
153
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
154
    def get_matching_blocks(self, parent, parent_len):
155
        for hunk in self.hunks:
156
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
157
                continue
158
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
159
        yield parent_len, self.num_lines(), 0
160
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
161
    def to_lines(self, parents=()):
162
        """Contruct a fulltext from this diff and its parents"""
163
        mpvf = MultiMemoryVersionedFile()
164
        for num, parent in enumerate(parents):
165
            mpvf.add_version(StringIO(parent).readlines(), num, [])
166
        mpvf.add_diff(self, 'a', range(len(parents)))
167
        return mpvf.get_line_list(['a'])[0]
168
0.9.1 by Aaron Bentley
Get trivial case passing
169
    @classmethod
170
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
171
        """Produce a MultiParent from a text and list of parent text"""
2520.4.92 by Aaron Bentley
fix line-splitting in MultiParent.from_texts
172
        return cls.from_lines(StringIO(text).readlines(),
173
                              [StringIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
174
0.9.4 by Aaron Bentley
Start supporting serialization
175
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
176
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
177
        for hunk in self.hunks:
178
            for line in hunk.to_patch():
179
                yield line
180
0.9.25 by Aaron Bentley
More messy hacking
181
    def patch_len(self):
182
        return len(''.join(self.to_patch()))
183
184
    def zipped_patch_len(self):
185
        return len(gzip_string(self.to_patch()))
186
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
187
    @classmethod
188
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
189
        """Create a MultiParent from its string form"""
2520.4.31 by Aaron Bentley
Using StringIO for splitting patch lines
190
        return cls._from_patch(StringIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
191
0.9.18 by Aaron Bentley
Implement from_patch
192
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
193
    def _from_patch(lines):
194
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
195
        line_iter = iter(lines)
196
        hunks = []
197
        cur_line = None
198
        while(True):
199
            try:
200
                cur_line = line_iter.next()
201
            except StopIteration:
202
                break
203
            if cur_line[0] == 'i':
204
                num_lines = int(cur_line.split(' ')[1])
205
                hunk_lines = [line_iter.next() for x in xrange(num_lines)]
206
                hunk_lines[-1] = hunk_lines[-1][:-1]
207
                hunks.append(NewText(hunk_lines))
208
            elif cur_line[0] == '\n':
209
                hunks[-1].lines[-1] += '\n'
210
            else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
211
                if not (cur_line[0] == 'c'):
212
                    raise AssertionError(cur_line[0])
0.9.18 by Aaron Bentley
Implement from_patch
213
                parent, parent_pos, child_pos, num_lines =\
214
                    [int(v) for v in cur_line.split(' ')[1:]]
215
                hunks.append(ParentText(parent, parent_pos, child_pos,
216
                                        num_lines))
217
        return MultiParent(hunks)
218
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
219
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
220
        """Iterate through the hunks, with range indicated
221
222
        kind is "new" or "parent".
223
        for "new", data is a list of lines.
224
        for "parent", data is (parent, parent_start, parent_end)
225
        :return: a generator of (start, end, kind, data)
226
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
227
        start = 0
228
        for hunk in self.hunks:
229
            if isinstance(hunk, NewText):
230
                kind = 'new'
231
                end = start + len(hunk.lines)
232
                data = hunk.lines
233
            else:
234
                kind = 'parent'
235
                start = hunk.child_pos
236
                end = start + hunk.num_lines
237
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
238
                        hunk.num_lines)
239
            yield start, end, kind, data
240
            start = end
241
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
242
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
243
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
244
        extra_n = 0
245
        for hunk in reversed(self.hunks):
246
            if isinstance(hunk, ParentText):
247
               return hunk.child_pos + hunk.num_lines + extra_n
248
            extra_n += len(hunk.lines)
249
        return extra_n
250
0.9.25 by Aaron Bentley
More messy hacking
251
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
252
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
253
        if len(self.hunks) != 1:
254
            return False
255
        return (isinstance(self.hunks[0], NewText))
256
0.9.1 by Aaron Bentley
Get trivial case passing
257
258
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
259
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
260
261
    def __init__(self, lines):
262
        self.lines = lines
263
264
    def __eq__(self, other):
265
        if self.__class__ is not other.__class__:
266
            return False
267
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
268
269
    def __repr__(self):
270
        return 'NewText(%r)' % self.lines
271
0.9.4 by Aaron Bentley
Start supporting serialization
272
    def to_patch(self):
273
        yield 'i %d\n' % len(self.lines)
274
        for line in self.lines:
275
            yield line
276
        yield '\n'
277
0.9.2 by Aaron Bentley
Get single-parent comparison working
278
279
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
280
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
281
282
    def __init__(self, parent, parent_pos, child_pos, num_lines):
283
        self.parent = parent
284
        self.parent_pos = parent_pos
285
        self.child_pos = child_pos
286
        self.num_lines = num_lines
287
288
    def __repr__(self):
289
        return 'ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'\
290
            ' %(num_lines)r)' % self.__dict__
291
292
    def __eq__(self, other):
4088.3.1 by Benjamin Peterson
compare types with 'is' not ==
293
        if self.__class__ is not other.__class__:
0.9.2 by Aaron Bentley
Get single-parent comparison working
294
            return False
295
        return (self.__dict__ == other.__dict__)
0.9.4 by Aaron Bentley
Start supporting serialization
296
297
    def to_patch(self):
298
        yield 'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'\
299
            % self.__dict__
0.9.8 by Aaron Bentley
get add_version working
300
301
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
302
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
303
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
304
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
305
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
306
        self._lines = {}
307
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
308
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
309
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
310
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
311
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
312
    def versions(self):
313
        return iter(self._parents)
314
2520.4.61 by Aaron Bentley
Do bulk insertion of records
315
    def has_version(self, version):
316
        return version in self._parents
317
0.9.12 by Aaron Bentley
Make benchmarks for mp
318
    def do_snapshot(self, version_id, parent_ids):
4031.3.1 by Frank Aspell
Fixing various typos
319
        """Determine whether to perform a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
320
        if self.snapshot_interval is None:
321
            return False
322
        if self.max_snapshots is not None and\
323
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
324
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
325
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
326
            return True
327
        for ignored in xrange(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
328
            if len(parent_ids) == 0:
329
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
330
            version_ids = parent_ids
331
            parent_ids = []
332
            for version_id in version_ids:
333
                if version_id not in self._snapshots:
334
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
335
        else:
336
            return True
0.9.8 by Aaron Bentley
get add_version working
337
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
338
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
339
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
340
        """Add a version to the versionedfile
341
342
        :param lines: The list of lines to add.  Must be split on '\n'.
343
        :param version_id: The version_id of the version to add
344
        :param force_snapshot: If true, force this version to be added as a
345
            snapshot version.  If false, force this version to be added as a
346
            diff.  If none, determine this automatically.
347
        :param single_parent: If true, use a single parent, rather than
348
            multiple parents.
349
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
350
        if force_snapshot is None:
351
            do_snapshot = self.do_snapshot(version_id, parent_ids)
352
        else:
353
            do_snapshot = force_snapshot
354
        if do_snapshot:
355
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
356
            diff = MultiParent([NewText(lines)])
357
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
358
            if single_parent:
359
                parent_lines = self.get_line_list(parent_ids[:1])
360
            else:
361
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
362
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
363
            if diff.is_snapshot():
364
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
365
        self.add_diff(diff, version_id, parent_ids)
366
        self._lines[version_id] = lines
367
0.9.35 by Aaron Bentley
Add build ranking
368
    def get_parents(self, version_id):
369
        return self._parents[version_id]
370
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
371
    def make_snapshot(self, version_id):
372
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
373
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
374
        self._snapshots.add(version_id)
375
0.9.20 by Aaron Bentley
Convert to a plugin
376
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
377
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
378
        """Import all revisions of a versionedfile
379
380
        :param vf: The versionedfile to import
381
        :param snapshots: If provided, the revisions to make snapshots of.
382
            Otherwise, this will be auto-determined
383
        :param no_cache: If true, clear the cache after every add.
384
        :param single_parent: If true, omit all but one parent text, (but
385
            retain parent metadata).
386
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
387
        if not (no_cache or not verify):
388
            raise ValueError()
0.9.19 by Aaron Bentley
More tweakage
389
        revisions = set(vf.versions())
390
        total = len(revisions)
0.9.20 by Aaron Bentley
Convert to a plugin
391
        pb = ui.ui_factory.nested_progress_bar()
392
        try:
393
            while len(revisions) > 0:
394
                added = set()
395
                for revision in revisions:
396
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
397
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
398
                        continue
399
                    lines = [a + ' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
400
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
401
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
402
                        force_snapshot = None
403
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
404
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
405
                    self.add_version(lines, revision, parents, force_snapshot,
406
                                     single_parent)
407
                    added.add(revision)
408
                    if no_cache:
409
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
410
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
411
                        if verify:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
412
                            if not (lines == self.get_line_list([revision])[0]):
413
                                raise AssertionError()
0.9.22 by Aaron Bentley
Fix restoration bug
414
                            self.clear_cache()
0.9.20 by Aaron Bentley
Convert to a plugin
415
                    pb.update('Importing revisions',
416
                              (total - len(revisions)) + len(added), total)
417
                revisions = [r for r in revisions if r not in added]
418
        finally:
419
            pb.finished()
0.9.19 by Aaron Bentley
More tweakage
420
0.9.23 by Aaron Bentley
handle snapshots all at once
421
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
422
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
423
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
424
        descendants = {}
425
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
426
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
427
            potential_build_ancestors = set(vf.get_parents(version_id))
428
            parents = vf.get_parents(version_id)
429
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
430
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
431
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
432
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
433
                for parent in vf.get_parents(version_id):
434
                    potential_build_ancestors.update(build_ancestors[parent])
435
                if len(potential_build_ancestors) > self.snapshot_interval:
436
                    snapshots.add(version_id)
437
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
438
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
439
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
440
        return snapshots
441
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
442
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
443
        """Select snapshots for minimum output size"""
444
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
445
        new_snapshots = self.get_size_ranking()[-num:]
446
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
447
448
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
449
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
450
        versions = []
451
        new_snapshots = set()
452
        for version_id in self.versions():
453
            if version_id in self._snapshots:
454
                continue
455
            diff_len = self.get_diff(version_id).patch_len()
456
            snapshot_len = MultiParent([NewText(
457
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
458
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
459
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
460
        return versions
461
462
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
463
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
464
        for version_id in vf.versions():
465
            self.add_diff(vf.get_diff(version_id), version_id,
466
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
467
0.9.35 by Aaron Bentley
Add build ranking
468
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
469
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
470
        could_avoid = {}
471
        referenced_by = {}
472
        for version_id in topo_iter(self):
473
            could_avoid[version_id] = set()
474
            if version_id not in self._snapshots:
475
                for parent_id in self._parents[version_id]:
476
                    could_avoid[version_id].update(could_avoid[parent_id])
477
                could_avoid[version_id].update(self._parents)
478
                could_avoid[version_id].discard(version_id)
479
            for avoid_id in could_avoid[version_id]:
480
                referenced_by.setdefault(avoid_id, set()).add(version_id)
481
        available_versions = list(self.versions())
482
        ranking = []
483
        while len(available_versions) > 0:
484
            available_versions.sort(key=lambda x:
485
                len(could_avoid[x]) *
486
                len(referenced_by.get(x, [])))
487
            selected = available_versions.pop()
488
            ranking.append(selected)
489
            for version_id in referenced_by[selected]:
490
                could_avoid[version_id].difference_update(
491
                    could_avoid[selected])
492
            for version_id in could_avoid[selected]:
493
                referenced_by[version_id].difference_update(
494
                    referenced_by[selected]
495
                )
496
        return ranking
497
0.9.8 by Aaron Bentley
get add_version working
498
    def clear_cache(self):
499
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
500
501
    def get_line_list(self, version_ids):
502
        return [self.cache_version(v) for v in version_ids]
503
504
    def cache_version(self, version_id):
505
        try:
506
            return self._lines[version_id]
507
        except KeyError:
508
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
509
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
510
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
511
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
512
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
513
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
514
        return lines
515
0.9.33 by Aaron Bentley
Enable caching commandline param
516
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
517
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
518
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
519
520
    def __init__(self, snapshot_interval=25, max_snapshots=None):
521
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
522
        self._diffs = {}
523
524
    def add_diff(self, diff, version_id, parent_ids):
525
        self._diffs[version_id] = diff
526
        self._parents[version_id] = parent_ids
527
528
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
529
        try:
530
            return self._diffs[version_id]
531
        except KeyError:
532
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
533
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
534
    def destroy(self):
535
        self._diffs = {}
536
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
537
538
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
539
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
540
541
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
542
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
543
        self._filename = filename
544
        self._diff_offset = {}
545
546
    def get_diff(self, version_id):
547
        start, count = self._diff_offset[version_id]
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
548
        infile = open(self._filename + '.mpknit', 'rb')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
549
        try:
550
            infile.seek(start)
551
            sio = StringIO(infile.read(count))
552
        finally:
553
            infile.close()
554
        zip_file = GzipFile(None, mode='rb', fileobj=sio)
555
        try:
556
            file_version_id = zip_file.readline()
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
557
            return MultiParent.from_patch(zip_file.read())
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
558
        finally:
559
            zip_file.close()
560
561
    def add_diff(self, diff, version_id, parent_ids):
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
562
        outfile = open(self._filename + '.mpknit', 'ab')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
563
        try:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
564
            outfile.seek(0, 2)      # workaround for windows bug:
565
                                    # .tell() for files opened in 'ab' mode
566
                                    # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
567
            start = outfile.tell()
568
            try:
569
                zipfile = GzipFile(None, mode='ab', fileobj=outfile)
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
570
                zipfile.writelines(itertools.chain(
571
                    ['version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
572
            finally:
573
                zipfile.close()
574
            end = outfile.tell()
575
        finally:
576
            outfile.close()
577
        self._diff_offset[version_id] = (start, end-start)
578
        self._parents[version_id] = parent_ids
579
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
580
    def destroy(self):
581
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
582
            os.unlink(self._filename + '.mpknit')
583
        except OSError, e:
584
            if e.errno != errno.ENOENT:
585
                raise
586
        try:
587
            os.unlink(self._filename + '.mpidx')
588
        except OSError, e:
589
            if e.errno != errno.ENOENT:
590
                raise
591
592
    def save(self):
593
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
594
            (self._parents, list(self._snapshots), self._diff_offset)))
595
596
    def load(self):
597
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
598
            open(self._filename + '.mpidx', 'rb').read())
599
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
600
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
601
602
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
603
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
604
605
    def __init__(self, diffs, lines, parents):
606
        self.diffs = diffs
607
        self.lines = lines
608
        self.parents = parents
609
        self.cursor = {}
610
611
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
612
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
613
        parent_id = self.parents[version_id][parent_text.parent]
614
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
615
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
616
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
617
618
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
619
        """Append lines for the requested version_id range"""
620
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
621
        if req_start == req_end:
622
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
623
        pending_reqs = [(req_version_id, req_start, req_end)]
624
        while len(pending_reqs) > 0:
625
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
626
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
627
            if req_version_id in self.lines:
628
                lines.extend(self.lines[req_version_id][req_start:req_end])
629
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
630
            try:
631
                start, end, kind, data, iterator = self.cursor[req_version_id]
632
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
633
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
634
                start, end, kind, data = iterator.next()
0.9.22 by Aaron Bentley
Fix restoration bug
635
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
636
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.22 by Aaron Bentley
Fix restoration bug
637
                start, end, kind, data = iterator.next()
638
0.9.10 by Aaron Bentley
Text reconstruction seems to work
639
            # find the first hunk relevant to the request
640
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
641
                start, end, kind, data = iterator.next()
642
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
643
            # if the hunk can't satisfy the whole request, split it in two,
644
            # and leave the second half for later.
645
            if req_end > end:
646
                pending_reqs.append((req_version_id, end, req_end))
647
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
648
            if kind == 'new':
649
                lines.extend(data[req_start - start: (req_end - start)])
650
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
651
                # If the hunk is a ParentText, rewrite it as a range request
652
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
653
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
654
                new_version_id = self.parents[req_version_id][parent]
655
                new_start = parent_start + req_start - start
656
                new_end = parent_end + req_end - end
657
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
658
659
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
660
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
661
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
662
2520.4.6 by Aaron Bentley
Get installation started
663
0.9.25 by Aaron Bentley
More messy hacking
664
def gzip_string(lines):
665
    sio = StringIO()
666
    data_file = GzipFile(None, mode='wb', fileobj=sio)
667
    data_file.writelines(lines)
668
    data_file.close()
669
    return sio.getvalue()