~bzr-pqm/bzr/bzr.dev

2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
17
from bzrlib.lazy_import import lazy_import
18
19
lazy_import(globals(), """
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
20
import errno
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
21
import itertools
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
22
import os
0.9.25 by Aaron Bentley
More messy hacking
23
from StringIO import StringIO
0.9.19 by Aaron Bentley
More tweakage
24
0.9.25 by Aaron Bentley
More messy hacking
25
from bzrlib import (
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
26
    errors,
0.9.25 by Aaron Bentley
More messy hacking
27
    patiencediff,
28
    trace,
29
    ui,
30
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
31
from bzrlib.util import bencode
32
""")
0.9.25 by Aaron Bentley
More messy hacking
33
from bzrlib.tuned_gzip import GzipFile
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
34
0.9.33 by Aaron Bentley
Enable caching commandline param
35
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
36
def topo_iter(vf, versions=None):
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
37
    seen = set()
38
    descendants = {}
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
39
    if versions is None:
40
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
41
    parents = vf.get_parent_map(versions)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
42
    def pending_parents(version):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
43
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
44
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
45
    for version_id in versions:
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
46
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
47
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
48
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
49
    while len(cur) > 0:
50
        next = []
51
        for version_id in cur:
52
            if version_id in seen:
53
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
54
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
55
                continue
56
            next.extend(descendants.get(version_id, []))
57
            yield version_id
58
            seen.add(version_id)
59
        cur = next
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
60
    assert len(seen) == len(versions)
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
61
62
0.9.1 by Aaron Bentley
Get trivial case passing
63
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
64
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
65
0.9.2 by Aaron Bentley
Get single-parent comparison working
66
    def __init__(self, hunks=None):
67
        if hunks is not None:
68
            self.hunks = hunks
69
        else:
70
            self.hunks = []
71
72
    def __repr__(self):
73
        return "MultiParent(%r)" % self.hunks
74
75
    def __eq__(self, other):
76
        if self.__class__ is not other.__class__:
77
            return False
78
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
79
80
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
81
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
82
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
83
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
84
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
85
                                                           text)
86
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
87
        if len(parents) > 0:
88
            if left_blocks is None:
89
                left_blocks = compare(parents[0])
90
            parent_comparisons = [left_blocks] + [compare(p) for p in
91
                                                  parents[1:]]
92
        else:
93
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
94
        cur_line = 0
95
        new_text = NewText([])
96
        parent_text = []
97
        block_iter = [iter(i) for i in parent_comparisons]
98
        diff = MultiParent([])
99
        def next_block(p):
100
            try:
101
                return block_iter[p].next()
102
            except StopIteration:
103
                return None
104
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
105
        while cur_line < len(text):
106
            best_match = None
107
            for p, block in enumerate(cur_block):
108
                if block is None:
109
                    continue
110
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
111
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
112
                    block = cur_block[p] = next_block(p)
113
                    if block is None:
114
                        break
115
                    i, j, n = block
116
                if block is None:
117
                    continue
118
                if j > cur_line:
119
                    continue
120
                offset = cur_line - j
121
                i += offset
122
                j = cur_line
123
                n -= offset
124
                if n == 0:
125
                    continue
126
                if best_match is None or n > best_match.num_lines:
127
                    best_match = ParentText(p, i, j, n)
128
            if best_match is None:
129
                new_text.lines.append(text[cur_line])
130
                cur_line += 1
131
            else:
132
                if len(new_text.lines) > 0:
133
                    diff.hunks.append(new_text)
134
                    new_text = NewText([])
135
                diff.hunks.append(best_match)
136
                cur_line += best_match.num_lines
137
        if len(new_text.lines) > 0:
138
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
139
        return diff
140
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
141
    def get_matching_blocks(self, parent, parent_len):
142
        for hunk in self.hunks:
143
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
144
                continue
145
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
146
        yield parent_len, self.num_lines(), 0
147
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
148
    def to_lines(self, parents=()):
149
        """Contruct a fulltext from this diff and its parents"""
150
        mpvf = MultiMemoryVersionedFile()
151
        for num, parent in enumerate(parents):
152
            mpvf.add_version(StringIO(parent).readlines(), num, [])
153
        mpvf.add_diff(self, 'a', range(len(parents)))
154
        return mpvf.get_line_list(['a'])[0]
155
0.9.1 by Aaron Bentley
Get trivial case passing
156
    @classmethod
157
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
158
        """Produce a MultiParent from a text and list of parent text"""
2520.4.92 by Aaron Bentley
fix line-splitting in MultiParent.from_texts
159
        return cls.from_lines(StringIO(text).readlines(),
160
                              [StringIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
161
0.9.4 by Aaron Bentley
Start supporting serialization
162
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
163
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
164
        for hunk in self.hunks:
165
            for line in hunk.to_patch():
166
                yield line
167
0.9.25 by Aaron Bentley
More messy hacking
168
    def patch_len(self):
169
        return len(''.join(self.to_patch()))
170
171
    def zipped_patch_len(self):
172
        return len(gzip_string(self.to_patch()))
173
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
174
    @classmethod
175
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
176
        """Create a MultiParent from its string form"""
2520.4.31 by Aaron Bentley
Using StringIO for splitting patch lines
177
        return cls._from_patch(StringIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
178
0.9.18 by Aaron Bentley
Implement from_patch
179
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
180
    def _from_patch(lines):
181
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
182
        line_iter = iter(lines)
183
        hunks = []
184
        cur_line = None
185
        while(True):
186
            try:
187
                cur_line = line_iter.next()
188
            except StopIteration:
189
                break
190
            if cur_line[0] == 'i':
191
                num_lines = int(cur_line.split(' ')[1])
192
                hunk_lines = [line_iter.next() for x in xrange(num_lines)]
193
                hunk_lines[-1] = hunk_lines[-1][:-1]
194
                hunks.append(NewText(hunk_lines))
195
            elif cur_line[0] == '\n':
196
                hunks[-1].lines[-1] += '\n'
197
            else:
198
                assert cur_line[0] == 'c', cur_line[0]
199
                parent, parent_pos, child_pos, num_lines =\
200
                    [int(v) for v in cur_line.split(' ')[1:]]
201
                hunks.append(ParentText(parent, parent_pos, child_pos,
202
                                        num_lines))
203
        return MultiParent(hunks)
204
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
205
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
206
        """Iterate through the hunks, with range indicated
207
208
        kind is "new" or "parent".
209
        for "new", data is a list of lines.
210
        for "parent", data is (parent, parent_start, parent_end)
211
        :return: a generator of (start, end, kind, data)
212
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
213
        start = 0
214
        for hunk in self.hunks:
215
            if isinstance(hunk, NewText):
216
                kind = 'new'
217
                end = start + len(hunk.lines)
218
                data = hunk.lines
219
            else:
220
                kind = 'parent'
221
                start = hunk.child_pos
222
                end = start + hunk.num_lines
223
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
224
                        hunk.num_lines)
225
            yield start, end, kind, data
226
            start = end
227
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
228
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
229
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
230
        extra_n = 0
231
        for hunk in reversed(self.hunks):
232
            if isinstance(hunk, ParentText):
233
               return hunk.child_pos + hunk.num_lines + extra_n
234
            extra_n += len(hunk.lines)
235
        return extra_n
236
0.9.25 by Aaron Bentley
More messy hacking
237
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
238
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
239
        if len(self.hunks) != 1:
240
            return False
241
        return (isinstance(self.hunks[0], NewText))
242
0.9.1 by Aaron Bentley
Get trivial case passing
243
244
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
245
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
246
247
    def __init__(self, lines):
248
        self.lines = lines
249
250
    def __eq__(self, other):
251
        if self.__class__ is not other.__class__:
252
            return False
253
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
254
255
    def __repr__(self):
256
        return 'NewText(%r)' % self.lines
257
0.9.4 by Aaron Bentley
Start supporting serialization
258
    def to_patch(self):
259
        yield 'i %d\n' % len(self.lines)
260
        for line in self.lines:
261
            yield line
262
        yield '\n'
263
0.9.2 by Aaron Bentley
Get single-parent comparison working
264
265
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
266
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
267
268
    def __init__(self, parent, parent_pos, child_pos, num_lines):
269
        self.parent = parent
270
        self.parent_pos = parent_pos
271
        self.child_pos = child_pos
272
        self.num_lines = num_lines
273
274
    def __repr__(self):
275
        return 'ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'\
276
            ' %(num_lines)r)' % self.__dict__
277
278
    def __eq__(self, other):
279
        if self.__class__ != other.__class__:
280
            return False
281
        return (self.__dict__ == other.__dict__)
0.9.4 by Aaron Bentley
Start supporting serialization
282
283
    def to_patch(self):
284
        yield 'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'\
285
            % self.__dict__
0.9.8 by Aaron Bentley
get add_version working
286
287
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
288
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
289
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
290
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
291
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
292
        self._lines = {}
293
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
294
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
295
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
296
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
297
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
298
    def versions(self):
299
        return iter(self._parents)
300
2520.4.61 by Aaron Bentley
Do bulk insertion of records
301
    def has_version(self, version):
302
        return version in self._parents
303
0.9.12 by Aaron Bentley
Make benchmarks for mp
304
    def do_snapshot(self, version_id, parent_ids):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
305
        """Determine whether to perform a a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
306
        if self.snapshot_interval is None:
307
            return False
308
        if self.max_snapshots is not None and\
309
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
310
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
311
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
312
            return True
313
        for ignored in xrange(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
314
            if len(parent_ids) == 0:
315
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
316
            version_ids = parent_ids
317
            parent_ids = []
318
            for version_id in version_ids:
319
                if version_id not in self._snapshots:
320
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
321
        else:
322
            return True
0.9.8 by Aaron Bentley
get add_version working
323
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
324
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
325
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
326
        """Add a version to the versionedfile
327
328
        :param lines: The list of lines to add.  Must be split on '\n'.
329
        :param version_id: The version_id of the version to add
330
        :param force_snapshot: If true, force this version to be added as a
331
            snapshot version.  If false, force this version to be added as a
332
            diff.  If none, determine this automatically.
333
        :param single_parent: If true, use a single parent, rather than
334
            multiple parents.
335
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
336
        if force_snapshot is None:
337
            do_snapshot = self.do_snapshot(version_id, parent_ids)
338
        else:
339
            do_snapshot = force_snapshot
340
        if do_snapshot:
341
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
342
            diff = MultiParent([NewText(lines)])
343
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
344
            if single_parent:
345
                parent_lines = self.get_line_list(parent_ids[:1])
346
            else:
347
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
348
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
349
            if diff.is_snapshot():
350
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
351
        self.add_diff(diff, version_id, parent_ids)
352
        self._lines[version_id] = lines
353
0.9.35 by Aaron Bentley
Add build ranking
354
    def get_parents(self, version_id):
355
        return self._parents[version_id]
356
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
357
    def make_snapshot(self, version_id):
358
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
359
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
360
        self._snapshots.add(version_id)
361
0.9.20 by Aaron Bentley
Convert to a plugin
362
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
363
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
364
        """Import all revisions of a versionedfile
365
366
        :param vf: The versionedfile to import
367
        :param snapshots: If provided, the revisions to make snapshots of.
368
            Otherwise, this will be auto-determined
369
        :param no_cache: If true, clear the cache after every add.
370
        :param single_parent: If true, omit all but one parent text, (but
371
            retain parent metadata).
372
        """
0.9.22 by Aaron Bentley
Fix restoration bug
373
        assert no_cache or not verify
0.9.19 by Aaron Bentley
More tweakage
374
        revisions = set(vf.versions())
375
        total = len(revisions)
0.9.20 by Aaron Bentley
Convert to a plugin
376
        pb = ui.ui_factory.nested_progress_bar()
377
        try:
378
            while len(revisions) > 0:
379
                added = set()
380
                for revision in revisions:
381
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
382
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
383
                        continue
384
                    lines = [a + ' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
385
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
386
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
387
                        force_snapshot = None
388
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
389
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
390
                    self.add_version(lines, revision, parents, force_snapshot,
391
                                     single_parent)
392
                    added.add(revision)
393
                    if no_cache:
394
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
395
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
396
                        if verify:
397
                            assert lines == self.get_line_list([revision])[0]
398
                            self.clear_cache()
0.9.20 by Aaron Bentley
Convert to a plugin
399
                    pb.update('Importing revisions',
400
                              (total - len(revisions)) + len(added), total)
401
                revisions = [r for r in revisions if r not in added]
402
        finally:
403
            pb.finished()
0.9.19 by Aaron Bentley
More tweakage
404
0.9.23 by Aaron Bentley
handle snapshots all at once
405
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
406
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
407
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
408
        descendants = {}
409
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
410
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
411
            potential_build_ancestors = set(vf.get_parents(version_id))
412
            parents = vf.get_parents(version_id)
413
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
414
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
415
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
416
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
417
                for parent in vf.get_parents(version_id):
418
                    potential_build_ancestors.update(build_ancestors[parent])
419
                if len(potential_build_ancestors) > self.snapshot_interval:
420
                    snapshots.add(version_id)
421
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
422
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
423
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
424
        return snapshots
425
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
426
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
427
        """Select snapshots for minimum output size"""
428
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
429
        new_snapshots = self.get_size_ranking()[-num:]
430
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
431
432
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
433
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
434
        versions = []
435
        new_snapshots = set()
436
        for version_id in self.versions():
437
            if version_id in self._snapshots:
438
                continue
439
            diff_len = self.get_diff(version_id).patch_len()
440
            snapshot_len = MultiParent([NewText(
441
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
442
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
443
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
444
        return versions
445
446
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
447
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
448
        for version_id in vf.versions():
449
            self.add_diff(vf.get_diff(version_id), version_id,
450
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
451
0.9.35 by Aaron Bentley
Add build ranking
452
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
453
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
454
        could_avoid = {}
455
        referenced_by = {}
456
        for version_id in topo_iter(self):
457
            could_avoid[version_id] = set()
458
            if version_id not in self._snapshots:
459
                for parent_id in self._parents[version_id]:
460
                    could_avoid[version_id].update(could_avoid[parent_id])
461
                could_avoid[version_id].update(self._parents)
462
                could_avoid[version_id].discard(version_id)
463
            for avoid_id in could_avoid[version_id]:
464
                referenced_by.setdefault(avoid_id, set()).add(version_id)
465
        available_versions = list(self.versions())
466
        ranking = []
467
        while len(available_versions) > 0:
468
            available_versions.sort(key=lambda x:
469
                len(could_avoid[x]) *
470
                len(referenced_by.get(x, [])))
471
            selected = available_versions.pop()
472
            ranking.append(selected)
473
            for version_id in referenced_by[selected]:
474
                could_avoid[version_id].difference_update(
475
                    could_avoid[selected])
476
            for version_id in could_avoid[selected]:
477
                referenced_by[version_id].difference_update(
478
                    referenced_by[selected]
479
                )
480
        return ranking
481
0.9.8 by Aaron Bentley
get add_version working
482
    def clear_cache(self):
483
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
484
485
    def get_line_list(self, version_ids):
486
        return [self.cache_version(v) for v in version_ids]
487
488
    def cache_version(self, version_id):
489
        try:
490
            return self._lines[version_id]
491
        except KeyError:
492
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
493
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
494
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
495
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
496
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
497
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
498
        return lines
499
0.9.33 by Aaron Bentley
Enable caching commandline param
500
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
501
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
502
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
503
504
    def __init__(self, snapshot_interval=25, max_snapshots=None):
505
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
506
        self._diffs = {}
507
508
    def add_diff(self, diff, version_id, parent_ids):
509
        self._diffs[version_id] = diff
510
        self._parents[version_id] = parent_ids
511
512
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
513
        try:
514
            return self._diffs[version_id]
515
        except KeyError:
516
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
517
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
518
    def destroy(self):
519
        self._diffs = {}
520
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
521
522
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
523
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
524
525
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
526
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
527
        self._filename = filename
528
        self._diff_offset = {}
529
530
    def get_diff(self, version_id):
531
        start, count = self._diff_offset[version_id]
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
532
        infile = open(self._filename + '.mpknit', 'rb')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
533
        try:
534
            infile.seek(start)
535
            sio = StringIO(infile.read(count))
536
        finally:
537
            infile.close()
538
        zip_file = GzipFile(None, mode='rb', fileobj=sio)
539
        try:
540
            file_version_id = zip_file.readline()
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
541
            return MultiParent.from_patch(zip_file.read())
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
542
        finally:
543
            zip_file.close()
544
545
    def add_diff(self, diff, version_id, parent_ids):
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
546
        outfile = open(self._filename + '.mpknit', 'ab')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
547
        try:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
548
            outfile.seek(0, 2)      # workaround for windows bug:
549
                                    # .tell() for files opened in 'ab' mode
550
                                    # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
551
            start = outfile.tell()
552
            try:
553
                zipfile = GzipFile(None, mode='ab', fileobj=outfile)
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
554
                zipfile.writelines(itertools.chain(
555
                    ['version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
556
            finally:
557
                zipfile.close()
558
            end = outfile.tell()
559
        finally:
560
            outfile.close()
561
        self._diff_offset[version_id] = (start, end-start)
562
        self._parents[version_id] = parent_ids
563
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
564
    def destroy(self):
565
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
566
            os.unlink(self._filename + '.mpknit')
567
        except OSError, e:
568
            if e.errno != errno.ENOENT:
569
                raise
570
        try:
571
            os.unlink(self._filename + '.mpidx')
572
        except OSError, e:
573
            if e.errno != errno.ENOENT:
574
                raise
575
576
    def save(self):
577
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
578
            (self._parents, list(self._snapshots), self._diff_offset)))
579
580
    def load(self):
581
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
582
            open(self._filename + '.mpidx', 'rb').read())
583
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
584
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
585
586
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
587
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
588
589
    def __init__(self, diffs, lines, parents):
590
        self.diffs = diffs
591
        self.lines = lines
592
        self.parents = parents
593
        self.cursor = {}
594
595
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
596
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
597
        parent_id = self.parents[version_id][parent_text.parent]
598
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
599
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
600
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
601
602
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
603
        """Append lines for the requested version_id range"""
604
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
605
        if req_start == req_end:
606
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
607
        pending_reqs = [(req_version_id, req_start, req_end)]
608
        while len(pending_reqs) > 0:
609
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
610
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
611
            if req_version_id in self.lines:
612
                lines.extend(self.lines[req_version_id][req_start:req_end])
613
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
614
            try:
615
                start, end, kind, data, iterator = self.cursor[req_version_id]
616
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
617
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
618
                start, end, kind, data = iterator.next()
0.9.22 by Aaron Bentley
Fix restoration bug
619
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
620
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.22 by Aaron Bentley
Fix restoration bug
621
                start, end, kind, data = iterator.next()
622
0.9.10 by Aaron Bentley
Text reconstruction seems to work
623
            # find the first hunk relevant to the request
624
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
625
                start, end, kind, data = iterator.next()
626
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
627
            # if the hunk can't satisfy the whole request, split it in two,
628
            # and leave the second half for later.
629
            if req_end > end:
630
                pending_reqs.append((req_version_id, end, req_end))
631
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
632
            if kind == 'new':
633
                lines.extend(data[req_start - start: (req_end - start)])
634
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
635
                # If the hunk is a ParentText, rewrite it as a range request
636
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
637
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
638
                new_version_id = self.parents[req_version_id][parent]
639
                new_start = parent_start + req_start - start
640
                new_end = parent_end + req_end - end
641
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
642
643
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
644
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
645
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
646
2520.4.6 by Aaron Bentley
Get installation started
647
0.9.25 by Aaron Bentley
More messy hacking
648
def gzip_string(lines):
649
    sio = StringIO()
650
    data_file = GzipFile(None, mode='wb', fileobj=sio)
651
    data_file.writelines(lines)
652
    data_file.close()
653
    return sio.getvalue()