~bzr-pqm/bzr/bzr.dev

5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
1
# Copyright (C) 2007-2011 Canonical Ltd
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
16
6379.6.3 by Jelmer Vernooij
Use absolute_import.
17
from __future__ import absolute_import
18
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
19
from bzrlib.lazy_import import lazy_import
20
21
lazy_import(globals(), """
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
22
import errno
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
23
import gzip
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
24
import itertools
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
25
import os
0.9.25 by Aaron Bentley
More messy hacking
26
from StringIO import StringIO
0.9.19 by Aaron Bentley
More tweakage
27
0.9.25 by Aaron Bentley
More messy hacking
28
from bzrlib import (
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
29
    bencode,
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
30
    errors,
0.9.25 by Aaron Bentley
More messy hacking
31
    patiencediff,
32
    ui,
33
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
34
""")
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
35
0.9.33 by Aaron Bentley
Enable caching commandline param
36
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
37
def topo_iter_keys(vf, keys=None):
38
    if keys is None:
39
        keys = vf.keys()
40
    parents = vf.get_parent_map(keys)
41
    return _topo_iter(parents, keys)
42
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
43
def topo_iter(vf, versions=None):
44
    if versions is None:
45
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
46
    parents = vf.get_parent_map(versions)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
47
    return _topo_iter(parents, versions)
48
49
def _topo_iter(parents, versions):
50
    seen = set()
51
    descendants = {}
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
52
    def pending_parents(version):
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
53
        if parents[version] is None:
54
            return []
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
55
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
56
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
57
    for version_id in versions:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
58
        if parents[version_id] is None:
59
            # parentless
60
            continue
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
61
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
62
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
63
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
64
    while len(cur) > 0:
65
        next = []
66
        for version_id in cur:
67
            if version_id in seen:
68
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
69
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
70
                continue
71
            next.extend(descendants.get(version_id, []))
72
            yield version_id
73
            seen.add(version_id)
74
        cur = next
75
76
0.9.1 by Aaron Bentley
Get trivial case passing
77
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
78
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
79
5374.2.9 by John Arbash Meinel
change slots back to a list.
80
    __slots__ = ['hunks']
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
81
0.9.2 by Aaron Bentley
Get single-parent comparison working
82
    def __init__(self, hunks=None):
83
        if hunks is not None:
84
            self.hunks = hunks
85
        else:
86
            self.hunks = []
87
88
    def __repr__(self):
89
        return "MultiParent(%r)" % self.hunks
90
91
    def __eq__(self, other):
92
        if self.__class__ is not other.__class__:
93
            return False
94
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
95
96
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
97
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
98
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
99
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
100
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
101
                                                           text)
102
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
103
        if len(parents) > 0:
104
            if left_blocks is None:
105
                left_blocks = compare(parents[0])
106
            parent_comparisons = [left_blocks] + [compare(p) for p in
107
                                                  parents[1:]]
108
        else:
109
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
110
        cur_line = 0
111
        new_text = NewText([])
112
        parent_text = []
113
        block_iter = [iter(i) for i in parent_comparisons]
114
        diff = MultiParent([])
115
        def next_block(p):
116
            try:
117
                return block_iter[p].next()
118
            except StopIteration:
119
                return None
120
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
121
        while cur_line < len(text):
122
            best_match = None
123
            for p, block in enumerate(cur_block):
124
                if block is None:
125
                    continue
126
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
127
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
128
                    block = cur_block[p] = next_block(p)
129
                    if block is None:
130
                        break
131
                    i, j, n = block
132
                if block is None:
133
                    continue
134
                if j > cur_line:
135
                    continue
136
                offset = cur_line - j
137
                i += offset
138
                j = cur_line
139
                n -= offset
140
                if n == 0:
141
                    continue
142
                if best_match is None or n > best_match.num_lines:
143
                    best_match = ParentText(p, i, j, n)
144
            if best_match is None:
145
                new_text.lines.append(text[cur_line])
146
                cur_line += 1
147
            else:
148
                if len(new_text.lines) > 0:
149
                    diff.hunks.append(new_text)
150
                    new_text = NewText([])
151
                diff.hunks.append(best_match)
152
                cur_line += best_match.num_lines
153
        if len(new_text.lines) > 0:
154
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
155
        return diff
156
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
157
    def get_matching_blocks(self, parent, parent_len):
158
        for hunk in self.hunks:
159
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
160
                continue
161
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
162
        yield parent_len, self.num_lines(), 0
163
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
164
    def to_lines(self, parents=()):
165
        """Contruct a fulltext from this diff and its parents"""
166
        mpvf = MultiMemoryVersionedFile()
167
        for num, parent in enumerate(parents):
168
            mpvf.add_version(StringIO(parent).readlines(), num, [])
169
        mpvf.add_diff(self, 'a', range(len(parents)))
170
        return mpvf.get_line_list(['a'])[0]
171
0.9.1 by Aaron Bentley
Get trivial case passing
172
    @classmethod
173
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
174
        """Produce a MultiParent from a text and list of parent text"""
2520.4.92 by Aaron Bentley
fix line-splitting in MultiParent.from_texts
175
        return cls.from_lines(StringIO(text).readlines(),
176
                              [StringIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
177
0.9.4 by Aaron Bentley
Start supporting serialization
178
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
179
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
180
        for hunk in self.hunks:
181
            for line in hunk.to_patch():
182
                yield line
183
0.9.25 by Aaron Bentley
More messy hacking
184
    def patch_len(self):
185
        return len(''.join(self.to_patch()))
186
187
    def zipped_patch_len(self):
188
        return len(gzip_string(self.to_patch()))
189
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
190
    @classmethod
191
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
192
        """Create a MultiParent from its string form"""
2520.4.31 by Aaron Bentley
Using StringIO for splitting patch lines
193
        return cls._from_patch(StringIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
194
0.9.18 by Aaron Bentley
Implement from_patch
195
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
196
    def _from_patch(lines):
197
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
198
        line_iter = iter(lines)
199
        hunks = []
200
        cur_line = None
201
        while(True):
202
            try:
203
                cur_line = line_iter.next()
204
            except StopIteration:
205
                break
206
            if cur_line[0] == 'i':
207
                num_lines = int(cur_line.split(' ')[1])
208
                hunk_lines = [line_iter.next() for x in xrange(num_lines)]
209
                hunk_lines[-1] = hunk_lines[-1][:-1]
210
                hunks.append(NewText(hunk_lines))
211
            elif cur_line[0] == '\n':
212
                hunks[-1].lines[-1] += '\n'
213
            else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
214
                if not (cur_line[0] == 'c'):
215
                    raise AssertionError(cur_line[0])
0.9.18 by Aaron Bentley
Implement from_patch
216
                parent, parent_pos, child_pos, num_lines =\
217
                    [int(v) for v in cur_line.split(' ')[1:]]
218
                hunks.append(ParentText(parent, parent_pos, child_pos,
219
                                        num_lines))
220
        return MultiParent(hunks)
221
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
222
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
223
        """Iterate through the hunks, with range indicated
224
225
        kind is "new" or "parent".
226
        for "new", data is a list of lines.
227
        for "parent", data is (parent, parent_start, parent_end)
228
        :return: a generator of (start, end, kind, data)
229
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
230
        start = 0
231
        for hunk in self.hunks:
232
            if isinstance(hunk, NewText):
233
                kind = 'new'
234
                end = start + len(hunk.lines)
235
                data = hunk.lines
236
            else:
237
                kind = 'parent'
238
                start = hunk.child_pos
239
                end = start + hunk.num_lines
240
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
241
                        hunk.num_lines)
242
            yield start, end, kind, data
243
            start = end
244
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
245
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
246
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
247
        extra_n = 0
248
        for hunk in reversed(self.hunks):
249
            if isinstance(hunk, ParentText):
250
               return hunk.child_pos + hunk.num_lines + extra_n
251
            extra_n += len(hunk.lines)
252
        return extra_n
253
0.9.25 by Aaron Bentley
More messy hacking
254
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
255
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
256
        if len(self.hunks) != 1:
257
            return False
258
        return (isinstance(self.hunks[0], NewText))
259
0.9.1 by Aaron Bentley
Get trivial case passing
260
261
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
262
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
263
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
264
    __slots__ = ['lines']
265
0.9.1 by Aaron Bentley
Get trivial case passing
266
    def __init__(self, lines):
267
        self.lines = lines
268
269
    def __eq__(self, other):
270
        if self.__class__ is not other.__class__:
271
            return False
272
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
273
274
    def __repr__(self):
275
        return 'NewText(%r)' % self.lines
276
0.9.4 by Aaron Bentley
Start supporting serialization
277
    def to_patch(self):
278
        yield 'i %d\n' % len(self.lines)
279
        for line in self.lines:
280
            yield line
281
        yield '\n'
282
0.9.2 by Aaron Bentley
Get single-parent comparison working
283
284
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
285
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
286
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
287
    __slots__ = ['parent', 'parent_pos', 'child_pos', 'num_lines']
288
0.9.2 by Aaron Bentley
Get single-parent comparison working
289
    def __init__(self, parent, parent_pos, child_pos, num_lines):
290
        self.parent = parent
291
        self.parent_pos = parent_pos
292
        self.child_pos = child_pos
293
        self.num_lines = num_lines
294
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
295
    def _as_dict(self):
296
        return dict(parent=self.parent, parent_pos=self.parent_pos,
297
                    child_pos=self.child_pos, num_lines=self.num_lines)
298
0.9.2 by Aaron Bentley
Get single-parent comparison working
299
    def __repr__(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
300
        return ('ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'
301
                ' %(num_lines)r)' % self._as_dict())
0.9.2 by Aaron Bentley
Get single-parent comparison working
302
303
    def __eq__(self, other):
4088.3.1 by Benjamin Peterson
compare types with 'is' not ==
304
        if self.__class__ is not other.__class__:
0.9.2 by Aaron Bentley
Get single-parent comparison working
305
            return False
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
306
        return self._as_dict() == other._as_dict()
0.9.4 by Aaron Bentley
Start supporting serialization
307
308
    def to_patch(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
309
        yield ('c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'
310
               % self._as_dict())
0.9.8 by Aaron Bentley
get add_version working
311
312
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
313
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
314
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
315
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
316
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
317
        self._lines = {}
318
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
319
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
320
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
321
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
322
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
323
    def versions(self):
324
        return iter(self._parents)
325
2520.4.61 by Aaron Bentley
Do bulk insertion of records
326
    def has_version(self, version):
327
        return version in self._parents
328
0.9.12 by Aaron Bentley
Make benchmarks for mp
329
    def do_snapshot(self, version_id, parent_ids):
4031.3.1 by Frank Aspell
Fixing various typos
330
        """Determine whether to perform a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
331
        if self.snapshot_interval is None:
332
            return False
333
        if self.max_snapshots is not None and\
334
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
335
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
336
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
337
            return True
338
        for ignored in xrange(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
339
            if len(parent_ids) == 0:
340
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
341
            version_ids = parent_ids
342
            parent_ids = []
343
            for version_id in version_ids:
344
                if version_id not in self._snapshots:
345
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
346
        else:
347
            return True
0.9.8 by Aaron Bentley
get add_version working
348
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
349
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
350
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
351
        """Add a version to the versionedfile
352
353
        :param lines: The list of lines to add.  Must be split on '\n'.
354
        :param version_id: The version_id of the version to add
355
        :param force_snapshot: If true, force this version to be added as a
356
            snapshot version.  If false, force this version to be added as a
357
            diff.  If none, determine this automatically.
358
        :param single_parent: If true, use a single parent, rather than
359
            multiple parents.
360
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
361
        if force_snapshot is None:
362
            do_snapshot = self.do_snapshot(version_id, parent_ids)
363
        else:
364
            do_snapshot = force_snapshot
365
        if do_snapshot:
366
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
367
            diff = MultiParent([NewText(lines)])
368
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
369
            if single_parent:
370
                parent_lines = self.get_line_list(parent_ids[:1])
371
            else:
372
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
373
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
374
            if diff.is_snapshot():
375
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
376
        self.add_diff(diff, version_id, parent_ids)
377
        self._lines[version_id] = lines
378
0.9.35 by Aaron Bentley
Add build ranking
379
    def get_parents(self, version_id):
380
        return self._parents[version_id]
381
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
382
    def make_snapshot(self, version_id):
383
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
384
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
385
        self._snapshots.add(version_id)
386
0.9.20 by Aaron Bentley
Convert to a plugin
387
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
388
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
389
        """Import all revisions of a versionedfile
390
391
        :param vf: The versionedfile to import
392
        :param snapshots: If provided, the revisions to make snapshots of.
393
            Otherwise, this will be auto-determined
394
        :param no_cache: If true, clear the cache after every add.
395
        :param single_parent: If true, omit all but one parent text, (but
396
            retain parent metadata).
397
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
398
        if not (no_cache or not verify):
399
            raise ValueError()
0.9.19 by Aaron Bentley
More tweakage
400
        revisions = set(vf.versions())
401
        total = len(revisions)
0.9.20 by Aaron Bentley
Convert to a plugin
402
        pb = ui.ui_factory.nested_progress_bar()
403
        try:
404
            while len(revisions) > 0:
405
                added = set()
406
                for revision in revisions:
407
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
408
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
409
                        continue
410
                    lines = [a + ' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
411
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
412
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
413
                        force_snapshot = None
414
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
415
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
416
                    self.add_version(lines, revision, parents, force_snapshot,
417
                                     single_parent)
418
                    added.add(revision)
419
                    if no_cache:
420
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
421
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
422
                        if verify:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
423
                            if not (lines == self.get_line_list([revision])[0]):
424
                                raise AssertionError()
0.9.22 by Aaron Bentley
Fix restoration bug
425
                            self.clear_cache()
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
426
                    pb.update(gettext('Importing revisions'),
0.9.20 by Aaron Bentley
Convert to a plugin
427
                              (total - len(revisions)) + len(added), total)
428
                revisions = [r for r in revisions if r not in added]
429
        finally:
430
            pb.finished()
0.9.19 by Aaron Bentley
More tweakage
431
0.9.23 by Aaron Bentley
handle snapshots all at once
432
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
433
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
434
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
435
        descendants = {}
436
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
437
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
438
            potential_build_ancestors = set(vf.get_parents(version_id))
439
            parents = vf.get_parents(version_id)
440
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
441
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
442
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
443
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
444
                for parent in vf.get_parents(version_id):
445
                    potential_build_ancestors.update(build_ancestors[parent])
446
                if len(potential_build_ancestors) > self.snapshot_interval:
447
                    snapshots.add(version_id)
448
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
449
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
450
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
451
        return snapshots
452
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
453
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
454
        """Select snapshots for minimum output size"""
455
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
456
        new_snapshots = self.get_size_ranking()[-num:]
457
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
458
459
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
460
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
461
        versions = []
462
        new_snapshots = set()
463
        for version_id in self.versions():
464
            if version_id in self._snapshots:
465
                continue
466
            diff_len = self.get_diff(version_id).patch_len()
467
            snapshot_len = MultiParent([NewText(
468
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
469
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
470
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
471
        return versions
472
473
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
474
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
475
        for version_id in vf.versions():
476
            self.add_diff(vf.get_diff(version_id), version_id,
477
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
478
0.9.35 by Aaron Bentley
Add build ranking
479
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
480
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
481
        could_avoid = {}
482
        referenced_by = {}
483
        for version_id in topo_iter(self):
484
            could_avoid[version_id] = set()
485
            if version_id not in self._snapshots:
486
                for parent_id in self._parents[version_id]:
487
                    could_avoid[version_id].update(could_avoid[parent_id])
488
                could_avoid[version_id].update(self._parents)
489
                could_avoid[version_id].discard(version_id)
490
            for avoid_id in could_avoid[version_id]:
491
                referenced_by.setdefault(avoid_id, set()).add(version_id)
492
        available_versions = list(self.versions())
493
        ranking = []
494
        while len(available_versions) > 0:
495
            available_versions.sort(key=lambda x:
496
                len(could_avoid[x]) *
497
                len(referenced_by.get(x, [])))
498
            selected = available_versions.pop()
499
            ranking.append(selected)
500
            for version_id in referenced_by[selected]:
501
                could_avoid[version_id].difference_update(
502
                    could_avoid[selected])
503
            for version_id in could_avoid[selected]:
504
                referenced_by[version_id].difference_update(
505
                    referenced_by[selected]
506
                )
507
        return ranking
508
0.9.8 by Aaron Bentley
get add_version working
509
    def clear_cache(self):
510
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
511
512
    def get_line_list(self, version_ids):
513
        return [self.cache_version(v) for v in version_ids]
514
515
    def cache_version(self, version_id):
516
        try:
517
            return self._lines[version_id]
518
        except KeyError:
519
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
520
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
521
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
522
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
523
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
524
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
525
        return lines
526
0.9.33 by Aaron Bentley
Enable caching commandline param
527
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
528
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
529
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
530
531
    def __init__(self, snapshot_interval=25, max_snapshots=None):
532
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
533
        self._diffs = {}
534
535
    def add_diff(self, diff, version_id, parent_ids):
536
        self._diffs[version_id] = diff
537
        self._parents[version_id] = parent_ids
538
539
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
540
        try:
541
            return self._diffs[version_id]
542
        except KeyError:
543
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
544
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
545
    def destroy(self):
546
        self._diffs = {}
547
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
548
549
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
550
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
551
552
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
553
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
554
        self._filename = filename
555
        self._diff_offset = {}
556
557
    def get_diff(self, version_id):
558
        start, count = self._diff_offset[version_id]
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
559
        infile = open(self._filename + '.mpknit', 'rb')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
560
        try:
561
            infile.seek(start)
562
            sio = StringIO(infile.read(count))
563
        finally:
564
            infile.close()
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
565
        zip_file = gzip.GzipFile(None, mode='rb', fileobj=sio)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
566
        try:
567
            file_version_id = zip_file.readline()
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
568
            content = zip_file.read()
569
            return MultiParent.from_patch(content)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
570
        finally:
571
            zip_file.close()
572
573
    def add_diff(self, diff, version_id, parent_ids):
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
574
        outfile = open(self._filename + '.mpknit', 'ab')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
575
        try:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
576
            outfile.seek(0, 2)      # workaround for windows bug:
577
                                    # .tell() for files opened in 'ab' mode
578
                                    # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
579
            start = outfile.tell()
580
            try:
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
581
                zipfile = gzip.GzipFile(None, mode='ab', fileobj=outfile)
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
582
                zipfile.writelines(itertools.chain(
583
                    ['version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
584
            finally:
585
                zipfile.close()
586
            end = outfile.tell()
587
        finally:
588
            outfile.close()
589
        self._diff_offset[version_id] = (start, end-start)
590
        self._parents[version_id] = parent_ids
591
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
592
    def destroy(self):
593
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
594
            os.unlink(self._filename + '.mpknit')
595
        except OSError, e:
596
            if e.errno != errno.ENOENT:
597
                raise
598
        try:
599
            os.unlink(self._filename + '.mpidx')
600
        except OSError, e:
601
            if e.errno != errno.ENOENT:
602
                raise
603
604
    def save(self):
605
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
606
            (self._parents, list(self._snapshots), self._diff_offset)))
607
608
    def load(self):
609
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
610
            open(self._filename + '.mpidx', 'rb').read())
611
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
612
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
613
614
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
615
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
616
617
    def __init__(self, diffs, lines, parents):
618
        self.diffs = diffs
619
        self.lines = lines
620
        self.parents = parents
621
        self.cursor = {}
622
623
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
624
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
625
        parent_id = self.parents[version_id][parent_text.parent]
626
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
627
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
628
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
629
630
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
631
        """Append lines for the requested version_id range"""
632
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
633
        if req_start == req_end:
634
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
635
        pending_reqs = [(req_version_id, req_start, req_end)]
636
        while len(pending_reqs) > 0:
637
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
638
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
639
            if req_version_id in self.lines:
640
                lines.extend(self.lines[req_version_id][req_start:req_end])
641
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
642
            try:
643
                start, end, kind, data, iterator = self.cursor[req_version_id]
644
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
645
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
646
                start, end, kind, data = iterator.next()
0.9.22 by Aaron Bentley
Fix restoration bug
647
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
648
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.22 by Aaron Bentley
Fix restoration bug
649
                start, end, kind, data = iterator.next()
650
0.9.10 by Aaron Bentley
Text reconstruction seems to work
651
            # find the first hunk relevant to the request
652
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
653
                start, end, kind, data = iterator.next()
654
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
655
            # if the hunk can't satisfy the whole request, split it in two,
656
            # and leave the second half for later.
657
            if req_end > end:
658
                pending_reqs.append((req_version_id, end, req_end))
659
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
660
            if kind == 'new':
661
                lines.extend(data[req_start - start: (req_end - start)])
662
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
663
                # If the hunk is a ParentText, rewrite it as a range request
664
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
665
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
666
                new_version_id = self.parents[req_version_id][parent]
667
                new_start = parent_start + req_start - start
668
                new_end = parent_end + req_end - end
669
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
670
671
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
672
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
673
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
674
2520.4.6 by Aaron Bentley
Get installation started
675
0.9.25 by Aaron Bentley
More messy hacking
676
def gzip_string(lines):
677
    sio = StringIO()
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
678
    data_file = gzip.GzipFile(None, mode='wb', fileobj=sio)
0.9.25 by Aaron Bentley
More messy hacking
679
    data_file.writelines(lines)
680
    data_file.close()
681
    return sio.getvalue()