~bzr-pqm/bzr/bzr.dev

5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
1
# Copyright (C) 2007-2010 Canonical Ltd
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
16
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
17
from bzrlib.lazy_import import lazy_import
18
19
lazy_import(globals(), """
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
20
import errno
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
21
import itertools
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
22
import os
0.9.25 by Aaron Bentley
More messy hacking
23
from StringIO import StringIO
0.9.19 by Aaron Bentley
More tweakage
24
0.9.25 by Aaron Bentley
More messy hacking
25
from bzrlib import (
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
26
    errors,
0.9.25 by Aaron Bentley
More messy hacking
27
    patiencediff,
28
    trace,
29
    ui,
30
    )
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
31
from bzrlib import bencode
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
32
""")
0.9.25 by Aaron Bentley
More messy hacking
33
from bzrlib.tuned_gzip import GzipFile
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
34
0.9.33 by Aaron Bentley
Enable caching commandline param
35
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
36
def topo_iter_keys(vf, keys=None):
37
    if keys is None:
38
        keys = vf.keys()
39
    parents = vf.get_parent_map(keys)
40
    return _topo_iter(parents, keys)
41
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
42
def topo_iter(vf, versions=None):
43
    if versions is None:
44
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
45
    parents = vf.get_parent_map(versions)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
46
    return _topo_iter(parents, versions)
47
48
def _topo_iter(parents, versions):
49
    seen = set()
50
    descendants = {}
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
51
    def pending_parents(version):
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
52
        if parents[version] is None:
53
            return []
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
54
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
55
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
56
    for version_id in versions:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
57
        if parents[version_id] is None:
58
            # parentless
59
            continue
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
60
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
61
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
62
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
63
    while len(cur) > 0:
64
        next = []
65
        for version_id in cur:
66
            if version_id in seen:
67
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
68
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
69
                continue
70
            next.extend(descendants.get(version_id, []))
71
            yield version_id
72
            seen.add(version_id)
73
        cur = next
74
75
0.9.1 by Aaron Bentley
Get trivial case passing
76
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
77
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
78
5374.2.9 by John Arbash Meinel
change slots back to a list.
79
    __slots__ = ['hunks']
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
80
0.9.2 by Aaron Bentley
Get single-parent comparison working
81
    def __init__(self, hunks=None):
82
        if hunks is not None:
83
            self.hunks = hunks
84
        else:
85
            self.hunks = []
86
87
    def __repr__(self):
88
        return "MultiParent(%r)" % self.hunks
89
90
    def __eq__(self, other):
91
        if self.__class__ is not other.__class__:
92
            return False
93
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
94
95
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
96
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
97
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
98
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
99
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
100
                                                           text)
101
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
102
        if len(parents) > 0:
103
            if left_blocks is None:
104
                left_blocks = compare(parents[0])
105
            parent_comparisons = [left_blocks] + [compare(p) for p in
106
                                                  parents[1:]]
107
        else:
108
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
109
        cur_line = 0
110
        new_text = NewText([])
111
        parent_text = []
112
        block_iter = [iter(i) for i in parent_comparisons]
113
        diff = MultiParent([])
114
        def next_block(p):
115
            try:
116
                return block_iter[p].next()
117
            except StopIteration:
118
                return None
119
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
120
        while cur_line < len(text):
121
            best_match = None
122
            for p, block in enumerate(cur_block):
123
                if block is None:
124
                    continue
125
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
126
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
127
                    block = cur_block[p] = next_block(p)
128
                    if block is None:
129
                        break
130
                    i, j, n = block
131
                if block is None:
132
                    continue
133
                if j > cur_line:
134
                    continue
135
                offset = cur_line - j
136
                i += offset
137
                j = cur_line
138
                n -= offset
139
                if n == 0:
140
                    continue
141
                if best_match is None or n > best_match.num_lines:
142
                    best_match = ParentText(p, i, j, n)
143
            if best_match is None:
144
                new_text.lines.append(text[cur_line])
145
                cur_line += 1
146
            else:
147
                if len(new_text.lines) > 0:
148
                    diff.hunks.append(new_text)
149
                    new_text = NewText([])
150
                diff.hunks.append(best_match)
151
                cur_line += best_match.num_lines
152
        if len(new_text.lines) > 0:
153
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
154
        return diff
155
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
156
    def get_matching_blocks(self, parent, parent_len):
157
        for hunk in self.hunks:
158
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
159
                continue
160
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
161
        yield parent_len, self.num_lines(), 0
162
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
163
    def to_lines(self, parents=()):
164
        """Contruct a fulltext from this diff and its parents"""
165
        mpvf = MultiMemoryVersionedFile()
166
        for num, parent in enumerate(parents):
167
            mpvf.add_version(StringIO(parent).readlines(), num, [])
168
        mpvf.add_diff(self, 'a', range(len(parents)))
169
        return mpvf.get_line_list(['a'])[0]
170
0.9.1 by Aaron Bentley
Get trivial case passing
171
    @classmethod
172
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
173
        """Produce a MultiParent from a text and list of parent text"""
2520.4.92 by Aaron Bentley
fix line-splitting in MultiParent.from_texts
174
        return cls.from_lines(StringIO(text).readlines(),
175
                              [StringIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
176
0.9.4 by Aaron Bentley
Start supporting serialization
177
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
178
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
179
        for hunk in self.hunks:
180
            for line in hunk.to_patch():
181
                yield line
182
0.9.25 by Aaron Bentley
More messy hacking
183
    def patch_len(self):
184
        return len(''.join(self.to_patch()))
185
186
    def zipped_patch_len(self):
187
        return len(gzip_string(self.to_patch()))
188
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
189
    @classmethod
190
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
191
        """Create a MultiParent from its string form"""
2520.4.31 by Aaron Bentley
Using StringIO for splitting patch lines
192
        return cls._from_patch(StringIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
193
0.9.18 by Aaron Bentley
Implement from_patch
194
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
195
    def _from_patch(lines):
196
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
197
        line_iter = iter(lines)
198
        hunks = []
199
        cur_line = None
200
        while(True):
201
            try:
202
                cur_line = line_iter.next()
203
            except StopIteration:
204
                break
205
            if cur_line[0] == 'i':
206
                num_lines = int(cur_line.split(' ')[1])
207
                hunk_lines = [line_iter.next() for x in xrange(num_lines)]
208
                hunk_lines[-1] = hunk_lines[-1][:-1]
209
                hunks.append(NewText(hunk_lines))
210
            elif cur_line[0] == '\n':
211
                hunks[-1].lines[-1] += '\n'
212
            else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
213
                if not (cur_line[0] == 'c'):
214
                    raise AssertionError(cur_line[0])
0.9.18 by Aaron Bentley
Implement from_patch
215
                parent, parent_pos, child_pos, num_lines =\
216
                    [int(v) for v in cur_line.split(' ')[1:]]
217
                hunks.append(ParentText(parent, parent_pos, child_pos,
218
                                        num_lines))
219
        return MultiParent(hunks)
220
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
221
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
222
        """Iterate through the hunks, with range indicated
223
224
        kind is "new" or "parent".
225
        for "new", data is a list of lines.
226
        for "parent", data is (parent, parent_start, parent_end)
227
        :return: a generator of (start, end, kind, data)
228
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
229
        start = 0
230
        for hunk in self.hunks:
231
            if isinstance(hunk, NewText):
232
                kind = 'new'
233
                end = start + len(hunk.lines)
234
                data = hunk.lines
235
            else:
236
                kind = 'parent'
237
                start = hunk.child_pos
238
                end = start + hunk.num_lines
239
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
240
                        hunk.num_lines)
241
            yield start, end, kind, data
242
            start = end
243
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
244
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
245
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
246
        extra_n = 0
247
        for hunk in reversed(self.hunks):
248
            if isinstance(hunk, ParentText):
249
               return hunk.child_pos + hunk.num_lines + extra_n
250
            extra_n += len(hunk.lines)
251
        return extra_n
252
0.9.25 by Aaron Bentley
More messy hacking
253
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
254
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
255
        if len(self.hunks) != 1:
256
            return False
257
        return (isinstance(self.hunks[0], NewText))
258
0.9.1 by Aaron Bentley
Get trivial case passing
259
260
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
261
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
262
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
263
    __slots__ = ['lines']
264
0.9.1 by Aaron Bentley
Get trivial case passing
265
    def __init__(self, lines):
266
        self.lines = lines
267
268
    def __eq__(self, other):
269
        if self.__class__ is not other.__class__:
270
            return False
271
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
272
273
    def __repr__(self):
274
        return 'NewText(%r)' % self.lines
275
0.9.4 by Aaron Bentley
Start supporting serialization
276
    def to_patch(self):
277
        yield 'i %d\n' % len(self.lines)
278
        for line in self.lines:
279
            yield line
280
        yield '\n'
281
0.9.2 by Aaron Bentley
Get single-parent comparison working
282
283
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
284
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
285
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
286
    __slots__ = ['parent', 'parent_pos', 'child_pos', 'num_lines']
287
0.9.2 by Aaron Bentley
Get single-parent comparison working
288
    def __init__(self, parent, parent_pos, child_pos, num_lines):
289
        self.parent = parent
290
        self.parent_pos = parent_pos
291
        self.child_pos = child_pos
292
        self.num_lines = num_lines
293
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
294
    def _as_dict(self):
295
        return dict(parent=self.parent, parent_pos=self.parent_pos,
296
                    child_pos=self.child_pos, num_lines=self.num_lines)
297
0.9.2 by Aaron Bentley
Get single-parent comparison working
298
    def __repr__(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
299
        return ('ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'
300
                ' %(num_lines)r)' % self._as_dict())
0.9.2 by Aaron Bentley
Get single-parent comparison working
301
302
    def __eq__(self, other):
4088.3.1 by Benjamin Peterson
compare types with 'is' not ==
303
        if self.__class__ is not other.__class__:
0.9.2 by Aaron Bentley
Get single-parent comparison working
304
            return False
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
305
        return self._as_dict() == other._as_dict()
0.9.4 by Aaron Bentley
Start supporting serialization
306
307
    def to_patch(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
308
        yield ('c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'
309
               % self._as_dict())
0.9.8 by Aaron Bentley
get add_version working
310
311
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
312
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
313
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
314
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
315
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
316
        self._lines = {}
317
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
318
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
319
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
320
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
321
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
322
    def versions(self):
323
        return iter(self._parents)
324
2520.4.61 by Aaron Bentley
Do bulk insertion of records
325
    def has_version(self, version):
326
        return version in self._parents
327
0.9.12 by Aaron Bentley
Make benchmarks for mp
328
    def do_snapshot(self, version_id, parent_ids):
4031.3.1 by Frank Aspell
Fixing various typos
329
        """Determine whether to perform a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
330
        if self.snapshot_interval is None:
331
            return False
332
        if self.max_snapshots is not None and\
333
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
334
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
335
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
336
            return True
337
        for ignored in xrange(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
338
            if len(parent_ids) == 0:
339
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
340
            version_ids = parent_ids
341
            parent_ids = []
342
            for version_id in version_ids:
343
                if version_id not in self._snapshots:
344
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
345
        else:
346
            return True
0.9.8 by Aaron Bentley
get add_version working
347
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
348
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
349
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
350
        """Add a version to the versionedfile
351
352
        :param lines: The list of lines to add.  Must be split on '\n'.
353
        :param version_id: The version_id of the version to add
354
        :param force_snapshot: If true, force this version to be added as a
355
            snapshot version.  If false, force this version to be added as a
356
            diff.  If none, determine this automatically.
357
        :param single_parent: If true, use a single parent, rather than
358
            multiple parents.
359
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
360
        if force_snapshot is None:
361
            do_snapshot = self.do_snapshot(version_id, parent_ids)
362
        else:
363
            do_snapshot = force_snapshot
364
        if do_snapshot:
365
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
366
            diff = MultiParent([NewText(lines)])
367
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
368
            if single_parent:
369
                parent_lines = self.get_line_list(parent_ids[:1])
370
            else:
371
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
372
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
373
            if diff.is_snapshot():
374
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
375
        self.add_diff(diff, version_id, parent_ids)
376
        self._lines[version_id] = lines
377
0.9.35 by Aaron Bentley
Add build ranking
378
    def get_parents(self, version_id):
379
        return self._parents[version_id]
380
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
381
    def make_snapshot(self, version_id):
382
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
383
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
384
        self._snapshots.add(version_id)
385
0.9.20 by Aaron Bentley
Convert to a plugin
386
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
387
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
388
        """Import all revisions of a versionedfile
389
390
        :param vf: The versionedfile to import
391
        :param snapshots: If provided, the revisions to make snapshots of.
392
            Otherwise, this will be auto-determined
393
        :param no_cache: If true, clear the cache after every add.
394
        :param single_parent: If true, omit all but one parent text, (but
395
            retain parent metadata).
396
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
397
        if not (no_cache or not verify):
398
            raise ValueError()
0.9.19 by Aaron Bentley
More tweakage
399
        revisions = set(vf.versions())
400
        total = len(revisions)
0.9.20 by Aaron Bentley
Convert to a plugin
401
        pb = ui.ui_factory.nested_progress_bar()
402
        try:
403
            while len(revisions) > 0:
404
                added = set()
405
                for revision in revisions:
406
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
407
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
408
                        continue
409
                    lines = [a + ' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
410
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
411
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
412
                        force_snapshot = None
413
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
414
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
415
                    self.add_version(lines, revision, parents, force_snapshot,
416
                                     single_parent)
417
                    added.add(revision)
418
                    if no_cache:
419
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
420
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
421
                        if verify:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
422
                            if not (lines == self.get_line_list([revision])[0]):
423
                                raise AssertionError()
0.9.22 by Aaron Bentley
Fix restoration bug
424
                            self.clear_cache()
0.9.20 by Aaron Bentley
Convert to a plugin
425
                    pb.update('Importing revisions',
426
                              (total - len(revisions)) + len(added), total)
427
                revisions = [r for r in revisions if r not in added]
428
        finally:
429
            pb.finished()
0.9.19 by Aaron Bentley
More tweakage
430
0.9.23 by Aaron Bentley
handle snapshots all at once
431
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
432
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
433
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
434
        descendants = {}
435
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
436
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
437
            potential_build_ancestors = set(vf.get_parents(version_id))
438
            parents = vf.get_parents(version_id)
439
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
440
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
441
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
442
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
443
                for parent in vf.get_parents(version_id):
444
                    potential_build_ancestors.update(build_ancestors[parent])
445
                if len(potential_build_ancestors) > self.snapshot_interval:
446
                    snapshots.add(version_id)
447
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
448
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
449
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
450
        return snapshots
451
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
452
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
453
        """Select snapshots for minimum output size"""
454
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
455
        new_snapshots = self.get_size_ranking()[-num:]
456
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
457
458
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
459
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
460
        versions = []
461
        new_snapshots = set()
462
        for version_id in self.versions():
463
            if version_id in self._snapshots:
464
                continue
465
            diff_len = self.get_diff(version_id).patch_len()
466
            snapshot_len = MultiParent([NewText(
467
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
468
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
469
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
470
        return versions
471
472
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
473
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
474
        for version_id in vf.versions():
475
            self.add_diff(vf.get_diff(version_id), version_id,
476
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
477
0.9.35 by Aaron Bentley
Add build ranking
478
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
479
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
480
        could_avoid = {}
481
        referenced_by = {}
482
        for version_id in topo_iter(self):
483
            could_avoid[version_id] = set()
484
            if version_id not in self._snapshots:
485
                for parent_id in self._parents[version_id]:
486
                    could_avoid[version_id].update(could_avoid[parent_id])
487
                could_avoid[version_id].update(self._parents)
488
                could_avoid[version_id].discard(version_id)
489
            for avoid_id in could_avoid[version_id]:
490
                referenced_by.setdefault(avoid_id, set()).add(version_id)
491
        available_versions = list(self.versions())
492
        ranking = []
493
        while len(available_versions) > 0:
494
            available_versions.sort(key=lambda x:
495
                len(could_avoid[x]) *
496
                len(referenced_by.get(x, [])))
497
            selected = available_versions.pop()
498
            ranking.append(selected)
499
            for version_id in referenced_by[selected]:
500
                could_avoid[version_id].difference_update(
501
                    could_avoid[selected])
502
            for version_id in could_avoid[selected]:
503
                referenced_by[version_id].difference_update(
504
                    referenced_by[selected]
505
                )
506
        return ranking
507
0.9.8 by Aaron Bentley
get add_version working
508
    def clear_cache(self):
509
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
510
511
    def get_line_list(self, version_ids):
512
        return [self.cache_version(v) for v in version_ids]
513
514
    def cache_version(self, version_id):
515
        try:
516
            return self._lines[version_id]
517
        except KeyError:
518
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
519
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
520
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
521
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
522
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
523
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
524
        return lines
525
0.9.33 by Aaron Bentley
Enable caching commandline param
526
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
527
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
528
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
529
530
    def __init__(self, snapshot_interval=25, max_snapshots=None):
531
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
532
        self._diffs = {}
533
534
    def add_diff(self, diff, version_id, parent_ids):
535
        self._diffs[version_id] = diff
536
        self._parents[version_id] = parent_ids
537
538
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
539
        try:
540
            return self._diffs[version_id]
541
        except KeyError:
542
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
543
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
544
    def destroy(self):
545
        self._diffs = {}
546
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
547
548
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
549
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
550
551
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
552
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
553
        self._filename = filename
554
        self._diff_offset = {}
555
556
    def get_diff(self, version_id):
557
        start, count = self._diff_offset[version_id]
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
558
        infile = open(self._filename + '.mpknit', 'rb')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
559
        try:
560
            infile.seek(start)
561
            sio = StringIO(infile.read(count))
562
        finally:
563
            infile.close()
564
        zip_file = GzipFile(None, mode='rb', fileobj=sio)
565
        try:
566
            file_version_id = zip_file.readline()
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
567
            return MultiParent.from_patch(zip_file.read())
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
568
        finally:
569
            zip_file.close()
570
571
    def add_diff(self, diff, version_id, parent_ids):
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
572
        outfile = open(self._filename + '.mpknit', 'ab')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
573
        try:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
574
            outfile.seek(0, 2)      # workaround for windows bug:
575
                                    # .tell() for files opened in 'ab' mode
576
                                    # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
577
            start = outfile.tell()
578
            try:
579
                zipfile = GzipFile(None, mode='ab', fileobj=outfile)
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
580
                zipfile.writelines(itertools.chain(
581
                    ['version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
582
            finally:
583
                zipfile.close()
584
            end = outfile.tell()
585
        finally:
586
            outfile.close()
587
        self._diff_offset[version_id] = (start, end-start)
588
        self._parents[version_id] = parent_ids
589
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
590
    def destroy(self):
591
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
592
            os.unlink(self._filename + '.mpknit')
593
        except OSError, e:
594
            if e.errno != errno.ENOENT:
595
                raise
596
        try:
597
            os.unlink(self._filename + '.mpidx')
598
        except OSError, e:
599
            if e.errno != errno.ENOENT:
600
                raise
601
602
    def save(self):
603
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
604
            (self._parents, list(self._snapshots), self._diff_offset)))
605
606
    def load(self):
607
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
608
            open(self._filename + '.mpidx', 'rb').read())
609
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
610
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
611
612
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
613
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
614
615
    def __init__(self, diffs, lines, parents):
616
        self.diffs = diffs
617
        self.lines = lines
618
        self.parents = parents
619
        self.cursor = {}
620
621
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
622
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
623
        parent_id = self.parents[version_id][parent_text.parent]
624
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
625
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
626
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
627
628
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
629
        """Append lines for the requested version_id range"""
630
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
631
        if req_start == req_end:
632
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
633
        pending_reqs = [(req_version_id, req_start, req_end)]
634
        while len(pending_reqs) > 0:
635
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
636
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
637
            if req_version_id in self.lines:
638
                lines.extend(self.lines[req_version_id][req_start:req_end])
639
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
640
            try:
641
                start, end, kind, data, iterator = self.cursor[req_version_id]
642
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
643
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
644
                start, end, kind, data = iterator.next()
0.9.22 by Aaron Bentley
Fix restoration bug
645
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
646
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.22 by Aaron Bentley
Fix restoration bug
647
                start, end, kind, data = iterator.next()
648
0.9.10 by Aaron Bentley
Text reconstruction seems to work
649
            # find the first hunk relevant to the request
650
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
651
                start, end, kind, data = iterator.next()
652
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
653
            # if the hunk can't satisfy the whole request, split it in two,
654
            # and leave the second half for later.
655
            if req_end > end:
656
                pending_reqs.append((req_version_id, end, req_end))
657
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
658
            if kind == 'new':
659
                lines.extend(data[req_start - start: (req_end - start)])
660
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
661
                # If the hunk is a ParentText, rewrite it as a range request
662
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
663
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
664
                new_version_id = self.parents[req_version_id][parent]
665
                new_start = parent_start + req_start - start
666
                new_end = parent_end + req_end - end
667
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
668
669
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
670
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
671
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
672
2520.4.6 by Aaron Bentley
Get installation started
673
0.9.25 by Aaron Bentley
More messy hacking
674
def gzip_string(lines):
675
    sio = StringIO()
676
    data_file = GzipFile(None, mode='wb', fileobj=sio)
677
    data_file.writelines(lines)
678
    data_file.close()
679
    return sio.getvalue()