~bzr-pqm/bzr/bzr.dev

5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
1
# Copyright (C) 2007-2011 Canonical Ltd
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
16
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
17
from bzrlib.lazy_import import lazy_import
18
19
lazy_import(globals(), """
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
20
import errno
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
21
import gzip
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
22
import itertools
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
23
import os
0.9.25 by Aaron Bentley
More messy hacking
24
from StringIO import StringIO
0.9.19 by Aaron Bentley
More tweakage
25
0.9.25 by Aaron Bentley
More messy hacking
26
from bzrlib import (
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
27
    bencode,
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
28
    errors,
0.9.25 by Aaron Bentley
More messy hacking
29
    patiencediff,
30
    ui,
31
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
32
""")
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
33
0.9.33 by Aaron Bentley
Enable caching commandline param
34
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
35
def topo_iter_keys(vf, keys=None):
36
    if keys is None:
37
        keys = vf.keys()
38
    parents = vf.get_parent_map(keys)
39
    return _topo_iter(parents, keys)
40
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
41
def topo_iter(vf, versions=None):
42
    if versions is None:
43
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
44
    parents = vf.get_parent_map(versions)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
45
    return _topo_iter(parents, versions)
46
47
def _topo_iter(parents, versions):
48
    seen = set()
49
    descendants = {}
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
50
    def pending_parents(version):
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
51
        if parents[version] is None:
52
            return []
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
53
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
54
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
55
    for version_id in versions:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
56
        if parents[version_id] is None:
57
            # parentless
58
            continue
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
59
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
60
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
61
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
62
    while len(cur) > 0:
63
        next = []
64
        for version_id in cur:
65
            if version_id in seen:
66
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
67
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
68
                continue
69
            next.extend(descendants.get(version_id, []))
70
            yield version_id
71
            seen.add(version_id)
72
        cur = next
73
74
0.9.1 by Aaron Bentley
Get trivial case passing
75
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
76
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
77
5374.2.9 by John Arbash Meinel
change slots back to a list.
78
    __slots__ = ['hunks']
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
79
0.9.2 by Aaron Bentley
Get single-parent comparison working
80
    def __init__(self, hunks=None):
81
        if hunks is not None:
82
            self.hunks = hunks
83
        else:
84
            self.hunks = []
85
86
    def __repr__(self):
87
        return "MultiParent(%r)" % self.hunks
88
89
    def __eq__(self, other):
90
        if self.__class__ is not other.__class__:
91
            return False
92
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
93
94
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
95
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
96
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
97
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
98
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
99
                                                           text)
100
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
101
        if len(parents) > 0:
102
            if left_blocks is None:
103
                left_blocks = compare(parents[0])
104
            parent_comparisons = [left_blocks] + [compare(p) for p in
105
                                                  parents[1:]]
106
        else:
107
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
108
        cur_line = 0
109
        new_text = NewText([])
110
        parent_text = []
111
        block_iter = [iter(i) for i in parent_comparisons]
112
        diff = MultiParent([])
113
        def next_block(p):
114
            try:
115
                return block_iter[p].next()
116
            except StopIteration:
117
                return None
118
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
119
        while cur_line < len(text):
120
            best_match = None
121
            for p, block in enumerate(cur_block):
122
                if block is None:
123
                    continue
124
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
125
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
126
                    block = cur_block[p] = next_block(p)
127
                    if block is None:
128
                        break
129
                    i, j, n = block
130
                if block is None:
131
                    continue
132
                if j > cur_line:
133
                    continue
134
                offset = cur_line - j
135
                i += offset
136
                j = cur_line
137
                n -= offset
138
                if n == 0:
139
                    continue
140
                if best_match is None or n > best_match.num_lines:
141
                    best_match = ParentText(p, i, j, n)
142
            if best_match is None:
143
                new_text.lines.append(text[cur_line])
144
                cur_line += 1
145
            else:
146
                if len(new_text.lines) > 0:
147
                    diff.hunks.append(new_text)
148
                    new_text = NewText([])
149
                diff.hunks.append(best_match)
150
                cur_line += best_match.num_lines
151
        if len(new_text.lines) > 0:
152
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
153
        return diff
154
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
155
    def get_matching_blocks(self, parent, parent_len):
156
        for hunk in self.hunks:
157
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
158
                continue
159
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
160
        yield parent_len, self.num_lines(), 0
161
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
162
    def to_lines(self, parents=()):
163
        """Contruct a fulltext from this diff and its parents"""
164
        mpvf = MultiMemoryVersionedFile()
165
        for num, parent in enumerate(parents):
166
            mpvf.add_version(StringIO(parent).readlines(), num, [])
167
        mpvf.add_diff(self, 'a', range(len(parents)))
168
        return mpvf.get_line_list(['a'])[0]
169
0.9.1 by Aaron Bentley
Get trivial case passing
170
    @classmethod
171
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
172
        """Produce a MultiParent from a text and list of parent text"""
2520.4.92 by Aaron Bentley
fix line-splitting in MultiParent.from_texts
173
        return cls.from_lines(StringIO(text).readlines(),
174
                              [StringIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
175
0.9.4 by Aaron Bentley
Start supporting serialization
176
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
177
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
178
        for hunk in self.hunks:
179
            for line in hunk.to_patch():
180
                yield line
181
0.9.25 by Aaron Bentley
More messy hacking
182
    def patch_len(self):
183
        return len(''.join(self.to_patch()))
184
185
    def zipped_patch_len(self):
186
        return len(gzip_string(self.to_patch()))
187
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
188
    @classmethod
189
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
190
        """Create a MultiParent from its string form"""
2520.4.31 by Aaron Bentley
Using StringIO for splitting patch lines
191
        return cls._from_patch(StringIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
192
0.9.18 by Aaron Bentley
Implement from_patch
193
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
194
    def _from_patch(lines):
195
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
196
        line_iter = iter(lines)
197
        hunks = []
198
        cur_line = None
199
        while(True):
200
            try:
201
                cur_line = line_iter.next()
202
            except StopIteration:
203
                break
204
            if cur_line[0] == 'i':
205
                num_lines = int(cur_line.split(' ')[1])
206
                hunk_lines = [line_iter.next() for x in xrange(num_lines)]
207
                hunk_lines[-1] = hunk_lines[-1][:-1]
208
                hunks.append(NewText(hunk_lines))
209
            elif cur_line[0] == '\n':
210
                hunks[-1].lines[-1] += '\n'
211
            else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
212
                if not (cur_line[0] == 'c'):
213
                    raise AssertionError(cur_line[0])
0.9.18 by Aaron Bentley
Implement from_patch
214
                parent, parent_pos, child_pos, num_lines =\
215
                    [int(v) for v in cur_line.split(' ')[1:]]
216
                hunks.append(ParentText(parent, parent_pos, child_pos,
217
                                        num_lines))
218
        return MultiParent(hunks)
219
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
220
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
221
        """Iterate through the hunks, with range indicated
222
223
        kind is "new" or "parent".
224
        for "new", data is a list of lines.
225
        for "parent", data is (parent, parent_start, parent_end)
226
        :return: a generator of (start, end, kind, data)
227
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
228
        start = 0
229
        for hunk in self.hunks:
230
            if isinstance(hunk, NewText):
231
                kind = 'new'
232
                end = start + len(hunk.lines)
233
                data = hunk.lines
234
            else:
235
                kind = 'parent'
236
                start = hunk.child_pos
237
                end = start + hunk.num_lines
238
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
239
                        hunk.num_lines)
240
            yield start, end, kind, data
241
            start = end
242
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
243
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
244
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
245
        extra_n = 0
246
        for hunk in reversed(self.hunks):
247
            if isinstance(hunk, ParentText):
248
               return hunk.child_pos + hunk.num_lines + extra_n
249
            extra_n += len(hunk.lines)
250
        return extra_n
251
0.9.25 by Aaron Bentley
More messy hacking
252
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
253
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
254
        if len(self.hunks) != 1:
255
            return False
256
        return (isinstance(self.hunks[0], NewText))
257
0.9.1 by Aaron Bentley
Get trivial case passing
258
259
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
260
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
261
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
262
    __slots__ = ['lines']
263
0.9.1 by Aaron Bentley
Get trivial case passing
264
    def __init__(self, lines):
265
        self.lines = lines
266
267
    def __eq__(self, other):
268
        if self.__class__ is not other.__class__:
269
            return False
270
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
271
272
    def __repr__(self):
273
        return 'NewText(%r)' % self.lines
274
0.9.4 by Aaron Bentley
Start supporting serialization
275
    def to_patch(self):
276
        yield 'i %d\n' % len(self.lines)
277
        for line in self.lines:
278
            yield line
279
        yield '\n'
280
0.9.2 by Aaron Bentley
Get single-parent comparison working
281
282
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
283
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
284
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
285
    __slots__ = ['parent', 'parent_pos', 'child_pos', 'num_lines']
286
0.9.2 by Aaron Bentley
Get single-parent comparison working
287
    def __init__(self, parent, parent_pos, child_pos, num_lines):
288
        self.parent = parent
289
        self.parent_pos = parent_pos
290
        self.child_pos = child_pos
291
        self.num_lines = num_lines
292
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
293
    def _as_dict(self):
294
        return dict(parent=self.parent, parent_pos=self.parent_pos,
295
                    child_pos=self.child_pos, num_lines=self.num_lines)
296
0.9.2 by Aaron Bentley
Get single-parent comparison working
297
    def __repr__(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
298
        return ('ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'
299
                ' %(num_lines)r)' % self._as_dict())
0.9.2 by Aaron Bentley
Get single-parent comparison working
300
301
    def __eq__(self, other):
4088.3.1 by Benjamin Peterson
compare types with 'is' not ==
302
        if self.__class__ is not other.__class__:
0.9.2 by Aaron Bentley
Get single-parent comparison working
303
            return False
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
304
        return self._as_dict() == other._as_dict()
0.9.4 by Aaron Bentley
Start supporting serialization
305
306
    def to_patch(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
307
        yield ('c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'
308
               % self._as_dict())
0.9.8 by Aaron Bentley
get add_version working
309
310
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
311
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
312
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
313
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
314
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
315
        self._lines = {}
316
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
317
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
318
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
319
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
320
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
321
    def versions(self):
322
        return iter(self._parents)
323
2520.4.61 by Aaron Bentley
Do bulk insertion of records
324
    def has_version(self, version):
325
        return version in self._parents
326
0.9.12 by Aaron Bentley
Make benchmarks for mp
327
    def do_snapshot(self, version_id, parent_ids):
4031.3.1 by Frank Aspell
Fixing various typos
328
        """Determine whether to perform a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
329
        if self.snapshot_interval is None:
330
            return False
331
        if self.max_snapshots is not None and\
332
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
333
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
334
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
335
            return True
336
        for ignored in xrange(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
337
            if len(parent_ids) == 0:
338
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
339
            version_ids = parent_ids
340
            parent_ids = []
341
            for version_id in version_ids:
342
                if version_id not in self._snapshots:
343
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
344
        else:
345
            return True
0.9.8 by Aaron Bentley
get add_version working
346
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
347
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
348
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
349
        """Add a version to the versionedfile
350
351
        :param lines: The list of lines to add.  Must be split on '\n'.
352
        :param version_id: The version_id of the version to add
353
        :param force_snapshot: If true, force this version to be added as a
354
            snapshot version.  If false, force this version to be added as a
355
            diff.  If none, determine this automatically.
356
        :param single_parent: If true, use a single parent, rather than
357
            multiple parents.
358
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
359
        if force_snapshot is None:
360
            do_snapshot = self.do_snapshot(version_id, parent_ids)
361
        else:
362
            do_snapshot = force_snapshot
363
        if do_snapshot:
364
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
365
            diff = MultiParent([NewText(lines)])
366
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
367
            if single_parent:
368
                parent_lines = self.get_line_list(parent_ids[:1])
369
            else:
370
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
371
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
372
            if diff.is_snapshot():
373
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
374
        self.add_diff(diff, version_id, parent_ids)
375
        self._lines[version_id] = lines
376
0.9.35 by Aaron Bentley
Add build ranking
377
    def get_parents(self, version_id):
378
        return self._parents[version_id]
379
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
380
    def make_snapshot(self, version_id):
381
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
382
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
383
        self._snapshots.add(version_id)
384
0.9.20 by Aaron Bentley
Convert to a plugin
385
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
386
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
387
        """Import all revisions of a versionedfile
388
389
        :param vf: The versionedfile to import
390
        :param snapshots: If provided, the revisions to make snapshots of.
391
            Otherwise, this will be auto-determined
392
        :param no_cache: If true, clear the cache after every add.
393
        :param single_parent: If true, omit all but one parent text, (but
394
            retain parent metadata).
395
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
396
        if not (no_cache or not verify):
397
            raise ValueError()
0.9.19 by Aaron Bentley
More tweakage
398
        revisions = set(vf.versions())
399
        total = len(revisions)
0.9.20 by Aaron Bentley
Convert to a plugin
400
        pb = ui.ui_factory.nested_progress_bar()
401
        try:
402
            while len(revisions) > 0:
403
                added = set()
404
                for revision in revisions:
405
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
406
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
407
                        continue
408
                    lines = [a + ' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
409
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
410
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
411
                        force_snapshot = None
412
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
413
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
414
                    self.add_version(lines, revision, parents, force_snapshot,
415
                                     single_parent)
416
                    added.add(revision)
417
                    if no_cache:
418
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
419
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
420
                        if verify:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
421
                            if not (lines == self.get_line_list([revision])[0]):
422
                                raise AssertionError()
0.9.22 by Aaron Bentley
Fix restoration bug
423
                            self.clear_cache()
0.9.20 by Aaron Bentley
Convert to a plugin
424
                    pb.update('Importing revisions',
425
                              (total - len(revisions)) + len(added), total)
426
                revisions = [r for r in revisions if r not in added]
427
        finally:
428
            pb.finished()
0.9.19 by Aaron Bentley
More tweakage
429
0.9.23 by Aaron Bentley
handle snapshots all at once
430
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
431
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
432
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
433
        descendants = {}
434
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
435
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
436
            potential_build_ancestors = set(vf.get_parents(version_id))
437
            parents = vf.get_parents(version_id)
438
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
439
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
440
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
441
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
442
                for parent in vf.get_parents(version_id):
443
                    potential_build_ancestors.update(build_ancestors[parent])
444
                if len(potential_build_ancestors) > self.snapshot_interval:
445
                    snapshots.add(version_id)
446
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
447
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
448
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
449
        return snapshots
450
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
451
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
452
        """Select snapshots for minimum output size"""
453
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
454
        new_snapshots = self.get_size_ranking()[-num:]
455
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
456
457
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
458
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
459
        versions = []
460
        new_snapshots = set()
461
        for version_id in self.versions():
462
            if version_id in self._snapshots:
463
                continue
464
            diff_len = self.get_diff(version_id).patch_len()
465
            snapshot_len = MultiParent([NewText(
466
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
467
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
468
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
469
        return versions
470
471
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
472
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
473
        for version_id in vf.versions():
474
            self.add_diff(vf.get_diff(version_id), version_id,
475
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
476
0.9.35 by Aaron Bentley
Add build ranking
477
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
478
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
479
        could_avoid = {}
480
        referenced_by = {}
481
        for version_id in topo_iter(self):
482
            could_avoid[version_id] = set()
483
            if version_id not in self._snapshots:
484
                for parent_id in self._parents[version_id]:
485
                    could_avoid[version_id].update(could_avoid[parent_id])
486
                could_avoid[version_id].update(self._parents)
487
                could_avoid[version_id].discard(version_id)
488
            for avoid_id in could_avoid[version_id]:
489
                referenced_by.setdefault(avoid_id, set()).add(version_id)
490
        available_versions = list(self.versions())
491
        ranking = []
492
        while len(available_versions) > 0:
493
            available_versions.sort(key=lambda x:
494
                len(could_avoid[x]) *
495
                len(referenced_by.get(x, [])))
496
            selected = available_versions.pop()
497
            ranking.append(selected)
498
            for version_id in referenced_by[selected]:
499
                could_avoid[version_id].difference_update(
500
                    could_avoid[selected])
501
            for version_id in could_avoid[selected]:
502
                referenced_by[version_id].difference_update(
503
                    referenced_by[selected]
504
                )
505
        return ranking
506
0.9.8 by Aaron Bentley
get add_version working
507
    def clear_cache(self):
508
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
509
510
    def get_line_list(self, version_ids):
511
        return [self.cache_version(v) for v in version_ids]
512
513
    def cache_version(self, version_id):
514
        try:
515
            return self._lines[version_id]
516
        except KeyError:
517
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
518
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
519
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
520
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
521
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
522
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
523
        return lines
524
0.9.33 by Aaron Bentley
Enable caching commandline param
525
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
526
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
527
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
528
529
    def __init__(self, snapshot_interval=25, max_snapshots=None):
530
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
531
        self._diffs = {}
532
533
    def add_diff(self, diff, version_id, parent_ids):
534
        self._diffs[version_id] = diff
535
        self._parents[version_id] = parent_ids
536
537
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
538
        try:
539
            return self._diffs[version_id]
540
        except KeyError:
541
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
542
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
543
    def destroy(self):
544
        self._diffs = {}
545
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
546
547
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
548
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
549
550
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
551
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
552
        self._filename = filename
553
        self._diff_offset = {}
554
555
    def get_diff(self, version_id):
556
        start, count = self._diff_offset[version_id]
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
557
        infile = open(self._filename + '.mpknit', 'rb')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
558
        try:
559
            infile.seek(start)
560
            sio = StringIO(infile.read(count))
561
        finally:
562
            infile.close()
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
563
        zip_file = gzip.GzipFile(None, mode='rb', fileobj=sio)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
564
        try:
565
            file_version_id = zip_file.readline()
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
566
            content = zip_file.read()
567
            return MultiParent.from_patch(content)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
568
        finally:
569
            zip_file.close()
570
571
    def add_diff(self, diff, version_id, parent_ids):
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
572
        outfile = open(self._filename + '.mpknit', 'ab')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
573
        try:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
574
            outfile.seek(0, 2)      # workaround for windows bug:
575
                                    # .tell() for files opened in 'ab' mode
576
                                    # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
577
            start = outfile.tell()
578
            try:
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
579
                zipfile = gzip.GzipFile(None, mode='ab', fileobj=outfile)
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
580
                zipfile.writelines(itertools.chain(
581
                    ['version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
582
            finally:
583
                zipfile.close()
584
            end = outfile.tell()
585
        finally:
586
            outfile.close()
587
        self._diff_offset[version_id] = (start, end-start)
588
        self._parents[version_id] = parent_ids
589
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
590
    def destroy(self):
591
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
592
            os.unlink(self._filename + '.mpknit')
593
        except OSError, e:
594
            if e.errno != errno.ENOENT:
595
                raise
596
        try:
597
            os.unlink(self._filename + '.mpidx')
598
        except OSError, e:
599
            if e.errno != errno.ENOENT:
600
                raise
601
602
    def save(self):
603
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
604
            (self._parents, list(self._snapshots), self._diff_offset)))
605
606
    def load(self):
607
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
608
            open(self._filename + '.mpidx', 'rb').read())
609
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
610
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
611
612
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
613
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
614
615
    def __init__(self, diffs, lines, parents):
616
        self.diffs = diffs
617
        self.lines = lines
618
        self.parents = parents
619
        self.cursor = {}
620
621
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
622
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
623
        parent_id = self.parents[version_id][parent_text.parent]
624
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
625
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
626
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
627
628
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
629
        """Append lines for the requested version_id range"""
630
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
631
        if req_start == req_end:
632
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
633
        pending_reqs = [(req_version_id, req_start, req_end)]
634
        while len(pending_reqs) > 0:
635
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
636
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
637
            if req_version_id in self.lines:
638
                lines.extend(self.lines[req_version_id][req_start:req_end])
639
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
640
            try:
641
                start, end, kind, data, iterator = self.cursor[req_version_id]
642
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
643
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
644
                start, end, kind, data = iterator.next()
0.9.22 by Aaron Bentley
Fix restoration bug
645
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
646
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.22 by Aaron Bentley
Fix restoration bug
647
                start, end, kind, data = iterator.next()
648
0.9.10 by Aaron Bentley
Text reconstruction seems to work
649
            # find the first hunk relevant to the request
650
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
651
                start, end, kind, data = iterator.next()
652
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
653
            # if the hunk can't satisfy the whole request, split it in two,
654
            # and leave the second half for later.
655
            if req_end > end:
656
                pending_reqs.append((req_version_id, end, req_end))
657
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
658
            if kind == 'new':
659
                lines.extend(data[req_start - start: (req_end - start)])
660
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
661
                # If the hunk is a ParentText, rewrite it as a range request
662
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
663
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
664
                new_version_id = self.parents[req_version_id][parent]
665
                new_start = parent_start + req_start - start
666
                new_end = parent_end + req_end - end
667
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
668
669
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
670
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
671
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
672
2520.4.6 by Aaron Bentley
Get installation started
673
0.9.25 by Aaron Bentley
More messy hacking
674
def gzip_string(lines):
675
    sio = StringIO()
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
676
    data_file = gzip.GzipFile(None, mode='wb', fileobj=sio)
0.9.25 by Aaron Bentley
More messy hacking
677
    data_file.writelines(lines)
678
    data_file.close()
679
    return sio.getvalue()