~bzr-pqm/bzr/bzr.dev

2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
17
from bzrlib.lazy_import import lazy_import
18
19
lazy_import(globals(), """
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
20
import errno
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
21
import itertools
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
22
import os
0.9.25 by Aaron Bentley
More messy hacking
23
from StringIO import StringIO
0.9.19 by Aaron Bentley
More tweakage
24
0.9.25 by Aaron Bentley
More messy hacking
25
from bzrlib import (
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
26
    errors,
0.9.25 by Aaron Bentley
More messy hacking
27
    patiencediff,
28
    trace,
29
    ui,
30
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
31
from bzrlib.util import bencode
32
""")
0.9.25 by Aaron Bentley
More messy hacking
33
from bzrlib.tuned_gzip import GzipFile
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
34
0.9.33 by Aaron Bentley
Enable caching commandline param
35
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
36
def topo_iter(vf, versions=None):
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
37
    seen = set()
38
    descendants = {}
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
39
    if versions is None:
40
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
41
    parents = vf.get_parent_map(versions)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
42
    def pending_parents(version):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
43
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
44
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
45
    for version_id in versions:
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
46
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
47
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
48
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
49
    while len(cur) > 0:
50
        next = []
51
        for version_id in cur:
52
            if version_id in seen:
53
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
54
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
55
                continue
56
            next.extend(descendants.get(version_id, []))
57
            yield version_id
58
            seen.add(version_id)
59
        cur = next
60
61
0.9.1 by Aaron Bentley
Get trivial case passing
62
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
63
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
64
0.9.2 by Aaron Bentley
Get single-parent comparison working
65
    def __init__(self, hunks=None):
66
        if hunks is not None:
67
            self.hunks = hunks
68
        else:
69
            self.hunks = []
70
71
    def __repr__(self):
72
        return "MultiParent(%r)" % self.hunks
73
74
    def __eq__(self, other):
75
        if self.__class__ is not other.__class__:
76
            return False
77
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
78
79
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
80
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
81
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
82
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
83
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
84
                                                           text)
85
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
86
        if len(parents) > 0:
87
            if left_blocks is None:
88
                left_blocks = compare(parents[0])
89
            parent_comparisons = [left_blocks] + [compare(p) for p in
90
                                                  parents[1:]]
91
        else:
92
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
93
        cur_line = 0
94
        new_text = NewText([])
95
        parent_text = []
96
        block_iter = [iter(i) for i in parent_comparisons]
97
        diff = MultiParent([])
98
        def next_block(p):
99
            try:
100
                return block_iter[p].next()
101
            except StopIteration:
102
                return None
103
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
104
        while cur_line < len(text):
105
            best_match = None
106
            for p, block in enumerate(cur_block):
107
                if block is None:
108
                    continue
109
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
110
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
111
                    block = cur_block[p] = next_block(p)
112
                    if block is None:
113
                        break
114
                    i, j, n = block
115
                if block is None:
116
                    continue
117
                if j > cur_line:
118
                    continue
119
                offset = cur_line - j
120
                i += offset
121
                j = cur_line
122
                n -= offset
123
                if n == 0:
124
                    continue
125
                if best_match is None or n > best_match.num_lines:
126
                    best_match = ParentText(p, i, j, n)
127
            if best_match is None:
128
                new_text.lines.append(text[cur_line])
129
                cur_line += 1
130
            else:
131
                if len(new_text.lines) > 0:
132
                    diff.hunks.append(new_text)
133
                    new_text = NewText([])
134
                diff.hunks.append(best_match)
135
                cur_line += best_match.num_lines
136
        if len(new_text.lines) > 0:
137
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
138
        return diff
139
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
140
    def get_matching_blocks(self, parent, parent_len):
141
        for hunk in self.hunks:
142
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
143
                continue
144
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
145
        yield parent_len, self.num_lines(), 0
146
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
147
    def to_lines(self, parents=()):
148
        """Contruct a fulltext from this diff and its parents"""
149
        mpvf = MultiMemoryVersionedFile()
150
        for num, parent in enumerate(parents):
151
            mpvf.add_version(StringIO(parent).readlines(), num, [])
152
        mpvf.add_diff(self, 'a', range(len(parents)))
153
        return mpvf.get_line_list(['a'])[0]
154
0.9.1 by Aaron Bentley
Get trivial case passing
155
    @classmethod
156
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
157
        """Produce a MultiParent from a text and list of parent text"""
2520.4.92 by Aaron Bentley
fix line-splitting in MultiParent.from_texts
158
        return cls.from_lines(StringIO(text).readlines(),
159
                              [StringIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
160
0.9.4 by Aaron Bentley
Start supporting serialization
161
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
162
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
163
        for hunk in self.hunks:
164
            for line in hunk.to_patch():
165
                yield line
166
0.9.25 by Aaron Bentley
More messy hacking
167
    def patch_len(self):
168
        return len(''.join(self.to_patch()))
169
170
    def zipped_patch_len(self):
171
        return len(gzip_string(self.to_patch()))
172
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
173
    @classmethod
174
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
175
        """Create a MultiParent from its string form"""
2520.4.31 by Aaron Bentley
Using StringIO for splitting patch lines
176
        return cls._from_patch(StringIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
177
0.9.18 by Aaron Bentley
Implement from_patch
178
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
179
    def _from_patch(lines):
180
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
181
        line_iter = iter(lines)
182
        hunks = []
183
        cur_line = None
184
        while(True):
185
            try:
186
                cur_line = line_iter.next()
187
            except StopIteration:
188
                break
189
            if cur_line[0] == 'i':
190
                num_lines = int(cur_line.split(' ')[1])
191
                hunk_lines = [line_iter.next() for x in xrange(num_lines)]
192
                hunk_lines[-1] = hunk_lines[-1][:-1]
193
                hunks.append(NewText(hunk_lines))
194
            elif cur_line[0] == '\n':
195
                hunks[-1].lines[-1] += '\n'
196
            else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
197
                if not (cur_line[0] == 'c'):
198
                    raise AssertionError(cur_line[0])
0.9.18 by Aaron Bentley
Implement from_patch
199
                parent, parent_pos, child_pos, num_lines =\
200
                    [int(v) for v in cur_line.split(' ')[1:]]
201
                hunks.append(ParentText(parent, parent_pos, child_pos,
202
                                        num_lines))
203
        return MultiParent(hunks)
204
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
205
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
206
        """Iterate through the hunks, with range indicated
207
208
        kind is "new" or "parent".
209
        for "new", data is a list of lines.
210
        for "parent", data is (parent, parent_start, parent_end)
211
        :return: a generator of (start, end, kind, data)
212
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
213
        start = 0
214
        for hunk in self.hunks:
215
            if isinstance(hunk, NewText):
216
                kind = 'new'
217
                end = start + len(hunk.lines)
218
                data = hunk.lines
219
            else:
220
                kind = 'parent'
221
                start = hunk.child_pos
222
                end = start + hunk.num_lines
223
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
224
                        hunk.num_lines)
225
            yield start, end, kind, data
226
            start = end
227
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
228
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
229
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
230
        extra_n = 0
231
        for hunk in reversed(self.hunks):
232
            if isinstance(hunk, ParentText):
233
               return hunk.child_pos + hunk.num_lines + extra_n
234
            extra_n += len(hunk.lines)
235
        return extra_n
236
0.9.25 by Aaron Bentley
More messy hacking
237
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
238
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
239
        if len(self.hunks) != 1:
240
            return False
241
        return (isinstance(self.hunks[0], NewText))
242
0.9.1 by Aaron Bentley
Get trivial case passing
243
244
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
245
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
246
247
    def __init__(self, lines):
248
        self.lines = lines
249
250
    def __eq__(self, other):
251
        if self.__class__ is not other.__class__:
252
            return False
253
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
254
255
    def __repr__(self):
256
        return 'NewText(%r)' % self.lines
257
0.9.4 by Aaron Bentley
Start supporting serialization
258
    def to_patch(self):
259
        yield 'i %d\n' % len(self.lines)
260
        for line in self.lines:
261
            yield line
262
        yield '\n'
263
0.9.2 by Aaron Bentley
Get single-parent comparison working
264
265
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
266
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
267
268
    def __init__(self, parent, parent_pos, child_pos, num_lines):
269
        self.parent = parent
270
        self.parent_pos = parent_pos
271
        self.child_pos = child_pos
272
        self.num_lines = num_lines
273
274
    def __repr__(self):
275
        return 'ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'\
276
            ' %(num_lines)r)' % self.__dict__
277
278
    def __eq__(self, other):
279
        if self.__class__ != other.__class__:
280
            return False
281
        return (self.__dict__ == other.__dict__)
0.9.4 by Aaron Bentley
Start supporting serialization
282
283
    def to_patch(self):
284
        yield 'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'\
285
            % self.__dict__
0.9.8 by Aaron Bentley
get add_version working
286
287
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
288
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
289
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
290
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
291
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
292
        self._lines = {}
293
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
294
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
295
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
296
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
297
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
298
    def versions(self):
299
        return iter(self._parents)
300
2520.4.61 by Aaron Bentley
Do bulk insertion of records
301
    def has_version(self, version):
302
        return version in self._parents
303
0.9.12 by Aaron Bentley
Make benchmarks for mp
304
    def do_snapshot(self, version_id, parent_ids):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
305
        """Determine whether to perform a a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
306
        if self.snapshot_interval is None:
307
            return False
308
        if self.max_snapshots is not None and\
309
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
310
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
311
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
312
            return True
313
        for ignored in xrange(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
314
            if len(parent_ids) == 0:
315
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
316
            version_ids = parent_ids
317
            parent_ids = []
318
            for version_id in version_ids:
319
                if version_id not in self._snapshots:
320
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
321
        else:
322
            return True
0.9.8 by Aaron Bentley
get add_version working
323
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
324
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
325
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
326
        """Add a version to the versionedfile
327
328
        :param lines: The list of lines to add.  Must be split on '\n'.
329
        :param version_id: The version_id of the version to add
330
        :param force_snapshot: If true, force this version to be added as a
331
            snapshot version.  If false, force this version to be added as a
332
            diff.  If none, determine this automatically.
333
        :param single_parent: If true, use a single parent, rather than
334
            multiple parents.
335
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
336
        if force_snapshot is None:
337
            do_snapshot = self.do_snapshot(version_id, parent_ids)
338
        else:
339
            do_snapshot = force_snapshot
340
        if do_snapshot:
341
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
342
            diff = MultiParent([NewText(lines)])
343
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
344
            if single_parent:
345
                parent_lines = self.get_line_list(parent_ids[:1])
346
            else:
347
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
348
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
349
            if diff.is_snapshot():
350
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
351
        self.add_diff(diff, version_id, parent_ids)
352
        self._lines[version_id] = lines
353
0.9.35 by Aaron Bentley
Add build ranking
354
    def get_parents(self, version_id):
355
        return self._parents[version_id]
356
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
357
    def make_snapshot(self, version_id):
358
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
359
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
360
        self._snapshots.add(version_id)
361
0.9.20 by Aaron Bentley
Convert to a plugin
362
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
363
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
364
        """Import all revisions of a versionedfile
365
366
        :param vf: The versionedfile to import
367
        :param snapshots: If provided, the revisions to make snapshots of.
368
            Otherwise, this will be auto-determined
369
        :param no_cache: If true, clear the cache after every add.
370
        :param single_parent: If true, omit all but one parent text, (but
371
            retain parent metadata).
372
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
373
        if not (no_cache or not verify):
374
            raise ValueError()
0.9.19 by Aaron Bentley
More tweakage
375
        revisions = set(vf.versions())
376
        total = len(revisions)
0.9.20 by Aaron Bentley
Convert to a plugin
377
        pb = ui.ui_factory.nested_progress_bar()
378
        try:
379
            while len(revisions) > 0:
380
                added = set()
381
                for revision in revisions:
382
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
383
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
384
                        continue
385
                    lines = [a + ' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
386
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
387
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
388
                        force_snapshot = None
389
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
390
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
391
                    self.add_version(lines, revision, parents, force_snapshot,
392
                                     single_parent)
393
                    added.add(revision)
394
                    if no_cache:
395
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
396
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
397
                        if verify:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
398
                            if not (lines == self.get_line_list([revision])[0]):
399
                                raise AssertionError()
0.9.22 by Aaron Bentley
Fix restoration bug
400
                            self.clear_cache()
0.9.20 by Aaron Bentley
Convert to a plugin
401
                    pb.update('Importing revisions',
402
                              (total - len(revisions)) + len(added), total)
403
                revisions = [r for r in revisions if r not in added]
404
        finally:
405
            pb.finished()
0.9.19 by Aaron Bentley
More tweakage
406
0.9.23 by Aaron Bentley
handle snapshots all at once
407
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
408
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
409
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
410
        descendants = {}
411
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
412
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
413
            potential_build_ancestors = set(vf.get_parents(version_id))
414
            parents = vf.get_parents(version_id)
415
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
416
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
417
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
418
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
419
                for parent in vf.get_parents(version_id):
420
                    potential_build_ancestors.update(build_ancestors[parent])
421
                if len(potential_build_ancestors) > self.snapshot_interval:
422
                    snapshots.add(version_id)
423
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
424
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
425
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
426
        return snapshots
427
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
428
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
429
        """Select snapshots for minimum output size"""
430
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
431
        new_snapshots = self.get_size_ranking()[-num:]
432
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
433
434
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
435
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
436
        versions = []
437
        new_snapshots = set()
438
        for version_id in self.versions():
439
            if version_id in self._snapshots:
440
                continue
441
            diff_len = self.get_diff(version_id).patch_len()
442
            snapshot_len = MultiParent([NewText(
443
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
444
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
445
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
446
        return versions
447
448
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
449
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
450
        for version_id in vf.versions():
451
            self.add_diff(vf.get_diff(version_id), version_id,
452
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
453
0.9.35 by Aaron Bentley
Add build ranking
454
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
455
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
456
        could_avoid = {}
457
        referenced_by = {}
458
        for version_id in topo_iter(self):
459
            could_avoid[version_id] = set()
460
            if version_id not in self._snapshots:
461
                for parent_id in self._parents[version_id]:
462
                    could_avoid[version_id].update(could_avoid[parent_id])
463
                could_avoid[version_id].update(self._parents)
464
                could_avoid[version_id].discard(version_id)
465
            for avoid_id in could_avoid[version_id]:
466
                referenced_by.setdefault(avoid_id, set()).add(version_id)
467
        available_versions = list(self.versions())
468
        ranking = []
469
        while len(available_versions) > 0:
470
            available_versions.sort(key=lambda x:
471
                len(could_avoid[x]) *
472
                len(referenced_by.get(x, [])))
473
            selected = available_versions.pop()
474
            ranking.append(selected)
475
            for version_id in referenced_by[selected]:
476
                could_avoid[version_id].difference_update(
477
                    could_avoid[selected])
478
            for version_id in could_avoid[selected]:
479
                referenced_by[version_id].difference_update(
480
                    referenced_by[selected]
481
                )
482
        return ranking
483
0.9.8 by Aaron Bentley
get add_version working
484
    def clear_cache(self):
485
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
486
487
    def get_line_list(self, version_ids):
488
        return [self.cache_version(v) for v in version_ids]
489
490
    def cache_version(self, version_id):
491
        try:
492
            return self._lines[version_id]
493
        except KeyError:
494
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
495
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
496
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
497
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
498
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
499
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
500
        return lines
501
0.9.33 by Aaron Bentley
Enable caching commandline param
502
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
503
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
504
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
505
506
    def __init__(self, snapshot_interval=25, max_snapshots=None):
507
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
508
        self._diffs = {}
509
510
    def add_diff(self, diff, version_id, parent_ids):
511
        self._diffs[version_id] = diff
512
        self._parents[version_id] = parent_ids
513
514
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
515
        try:
516
            return self._diffs[version_id]
517
        except KeyError:
518
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
519
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
520
    def destroy(self):
521
        self._diffs = {}
522
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
523
524
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
525
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
526
527
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
528
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
529
        self._filename = filename
530
        self._diff_offset = {}
531
532
    def get_diff(self, version_id):
533
        start, count = self._diff_offset[version_id]
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
534
        infile = open(self._filename + '.mpknit', 'rb')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
535
        try:
536
            infile.seek(start)
537
            sio = StringIO(infile.read(count))
538
        finally:
539
            infile.close()
540
        zip_file = GzipFile(None, mode='rb', fileobj=sio)
541
        try:
542
            file_version_id = zip_file.readline()
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
543
            return MultiParent.from_patch(zip_file.read())
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
544
        finally:
545
            zip_file.close()
546
547
    def add_diff(self, diff, version_id, parent_ids):
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
548
        outfile = open(self._filename + '.mpknit', 'ab')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
549
        try:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
550
            outfile.seek(0, 2)      # workaround for windows bug:
551
                                    # .tell() for files opened in 'ab' mode
552
                                    # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
553
            start = outfile.tell()
554
            try:
555
                zipfile = GzipFile(None, mode='ab', fileobj=outfile)
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
556
                zipfile.writelines(itertools.chain(
557
                    ['version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
558
            finally:
559
                zipfile.close()
560
            end = outfile.tell()
561
        finally:
562
            outfile.close()
563
        self._diff_offset[version_id] = (start, end-start)
564
        self._parents[version_id] = parent_ids
565
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
566
    def destroy(self):
567
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
568
            os.unlink(self._filename + '.mpknit')
569
        except OSError, e:
570
            if e.errno != errno.ENOENT:
571
                raise
572
        try:
573
            os.unlink(self._filename + '.mpidx')
574
        except OSError, e:
575
            if e.errno != errno.ENOENT:
576
                raise
577
578
    def save(self):
579
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
580
            (self._parents, list(self._snapshots), self._diff_offset)))
581
582
    def load(self):
583
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
584
            open(self._filename + '.mpidx', 'rb').read())
585
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
586
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
587
588
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
589
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
590
591
    def __init__(self, diffs, lines, parents):
592
        self.diffs = diffs
593
        self.lines = lines
594
        self.parents = parents
595
        self.cursor = {}
596
597
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
598
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
599
        parent_id = self.parents[version_id][parent_text.parent]
600
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
601
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
602
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
603
604
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
605
        """Append lines for the requested version_id range"""
606
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
607
        if req_start == req_end:
608
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
609
        pending_reqs = [(req_version_id, req_start, req_end)]
610
        while len(pending_reqs) > 0:
611
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
612
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
613
            if req_version_id in self.lines:
614
                lines.extend(self.lines[req_version_id][req_start:req_end])
615
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
616
            try:
617
                start, end, kind, data, iterator = self.cursor[req_version_id]
618
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
619
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
620
                start, end, kind, data = iterator.next()
0.9.22 by Aaron Bentley
Fix restoration bug
621
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
622
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.22 by Aaron Bentley
Fix restoration bug
623
                start, end, kind, data = iterator.next()
624
0.9.10 by Aaron Bentley
Text reconstruction seems to work
625
            # find the first hunk relevant to the request
626
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
627
                start, end, kind, data = iterator.next()
628
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
629
            # if the hunk can't satisfy the whole request, split it in two,
630
            # and leave the second half for later.
631
            if req_end > end:
632
                pending_reqs.append((req_version_id, end, req_end))
633
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
634
            if kind == 'new':
635
                lines.extend(data[req_start - start: (req_end - start)])
636
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
637
                # If the hunk is a ParentText, rewrite it as a range request
638
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
639
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
640
                new_version_id = self.parents[req_version_id][parent]
641
                new_start = parent_start + req_start - start
642
                new_end = parent_end + req_end - end
643
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
644
645
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
646
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
647
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
648
2520.4.6 by Aaron Bentley
Get installation started
649
0.9.25 by Aaron Bentley
More messy hacking
650
def gzip_string(lines):
651
    sio = StringIO()
652
    data_file = GzipFile(None, mode='wb', fileobj=sio)
653
    data_file.writelines(lines)
654
    data_file.close()
655
    return sio.getvalue()