5374.2.1
by John Arbash Meinel
Do some memory shrinking for multiparent. |
1 |
# Copyright (C) 2007-2010 Canonical Ltd
|
2520.4.85
by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!) |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
2520.4.85
by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!) |
16 |
|
0.9.34
by Aaron Bentley
Implement save, load, snapshot-by-size |
17 |
from bzrlib.lazy_import import lazy_import |
18 |
||
19 |
lazy_import(globals(), """ |
|
0.9.31
by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline |
20 |
import errno
|
0.9.34
by Aaron Bentley
Implement save, load, snapshot-by-size |
21 |
import itertools
|
0.9.31
by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline |
22 |
import os
|
0.9.25
by Aaron Bentley
More messy hacking |
23 |
from StringIO import StringIO
|
0.9.19
by Aaron Bentley
More tweakage |
24 |
|
0.9.25
by Aaron Bentley
More messy hacking |
25 |
from bzrlib import (
|
3287.5.2
by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code. |
26 |
errors,
|
0.9.25
by Aaron Bentley
More messy hacking |
27 |
patiencediff,
|
28 |
trace,
|
|
29 |
ui,
|
|
30 |
)
|
|
2694.5.1
by Alexander Belchenko
pyrex bencode (without benchmarks) |
31 |
from bzrlib import bencode
|
0.9.34
by Aaron Bentley
Implement save, load, snapshot-by-size |
32 |
""") |
0.9.25
by Aaron Bentley
More messy hacking |
33 |
from bzrlib.tuned_gzip import GzipFile |
0.9.3
by Aaron Bentley
Get three-parent comparisions under test |
34 |
|
0.9.33
by Aaron Bentley
Enable caching commandline param |
35 |
|
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
36 |
def topo_iter_keys(vf, keys=None): |
37 |
if keys is None: |
|
38 |
keys = vf.keys() |
|
39 |
parents = vf.get_parent_map(keys) |
|
40 |
return _topo_iter(parents, keys) |
|
41 |
||
2520.4.28
by Aaron Bentley
Force revisions to be topologically sorted |
42 |
def topo_iter(vf, versions=None): |
43 |
if versions is None: |
|
44 |
versions = vf.versions() |
|
3287.5.2
by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code. |
45 |
parents = vf.get_parent_map(versions) |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
46 |
return _topo_iter(parents, versions) |
47 |
||
48 |
def _topo_iter(parents, versions): |
|
49 |
seen = set() |
|
50 |
descendants = {} |
|
2520.4.29
by Aaron Bentley
Reactivate some testing, fix topo_iter |
51 |
def pending_parents(version): |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
52 |
if parents[version] is None: |
53 |
return [] |
|
3287.5.2
by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code. |
54 |
return [v for v in parents[version] if v in versions and |
2520.4.29
by Aaron Bentley
Reactivate some testing, fix topo_iter |
55 |
v not in seen] |
2520.4.28
by Aaron Bentley
Force revisions to be topologically sorted |
56 |
for version_id in versions: |
3350.6.4
by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores. |
57 |
if parents[version_id] is None: |
58 |
# parentless
|
|
59 |
continue
|
|
3287.5.2
by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code. |
60 |
for parent_id in parents[version_id]: |
0.9.26
by Aaron Bentley
Move topological iteration into an iterator |
61 |
descendants.setdefault(parent_id, []).append(version_id) |
2520.4.29
by Aaron Bentley
Reactivate some testing, fix topo_iter |
62 |
cur = [v for v in versions if len(pending_parents(v)) == 0] |
0.9.26
by Aaron Bentley
Move topological iteration into an iterator |
63 |
while len(cur) > 0: |
64 |
next = [] |
|
65 |
for version_id in cur: |
|
66 |
if version_id in seen: |
|
67 |
continue
|
|
2520.4.29
by Aaron Bentley
Reactivate some testing, fix topo_iter |
68 |
if len(pending_parents(version_id)) != 0: |
0.9.26
by Aaron Bentley
Move topological iteration into an iterator |
69 |
continue
|
70 |
next.extend(descendants.get(version_id, [])) |
|
71 |
yield version_id |
|
72 |
seen.add(version_id) |
|
73 |
cur = next |
|
74 |
||
75 |
||
0.9.1
by Aaron Bentley
Get trivial case passing |
76 |
class MultiParent(object): |
2520.4.124
by Aaron Bentley
Add docs to multiparent.py |
77 |
"""A multi-parent diff"""
|
0.9.1
by Aaron Bentley
Get trivial case passing |
78 |
|
5374.2.9
by John Arbash Meinel
change slots back to a list. |
79 |
__slots__ = ['hunks'] |
5374.2.1
by John Arbash Meinel
Do some memory shrinking for multiparent. |
80 |
|
0.9.2
by Aaron Bentley
Get single-parent comparison working |
81 |
def __init__(self, hunks=None): |
82 |
if hunks is not None: |
|
83 |
self.hunks = hunks |
|
84 |
else: |
|
85 |
self.hunks = [] |
|
86 |
||
87 |
def __repr__(self): |
|
88 |
return "MultiParent(%r)" % self.hunks |
|
89 |
||
90 |
def __eq__(self, other): |
|
91 |
if self.__class__ is not other.__class__: |
|
92 |
return False |
|
93 |
return (self.hunks == other.hunks) |
|
0.9.1
by Aaron Bentley
Get trivial case passing |
94 |
|
95 |
@staticmethod
|
|
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
96 |
def from_lines(text, parents=(), left_blocks=None): |
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
97 |
"""Produce a MultiParent from a list of lines and parents"""
|
0.9.2
by Aaron Bentley
Get single-parent comparison working |
98 |
def compare(parent): |
0.9.16
by Aaron Bentley
More control over snapshotting, disable caching for inventory |
99 |
matcher = patiencediff.PatienceSequenceMatcher(None, parent, |
100 |
text) |
|
101 |
return matcher.get_matching_blocks() |
|
2520.4.41
by Aaron Bentley
Accelerate mpdiff generation |
102 |
if len(parents) > 0: |
103 |
if left_blocks is None: |
|
104 |
left_blocks = compare(parents[0]) |
|
105 |
parent_comparisons = [left_blocks] + [compare(p) for p in |
|
106 |
parents[1:]] |
|
107 |
else: |
|
108 |
parent_comparisons = [] |
|
0.9.2
by Aaron Bentley
Get single-parent comparison working |
109 |
cur_line = 0 |
110 |
new_text = NewText([]) |
|
111 |
parent_text = [] |
|
112 |
block_iter = [iter(i) for i in parent_comparisons] |
|
113 |
diff = MultiParent([]) |
|
114 |
def next_block(p): |
|
115 |
try: |
|
116 |
return block_iter[p].next() |
|
117 |
except StopIteration: |
|
118 |
return None |
|
119 |
cur_block = [next_block(p) for p, i in enumerate(block_iter)] |
|
120 |
while cur_line < len(text): |
|
121 |
best_match = None |
|
122 |
for p, block in enumerate(cur_block): |
|
123 |
if block is None: |
|
124 |
continue
|
|
125 |
i, j, n = block |
|
2520.4.138
by Aaron Bentley
Fix benign off-by-one error generating mpdiffs |
126 |
while j + n <= cur_line: |
0.9.2
by Aaron Bentley
Get single-parent comparison working |
127 |
block = cur_block[p] = next_block(p) |
128 |
if block is None: |
|
129 |
break
|
|
130 |
i, j, n = block |
|
131 |
if block is None: |
|
132 |
continue
|
|
133 |
if j > cur_line: |
|
134 |
continue
|
|
135 |
offset = cur_line - j |
|
136 |
i += offset |
|
137 |
j = cur_line |
|
138 |
n -= offset |
|
139 |
if n == 0: |
|
140 |
continue
|
|
141 |
if best_match is None or n > best_match.num_lines: |
|
142 |
best_match = ParentText(p, i, j, n) |
|
143 |
if best_match is None: |
|
144 |
new_text.lines.append(text[cur_line]) |
|
145 |
cur_line += 1 |
|
146 |
else: |
|
147 |
if len(new_text.lines) > 0: |
|
148 |
diff.hunks.append(new_text) |
|
149 |
new_text = NewText([]) |
|
150 |
diff.hunks.append(best_match) |
|
151 |
cur_line += best_match.num_lines |
|
152 |
if len(new_text.lines) > 0: |
|
153 |
diff.hunks.append(new_text) |
|
0.9.1
by Aaron Bentley
Get trivial case passing |
154 |
return diff |
155 |
||
2520.4.139
by Aaron Bentley
Support Multiparent.get_matching_blocks |
156 |
def get_matching_blocks(self, parent, parent_len): |
157 |
for hunk in self.hunks: |
|
158 |
if not isinstance(hunk, ParentText) or hunk.parent != parent: |
|
159 |
continue
|
|
160 |
yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines) |
|
161 |
yield parent_len, self.num_lines(), 0 |
|
162 |
||
2520.4.103
by Aaron Bentley
Add MultiParent.to_lines |
163 |
def to_lines(self, parents=()): |
164 |
"""Contruct a fulltext from this diff and its parents"""
|
|
165 |
mpvf = MultiMemoryVersionedFile() |
|
166 |
for num, parent in enumerate(parents): |
|
167 |
mpvf.add_version(StringIO(parent).readlines(), num, []) |
|
168 |
mpvf.add_diff(self, 'a', range(len(parents))) |
|
169 |
return mpvf.get_line_list(['a'])[0] |
|
170 |
||
0.9.1
by Aaron Bentley
Get trivial case passing |
171 |
@classmethod
|
172 |
def from_texts(cls, text, parents=()): |
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
173 |
"""Produce a MultiParent from a text and list of parent text"""
|
2520.4.92
by Aaron Bentley
fix line-splitting in MultiParent.from_texts |
174 |
return cls.from_lines(StringIO(text).readlines(), |
175 |
[StringIO(p).readlines() for p in parents]) |
|
0.9.1
by Aaron Bentley
Get trivial case passing |
176 |
|
0.9.4
by Aaron Bentley
Start supporting serialization |
177 |
def to_patch(self): |
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
178 |
"""Yield text lines for a patch"""
|
0.9.4
by Aaron Bentley
Start supporting serialization |
179 |
for hunk in self.hunks: |
180 |
for line in hunk.to_patch(): |
|
181 |
yield line |
|
182 |
||
0.9.25
by Aaron Bentley
More messy hacking |
183 |
def patch_len(self): |
184 |
return len(''.join(self.to_patch())) |
|
185 |
||
186 |
def zipped_patch_len(self): |
|
187 |
return len(gzip_string(self.to_patch())) |
|
188 |
||
2520.4.30
by Aaron Bentley
Do our own line splitting for mp-diffs |
189 |
@classmethod
|
190 |
def from_patch(cls, text): |
|
2520.4.124
by Aaron Bentley
Add docs to multiparent.py |
191 |
"""Create a MultiParent from its string form"""
|
2520.4.31
by Aaron Bentley
Using StringIO for splitting patch lines |
192 |
return cls._from_patch(StringIO(text)) |
2520.4.30
by Aaron Bentley
Do our own line splitting for mp-diffs |
193 |
|
0.9.18
by Aaron Bentley
Implement from_patch |
194 |
@staticmethod
|
2520.4.30
by Aaron Bentley
Do our own line splitting for mp-diffs |
195 |
def _from_patch(lines): |
196 |
"""This is private because it is essential to split lines on \n only"""
|
|
0.9.18
by Aaron Bentley
Implement from_patch |
197 |
line_iter = iter(lines) |
198 |
hunks = [] |
|
199 |
cur_line = None |
|
200 |
while(True): |
|
201 |
try: |
|
202 |
cur_line = line_iter.next() |
|
203 |
except StopIteration: |
|
204 |
break
|
|
205 |
if cur_line[0] == 'i': |
|
206 |
num_lines = int(cur_line.split(' ')[1]) |
|
207 |
hunk_lines = [line_iter.next() for x in xrange(num_lines)] |
|
208 |
hunk_lines[-1] = hunk_lines[-1][:-1] |
|
209 |
hunks.append(NewText(hunk_lines)) |
|
210 |
elif cur_line[0] == '\n': |
|
211 |
hunks[-1].lines[-1] += '\n' |
|
212 |
else: |
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
213 |
if not (cur_line[0] == 'c'): |
214 |
raise AssertionError(cur_line[0]) |
|
0.9.18
by Aaron Bentley
Implement from_patch |
215 |
parent, parent_pos, child_pos, num_lines =\ |
216 |
[int(v) for v in cur_line.split(' ')[1:]] |
|
217 |
hunks.append(ParentText(parent, parent_pos, child_pos, |
|
218 |
num_lines)) |
|
219 |
return MultiParent(hunks) |
|
220 |
||
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
221 |
def range_iterator(self): |
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
222 |
"""Iterate through the hunks, with range indicated
|
223 |
||
224 |
kind is "new" or "parent".
|
|
225 |
for "new", data is a list of lines.
|
|
226 |
for "parent", data is (parent, parent_start, parent_end)
|
|
227 |
:return: a generator of (start, end, kind, data)
|
|
228 |
"""
|
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
229 |
start = 0 |
230 |
for hunk in self.hunks: |
|
231 |
if isinstance(hunk, NewText): |
|
232 |
kind = 'new' |
|
233 |
end = start + len(hunk.lines) |
|
234 |
data = hunk.lines |
|
235 |
else: |
|
236 |
kind = 'parent' |
|
237 |
start = hunk.child_pos |
|
238 |
end = start + hunk.num_lines |
|
239 |
data = (hunk.parent, hunk.parent_pos, hunk.parent_pos + |
|
240 |
hunk.num_lines) |
|
241 |
yield start, end, kind, data |
|
242 |
start = end |
|
243 |
||
0.9.11
by Aaron Bentley
Implement reconstruct_version, handle all hunks through that |
244 |
def num_lines(self): |
2520.4.124
by Aaron Bentley
Add docs to multiparent.py |
245 |
"""The number of lines in the output text"""
|
0.9.11
by Aaron Bentley
Implement reconstruct_version, handle all hunks through that |
246 |
extra_n = 0 |
247 |
for hunk in reversed(self.hunks): |
|
248 |
if isinstance(hunk, ParentText): |
|
249 |
return hunk.child_pos + hunk.num_lines + extra_n |
|
250 |
extra_n += len(hunk.lines) |
|
251 |
return extra_n |
|
252 |
||
0.9.25
by Aaron Bentley
More messy hacking |
253 |
def is_snapshot(self): |
2520.4.124
by Aaron Bentley
Add docs to multiparent.py |
254 |
"""Return true of this hunk is effectively a fulltext"""
|
0.9.25
by Aaron Bentley
More messy hacking |
255 |
if len(self.hunks) != 1: |
256 |
return False |
|
257 |
return (isinstance(self.hunks[0], NewText)) |
|
258 |
||
0.9.1
by Aaron Bentley
Get trivial case passing |
259 |
|
260 |
class NewText(object): |
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
261 |
"""The contents of text that is introduced by this text"""
|
0.9.1
by Aaron Bentley
Get trivial case passing |
262 |
|
5374.2.1
by John Arbash Meinel
Do some memory shrinking for multiparent. |
263 |
__slots__ = ['lines'] |
264 |
||
0.9.1
by Aaron Bentley
Get trivial case passing |
265 |
def __init__(self, lines): |
266 |
self.lines = lines |
|
267 |
||
268 |
def __eq__(self, other): |
|
269 |
if self.__class__ is not other.__class__: |
|
270 |
return False |
|
271 |
return (other.lines == self.lines) |
|
0.9.2
by Aaron Bentley
Get single-parent comparison working |
272 |
|
273 |
def __repr__(self): |
|
274 |
return 'NewText(%r)' % self.lines |
|
275 |
||
0.9.4
by Aaron Bentley
Start supporting serialization |
276 |
def to_patch(self): |
277 |
yield 'i %d\n' % len(self.lines) |
|
278 |
for line in self.lines: |
|
279 |
yield line |
|
280 |
yield '\n' |
|
281 |
||
0.9.2
by Aaron Bentley
Get single-parent comparison working |
282 |
|
283 |
class ParentText(object): |
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
284 |
"""A reference to text present in a parent text"""
|
0.9.2
by Aaron Bentley
Get single-parent comparison working |
285 |
|
5374.2.1
by John Arbash Meinel
Do some memory shrinking for multiparent. |
286 |
__slots__ = ['parent', 'parent_pos', 'child_pos', 'num_lines'] |
287 |
||
0.9.2
by Aaron Bentley
Get single-parent comparison working |
288 |
def __init__(self, parent, parent_pos, child_pos, num_lines): |
289 |
self.parent = parent |
|
290 |
self.parent_pos = parent_pos |
|
291 |
self.child_pos = child_pos |
|
292 |
self.num_lines = num_lines |
|
293 |
||
5374.2.1
by John Arbash Meinel
Do some memory shrinking for multiparent. |
294 |
def _as_dict(self): |
295 |
return dict(parent=self.parent, parent_pos=self.parent_pos, |
|
296 |
child_pos=self.child_pos, num_lines=self.num_lines) |
|
297 |
||
0.9.2
by Aaron Bentley
Get single-parent comparison working |
298 |
def __repr__(self): |
5374.2.1
by John Arbash Meinel
Do some memory shrinking for multiparent. |
299 |
return ('ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,' |
300 |
' %(num_lines)r)' % self._as_dict()) |
|
0.9.2
by Aaron Bentley
Get single-parent comparison working |
301 |
|
302 |
def __eq__(self, other): |
|
4088.3.1
by Benjamin Peterson
compare types with 'is' not == |
303 |
if self.__class__ is not other.__class__: |
0.9.2
by Aaron Bentley
Get single-parent comparison working |
304 |
return False |
5374.2.1
by John Arbash Meinel
Do some memory shrinking for multiparent. |
305 |
return self._as_dict() == other._as_dict() |
0.9.4
by Aaron Bentley
Start supporting serialization |
306 |
|
307 |
def to_patch(self): |
|
5374.2.1
by John Arbash Meinel
Do some memory shrinking for multiparent. |
308 |
yield ('c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n' |
309 |
% self._as_dict()) |
|
0.9.8
by Aaron Bentley
get add_version working |
310 |
|
311 |
||
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
312 |
class BaseVersionedFile(object): |
2520.4.124
by Aaron Bentley
Add docs to multiparent.py |
313 |
"""Pseudo-VersionedFile skeleton for MultiParent"""
|
0.9.8
by Aaron Bentley
get add_version working |
314 |
|
0.9.16
by Aaron Bentley
More control over snapshotting, disable caching for inventory |
315 |
def __init__(self, snapshot_interval=25, max_snapshots=None): |
0.9.8
by Aaron Bentley
get add_version working |
316 |
self._lines = {} |
317 |
self._parents = {} |
|
0.9.16
by Aaron Bentley
More control over snapshotting, disable caching for inventory |
318 |
self._snapshots = set() |
0.9.12
by Aaron Bentley
Make benchmarks for mp |
319 |
self.snapshot_interval = snapshot_interval |
0.9.16
by Aaron Bentley
More control over snapshotting, disable caching for inventory |
320 |
self.max_snapshots = max_snapshots |
0.9.12
by Aaron Bentley
Make benchmarks for mp |
321 |
|
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
322 |
def versions(self): |
323 |
return iter(self._parents) |
|
324 |
||
2520.4.61
by Aaron Bentley
Do bulk insertion of records |
325 |
def has_version(self, version): |
326 |
return version in self._parents |
|
327 |
||
0.9.12
by Aaron Bentley
Make benchmarks for mp |
328 |
def do_snapshot(self, version_id, parent_ids): |
4031.3.1
by Frank Aspell
Fixing various typos |
329 |
"""Determine whether to perform a snapshot for this version"""
|
0.9.16
by Aaron Bentley
More control over snapshotting, disable caching for inventory |
330 |
if self.snapshot_interval is None: |
331 |
return False |
|
332 |
if self.max_snapshots is not None and\ |
|
333 |
len(self._snapshots) == self.max_snapshots: |
|
0.9.14
by Aaron Bentley
Temporarily force snapshots to 44 |
334 |
return False |
0.9.12
by Aaron Bentley
Make benchmarks for mp |
335 |
if len(parent_ids) == 0: |
0.9.16
by Aaron Bentley
More control over snapshotting, disable caching for inventory |
336 |
return True |
337 |
for ignored in xrange(self.snapshot_interval): |
|
0.9.12
by Aaron Bentley
Make benchmarks for mp |
338 |
if len(parent_ids) == 0: |
339 |
return False |
|
0.9.17
by Aaron Bentley
Dynamically select snapshots based on all parents |
340 |
version_ids = parent_ids |
341 |
parent_ids = [] |
|
342 |
for version_id in version_ids: |
|
343 |
if version_id not in self._snapshots: |
|
344 |
parent_ids.extend(self._parents[version_id]) |
|
0.9.16
by Aaron Bentley
More control over snapshotting, disable caching for inventory |
345 |
else: |
346 |
return True |
|
0.9.8
by Aaron Bentley
get add_version working |
347 |
|
0.9.16
by Aaron Bentley
More control over snapshotting, disable caching for inventory |
348 |
def add_version(self, lines, version_id, parent_ids, |
0.9.20
by Aaron Bentley
Convert to a plugin |
349 |
force_snapshot=None, single_parent=False): |
2520.4.124
by Aaron Bentley
Add docs to multiparent.py |
350 |
"""Add a version to the versionedfile
|
351 |
||
352 |
:param lines: The list of lines to add. Must be split on '\n'.
|
|
353 |
:param version_id: The version_id of the version to add
|
|
354 |
:param force_snapshot: If true, force this version to be added as a
|
|
355 |
snapshot version. If false, force this version to be added as a
|
|
356 |
diff. If none, determine this automatically.
|
|
357 |
:param single_parent: If true, use a single parent, rather than
|
|
358 |
multiple parents.
|
|
359 |
"""
|
|
0.9.16
by Aaron Bentley
More control over snapshotting, disable caching for inventory |
360 |
if force_snapshot is None: |
361 |
do_snapshot = self.do_snapshot(version_id, parent_ids) |
|
362 |
else: |
|
363 |
do_snapshot = force_snapshot |
|
364 |
if do_snapshot: |
|
365 |
self._snapshots.add(version_id) |
|
0.9.12
by Aaron Bentley
Make benchmarks for mp |
366 |
diff = MultiParent([NewText(lines)]) |
367 |
else: |
|
0.9.20
by Aaron Bentley
Convert to a plugin |
368 |
if single_parent: |
369 |
parent_lines = self.get_line_list(parent_ids[:1]) |
|
370 |
else: |
|
371 |
parent_lines = self.get_line_list(parent_ids) |
|
0.9.12
by Aaron Bentley
Make benchmarks for mp |
372 |
diff = MultiParent.from_lines(lines, parent_lines) |
0.9.25
by Aaron Bentley
More messy hacking |
373 |
if diff.is_snapshot(): |
374 |
self._snapshots.add(version_id) |
|
0.9.8
by Aaron Bentley
get add_version working |
375 |
self.add_diff(diff, version_id, parent_ids) |
376 |
self._lines[version_id] = lines |
|
377 |
||
0.9.35
by Aaron Bentley
Add build ranking |
378 |
def get_parents(self, version_id): |
379 |
return self._parents[version_id] |
|
380 |
||
0.9.34
by Aaron Bentley
Implement save, load, snapshot-by-size |
381 |
def make_snapshot(self, version_id): |
382 |
snapdiff = MultiParent([NewText(self.cache_version(version_id))]) |
|
0.9.36
by Aaron Bentley
merge changes |
383 |
self.add_diff(snapdiff, version_id, self._parents[version_id]) |
0.9.34
by Aaron Bentley
Implement save, load, snapshot-by-size |
384 |
self._snapshots.add(version_id) |
385 |
||
0.9.20
by Aaron Bentley
Convert to a plugin |
386 |
def import_versionedfile(self, vf, snapshots, no_cache=True, |
0.9.22
by Aaron Bentley
Fix restoration bug |
387 |
single_parent=False, verify=False): |
0.9.20
by Aaron Bentley
Convert to a plugin |
388 |
"""Import all revisions of a versionedfile
|
389 |
||
390 |
:param vf: The versionedfile to import
|
|
391 |
:param snapshots: If provided, the revisions to make snapshots of.
|
|
392 |
Otherwise, this will be auto-determined
|
|
393 |
:param no_cache: If true, clear the cache after every add.
|
|
394 |
:param single_parent: If true, omit all but one parent text, (but
|
|
395 |
retain parent metadata).
|
|
396 |
"""
|
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
397 |
if not (no_cache or not verify): |
398 |
raise ValueError() |
|
0.9.19
by Aaron Bentley
More tweakage |
399 |
revisions = set(vf.versions()) |
400 |
total = len(revisions) |
|
0.9.20
by Aaron Bentley
Convert to a plugin |
401 |
pb = ui.ui_factory.nested_progress_bar() |
402 |
try: |
|
403 |
while len(revisions) > 0: |
|
404 |
added = set() |
|
405 |
for revision in revisions: |
|
406 |
parents = vf.get_parents(revision) |
|
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
407 |
if [p for p in parents if p not in self._parents] != []: |
0.9.20
by Aaron Bentley
Convert to a plugin |
408 |
continue
|
409 |
lines = [a + ' ' + l for a, l in |
|
3316.2.13
by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this |
410 |
vf.annotate(revision)] |
0.9.21
by Aaron Bentley
finish converting ft_ to snapshots |
411 |
if snapshots is None: |
0.9.20
by Aaron Bentley
Convert to a plugin |
412 |
force_snapshot = None |
413 |
else: |
|
0.9.21
by Aaron Bentley
finish converting ft_ to snapshots |
414 |
force_snapshot = (revision in snapshots) |
0.9.20
by Aaron Bentley
Convert to a plugin |
415 |
self.add_version(lines, revision, parents, force_snapshot, |
416 |
single_parent) |
|
417 |
added.add(revision) |
|
418 |
if no_cache: |
|
419 |
self.clear_cache() |
|
0.9.25
by Aaron Bentley
More messy hacking |
420 |
vf.clear_cache() |
0.9.22
by Aaron Bentley
Fix restoration bug |
421 |
if verify: |
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
422 |
if not (lines == self.get_line_list([revision])[0]): |
423 |
raise AssertionError() |
|
0.9.22
by Aaron Bentley
Fix restoration bug |
424 |
self.clear_cache() |
0.9.20
by Aaron Bentley
Convert to a plugin |
425 |
pb.update('Importing revisions', |
426 |
(total - len(revisions)) + len(added), total) |
|
427 |
revisions = [r for r in revisions if r not in added] |
|
428 |
finally: |
|
429 |
pb.finished() |
|
0.9.19
by Aaron Bentley
More tweakage |
430 |
|
0.9.23
by Aaron Bentley
handle snapshots all at once |
431 |
def select_snapshots(self, vf): |
2520.4.124
by Aaron Bentley
Add docs to multiparent.py |
432 |
"""Determine which versions to add as snapshots"""
|
0.9.28
by Aaron Bentley
Update snapshot-picking to use sets of ancestors |
433 |
build_ancestors = {} |
0.9.23
by Aaron Bentley
handle snapshots all at once |
434 |
descendants = {} |
435 |
snapshots = set() |
|
0.9.26
by Aaron Bentley
Move topological iteration into an iterator |
436 |
for version_id in topo_iter(vf): |
0.9.28
by Aaron Bentley
Update snapshot-picking to use sets of ancestors |
437 |
potential_build_ancestors = set(vf.get_parents(version_id)) |
438 |
parents = vf.get_parents(version_id) |
|
439 |
if len(parents) == 0: |
|
0.9.26
by Aaron Bentley
Move topological iteration into an iterator |
440 |
snapshots.add(version_id) |
0.9.28
by Aaron Bentley
Update snapshot-picking to use sets of ancestors |
441 |
build_ancestors[version_id] = set() |
0.9.26
by Aaron Bentley
Move topological iteration into an iterator |
442 |
else: |
0.9.28
by Aaron Bentley
Update snapshot-picking to use sets of ancestors |
443 |
for parent in vf.get_parents(version_id): |
444 |
potential_build_ancestors.update(build_ancestors[parent]) |
|
445 |
if len(potential_build_ancestors) > self.snapshot_interval: |
|
446 |
snapshots.add(version_id) |
|
447 |
build_ancestors[version_id] = set() |
|
0.9.23
by Aaron Bentley
handle snapshots all at once |
448 |
else: |
0.9.28
by Aaron Bentley
Update snapshot-picking to use sets of ancestors |
449 |
build_ancestors[version_id] = potential_build_ancestors |
0.9.23
by Aaron Bentley
handle snapshots all at once |
450 |
return snapshots |
451 |
||
0.9.34
by Aaron Bentley
Implement save, load, snapshot-by-size |
452 |
def select_by_size(self, num): |
0.9.35
by Aaron Bentley
Add build ranking |
453 |
"""Select snapshots for minimum output size"""
|
454 |
num -= len(self._snapshots) |
|
0.9.36
by Aaron Bentley
merge changes |
455 |
new_snapshots = self.get_size_ranking()[-num:] |
456 |
return [v for n, v in new_snapshots] |
|
0.9.35
by Aaron Bentley
Add build ranking |
457 |
|
458 |
def get_size_ranking(self): |
|
2520.4.124
by Aaron Bentley
Add docs to multiparent.py |
459 |
"""Get versions ranked by size"""
|
0.9.34
by Aaron Bentley
Implement save, load, snapshot-by-size |
460 |
versions = [] |
461 |
new_snapshots = set() |
|
462 |
for version_id in self.versions(): |
|
463 |
if version_id in self._snapshots: |
|
464 |
continue
|
|
465 |
diff_len = self.get_diff(version_id).patch_len() |
|
466 |
snapshot_len = MultiParent([NewText( |
|
467 |
self.cache_version(version_id))]).patch_len() |
|
0.9.36
by Aaron Bentley
merge changes |
468 |
versions.append((snapshot_len - diff_len, version_id)) |
0.9.34
by Aaron Bentley
Implement save, load, snapshot-by-size |
469 |
versions.sort() |
0.9.36
by Aaron Bentley
merge changes |
470 |
return versions |
471 |
||
472 |
def import_diffs(self, vf): |
|
2520.4.124
by Aaron Bentley
Add docs to multiparent.py |
473 |
"""Import the diffs from another pseudo-versionedfile"""
|
0.9.36
by Aaron Bentley
merge changes |
474 |
for version_id in vf.versions(): |
475 |
self.add_diff(vf.get_diff(version_id), version_id, |
|
476 |
vf._parents[version_id]) |
|
0.9.23
by Aaron Bentley
handle snapshots all at once |
477 |
|
0.9.35
by Aaron Bentley
Add build ranking |
478 |
def get_build_ranking(self): |
2520.4.124
by Aaron Bentley
Add docs to multiparent.py |
479 |
"""Return revisions sorted by how much they reduce build complexity"""
|
0.9.35
by Aaron Bentley
Add build ranking |
480 |
could_avoid = {} |
481 |
referenced_by = {} |
|
482 |
for version_id in topo_iter(self): |
|
483 |
could_avoid[version_id] = set() |
|
484 |
if version_id not in self._snapshots: |
|
485 |
for parent_id in self._parents[version_id]: |
|
486 |
could_avoid[version_id].update(could_avoid[parent_id]) |
|
487 |
could_avoid[version_id].update(self._parents) |
|
488 |
could_avoid[version_id].discard(version_id) |
|
489 |
for avoid_id in could_avoid[version_id]: |
|
490 |
referenced_by.setdefault(avoid_id, set()).add(version_id) |
|
491 |
available_versions = list(self.versions()) |
|
492 |
ranking = [] |
|
493 |
while len(available_versions) > 0: |
|
494 |
available_versions.sort(key=lambda x: |
|
495 |
len(could_avoid[x]) * |
|
496 |
len(referenced_by.get(x, []))) |
|
497 |
selected = available_versions.pop() |
|
498 |
ranking.append(selected) |
|
499 |
for version_id in referenced_by[selected]: |
|
500 |
could_avoid[version_id].difference_update( |
|
501 |
could_avoid[selected]) |
|
502 |
for version_id in could_avoid[selected]: |
|
503 |
referenced_by[version_id].difference_update( |
|
504 |
referenced_by[selected] |
|
505 |
)
|
|
506 |
return ranking |
|
507 |
||
0.9.8
by Aaron Bentley
get add_version working |
508 |
def clear_cache(self): |
509 |
self._lines.clear() |
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
510 |
|
511 |
def get_line_list(self, version_ids): |
|
512 |
return [self.cache_version(v) for v in version_ids] |
|
513 |
||
514 |
def cache_version(self, version_id): |
|
515 |
try: |
|
516 |
return self._lines[version_id] |
|
517 |
except KeyError: |
|
518 |
pass
|
|
0.9.29
by Aaron Bentley
Support using disk for knit reconstruction |
519 |
diff = self.get_diff(version_id) |
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
520 |
lines = [] |
2520.4.144
by Aaron Bentley
Make Reconstructor use cached versions |
521 |
reconstructor = _Reconstructor(self, self._lines, self._parents) |
0.9.11
by Aaron Bentley
Implement reconstruct_version, handle all hunks through that |
522 |
reconstructor.reconstruct_version(lines, version_id) |
0.9.33
by Aaron Bentley
Enable caching commandline param |
523 |
self._lines[version_id] = lines |
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
524 |
return lines |
525 |
||
0.9.33
by Aaron Bentley
Enable caching commandline param |
526 |
|
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
527 |
class MultiMemoryVersionedFile(BaseVersionedFile): |
2520.4.124
by Aaron Bentley
Add docs to multiparent.py |
528 |
"""Memory-backed pseudo-versionedfile"""
|
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
529 |
|
530 |
def __init__(self, snapshot_interval=25, max_snapshots=None): |
|
531 |
BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots) |
|
532 |
self._diffs = {} |
|
533 |
||
534 |
def add_diff(self, diff, version_id, parent_ids): |
|
535 |
self._diffs[version_id] = diff |
|
536 |
self._parents[version_id] = parent_ids |
|
537 |
||
538 |
def get_diff(self, version_id): |
|
3287.5.2
by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code. |
539 |
try: |
540 |
return self._diffs[version_id] |
|
541 |
except KeyError: |
|
542 |
raise errors.RevisionNotPresent(version_id, self) |
|
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
543 |
|
0.9.31
by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline |
544 |
def destroy(self): |
545 |
self._diffs = {} |
|
546 |
||
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
547 |
|
548 |
class MultiVersionedFile(BaseVersionedFile): |
|
2520.4.124
by Aaron Bentley
Add docs to multiparent.py |
549 |
"""Disk-backed pseudo-versionedfile"""
|
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
550 |
|
551 |
def __init__(self, filename, snapshot_interval=25, max_snapshots=None): |
|
552 |
BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots) |
|
553 |
self._filename = filename |
|
554 |
self._diff_offset = {} |
|
555 |
||
556 |
def get_diff(self, version_id): |
|
557 |
start, count = self._diff_offset[version_id] |
|
0.9.34
by Aaron Bentley
Implement save, load, snapshot-by-size |
558 |
infile = open(self._filename + '.mpknit', 'rb') |
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
559 |
try: |
560 |
infile.seek(start) |
|
561 |
sio = StringIO(infile.read(count)) |
|
562 |
finally: |
|
563 |
infile.close() |
|
564 |
zip_file = GzipFile(None, mode='rb', fileobj=sio) |
|
565 |
try: |
|
566 |
file_version_id = zip_file.readline() |
|
2520.4.30
by Aaron Bentley
Do our own line splitting for mp-diffs |
567 |
return MultiParent.from_patch(zip_file.read()) |
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
568 |
finally: |
569 |
zip_file.close() |
|
570 |
||
571 |
def add_diff(self, diff, version_id, parent_ids): |
|
0.9.34
by Aaron Bentley
Implement save, load, snapshot-by-size |
572 |
outfile = open(self._filename + '.mpknit', 'ab') |
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
573 |
try: |
2839.4.1
by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0 |
574 |
outfile.seek(0, 2) # workaround for windows bug: |
575 |
# .tell() for files opened in 'ab' mode
|
|
576 |
# before any write returns 0
|
|
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
577 |
start = outfile.tell() |
578 |
try: |
|
579 |
zipfile = GzipFile(None, mode='ab', fileobj=outfile) |
|
0.9.34
by Aaron Bentley
Implement save, load, snapshot-by-size |
580 |
zipfile.writelines(itertools.chain( |
581 |
['version %s\n' % version_id], diff.to_patch())) |
|
0.9.30
by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile |
582 |
finally: |
583 |
zipfile.close() |
|
584 |
end = outfile.tell() |
|
585 |
finally: |
|
586 |
outfile.close() |
|
587 |
self._diff_offset[version_id] = (start, end-start) |
|
588 |
self._parents[version_id] = parent_ids |
|
589 |
||
0.9.31
by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline |
590 |
def destroy(self): |
591 |
try: |
|
0.9.34
by Aaron Bentley
Implement save, load, snapshot-by-size |
592 |
os.unlink(self._filename + '.mpknit') |
593 |
except OSError, e: |
|
594 |
if e.errno != errno.ENOENT: |
|
595 |
raise
|
|
596 |
try: |
|
597 |
os.unlink(self._filename + '.mpidx') |
|
598 |
except OSError, e: |
|
599 |
if e.errno != errno.ENOENT: |
|
600 |
raise
|
|
601 |
||
602 |
def save(self): |
|
603 |
open(self._filename + '.mpidx', 'wb').write(bencode.bencode( |
|
604 |
(self._parents, list(self._snapshots), self._diff_offset))) |
|
605 |
||
606 |
def load(self): |
|
607 |
self._parents, snapshots, self._diff_offset = bencode.bdecode( |
|
608 |
open(self._filename + '.mpidx', 'rb').read()) |
|
609 |
self._snapshots = set(snapshots) |
|
0.9.31
by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline |
610 |
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
611 |
|
612 |
class _Reconstructor(object): |
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
613 |
"""Build a text from the diffs, ancestry graph and cached lines"""
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
614 |
|
615 |
def __init__(self, diffs, lines, parents): |
|
616 |
self.diffs = diffs |
|
617 |
self.lines = lines |
|
618 |
self.parents = parents |
|
619 |
self.cursor = {} |
|
620 |
||
621 |
def reconstruct(self, lines, parent_text, version_id): |
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
622 |
"""Append the lines referred to by a ParentText to lines"""
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
623 |
parent_id = self.parents[version_id][parent_text.parent] |
624 |
end = parent_text.parent_pos + parent_text.num_lines |
|
0.9.17
by Aaron Bentley
Dynamically select snapshots based on all parents |
625 |
return self._reconstruct(lines, parent_id, parent_text.parent_pos, |
626 |
end) |
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
627 |
|
628 |
def _reconstruct(self, lines, req_version_id, req_start, req_end): |
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
629 |
"""Append lines for the requested version_id range"""
|
630 |
# stack of pending range requests
|
|
2520.4.16
by Aaron Bentley
Handle empty versions correctly |
631 |
if req_start == req_end: |
632 |
return
|
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
633 |
pending_reqs = [(req_version_id, req_start, req_end)] |
634 |
while len(pending_reqs) > 0: |
|
635 |
req_version_id, req_start, req_end = pending_reqs.pop() |
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
636 |
# lazily allocate cursors for versions
|
2520.4.144
by Aaron Bentley
Make Reconstructor use cached versions |
637 |
if req_version_id in self.lines: |
638 |
lines.extend(self.lines[req_version_id][req_start:req_end]) |
|
639 |
continue
|
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
640 |
try: |
641 |
start, end, kind, data, iterator = self.cursor[req_version_id] |
|
642 |
except KeyError: |
|
0.9.29
by Aaron Bentley
Support using disk for knit reconstruction |
643 |
iterator = self.diffs.get_diff(req_version_id).range_iterator() |
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
644 |
start, end, kind, data = iterator.next() |
0.9.22
by Aaron Bentley
Fix restoration bug |
645 |
if start > req_start: |
0.9.29
by Aaron Bentley
Support using disk for knit reconstruction |
646 |
iterator = self.diffs.get_diff(req_version_id).range_iterator() |
0.9.22
by Aaron Bentley
Fix restoration bug |
647 |
start, end, kind, data = iterator.next() |
648 |
||
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
649 |
# find the first hunk relevant to the request
|
650 |
while end <= req_start: |
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
651 |
start, end, kind, data = iterator.next() |
652 |
self.cursor[req_version_id] = start, end, kind, data, iterator |
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
653 |
# if the hunk can't satisfy the whole request, split it in two,
|
654 |
# and leave the second half for later.
|
|
655 |
if req_end > end: |
|
656 |
pending_reqs.append((req_version_id, end, req_end)) |
|
657 |
req_end = end |
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
658 |
if kind == 'new': |
659 |
lines.extend(data[req_start - start: (req_end - start)]) |
|
660 |
else: |
|
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
661 |
# If the hunk is a ParentText, rewrite it as a range request
|
662 |
# for the parent, and make it the next pending request.
|
|
0.9.9
by Aaron Bentley
Much progress on non-naive text reconstruction |
663 |
parent, parent_start, parent_end = data |
0.9.10
by Aaron Bentley
Text reconstruction seems to work |
664 |
new_version_id = self.parents[req_version_id][parent] |
665 |
new_start = parent_start + req_start - start |
|
666 |
new_end = parent_end + req_end - end |
|
667 |
pending_reqs.append((new_version_id, new_start, new_end)) |
|
0.9.11
by Aaron Bentley
Implement reconstruct_version, handle all hunks through that |
668 |
|
669 |
def reconstruct_version(self, lines, version_id): |
|
0.9.29
by Aaron Bentley
Support using disk for knit reconstruction |
670 |
length = self.diffs.get_diff(version_id).num_lines() |
0.9.11
by Aaron Bentley
Implement reconstruct_version, handle all hunks through that |
671 |
return self._reconstruct(lines, version_id, 0, length) |
0.9.25
by Aaron Bentley
More messy hacking |
672 |
|
2520.4.6
by Aaron Bentley
Get installation started |
673 |
|
0.9.25
by Aaron Bentley
More messy hacking |
674 |
def gzip_string(lines): |
675 |
sio = StringIO() |
|
676 |
data_file = GzipFile(None, mode='wb', fileobj=sio) |
|
677 |
data_file.writelines(lines) |
|
678 |
data_file.close() |
|
679 |
return sio.getvalue() |