3
# Copyright (C) 2005 Canonical Ltd
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
# Author: Martin Pool <mbp@canonical.com>
22
"""Weave - storage of related text file versions"""
25
# XXX: If we do weaves this way, will a merge still behave the same
26
# way if it's done in a different order? That's a pretty desirable
29
# TODO: Nothing here so far assumes the lines are really \n newlines,
30
# rather than being split up in some other way. We could accomodate
31
# binaries, perhaps by naively splitting on \n or perhaps using
32
# something like a rolling checksum.
34
# TODO: End marker for each version so we can stop reading?
36
# TODO: Check that no insertion occurs inside a deletion that was
37
# active in the version of the insertion.
39
# TODO: In addition to the SHA-1 check, perhaps have some code that
40
# checks structural constraints of the weave: ie that insertions are
41
# properly nested, that there is no text outside of an insertion, that
42
# insertions or deletions are not repeated, etc.
44
# TODO: Parallel-extract that passes back each line along with a
45
# description of which revisions include it. Nice for checking all
46
# shas or calculating stats in parallel.
48
# TODO: Using a single _extract routine and then processing the output
49
# is probably inefficient. It's simple enough that we can afford to
50
# have slight specializations for different ways its used: annotate,
51
# basis for add, get, etc.
53
# TODO: Probably the API should work only in names to hide the integer
54
# indexes from the user.
56
# TODO: Is there any potential performance win by having an add()
57
# variant that is passed a pre-cooked version of the single basis
60
# TODO: Reweave can possibly be made faster by remembering diffs
61
# where the basis and destination are unchanged.
63
# FIXME: Sometimes we will be given a parents list for a revision
64
# that includes some redundant parents (i.e. already a parent of
65
# something in the list.) We should eliminate them. This can
66
# be done fairly efficiently because the sequence numbers constrain
67
# the possible relationships.
69
# FIXME: the conflict markers should be *7* characters
72
from cStringIO import StringIO
73
from difflib import SequenceMatcher
78
from bzrlib.trace import mutter
79
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
80
RevisionAlreadyPresent,
82
WeaveRevisionAlreadyPresent,
83
WeaveRevisionNotPresent,
85
import bzrlib.errors as errors
86
from bzrlib.osutils import sha_strings
87
from bzrlib.patiencediff import SequenceMatcher, unified_diff
88
from bzrlib.symbol_versioning import *
89
from bzrlib.tsort import topo_sort
90
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
91
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
94
class Weave(VersionedFile):
95
"""weave - versioned text file storage.
97
A Weave manages versions of line-based text files, keeping track
98
of the originating version for each line.
100
To clients the "lines" of the file are represented as a list of strings.
101
These strings will typically have terminal newline characters, but
102
this is not required. In particular files commonly do not have a newline
103
at the end of the file.
105
Texts can be identified in either of two ways:
107
* a nonnegative index number.
109
* a version-id string.
111
Typically the index number will be valid only inside this weave and
112
the version-id is used to reference it in the larger world.
114
The weave is represented as a list mixing edit instructions and
115
literal text. Each entry in _weave can be either a string (or
116
unicode), or a tuple. If a string, it means that the given line
117
should be output in the currently active revisions.
119
If a tuple, it gives a processing instruction saying in which
120
revisions the enclosed lines are active. The tuple has the form
121
(instruction, version).
123
The instruction can be '{' or '}' for an insertion block, and '['
124
and ']' for a deletion block respectively. The version is the
125
integer version index. There is no replace operator, only deletes
126
and inserts. For '}', the end of an insertion, there is no
127
version parameter because it always closes the most recently
132
* A later version can delete lines that were introduced by any
133
number of ancestor versions; this implies that deletion
134
instructions can span insertion blocks without regard to the
135
insertion block's nesting.
137
* Similarly, deletions need not be properly nested with regard to
138
each other, because they might have been generated by
139
independent revisions.
141
* Insertions are always made by inserting a new bracketed block
142
into a single point in the previous weave. This implies they
143
can nest but not overlap, and the nesting must always have later
144
insertions on the inside.
146
* It doesn't seem very useful to have an active insertion
147
inside an inactive insertion, but it might happen.
149
* Therefore, all instructions are always"considered"; that
150
is passed onto and off the stack. An outer inactive block
151
doesn't disable an inner block.
153
* Lines are enabled if the most recent enclosing insertion is
154
active and none of the enclosing deletions are active.
156
* There is no point having a deletion directly inside its own
157
insertion; you might as well just not write it. And there
158
should be no way to get an earlier version deleting a later
162
Text of the weave; list of control instruction tuples and strings.
165
List of parents, indexed by version number.
166
It is only necessary to store the minimal set of parents for
167
each version; the parent's parents are implied.
170
List of hex SHA-1 of each version.
173
List of symbolic names for each version. Each should be unique.
176
For each name, the version number.
179
Descriptive name of this weave; typically the filename if known.
183
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
184
'_weave_name', '_matcher']
186
def __init__(self, weave_name=None, access_mode='w', matcher=None):
187
super(Weave, self).__init__(access_mode)
193
self._weave_name = weave_name
195
self._matcher = SequenceMatcher
197
self._matcher = matcher
200
return "Weave(%r)" % self._weave_name
203
"""Return a deep copy of self.
205
The copy can be modified without affecting the original weave."""
207
other._weave = self._weave[:]
208
other._parents = self._parents[:]
209
other._sha1s = self._sha1s[:]
210
other._names = self._names[:]
211
other._name_map = self._name_map.copy()
212
other._weave_name = self._weave_name
215
def __eq__(self, other):
216
if not isinstance(other, Weave):
218
return self._parents == other._parents \
219
and self._weave == other._weave \
220
and self._sha1s == other._sha1s
222
def __ne__(self, other):
223
return not self.__eq__(other)
225
@deprecated_method(zero_eight)
226
def idx_to_name(self, index):
227
"""Old public interface, the public interface is all names now."""
230
def _idx_to_name(self, version):
231
return self._names[version]
233
@deprecated_method(zero_eight)
234
def lookup(self, name):
235
"""Backwards compatability thunk:
237
Return name, as name is valid in the api now, and spew deprecation
242
def _lookup(self, name):
243
"""Convert symbolic version name to index."""
245
return self._name_map[name]
247
raise RevisionNotPresent(name, self._weave_name)
249
@deprecated_method(zero_eight)
250
def iter_names(self):
251
"""Deprecated convenience function, please see VersionedFile.names()."""
252
return iter(self.names())
254
@deprecated_method(zero_eight)
256
"""See Weave.versions for the current api."""
257
return self.versions()
260
"""See VersionedFile.versions."""
261
return self._names[:]
263
def has_version(self, version_id):
264
"""See VersionedFile.has_version."""
265
return self._name_map.has_key(version_id)
267
__contains__ = has_version
269
def get_delta(self, version_id):
270
"""See VersionedFile.get_delta."""
271
return self.get_deltas([version_id])[version_id]
273
def get_deltas(self, version_ids):
274
"""See VersionedFile.get_deltas."""
275
version_ids = self.get_ancestry(version_ids)
276
for version_id in version_ids:
277
if not self.has_version(version_id):
278
raise RevisionNotPresent(version_id, self)
279
# try extracting all versions; parallel extraction is used
280
nv = self.num_versions()
286
last_parent_lines = {}
288
parent_inclusions = {}
293
# its simplest to generate a full set of prepared variables.
295
name = self._names[i]
296
sha1s[name] = self.get_sha1(name)
297
parents_list = self.get_parents(name)
299
parent = parents_list[0]
300
parents[name] = parent
301
parent_inclusions[name] = inclusions[parent]
304
parent_inclusions[name] = set()
305
# we want to emit start, finish, replacement_length, replacement_lines tuples.
306
diff_hunks[name] = []
307
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
308
parent_linenums[name] = 0
310
parent_noeols[name] = False
311
last_parent_lines[name] = None
312
new_inc = set([name])
313
for p in self._parents[i]:
314
new_inc.update(inclusions[self._idx_to_name(p)])
315
# debug only, known good so far.
316
#assert set(new_inc) == set(self.get_ancestry(name)), \
317
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
318
inclusions[name] = new_inc
320
nlines = len(self._weave)
322
for lineno, inserted, deletes, line in self._walk_internal():
323
# a line is active in a version if:
324
# insert is in the versions inclusions
326
# deleteset & the versions inclusions is an empty set.
327
# so - if we have a included by mapping - version is included by
328
# children, we get a list of children to examine for deletes affect
329
# ing them, which is less than the entire set of children.
330
for version_id in version_ids:
331
# The active inclusion must be an ancestor,
332
# and no ancestors must have deleted this line,
333
# because we don't support resurrection.
334
parent_inclusion = parent_inclusions[version_id]
335
inclusion = inclusions[version_id]
336
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
337
version_active = inserted in inclusion and not (deletes & inclusion)
338
if not parent_active and not version_active:
339
# unrelated line of ancestry
341
elif parent_active and version_active:
343
parent_linenum = parent_linenums[version_id]
344
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
345
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
347
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
348
parent_linenums[version_id] = parent_linenum
351
noeols[version_id] = True
354
elif parent_active and not version_active:
356
current_hunks[version_id][1] += 1
357
parent_linenums[version_id] += 1
358
last_parent_lines[version_id] = line
359
elif not parent_active and version_active:
361
# noeol only occurs at the end of a file because we
362
# diff linewise. We want to show noeol changes as a
363
# empty diff unless the actual eol-less content changed.
366
if last_parent_lines[version_id][-1] != '\n':
367
parent_noeols[version_id] = True
368
except (TypeError, IndexError):
371
if theline[-1] != '\n':
372
noeols[version_id] = True
376
parent_should_go = False
378
if parent_noeols[version_id] == noeols[version_id]:
379
# no noeol toggle, so trust the weaves statement
380
# that this line is changed.
382
if parent_noeols[version_id]:
383
theline = theline + '\n'
384
elif parent_noeols[version_id]:
385
# parent has no eol, we do:
386
# our line is new, report as such..
388
elif noeols[version_id]:
389
# append a eol so that it looks like
391
theline = theline + '\n'
392
if parents[version_id] is not None:
393
#if last_parent_lines[version_id] is not None:
394
parent_should_go = True
395
if last_parent_lines[version_id] != theline:
398
#parent_should_go = False
400
current_hunks[version_id][2] += 1
401
current_hunks[version_id][3].append((inserted, theline))
403
# last hunk last parent line is not eaten
404
current_hunks[version_id][1] -= 1
405
if current_hunks[version_id][1] < 0:
406
current_hunks[version_id][1] = 0
407
# import pdb;pdb.set_trace()
408
# assert current_hunks[version_id][1] >= 0
412
version = self._idx_to_name(i)
413
if current_hunks[version] != [0, 0, 0, []]:
414
diff_hunks[version].append(tuple(current_hunks[version]))
416
for version_id in version_ids:
417
result[version_id] = (
421
diff_hunks[version_id],
425
def get_parents(self, version_id):
426
"""See VersionedFile.get_parent."""
427
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
429
def _check_repeated_add(self, name, parents, text, sha1):
430
"""Check that a duplicated add is OK.
432
If it is, return the (old) index; otherwise raise an exception.
434
idx = self._lookup(name)
435
if sorted(self._parents[idx]) != sorted(parents) \
436
or sha1 != self._sha1s[idx]:
437
raise RevisionAlreadyPresent(name, self._weave_name)
440
@deprecated_method(zero_eight)
441
def add_identical(self, old_rev_id, new_rev_id, parents):
442
"""Please use Weave.clone_text now."""
443
return self.clone_text(new_rev_id, old_rev_id, parents)
445
def _add_lines(self, version_id, parents, lines, parent_texts):
446
"""See VersionedFile.add_lines."""
447
return self._add(version_id, lines, map(self._lookup, parents))
449
@deprecated_method(zero_eight)
450
def add(self, name, parents, text, sha1=None):
451
"""See VersionedFile.add_lines for the non deprecated api."""
452
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
454
def _add(self, version_id, lines, parents, sha1=None):
455
"""Add a single text on top of the weave.
457
Returns the index number of the newly added version.
460
Symbolic name for this version.
461
(Typically the revision-id of the revision that added it.)
464
List or set of direct parent version numbers.
467
Sequence of lines to be added in the new version.
470
assert isinstance(version_id, basestring)
471
self._check_lines_not_unicode(lines)
472
self._check_lines_are_lines(lines)
474
sha1 = sha_strings(lines)
475
if version_id in self._name_map:
476
return self._check_repeated_add(version_id, parents, lines, sha1)
478
self._check_versions(parents)
479
## self._check_lines(lines)
480
new_version = len(self._parents)
482
# if we abort after here the (in-memory) weave will be corrupt because only
483
# some fields are updated
484
# XXX: FIXME implement a succeed-or-fail of the rest of this routine.
485
# - Robert Collins 20060226
486
self._parents.append(parents[:])
487
self._sha1s.append(sha1)
488
self._names.append(version_id)
489
self._name_map[version_id] = new_version
493
# special case; adding with no parents revision; can do
494
# this more quickly by just appending unconditionally.
495
# even more specially, if we're adding an empty text we
496
# need do nothing at all.
498
self._weave.append(('{', new_version))
499
self._weave.extend(lines)
500
self._weave.append(('}', None))
503
if len(parents) == 1:
504
pv = list(parents)[0]
505
if sha1 == self._sha1s[pv]:
506
# special case: same as the single parent
510
ancestors = self._inclusions(parents)
514
# basis a list of (origin, lineno, line)
517
for origin, lineno, line in self._extract(ancestors):
518
basis_lineno.append(lineno)
519
basis_lines.append(line)
521
# another small special case: a merge, producing the same text
523
if lines == basis_lines:
526
# add a sentinal, because we can also match against the final line
527
basis_lineno.append(len(self._weave))
529
# XXX: which line of the weave should we really consider
530
# matches the end of the file? the current code says it's the
531
# last line of the weave?
533
#print 'basis_lines:', basis_lines
534
#print 'new_lines: ', lines
536
s = self._matcher(None, basis_lines, lines)
538
# offset gives the number of lines that have been inserted
539
# into the weave up to the current point; if the original edit instruction
540
# says to change line A then we actually change (A+offset)
543
for tag, i1, i2, j1, j2 in s.get_opcodes():
544
# i1,i2 are given in offsets within basis_lines; we need to map them
545
# back to offsets within the entire weave
546
#print 'raw match', tag, i1, i2, j1, j2
550
i1 = basis_lineno[i1]
551
i2 = basis_lineno[i2]
553
assert 0 <= j1 <= j2 <= len(lines)
555
#print tag, i1, i2, j1, j2
557
# the deletion and insertion are handled separately.
558
# first delete the region.
560
self._weave.insert(i1+offset, ('[', new_version))
561
self._weave.insert(i2+offset+1, (']', new_version))
565
# there may have been a deletion spanning up to
566
# i2; we want to insert after this region to make sure
567
# we don't destroy ourselves
569
self._weave[i:i] = ([('{', new_version)]
572
offset += 2 + (j2 - j1)
575
def _clone_text(self, new_version_id, old_version_id, parents):
576
"""See VersionedFile.clone_text."""
577
old_lines = self.get_text(old_version_id)
578
self.add_lines(new_version_id, parents, old_lines)
580
def _inclusions(self, versions):
581
"""Return set of all ancestors of given version(s)."""
582
if not len(versions):
585
for v in xrange(max(versions), 0, -1):
587
# include all its parents
588
i.update(self._parents[v])
590
## except IndexError:
591
## raise ValueError("version %d not present in weave" % v)
593
@deprecated_method(zero_eight)
594
def inclusions(self, version_ids):
595
"""Deprecated - see VersionedFile.get_ancestry for the replacement."""
598
if isinstance(version_ids[0], int):
599
return [self._idx_to_name(v) for v in self._inclusions(version_ids)]
601
return self.get_ancestry(version_ids)
603
def get_ancestry(self, version_ids):
604
"""See VersionedFile.get_ancestry."""
605
if isinstance(version_ids, basestring):
606
version_ids = [version_ids]
607
i = self._inclusions([self._lookup(v) for v in version_ids])
608
return [self._idx_to_name(v) for v in i]
610
def _check_lines(self, text):
611
if not isinstance(text, list):
612
raise ValueError("text should be a list, not %s" % type(text))
615
if not isinstance(l, basestring):
616
raise ValueError("text line should be a string or unicode, not %s"
621
def _check_versions(self, indexes):
622
"""Check everything in the sequence of indexes is valid"""
627
raise IndexError("invalid version number %r" % i)
629
def _compatible_parents(self, my_parents, other_parents):
630
"""During join check that other_parents are joinable with my_parents.
632
Joinable is defined as 'is a subset of' - supersets may require
633
regeneration of diffs, but subsets do not.
635
return len(other_parents.difference(my_parents)) == 0
637
def annotate(self, version_id):
638
if isinstance(version_id, int):
639
warn('Weave.annotate(int) is deprecated. Please use version names'
640
' in all circumstances as of 0.8',
645
for origin, lineno, text in self._extract([version_id]):
646
result.append((origin, text))
649
return super(Weave, self).annotate(version_id)
651
def annotate_iter(self, version_id):
652
"""Yield list of (version-id, line) pairs for the specified version.
654
The index indicates when the line originated in the weave."""
655
incls = [self._lookup(version_id)]
656
for origin, lineno, text in self._extract(incls):
657
yield self._idx_to_name(origin), text
659
@deprecated_method(zero_eight)
661
"""_walk has become visit, a supported api."""
662
return self._walk_internal()
664
def iter_lines_added_or_present_in_versions(self, version_ids=None):
665
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
666
if version_ids is None:
667
version_ids = self.versions()
668
version_ids = set(version_ids)
669
for lineno, inserted, deletes, line in self._walk_internal(version_ids):
670
# if inserted not in version_ids then it was inserted before the
671
# versions we care about, but because weaves cannot represent ghosts
672
# properly, we do not filter down to that
673
# if inserted not in version_ids: continue
679
#@deprecated_method(zero_eight)
680
def walk(self, version_ids=None):
681
"""See VersionedFile.walk."""
682
return self._walk_internal(version_ids)
684
def _walk_internal(self, version_ids=None):
685
"""Helper method for weave actions."""
690
lineno = 0 # line of weave, 0-based
692
for l in self._weave:
693
if l.__class__ == tuple:
697
istack.append(self._names[v])
701
assert self._names[v] not in dset
702
dset.add(self._names[v])
704
dset.remove(self._names[v])
706
raise WeaveFormatError('unexpected instruction %r' % v)
708
assert l.__class__ in (str, unicode)
710
yield lineno, istack[-1], frozenset(dset), l
714
raise WeaveFormatError("unclosed insertion blocks "
715
"at end of weave: %s" % istack)
717
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
720
def plan_merge(self, ver_a, ver_b):
721
"""Return pseudo-annotation indicating how the two versions merge.
723
This is computed between versions a and b and their common
726
Weave lines present in none of them are skipped entirely.
728
inc_a = set(self.get_ancestry([ver_a]))
729
inc_b = set(self.get_ancestry([ver_b]))
730
inc_c = inc_a & inc_b
732
for lineno, insert, deleteset, line in\
733
self.walk([ver_a, ver_b]):
734
if deleteset & inc_c:
735
# killed in parent; can't be in either a or b
736
# not relevant to our work
737
yield 'killed-base', line
738
elif insert in inc_c:
739
# was inserted in base
740
killed_a = bool(deleteset & inc_a)
741
killed_b = bool(deleteset & inc_b)
742
if killed_a and killed_b:
743
yield 'killed-both', line
745
yield 'killed-a', line
747
yield 'killed-b', line
749
yield 'unchanged', line
750
elif insert in inc_a:
751
if deleteset & inc_a:
752
yield 'ghost-a', line
756
elif insert in inc_b:
757
if deleteset & inc_b:
758
yield 'ghost-b', line
762
# not in either revision
763
yield 'irrelevant', line
765
yield 'unchanged', '' # terminator
767
def _extract(self, versions):
768
"""Yield annotation of lines in included set.
770
Yields a sequence of tuples (origin, lineno, text), where
771
origin is the origin version, lineno the index in the weave,
772
and text the text of the line.
774
The set typically but not necessarily corresponds to a version.
777
if not isinstance(i, int):
780
included = self._inclusions(versions)
786
lineno = 0 # line of weave, 0-based
792
WFE = WeaveFormatError
795
# 449 0 4474.6820 2356.5590 bzrlib.weave:556(_extract)
796
# +285282 0 1676.8040 1676.8040 +<isinstance>
797
# 1.6 seconds in 'isinstance'.
798
# changing the first isinstance:
799
# 449 0 2814.2660 1577.1760 bzrlib.weave:556(_extract)
800
# +140414 0 762.8050 762.8050 +<isinstance>
801
# note that the inline time actually dropped (less function calls)
802
# and total processing time was halved.
803
# we're still spending ~1/4 of the method in isinstance though.
804
# so lets hard code the acceptable string classes we expect:
805
# 449 0 1202.9420 786.2930 bzrlib.weave:556(_extract)
806
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
808
# yay, down to ~1/4 the initial extract time, and our inline time
809
# has shrunk again, with isinstance no longer dominating.
810
# tweaking the stack inclusion test to use a set gives:
811
# 449 0 1122.8030 713.0080 bzrlib.weave:556(_extract)
812
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
814
# - a 5% win, or possibly just noise. However with large istacks that
815
# 'in' test could dominate, so I'm leaving this change in place -
816
# when its fast enough to consider profiling big datasets we can review.
821
for l in self._weave:
822
if l.__class__ == tuple:
830
iset.remove(istack.pop())
841
assert l.__class__ in (str, unicode)
843
isactive = (not dset) and istack and (istack[-1] in included)
845
result.append((istack[-1], lineno, l))
848
raise WeaveFormatError("unclosed insertion blocks "
849
"at end of weave: %s" % istack)
851
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
855
@deprecated_method(zero_eight)
856
def get_iter(self, name_or_index):
857
"""Deprecated, please do not use. Lookups are not not needed.
859
Please use get_lines now.
861
return iter(self.get_lines(self._maybe_lookup(name_or_index)))
863
@deprecated_method(zero_eight)
864
def maybe_lookup(self, name_or_index):
865
"""Deprecated, please do not use. Lookups are not not needed."""
866
return self._maybe_lookup(name_or_index)
868
def _maybe_lookup(self, name_or_index):
869
"""Convert possible symbolic name to index, or pass through indexes.
873
if isinstance(name_or_index, (int, long)):
876
return self._lookup(name_or_index)
878
@deprecated_method(zero_eight)
879
def get(self, version_id):
880
"""Please use either Weave.get_text or Weave.get_lines as desired."""
881
return self.get_lines(version_id)
883
def get_lines(self, version_id):
884
"""See VersionedFile.get_lines()."""
885
int_index = self._maybe_lookup(version_id)
886
result = [line for (origin, lineno, line) in self._extract([int_index])]
887
expected_sha1 = self._sha1s[int_index]
888
measured_sha1 = sha_strings(result)
889
if measured_sha1 != expected_sha1:
890
raise errors.WeaveInvalidChecksum(
891
'file %s, revision %s, expected: %s, measured %s'
892
% (self._weave_name, version_id,
893
expected_sha1, measured_sha1))
896
def get_sha1(self, version_id):
897
"""See VersionedFile.get_sha1()."""
898
return self._sha1s[self._lookup(version_id)]
900
@deprecated_method(zero_eight)
901
def numversions(self):
902
"""How many versions are in this weave?
904
Deprecated in favour of num_versions.
906
return self.num_versions()
908
def num_versions(self):
909
"""How many versions are in this weave?"""
910
l = len(self._parents)
911
assert l == len(self._sha1s)
914
__len__ = num_versions
916
def check(self, progress_bar=None):
917
# TODO evaluate performance hit of using string sets in this routine.
918
# TODO: check no circular inclusions
919
# TODO: create a nested progress bar
920
for version in range(self.num_versions()):
921
inclusions = list(self._parents[version])
924
if inclusions[-1] >= version:
925
raise WeaveFormatError("invalid included version %d for index %d"
926
% (inclusions[-1], version))
928
# try extracting all versions; parallel extraction is used
929
nv = self.num_versions()
934
# For creating the ancestry, IntSet is much faster (3.7s vs 0.17s)
935
# The problem is that set membership is much more expensive
936
name = self._idx_to_name(i)
937
sha1s[name] = sha.new()
939
new_inc = set([name])
940
for p in self._parents[i]:
941
new_inc.update(inclusions[self._idx_to_name(p)])
943
assert set(new_inc) == set(self.get_ancestry(name)), \
944
'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
945
inclusions[name] = new_inc
947
nlines = len(self._weave)
949
update_text = 'checking weave'
951
short_name = os.path.basename(self._weave_name)
952
update_text = 'checking %s' % (short_name,)
953
update_text = update_text[:25]
955
for lineno, insert, deleteset, line in self._walk_internal():
957
progress_bar.update(update_text, lineno, nlines)
959
for name, name_inclusions in inclusions.items():
960
# The active inclusion must be an ancestor,
961
# and no ancestors must have deleted this line,
962
# because we don't support resurrection.
963
if (insert in name_inclusions) and not (deleteset & name_inclusions):
964
sha1s[name].update(line)
967
version = self._idx_to_name(i)
968
hd = sha1s[version].hexdigest()
969
expected = self._sha1s[i]
971
raise errors.WeaveInvalidChecksum(
972
"mismatched sha1 for version %s: "
973
"got %s, expected %s"
974
% (version, hd, expected))
976
# TODO: check insertions are properly nested, that there are
977
# no lines outside of insertion blocks, that deletions are
978
# properly paired, etc.
980
def _join(self, other, pb, msg, version_ids, ignore_missing):
981
"""Worker routine for join()."""
982
if not other.versions():
983
return # nothing to update, easy
986
# versions is never none, InterWeave checks this.
989
# two loops so that we do not change ourselves before verifying it
991
# work through in index order to make sure we get all dependencies
994
# get the selected versions only that are in other.versions.
995
version_ids = set(other.versions()).intersection(set(version_ids))
996
# pull in the referenced graph.
997
version_ids = other.get_ancestry(version_ids)
998
pending_graph = [(version, other.get_parents(version)) for
999
version in version_ids]
1000
for name in topo_sort(pending_graph):
1001
other_idx = other._name_map[name]
1002
# returns True if we have it, False if we need it.
1003
if not self._check_version_consistent(other, other_idx, name):
1004
names_to_join.append((other_idx, name))
1013
for other_idx, name in names_to_join:
1014
# TODO: If all the parents of the other version are already
1015
# present then we can avoid some work by just taking the delta
1016
# and adjusting the offsets.
1017
new_parents = self._imported_parents(other, other_idx)
1018
sha1 = other._sha1s[other_idx]
1023
pb.update(msg, merged, len(names_to_join))
1025
lines = other.get_lines(other_idx)
1026
self._add(name, lines, new_parents, sha1)
1028
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1029
merged, processed, self._weave_name, time.time()-time0))
1031
def _imported_parents(self, other, other_idx):
1032
"""Return list of parents in self corresponding to indexes in other."""
1034
for parent_idx in other._parents[other_idx]:
1035
parent_name = other._names[parent_idx]
1036
if parent_name not in self._name_map:
1037
# should not be possible
1038
raise WeaveError("missing parent {%s} of {%s} in %r"
1039
% (parent_name, other._name_map[other_idx], self))
1040
new_parents.append(self._name_map[parent_name])
1043
def _check_version_consistent(self, other, other_idx, name):
1044
"""Check if a version in consistent in this and other.
1046
To be consistent it must have:
1049
* the same direct parents (by name, not index, and disregarding
1052
If present & correct return True;
1053
if not present in self return False;
1054
if inconsistent raise error."""
1055
this_idx = self._name_map.get(name, -1)
1057
if self._sha1s[this_idx] != other._sha1s[other_idx]:
1058
raise errors.WeaveTextDiffers(name, self, other)
1059
self_parents = self._parents[this_idx]
1060
other_parents = other._parents[other_idx]
1061
n1 = set([self._names[i] for i in self_parents])
1062
n2 = set([other._names[i] for i in other_parents])
1063
if not self._compatible_parents(n1, n2):
1064
raise WeaveParentMismatch("inconsistent parents "
1065
"for version {%s}: %s vs %s" % (name, n1, n2))
1071
@deprecated_method(zero_eight)
1072
def reweave(self, other, pb=None, msg=None):
1073
"""reweave has been superceded by plain use of join."""
1074
return self.join(other, pb, msg)
1076
def _reweave(self, other, pb, msg):
1077
"""Reweave self with other - internal helper for join().
1079
:param other: The other weave to merge
1080
:param pb: An optional progress bar, indicating how far done we are
1081
:param msg: An optional message for the progress
1083
new_weave = _reweave(self, other, pb=pb, msg=msg)
1084
self._copy_weave_content(new_weave)
1086
def _copy_weave_content(self, otherweave):
1087
"""adsorb the content from otherweave."""
1088
for attr in self.__slots__:
1089
if attr != '_weave_name':
1090
setattr(self, attr, copy(getattr(otherweave, attr)))
1093
class WeaveFile(Weave):
1094
"""A WeaveFile represents a Weave on disk and writes on change."""
1096
WEAVE_SUFFIX = '.weave'
1098
def __init__(self, name, transport, filemode=None, create=False, access_mode='w'):
1099
"""Create a WeaveFile.
1101
:param create: If not True, only open an existing knit.
1103
super(WeaveFile, self).__init__(name, access_mode)
1104
self._transport = transport
1105
self._filemode = filemode
1107
_read_weave_v5(self._transport.get(name + WeaveFile.WEAVE_SUFFIX), self)
1108
except errors.NoSuchFile:
1114
def _add_lines(self, version_id, parents, lines, parent_texts):
1115
"""Add a version and save the weave."""
1116
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
1121
def _clone_text(self, new_version_id, old_version_id, parents):
1122
"""See VersionedFile.clone_text."""
1123
super(WeaveFile, self)._clone_text(new_version_id, old_version_id, parents)
1126
def copy_to(self, name, transport):
1127
"""See VersionedFile.copy_to()."""
1128
# as we are all in memory always, just serialise to the new place.
1130
write_weave_v5(self, sio)
1132
transport.put(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1134
def create_empty(self, name, transport, filemode=None):
1135
return WeaveFile(name, transport, filemode, create=True)
1138
"""Save the weave."""
1139
self._check_write_ok()
1141
write_weave_v5(self, sio)
1143
self._transport.put(self._weave_name + WeaveFile.WEAVE_SUFFIX,
1149
"""See VersionedFile.get_suffixes()."""
1150
return [WeaveFile.WEAVE_SUFFIX]
1152
def join(self, other, pb=None, msg=None, version_ids=None,
1153
ignore_missing=False):
1154
"""Join other into self and save."""
1155
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
1159
@deprecated_function(zero_eight)
1160
def reweave(wa, wb, pb=None, msg=None):
1161
"""reweaving is deprecation, please just use weave.join()."""
1162
_reweave(wa, wb, pb, msg)
1164
def _reweave(wa, wb, pb=None, msg=None):
1165
"""Combine two weaves and return the result.
1167
This works even if a revision R has different parents in
1168
wa and wb. In the resulting weave all the parents are given.
1170
This is done by just building up a new weave, maintaining ordering
1171
of the versions in the two inputs. More efficient approaches
1172
might be possible but it should only be necessary to do
1173
this operation rarely, when a new previously ghost version is
1176
:param pb: An optional progress bar, indicating how far done we are
1177
:param msg: An optional message for the progress
1181
queue_a = range(wa.num_versions())
1182
queue_b = range(wb.num_versions())
1183
# first determine combined parents of all versions
1184
# map from version name -> all parent names
1185
combined_parents = _reweave_parent_graphs(wa, wb)
1186
mutter("combined parents: %r", combined_parents)
1187
order = topo_sort(combined_parents.iteritems())
1188
mutter("order to reweave: %r", order)
1193
for idx, name in enumerate(order):
1195
pb.update(msg, idx, len(order))
1196
if name in wa._name_map:
1197
lines = wa.get_lines(name)
1198
if name in wb._name_map:
1199
lines_b = wb.get_lines(name)
1200
if lines != lines_b:
1201
mutter('Weaves differ on content. rev_id {%s}', name)
1202
mutter('weaves: %s, %s', wa._weave_name, wb._weave_name)
1204
lines = list(difflib.unified_diff(lines, lines_b,
1205
wa._weave_name, wb._weave_name))
1206
mutter('lines:\n%s', ''.join(lines))
1207
raise errors.WeaveTextDiffers(name, wa, wb)
1209
lines = wb.get_lines(name)
1210
wr._add(name, lines, [wr._lookup(i) for i in combined_parents[name]])
1213
def _reweave_parent_graphs(wa, wb):
1214
"""Return combined parent ancestry for two weaves.
1216
Returned as a list of (version_name, set(parent_names))"""
1218
for weave in [wa, wb]:
1219
for idx, name in enumerate(weave._names):
1220
p = combined.setdefault(name, set())
1221
p.update(map(weave._idx_to_name, weave._parents[idx]))
1226
"""Show the weave's table-of-contents"""
1227
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
1228
for i in (6, 50, 10, 10):
1231
for i in range(w.num_versions()):
1234
parent_str = ' '.join(map(str, w._parents[i]))
1235
print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
1239
def weave_stats(weave_file, pb):
1240
from bzrlib.weavefile import read_weave
1242
wf = file(weave_file, 'rb')
1243
w = read_weave(wf, WeaveVersionedFile)
1244
# FIXME: doesn't work on pipes
1245
weave_size = wf.tell()
1249
for i in range(vers):
1250
pb.update('checking sizes', i, vers)
1251
for origin, lineno, line in w._extract([i]):
1256
print 'versions %9d' % vers
1257
print 'weave file %9d bytes' % weave_size
1258
print 'total contents %9d bytes' % total
1259
print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
1262
print 'average size %9d bytes' % avg
1263
print 'relative size %9.2fx' % (float(weave_size) / float(avg))
1267
print """bzr weave tool
1269
Experimental tool for weave algorithm.
1272
weave init WEAVEFILE
1273
Create an empty weave file
1274
weave get WEAVEFILE VERSION
1275
Write out specified version.
1276
weave check WEAVEFILE
1277
Check consistency of all versions.
1279
Display table of contents.
1280
weave add WEAVEFILE NAME [BASE...] < NEWTEXT
1281
Add NEWTEXT, with specified parent versions.
1282
weave annotate WEAVEFILE VERSION
1283
Display origin of each line.
1284
weave merge WEAVEFILE VERSION1 VERSION2 > OUT
1285
Auto-merge two versions and display conflicts.
1286
weave diff WEAVEFILE VERSION1 VERSION2
1287
Show differences between two versions.
1291
% weave init foo.weave
1293
% weave add foo.weave ver0 < foo.txt
1296
(create updated version)
1298
% weave get foo.weave 0 | diff -u - foo.txt
1299
% weave add foo.weave ver1 0 < foo.txt
1302
% weave get foo.weave 0 > foo.txt (create forked version)
1304
% weave add foo.weave ver2 0 < foo.txt
1307
% weave merge foo.weave 1 2 > foo.txt (merge them)
1308
% vi foo.txt (resolve conflicts)
1309
% weave add foo.weave merged 1 2 < foo.txt (commit merged version)
1321
# in case we're run directly from the subdirectory
1322
sys.path.append('..')
1324
from bzrlib.weavefile import write_weave, read_weave
1325
from bzrlib.progress import ProgressBar
1340
return read_weave(file(argv[2], 'rb'))
1346
# at the moment, based on everything in the file
1348
parents = map(int, argv[4:])
1349
lines = sys.stdin.readlines()
1350
ver = w.add(name, parents, lines)
1351
write_weave(w, file(argv[2], 'wb'))
1352
print 'added version %r %d' % (name, ver)
1355
if os.path.exists(fn):
1356
raise IOError("file exists")
1358
write_weave(w, file(fn, 'wb'))
1359
elif cmd == 'get': # get one version
1361
sys.stdout.writelines(w.get_iter(int(argv[3])))
1366
v1, v2 = map(int, argv[3:5])
1369
diff_gen = unified_diff(lines1, lines2,
1370
'%s version %d' % (fn, v1),
1371
'%s version %d' % (fn, v2))
1372
sys.stdout.writelines(diff_gen)
1374
elif cmd == 'annotate':
1376
# newline is added to all lines regardless; too hard to get
1377
# reasonable formatting otherwise
1379
for origin, text in w.annotate(int(argv[3])):
1380
text = text.rstrip('\r\n')
1382
print ' | %s' % (text)
1384
print '%5d | %s' % (origin, text)
1390
elif cmd == 'stats':
1391
weave_stats(argv[2], ProgressBar())
1393
elif cmd == 'check':
1398
print '%d versions ok' % w.num_versions()
1400
elif cmd == 'inclusions':
1402
print ' '.join(map(str, w.inclusions([int(argv[3])])))
1404
elif cmd == 'parents':
1406
print ' '.join(map(str, w._parents[int(argv[3])]))
1408
elif cmd == 'plan-merge':
1409
# replaced by 'bzr weave-plan-merge'
1411
for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
1413
print '%14s | %s' % (state, line),
1414
elif cmd == 'merge':
1415
# replaced by 'bzr weave-merge-text'
1417
p = w.plan_merge(int(argv[3]), int(argv[4]))
1418
sys.stdout.writelines(w.weave_merge(p))
1420
raise ValueError('unknown command %r' % cmd)
1424
def profile_main(argv):
1425
import tempfile, hotshot, hotshot.stats
1427
prof_f = tempfile.NamedTemporaryFile()
1429
prof = hotshot.Profile(prof_f.name)
1431
ret = prof.runcall(main, argv)
1434
stats = hotshot.stats.load(prof_f.name)
1436
stats.sort_stats('cumulative')
1437
## XXX: Might like to write to stderr or the trace file instead but
1438
## print_stats seems hardcoded to stdout
1439
stats.print_stats(20)
1444
def lsprofile_main(argv):
1445
from bzrlib.lsprof import profile
1446
ret,stats = profile(main, argv)
1452
if __name__ == '__main__':
1454
if '--profile' in sys.argv:
1456
args.remove('--profile')
1457
sys.exit(profile_main(args))
1458
elif '--lsprof' in sys.argv:
1460
args.remove('--lsprof')
1461
sys.exit(lsprofile_main(args))
1463
sys.exit(main(sys.argv))
1466
class InterWeave(InterVersionedFile):
1467
"""Optimised code paths for weave to weave operations."""
1469
_matching_file_from_factory = staticmethod(WeaveFile)
1470
_matching_file_to_factory = staticmethod(WeaveFile)
1473
def is_compatible(source, target):
1474
"""Be compatible with weaves."""
1476
return (isinstance(source, Weave) and
1477
isinstance(target, Weave))
1478
except AttributeError:
1481
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1482
"""See InterVersionedFile.join."""
1483
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1484
if self.target.versions() == [] and version_ids is None:
1485
self.target._copy_weave_content(self.source)
1488
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1489
except errors.WeaveParentMismatch:
1490
self.target._reweave(self.source, pb, msg)
1493
InterVersionedFile.register_optimiser(InterWeave)