22
22
"""Weave - storage of related text file versions"""
24
# TODO: Perhaps have copy method for Weave instances?
25
26
# XXX: If we do weaves this way, will a merge still behave the same
26
27
# way if it's done in a different order? That's a pretty desirable
30
# TODO: How to write these to disk? One option is cPickle, which
31
# would be fast but less friendly to C, and perhaps not portable. Another is
29
33
# TODO: Nothing here so far assumes the lines are really \n newlines,
30
# rather than being split up in some other way. We could accommodate
34
# rather than being split up in some other way. We could accomodate
31
35
# binaries, perhaps by naively splitting on \n or perhaps using
32
36
# something like a rolling checksum.
38
# TODO: Perhaps track SHA-1 in the header for protection? This would
39
# be redundant with it being stored in the inventory, but perhaps
42
# TODO: Track version names as well as indexes.
44
# TODO: Probably do transitive expansion when specifying parents?
46
# TODO: Separate out some code to read and write weaves.
34
48
# TODO: End marker for each version so we can stop reading?
36
50
# TODO: Check that no insertion occurs inside a deletion that was
37
51
# active in the version of the insertion.
39
# TODO: In addition to the SHA-1 check, perhaps have some code that
40
# checks structural constraints of the weave: ie that insertions are
41
# properly nested, that there is no text outside of an insertion, that
42
# insertions or deletions are not repeated, etc.
44
# TODO: Parallel-extract that passes back each line along with a
45
# description of which revisions include it. Nice for checking all
46
# shas or calculating stats in parallel.
48
# TODO: Using a single _extract routine and then processing the output
49
# is probably inefficient. It's simple enough that we can afford to
50
# have slight specializations for different ways its used: annotate,
51
# basis for add, get, etc.
53
# TODO: Probably the API should work only in names to hide the integer
54
# indexes from the user.
56
# TODO: Is there any potential performance win by having an add()
57
# variant that is passed a pre-cooked version of the single basis
60
# TODO: Reweave can possibly be made faster by remembering diffs
61
# where the basis and destination are unchanged.
63
# FIXME: Sometimes we will be given a parents list for a revision
64
# that includes some redundant parents (i.e. already a parent of
65
# something in the list.) We should eliminate them. This can
66
# be done fairly efficiently because the sequence numbers constrain
67
# the possible relationships.
69
# FIXME: the conflict markers should be *7* characters
72
from cStringIO import StringIO
77
from bzrlib.lazy_import import lazy_import
78
lazy_import(globals(), """
79
from bzrlib import tsort
84
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
85
RevisionAlreadyPresent,
87
UnavailableRepresentation,
88
WeaveRevisionAlreadyPresent,
89
WeaveRevisionNotPresent,
91
import bzrlib.errors as errors
92
from bzrlib.osutils import dirname, sha, sha_strings, split_lines
93
import bzrlib.patiencediff
94
from bzrlib.revision import NULL_REVISION
95
from bzrlib.symbol_versioning import *
96
from bzrlib.trace import mutter
97
from bzrlib.versionedfile import (
103
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
106
class WeaveContentFactory(ContentFactory):
107
"""Content factory for streaming from weaves.
109
:seealso ContentFactory:
112
def __init__(self, version, weave):
113
"""Create a WeaveContentFactory for version from weave."""
114
ContentFactory.__init__(self)
115
self.sha1 = weave.get_sha1s([version])[version]
116
self.key = (version,)
117
parents = weave.get_parent_map([version])[version]
118
self.parents = tuple((parent,) for parent in parents)
119
self.storage_kind = 'fulltext'
122
def get_bytes_as(self, storage_kind):
123
if storage_kind == 'fulltext':
124
return self._weave.get_text(self.key[-1])
126
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
129
class Weave(VersionedFile):
53
# TODO: Perhaps a special slower check() method that verifies more
54
# nesting constraints and the MD5 of each version?
62
from sets import Set, ImmutableSet
64
frozenset = ImmutableSet
68
class WeaveError(Exception):
69
"""Exception in processing weave"""
72
class WeaveFormatError(WeaveError):
73
"""Weave invariant violated"""
130
77
"""weave - versioned text file storage.
132
79
A Weave manages versions of line-based text files, keeping track
193
138
should be no way to get an earlier version deleting a later
197
Text of the weave; list of control instruction tuples and strings.
200
List of parents, indexed by version number.
201
It is only necessary to store the minimal set of parents for
202
each version; the parent's parents are implied.
145
List of versions, indexed by index number.
147
For each version we store the set (included_versions), which
148
lists the previous versions also considered active; the
149
versions included in those versions are included transitively.
150
So new versions created from nothing list []; most versions
151
have a single entry; some have more.
205
List of hex SHA-1 of each version.
208
List of symbolic names for each version. Each should be unique.
211
For each name, the version number.
214
Descriptive name of this weave; typically the filename if known.
154
List of hex SHA-1 of each version, or None if not recorded.
218
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
219
'_weave_name', '_matcher', '_allow_reserved']
221
def __init__(self, weave_name=None, access_mode='w', matcher=None,
222
get_scope=None, allow_reserved=False):
225
:param get_scope: A callable that returns an opaque object to be used
226
for detecting when this weave goes out of scope (should stop
227
answering requests or allowing mutation).
229
super(Weave, self).__init__()
235
self._weave_name = weave_name
237
self._matcher = bzrlib.patiencediff.PatienceSequenceMatcher
239
self._matcher = matcher
240
if get_scope is None:
241
get_scope = lambda:None
242
self._get_scope = get_scope
243
self._scope = get_scope()
244
self._access_mode = access_mode
245
self._allow_reserved = allow_reserved
248
return "Weave(%r)" % self._weave_name
250
def _check_write_ok(self):
251
"""Is the versioned file marked as 'finished' ? Raise if it is."""
252
if self._get_scope() != self._scope:
253
raise errors.OutSideTransaction()
254
if self._access_mode != 'w':
255
raise errors.ReadOnlyObjectDirtiedError(self)
258
"""Return a deep copy of self.
260
The copy can be modified without affecting the original weave."""
262
other._weave = self._weave[:]
263
other._parents = self._parents[:]
264
other._sha1s = self._sha1s[:]
265
other._names = self._names[:]
266
other._name_map = self._name_map.copy()
267
other._weave_name = self._weave_name
270
162
def __eq__(self, other):
271
163
if not isinstance(other, Weave):
273
return self._parents == other._parents \
274
and self._weave == other._weave \
275
and self._sha1s == other._sha1s
165
return self._v == other._v \
166
and self._l == other._l
277
169
def __ne__(self, other):
278
170
return not self.__eq__(other)
280
def _idx_to_name(self, version):
281
return self._names[version]
283
def _lookup(self, name):
284
"""Convert symbolic version name to index."""
285
if not self._allow_reserved:
286
self.check_not_reserved_id(name)
288
return self._name_map[name]
290
raise RevisionNotPresent(name, self._weave_name)
293
"""See VersionedFile.versions."""
294
return self._names[:]
296
def has_version(self, version_id):
297
"""See VersionedFile.has_version."""
298
return (version_id in self._name_map)
300
__contains__ = has_version
302
def get_record_stream(self, versions, ordering, include_delta_closure):
303
"""Get a stream of records for versions.
305
:param versions: The versions to include. Each version is a tuple
307
:param ordering: Either 'unordered' or 'topological'. A topologically
308
sorted stream has compression parents strictly before their
310
:param include_delta_closure: If True then the closure across any
311
compression parents will be included (in the opaque data).
312
:return: An iterator of ContentFactory objects, each of which is only
313
valid until the iterator is advanced.
315
versions = [version[-1] for version in versions]
316
if ordering == 'topological':
317
parents = self.get_parent_map(versions)
318
new_versions = tsort.topo_sort(parents)
319
new_versions.extend(set(versions).difference(set(parents)))
320
versions = new_versions
321
for version in versions:
323
yield WeaveContentFactory(version, self)
325
yield AbsentContentFactory((version,))
327
def get_parent_map(self, version_ids):
328
"""See VersionedFile.get_parent_map."""
330
for version_id in version_ids:
331
if version_id == NULL_REVISION:
336
map(self._idx_to_name,
337
self._parents[self._lookup(version_id)]))
338
except RevisionNotPresent:
340
result[version_id] = parents
343
def get_parents_with_ghosts(self, version_id):
344
raise NotImplementedError(self.get_parents_with_ghosts)
346
def insert_record_stream(self, stream):
347
"""Insert a record stream into this versioned file.
349
:param stream: A stream of records to insert.
351
:seealso VersionedFile.get_record_stream:
354
for record in stream:
355
# Raise an error when a record is missing.
356
if record.storage_kind == 'absent':
357
raise RevisionNotPresent([record.key[0]], self)
358
# adapt to non-tuple interface
359
parents = [parent[0] for parent in record.parents]
360
if record.storage_kind == 'fulltext':
361
self.add_lines(record.key[0], parents,
362
split_lines(record.get_bytes_as('fulltext')))
364
adapter_key = record.storage_kind, 'fulltext'
366
adapter = adapters[adapter_key]
368
adapter_factory = adapter_registry.get(adapter_key)
369
adapter = adapter_factory(self)
370
adapters[adapter_key] = adapter
371
lines = split_lines(adapter.get_bytes(
372
record, record.get_bytes_as(record.storage_kind)))
374
self.add_lines(record.key[0], parents, lines)
375
except RevisionAlreadyPresent:
378
def _check_repeated_add(self, name, parents, text, sha1):
379
"""Check that a duplicated add is OK.
381
If it is, return the (old) index; otherwise raise an exception.
383
idx = self._lookup(name)
384
if sorted(self._parents[idx]) != sorted(parents) \
385
or sha1 != self._sha1s[idx]:
386
raise RevisionAlreadyPresent(name, self._weave_name)
389
def _add_lines(self, version_id, parents, lines, parent_texts,
390
left_matching_blocks, nostore_sha, random_id, check_content):
391
"""See VersionedFile.add_lines."""
392
idx = self._add(version_id, lines, map(self._lookup, parents),
393
nostore_sha=nostore_sha)
394
return sha_strings(lines), sum(map(len, lines)), idx
396
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
173
def add(self, parents, text):
397
174
"""Add a single text on top of the weave.
399
176
Returns the index number of the newly added version.
402
Symbolic name for this version.
403
(Typically the revision-id of the revision that added it.)
406
List or set of direct parent version numbers.
409
Sequence of lines to be added in the new version.
411
:param nostore_sha: See VersionedFile.add_lines.
413
self._check_lines_not_unicode(lines)
414
self._check_lines_are_lines(lines)
416
sha1 = sha_strings(lines)
417
if sha1 == nostore_sha:
418
raise errors.ExistingContent
419
if version_id in self._name_map:
420
return self._check_repeated_add(version_id, parents, lines, sha1)
422
self._check_versions(parents)
423
## self._check_lines(lines)
424
new_version = len(self._parents)
426
# if we abort after here the (in-memory) weave will be corrupt because only
427
# some fields are updated
428
# XXX: FIXME implement a succeed-or-fail of the rest of this routine.
429
# - Robert Collins 20060226
430
self._parents.append(parents[:])
179
List or set of parent version numbers. This must normally include
180
the parents and the parent's parents, or wierd things might happen.
183
Sequence of lines to be added in the new version."""
184
## self._check_versions(parents)
185
## self._check_lines(text)
196
delta = self._delta(self.inclusions(parents), text)
198
# offset gives the number of lines that have been inserted
199
# into the weave up to the current point; if the original edit instruction
200
# says to change line A then we actually change (A+offset)
203
for i1, i2, newlines in delta:
206
assert i2 <= len(self._l)
208
# the deletion and insertion are handled separately.
209
# first delete the region.
211
self._l.insert(i1+offset, ('[', idx))
212
self._l.insert(i2+offset+1, (']', idx))
217
# there may have been a deletion spanning up to
218
# i2; we want to insert after this region to make sure
219
# we don't destroy ourselves
221
self._l[i:i] = [('{', idx)] \
224
offset += 2 + len(newlines)
226
self._addversion(parents)
228
# special case; adding with no parents revision; can do this
229
# more quickly by just appending unconditionally
230
self._l.append(('{', idx))
232
self._l.append(('}', idx))
234
self._addversion(None)
431
236
self._sha1s.append(sha1)
432
self._names.append(version_id)
433
self._name_map[version_id] = new_version
437
# special case; adding with no parents revision; can do
438
# this more quickly by just appending unconditionally.
439
# even more specially, if we're adding an empty text we
440
# need do nothing at all.
442
self._weave.append(('{', new_version))
443
self._weave.extend(lines)
444
self._weave.append(('}', None))
447
if len(parents) == 1:
448
pv = list(parents)[0]
449
if sha1 == self._sha1s[pv]:
450
# special case: same as the single parent
454
ancestors = self._inclusions(parents)
458
# basis a list of (origin, lineno, line)
461
for origin, lineno, line in self._extract(ancestors):
462
basis_lineno.append(lineno)
463
basis_lines.append(line)
465
# another small special case: a merge, producing the same text
467
if lines == basis_lines:
470
# add a sentinel, because we can also match against the final line
471
basis_lineno.append(len(self._weave))
473
# XXX: which line of the weave should we really consider
474
# matches the end of the file? the current code says it's the
475
# last line of the weave?
477
#print 'basis_lines:', basis_lines
478
#print 'new_lines: ', lines
480
s = self._matcher(None, basis_lines, lines)
482
# offset gives the number of lines that have been inserted
483
# into the weave up to the current point; if the original edit instruction
484
# says to change line A then we actually change (A+offset)
487
for tag, i1, i2, j1, j2 in s.get_opcodes():
488
# i1,i2 are given in offsets within basis_lines; we need to map them
489
# back to offsets within the entire weave
490
#print 'raw match', tag, i1, i2, j1, j2
493
i1 = basis_lineno[i1]
494
i2 = basis_lineno[i2]
495
# the deletion and insertion are handled separately.
496
# first delete the region.
498
self._weave.insert(i1+offset, ('[', new_version))
499
self._weave.insert(i2+offset+1, (']', new_version))
503
# there may have been a deletion spanning up to
504
# i2; we want to insert after this region to make sure
505
# we don't destroy ourselves
507
self._weave[i:i] = ([('{', new_version)]
510
offset += 2 + (j2 - j1)
513
def _inclusions(self, versions):
514
"""Return set of all ancestors of given version(s)."""
515
if not len(versions):
241
def inclusions(self, versions):
242
"""Expand out everything included by versions."""
517
243
i = set(versions)
518
for v in xrange(max(versions), 0, -1):
520
# include all its parents
521
i.update(self._parents[v])
523
## except IndexError:
524
## raise ValueError("version %d not present in weave" % v)
526
def get_ancestry(self, version_ids, topo_sorted=True):
527
"""See VersionedFile.get_ancestry."""
528
if isinstance(version_ids, basestring):
529
version_ids = [version_ids]
530
i = self._inclusions([self._lookup(v) for v in version_ids])
531
return [self._idx_to_name(v) for v in i]
249
def _addversion(self, parents):
251
self._v.append(frozenset(parents))
253
self._v.append(frozenset())
533
256
def _check_lines(self, text):
534
257
if not isinstance(text, list):
668
295
The set typically but not necessarily corresponds to a version.
671
if not isinstance(i, int):
674
included = self._inclusions(versions)
680
lineno = 0 # line of weave, 0-based
297
istack = [] # versions for which an insertion block is current
299
dset = set() # versions for which a deletion block is current
303
lineno = 0 # line of weave, 0-based
305
# TODO: Probably only need to put included revisions in the istack
307
# TODO: Could split this into two functions, one that updates
308
# the stack and the other that processes the results -- but
309
# I'm not sure it's really needed.
311
# TODO: In fact, I think we only need to store the *count* of
312
# active insertions and deletions, and we can maintain that by
313
# just by just counting as we go along.
686
315
WFE = WeaveFormatError
689
# 449 0 4474.6820 2356.5590 bzrlib.weave:556(_extract)
690
# +285282 0 1676.8040 1676.8040 +<isinstance>
691
# 1.6 seconds in 'isinstance'.
692
# changing the first isinstance:
693
# 449 0 2814.2660 1577.1760 bzrlib.weave:556(_extract)
694
# +140414 0 762.8050 762.8050 +<isinstance>
695
# note that the inline time actually dropped (less function calls)
696
# and total processing time was halved.
697
# we're still spending ~1/4 of the method in isinstance though.
698
# so lets hard code the acceptable string classes we expect:
699
# 449 0 1202.9420 786.2930 bzrlib.weave:556(_extract)
700
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
702
# yay, down to ~1/4 the initial extract time, and our inline time
703
# has shrunk again, with isinstance no longer dominating.
704
# tweaking the stack inclusion test to use a set gives:
705
# 449 0 1122.8030 713.0080 bzrlib.weave:556(_extract)
706
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
708
# - a 5% win, or possibly just noise. However with large istacks that
709
# 'in' test could dominate, so I'm leaving this change in place -
710
# when its fast enough to consider profiling big datasets we can review.
715
for l in self._weave:
716
if l.__class__ == tuple:
318
if isinstance(l, tuple):
319
isactive = None # recalculate
322
if istack and (istack[-1] >= v):
323
raise WFE("improperly nested insertions %d>=%d on line %d"
324
% (istack[-1], v, lineno))
723
iset.remove(istack.pop())
330
raise WFE("unmatched close of insertion %d on line %d"
333
raise WFE("mismatched close of insertion %d!=%d on line %d"
338
raise WFE("repeated deletion marker for version %d on line %d"
342
raise WFE("version %d deletes own text on line %d"
350
raise WFE("unmatched close of deletion %d on line %d"
731
raise AssertionError()
353
raise WFE("invalid processing instruction %r on line %d"
734
isactive = (not dset) and istack and (istack[-1] in included)
356
assert isinstance(l, basestring)
358
raise WFE("literal at top level on line %d"
361
isactive = (istack[-1] in included) \
362
and not included.intersection(dset)
736
result.append((istack[-1], lineno, l))
365
yield origin, lineno, l
739
raise WeaveFormatError("unclosed insertion blocks "
740
"at end of weave: %s" % istack)
369
raise WFE("unclosed insertion blocks at end of weave",
742
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
746
def _maybe_lookup(self, name_or_index):
747
"""Convert possible symbolic name to index, or pass through indexes.
751
if isinstance(name_or_index, (int, long)):
754
return self._lookup(name_or_index)
756
def get_lines(self, version_id):
757
"""See VersionedFile.get_lines()."""
758
int_index = self._maybe_lookup(version_id)
759
result = [line for (origin, lineno, line) in self._extract([int_index])]
760
expected_sha1 = self._sha1s[int_index]
761
measured_sha1 = sha_strings(result)
762
if measured_sha1 != expected_sha1:
763
raise errors.WeaveInvalidChecksum(
764
'file %s, revision %s, expected: %s, measured %s'
765
% (self._weave_name, version_id,
766
expected_sha1, measured_sha1))
769
def get_sha1s(self, version_ids):
770
"""See VersionedFile.get_sha1s()."""
772
for v in version_ids:
773
result[v] = self._sha1s[self._lookup(v)]
776
def num_versions(self):
777
"""How many versions are in this weave?"""
778
l = len(self._parents)
372
raise WFE("unclosed deletion blocks at end of weave",
376
def get_iter(self, version):
377
"""Yield lines for the specified version."""
378
for origin, lineno, line in self._extract(self.inclusions([version])):
382
def get(self, index):
383
return list(self.get_iter(index))
386
def mash_iter(self, included):
387
"""Return composed version of multiple included versions."""
388
included = frozenset(included)
389
for origin, lineno, text in self._extract(included):
393
def dump(self, to_file):
394
from pprint import pprint
395
print >>to_file, "Weave._l = ",
396
pprint(self._l, to_file)
397
print >>to_file, "Weave._v = ",
398
pprint(self._v, to_file)
402
def numversions(self):
404
assert l == len(self._sha1s)
781
__len__ = num_versions
783
def check(self, progress_bar=None):
784
# TODO evaluate performance hit of using string sets in this routine.
785
# TODO: check no circular inclusions
786
# TODO: create a nested progress bar
787
for version in range(self.num_versions()):
788
inclusions = list(self._parents[version])
409
# check no circular inclusions
410
for version in range(self.numversions()):
411
inclusions = list(self._v[version])
790
413
inclusions.sort()
791
414
if inclusions[-1] >= version:
792
415
raise WeaveFormatError("invalid included version %d for index %d"
793
416
% (inclusions[-1], version))
795
# try extracting all versions; parallel extraction is used
796
nv = self.num_versions()
801
# For creating the ancestry, IntSet is much faster (3.7s vs 0.17s)
802
# The problem is that set membership is much more expensive
803
name = self._idx_to_name(i)
806
new_inc = set([name])
807
for p in self._parents[i]:
808
new_inc.update(inclusions[self._idx_to_name(p)])
810
if set(new_inc) != set(self.get_ancestry(name)):
811
raise AssertionError(
813
% (set(new_inc), set(self.get_ancestry(name))))
814
inclusions[name] = new_inc
816
nlines = len(self._weave)
818
update_text = 'checking weave'
820
short_name = os.path.basename(self._weave_name)
821
update_text = 'checking %s' % (short_name,)
822
update_text = update_text[:25]
824
for lineno, insert, deleteset, line in self._walk_internal():
826
progress_bar.update(update_text, lineno, nlines)
828
for name, name_inclusions in inclusions.items():
829
# The active inclusion must be an ancestor,
830
# and no ancestors must have deleted this line,
831
# because we don't support resurrection.
832
if (insert in name_inclusions) and not (deleteset & name_inclusions):
833
sha1s[name].update(line)
836
version = self._idx_to_name(i)
837
hd = sha1s[version].hexdigest()
838
expected = self._sha1s[i]
418
# try extracting all versions; this is a bit slow and parallel
419
# extraction could be used
421
for version in range(self.numversions()):
423
for l in self.get_iter(version):
426
expected = self._sha1s[version]
839
427
if hd != expected:
840
raise errors.WeaveInvalidChecksum(
841
"mismatched sha1 for version %s: "
842
"got %s, expected %s"
843
% (version, hd, expected))
845
# TODO: check insertions are properly nested, that there are
846
# no lines outside of insertion blocks, that deletions are
847
# properly paired, etc.
849
def _imported_parents(self, other, other_idx):
850
"""Return list of parents in self corresponding to indexes in other."""
852
for parent_idx in other._parents[other_idx]:
853
parent_name = other._names[parent_idx]
854
if parent_name not in self._name_map:
855
# should not be possible
856
raise WeaveError("missing parent {%s} of {%s} in %r"
857
% (parent_name, other._name_map[other_idx], self))
858
new_parents.append(self._name_map[parent_name])
861
def _check_version_consistent(self, other, other_idx, name):
862
"""Check if a version in consistent in this and other.
864
To be consistent it must have:
867
* the same direct parents (by name, not index, and disregarding
870
If present & correct return True;
871
if not present in self return False;
872
if inconsistent raise error."""
873
this_idx = self._name_map.get(name, -1)
875
if self._sha1s[this_idx] != other._sha1s[other_idx]:
876
raise errors.WeaveTextDiffers(name, self, other)
877
self_parents = self._parents[this_idx]
878
other_parents = other._parents[other_idx]
879
n1 = set([self._names[i] for i in self_parents])
880
n2 = set([other._names[i] for i in other_parents])
881
if not self._compatible_parents(n1, n2):
882
raise WeaveParentMismatch("inconsistent parents "
883
"for version {%s}: %s vs %s" % (name, n1, n2))
889
def _reweave(self, other, pb, msg):
890
"""Reweave self with other - internal helper for join().
892
:param other: The other weave to merge
893
:param pb: An optional progress bar, indicating how far done we are
894
:param msg: An optional message for the progress
896
new_weave = _reweave(self, other, pb=pb, msg=msg)
897
self._copy_weave_content(new_weave)
899
def _copy_weave_content(self, otherweave):
900
"""adsorb the content from otherweave."""
901
for attr in self.__slots__:
902
if attr != '_weave_name':
903
setattr(self, attr, copy(getattr(otherweave, attr)))
906
class WeaveFile(Weave):
907
"""A WeaveFile represents a Weave on disk and writes on change."""
909
WEAVE_SUFFIX = '.weave'
911
def __init__(self, name, transport, filemode=None, create=False, access_mode='w', get_scope=None):
912
"""Create a WeaveFile.
914
:param create: If not True, only open an existing knit.
916
super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope,
917
allow_reserved=False)
918
self._transport = transport
919
self._filemode = filemode
921
_read_weave_v5(self._transport.get(name + WeaveFile.WEAVE_SUFFIX), self)
922
except errors.NoSuchFile:
928
def _add_lines(self, version_id, parents, lines, parent_texts,
929
left_matching_blocks, nostore_sha, random_id, check_content):
930
"""Add a version and save the weave."""
931
self.check_not_reserved_id(version_id)
932
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
933
parent_texts, left_matching_blocks, nostore_sha, random_id,
938
def copy_to(self, name, transport):
939
"""See VersionedFile.copy_to()."""
940
# as we are all in memory always, just serialise to the new place.
942
write_weave_v5(self, sio)
944
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
947
"""Save the weave."""
948
self._check_write_ok()
950
write_weave_v5(self, sio)
952
bytes = sio.getvalue()
953
path = self._weave_name + WeaveFile.WEAVE_SUFFIX
955
self._transport.put_bytes(path, bytes, self._filemode)
956
except errors.NoSuchFile:
957
self._transport.mkdir(dirname(path))
958
self._transport.put_bytes(path, bytes, self._filemode)
962
"""See VersionedFile.get_suffixes()."""
963
return [WeaveFile.WEAVE_SUFFIX]
965
def insert_record_stream(self, stream):
966
super(WeaveFile, self).insert_record_stream(stream)
969
@deprecated_method(one_five)
970
def join(self, other, pb=None, msg=None, version_ids=None,
971
ignore_missing=False):
972
"""Join other into self and save."""
973
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
977
def _reweave(wa, wb, pb=None, msg=None):
978
"""Combine two weaves and return the result.
980
This works even if a revision R has different parents in
981
wa and wb. In the resulting weave all the parents are given.
983
This is done by just building up a new weave, maintaining ordering
984
of the versions in the two inputs. More efficient approaches
985
might be possible but it should only be necessary to do
986
this operation rarely, when a new previously ghost version is
989
:param pb: An optional progress bar, indicating how far done we are
990
:param msg: An optional message for the progress
994
queue_a = range(wa.num_versions())
995
queue_b = range(wb.num_versions())
996
# first determine combined parents of all versions
997
# map from version name -> all parent names
998
combined_parents = _reweave_parent_graphs(wa, wb)
999
mutter("combined parents: %r", combined_parents)
1000
order = tsort.topo_sort(combined_parents.iteritems())
1001
mutter("order to reweave: %r", order)
1006
for idx, name in enumerate(order):
1008
pb.update(msg, idx, len(order))
1009
if name in wa._name_map:
1010
lines = wa.get_lines(name)
1011
if name in wb._name_map:
1012
lines_b = wb.get_lines(name)
1013
if lines != lines_b:
1014
mutter('Weaves differ on content. rev_id {%s}', name)
1015
mutter('weaves: %s, %s', wa._weave_name, wb._weave_name)
1017
lines = list(difflib.unified_diff(lines, lines_b,
1018
wa._weave_name, wb._weave_name))
1019
mutter('lines:\n%s', ''.join(lines))
1020
raise errors.WeaveTextDiffers(name, wa, wb)
1022
lines = wb.get_lines(name)
1023
wr._add(name, lines, [wr._lookup(i) for i in combined_parents[name]])
1026
def _reweave_parent_graphs(wa, wb):
1027
"""Return combined parent ancestry for two weaves.
1029
Returned as a list of (version_name, set(parent_names))"""
1031
for weave in [wa, wb]:
1032
for idx, name in enumerate(weave._names):
1033
p = combined.setdefault(name, set())
1034
p.update(map(weave._idx_to_name, weave._parents[idx]))
1039
"""Show the weave's table-of-contents"""
1040
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
1041
for i in (6, 50, 10, 10):
1044
for i in range(w.num_versions()):
1047
parent_str = ' '.join(map(str, w._parents[i]))
1048
print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
1052
def weave_stats(weave_file, pb):
1053
from bzrlib.weavefile import read_weave
1055
wf = file(weave_file, 'rb')
428
raise WeaveError("mismatched sha1 for version %d; "
429
"got %s, expected %s"
430
% (version, hd, expected))
434
def merge(self, merge_versions):
435
"""Automerge and mark conflicts between versions.
437
This returns a sequence, each entry describing alternatives
438
for a chunk of the file. Each of the alternatives is given as
441
If there is a chunk of the file where there's no diagreement,
442
only one alternative is given.
445
# approach: find the included versions common to all the
447
raise NotImplementedError()
451
def _delta(self, included, lines):
452
"""Return changes from basis to new revision.
454
The old text for comparison is the union of included revisions.
456
This is used in inserting a new text.
458
Delta is returned as a sequence of
459
(weave1, weave2, newlines).
461
This indicates that weave1:weave2 of the old weave should be
462
replaced by the sequence of lines in newlines. Note that
463
these line numbers are positions in the total weave and don't
464
correspond to the lines in any extracted version, or even the
465
extracted union of included versions.
467
If line1=line2, this is a pure insert; if newlines=[] this is a
468
pure delete. (Similar to difflib.)
470
# basis a list of (origin, lineno, line)
473
for origin, lineno, line in self._extract(included):
474
basis_lineno.append(lineno)
475
basis_lines.append(line)
477
# add a sentinal, because we can also match against the final line
478
basis_lineno.append(len(self._l))
480
# XXX: which line of the weave should we really consider
481
# matches the end of the file? the current code says it's the
482
# last line of the weave?
484
from difflib import SequenceMatcher
485
s = SequenceMatcher(None, basis_lines, lines)
487
# TODO: Perhaps return line numbers from composed weave as well?
489
for tag, i1, i2, j1, j2 in s.get_opcodes():
490
##print tag, i1, i2, j1, j2
495
# i1,i2 are given in offsets within basis_lines; we need to map them
496
# back to offsets within the entire weave
497
real_i1 = basis_lineno[i1]
498
real_i2 = basis_lineno[i2]
502
assert j2 <= len(lines)
504
yield real_i1, real_i2, lines[j1:j2]
508
def weave_info(filename, out):
509
"""Show some text information about the weave."""
510
from weavefile import read_weave
511
wf = file(filename, 'rb')
1056
512
w = read_weave(wf)
1057
513
# FIXME: doesn't work on pipes
1058
514
weave_size = wf.tell()
515
print >>out, "weave file size %d bytes" % weave_size
516
print >>out, "weave contains %d versions" % len(w._v)
1062
for i in range(vers):
1063
pb.update('checking sizes', i, vers)
1064
for origin, lineno, line in w._extract([i]):
1069
print 'versions %9d' % vers
1070
print 'weave file %9d bytes' % weave_size
1071
print 'total contents %9d bytes' % total
1072
print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
1075
print 'average size %9d bytes' % avg
1076
print 'relative size %9.2fx' % (float(weave_size) / float(avg))
1080
print """bzr weave tool
1082
Experimental tool for weave algorithm.
1085
weave init WEAVEFILE
1086
Create an empty weave file
1087
weave get WEAVEFILE VERSION
1088
Write out specified version.
1089
weave check WEAVEFILE
1090
Check consistency of all versions.
1092
Display table of contents.
1093
weave add WEAVEFILE NAME [BASE...] < NEWTEXT
1094
Add NEWTEXT, with specified parent versions.
1095
weave annotate WEAVEFILE VERSION
1096
Display origin of each line.
1097
weave merge WEAVEFILE VERSION1 VERSION2 > OUT
1098
Auto-merge two versions and display conflicts.
1099
weave diff WEAVEFILE VERSION1 VERSION2
1100
Show differences between two versions.
1104
% weave init foo.weave
1106
% weave add foo.weave ver0 < foo.txt
1109
(create updated version)
1111
% weave get foo.weave 0 | diff -u - foo.txt
1112
% weave add foo.weave ver1 0 < foo.txt
1115
% weave get foo.weave 0 > foo.txt (create forked version)
1117
% weave add foo.weave ver2 0 < foo.txt
1120
% weave merge foo.weave 1 2 > foo.txt (merge them)
1121
% vi foo.txt (resolve conflicts)
1122
% weave add foo.weave merged 1 2 < foo.txt (commit merged version)
519
print ' %8s %8s %8s %s' % ('version', 'lines', 'bytes', 'sha1')
520
print ' -------- -------- -------- ----------------------------------------'
521
for i in range(len(w._v)):
524
bytes = sum((len(a) for a in text))
526
print ' %8d %8d %8d %s' % (i, lines, bytes, sha1)
529
print >>out, "versions total %d bytes" % total
530
print >>out, "compression ratio %.3f" % (float(total)/float(weave_size))
1134
# in case we're run directly from the subdirectory
1135
sys.path.append('..')
1137
from bzrlib.weavefile import write_weave, read_weave
1138
from bzrlib.progress import ProgressBar
537
from weavefile import write_weave_v1, read_weave
1153
return read_weave(file(argv[2], 'rb'))
540
w = read_weave(file(argv[2], 'rb'))
1159
541
# at the moment, based on everything in the file
1161
parents = map(int, argv[4:])
542
parents = set(range(len(w._v)))
1162
543
lines = sys.stdin.readlines()
1163
ver = w.add(name, parents, lines)
1164
write_weave(w, file(argv[2], 'wb'))
1165
print 'added version %r %d' % (name, ver)
544
ver = w.add(parents, lines)
545
write_weave_v1(w, file(argv[2], 'wb'))
546
print 'added %d' % ver
1166
547
elif cmd == 'init':
1168
549
if os.path.exists(fn):
1169
550
raise IOError("file exists")
1171
write_weave(w, file(fn, 'wb'))
1172
elif cmd == 'get': # get one version
552
write_weave_v1(w, file(fn, 'wb'))
554
w = read_weave(file(argv[2], 'rb'))
1174
555
sys.stdout.writelines(w.get_iter(int(argv[3])))
1179
v1, v2 = map(int, argv[3:5])
1182
diff_gen = bzrlib.patiencediff.unified_diff(lines1, lines2,
1183
'%s version %d' % (fn, v1),
1184
'%s version %d' % (fn, v2))
1185
sys.stdout.writelines(diff_gen)
1187
556
elif cmd == 'annotate':
557
w = read_weave(file(argv[2], 'rb'))
1189
558
# newline is added to all lines regardless; too hard to get
1190
559
# reasonable formatting otherwise