71
71
from copy import copy
72
72
from cStringIO import StringIO
77
from bzrlib.lazy_import import lazy_import
78
lazy_import(globals(), """
79
from bzrlib import tsort
81
78
from bzrlib import (
81
from bzrlib.trace import mutter
86
82
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
87
83
RevisionAlreadyPresent,
88
84
RevisionNotPresent,
89
UnavailableRepresentation,
90
85
WeaveRevisionAlreadyPresent,
91
86
WeaveRevisionNotPresent,
93
from bzrlib.osutils import dirname, sha, sha_strings, split_lines
88
import bzrlib.errors as errors
89
from bzrlib.osutils import sha_strings
94
90
import bzrlib.patiencediff
95
from bzrlib.revision import NULL_REVISION
96
from bzrlib.symbol_versioning import *
97
from bzrlib.trace import mutter
98
from bzrlib.versionedfile import (
91
from bzrlib.symbol_versioning import (deprecated_method,
95
from bzrlib.tsort import topo_sort
96
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
105
97
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
108
class WeaveContentFactory(ContentFactory):
109
"""Content factory for streaming from weaves.
111
:seealso ContentFactory:
114
def __init__(self, version, weave):
115
"""Create a WeaveContentFactory for version from weave."""
116
ContentFactory.__init__(self)
117
self.sha1 = weave.get_sha1s([version])[version]
118
self.key = (version,)
119
parents = weave.get_parent_map([version])[version]
120
self.parents = tuple((parent,) for parent in parents)
121
self.storage_kind = 'fulltext'
124
def get_bytes_as(self, storage_kind):
125
if storage_kind == 'fulltext':
126
return self._weave.get_text(self.key[-1])
127
elif storage_kind == 'chunked':
128
return self._weave.get_lines(self.key[-1])
130
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
133
100
class Weave(VersionedFile):
134
101
"""weave - versioned text file storage.
136
103
A Weave manages versions of line-based text files, keeping track
137
104
of the originating version for each line.
304
273
__contains__ = has_version
306
def get_record_stream(self, versions, ordering, include_delta_closure):
307
"""Get a stream of records for versions.
309
:param versions: The versions to include. Each version is a tuple
311
:param ordering: Either 'unordered' or 'topological'. A topologically
312
sorted stream has compression parents strictly before their
314
:param include_delta_closure: If True then the closure across any
315
compression parents will be included (in the opaque data).
316
:return: An iterator of ContentFactory objects, each of which is only
317
valid until the iterator is advanced.
319
versions = [version[-1] for version in versions]
320
if ordering == 'topological':
321
parents = self.get_parent_map(versions)
322
new_versions = tsort.topo_sort(parents)
323
new_versions.extend(set(versions).difference(set(parents)))
324
versions = new_versions
325
elif ordering == 'groupcompress':
326
parents = self.get_parent_map(versions)
327
new_versions = sort_groupcompress(parents)
328
new_versions.extend(set(versions).difference(set(parents)))
329
versions = new_versions
330
for version in versions:
332
yield WeaveContentFactory(version, self)
334
yield AbsentContentFactory((version,))
336
def get_parent_map(self, version_ids):
337
"""See VersionedFile.get_parent_map."""
275
def get_delta(self, version_id):
276
"""See VersionedFile.get_delta."""
277
return self.get_deltas([version_id])[version_id]
279
def get_deltas(self, version_ids):
280
"""See VersionedFile.get_deltas."""
281
version_ids = self.get_ancestry(version_ids)
339
282
for version_id in version_ids:
340
if version_id == NULL_REVISION:
345
map(self._idx_to_name,
346
self._parents[self._lookup(version_id)]))
347
except RevisionNotPresent:
283
if not self.has_version(version_id):
284
raise RevisionNotPresent(version_id, self)
285
# try extracting all versions; parallel extraction is used
286
nv = self.num_versions()
292
last_parent_lines = {}
294
parent_inclusions = {}
299
# its simplest to generate a full set of prepared variables.
301
name = self._names[i]
302
sha1s[name] = self.get_sha1(name)
303
parents_list = self.get_parents(name)
305
parent = parents_list[0]
306
parents[name] = parent
307
parent_inclusions[name] = inclusions[parent]
310
parent_inclusions[name] = set()
311
# we want to emit start, finish, replacement_length, replacement_lines tuples.
312
diff_hunks[name] = []
313
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
314
parent_linenums[name] = 0
316
parent_noeols[name] = False
317
last_parent_lines[name] = None
318
new_inc = set([name])
319
for p in self._parents[i]:
320
new_inc.update(inclusions[self._idx_to_name(p)])
321
# debug only, known good so far.
322
#assert set(new_inc) == set(self.get_ancestry(name)), \
323
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
324
inclusions[name] = new_inc
326
nlines = len(self._weave)
328
for lineno, inserted, deletes, line in self._walk_internal():
329
# a line is active in a version if:
330
# insert is in the versions inclusions
332
# deleteset & the versions inclusions is an empty set.
333
# so - if we have a included by mapping - version is included by
334
# children, we get a list of children to examine for deletes affect
335
# ing them, which is less than the entire set of children.
336
for version_id in version_ids:
337
# The active inclusion must be an ancestor,
338
# and no ancestors must have deleted this line,
339
# because we don't support resurrection.
340
parent_inclusion = parent_inclusions[version_id]
341
inclusion = inclusions[version_id]
342
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
343
version_active = inserted in inclusion and not (deletes & inclusion)
344
if not parent_active and not version_active:
345
# unrelated line of ancestry
349
result[version_id] = parents
347
elif parent_active and version_active:
349
parent_linenum = parent_linenums[version_id]
350
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
351
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
353
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
354
parent_linenums[version_id] = parent_linenum
357
noeols[version_id] = True
360
elif parent_active and not version_active:
362
current_hunks[version_id][1] += 1
363
parent_linenums[version_id] += 1
364
last_parent_lines[version_id] = line
365
elif not parent_active and version_active:
367
# noeol only occurs at the end of a file because we
368
# diff linewise. We want to show noeol changes as a
369
# empty diff unless the actual eol-less content changed.
372
if last_parent_lines[version_id][-1] != '\n':
373
parent_noeols[version_id] = True
374
except (TypeError, IndexError):
377
if theline[-1] != '\n':
378
noeols[version_id] = True
382
parent_should_go = False
384
if parent_noeols[version_id] == noeols[version_id]:
385
# no noeol toggle, so trust the weaves statement
386
# that this line is changed.
388
if parent_noeols[version_id]:
389
theline = theline + '\n'
390
elif parent_noeols[version_id]:
391
# parent has no eol, we do:
392
# our line is new, report as such..
394
elif noeols[version_id]:
395
# append a eol so that it looks like
397
theline = theline + '\n'
398
if parents[version_id] is not None:
399
#if last_parent_lines[version_id] is not None:
400
parent_should_go = True
401
if last_parent_lines[version_id] != theline:
404
#parent_should_go = False
406
current_hunks[version_id][2] += 1
407
current_hunks[version_id][3].append((inserted, theline))
409
# last hunk last parent line is not eaten
410
current_hunks[version_id][1] -= 1
411
if current_hunks[version_id][1] < 0:
412
current_hunks[version_id][1] = 0
413
# import pdb;pdb.set_trace()
414
# assert current_hunks[version_id][1] >= 0
418
version = self._idx_to_name(i)
419
if current_hunks[version] != [0, 0, 0, []]:
420
diff_hunks[version].append(tuple(current_hunks[version]))
422
for version_id in version_ids:
423
result[version_id] = (
427
diff_hunks[version_id],
352
def get_parents_with_ghosts(self, version_id):
353
raise NotImplementedError(self.get_parents_with_ghosts)
355
def insert_record_stream(self, stream):
356
"""Insert a record stream into this versioned file.
358
:param stream: A stream of records to insert.
360
:seealso VersionedFile.get_record_stream:
363
for record in stream:
364
# Raise an error when a record is missing.
365
if record.storage_kind == 'absent':
366
raise RevisionNotPresent([record.key[0]], self)
367
# adapt to non-tuple interface
368
parents = [parent[0] for parent in record.parents]
369
if (record.storage_kind == 'fulltext'
370
or record.storage_kind == 'chunked'):
371
self.add_lines(record.key[0], parents,
372
osutils.chunks_to_lines(record.get_bytes_as('chunked')))
374
adapter_key = record.storage_kind, 'fulltext'
376
adapter = adapters[adapter_key]
378
adapter_factory = adapter_registry.get(adapter_key)
379
adapter = adapter_factory(self)
380
adapters[adapter_key] = adapter
381
lines = split_lines(adapter.get_bytes(record))
383
self.add_lines(record.key[0], parents, lines)
384
except RevisionAlreadyPresent:
431
def get_parents(self, version_id):
432
"""See VersionedFile.get_parent."""
433
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
387
435
def _check_repeated_add(self, name, parents, text, sha1):
388
436
"""Check that a duplicated add is OK.
395
443
raise RevisionAlreadyPresent(name, self._weave_name)
398
def _add_lines(self, version_id, parents, lines, parent_texts,
399
left_matching_blocks, nostore_sha, random_id, check_content):
446
@deprecated_method(zero_eight)
447
def add_identical(self, old_rev_id, new_rev_id, parents):
448
"""Please use Weave.clone_text now."""
449
return self.clone_text(new_rev_id, old_rev_id, parents)
451
def _add_lines(self, version_id, parents, lines, parent_texts):
400
452
"""See VersionedFile.add_lines."""
401
idx = self._add(version_id, lines, map(self._lookup, parents),
402
nostore_sha=nostore_sha)
403
return sha_strings(lines), sum(map(len, lines)), idx
405
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
453
return self._add(version_id, lines, map(self._lookup, parents))
455
@deprecated_method(zero_eight)
456
def add(self, name, parents, text, sha1=None):
457
"""See VersionedFile.add_lines for the non deprecated api."""
458
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
460
def _add(self, version_id, lines, parents, sha1=None):
406
461
"""Add a single text on top of the weave.
408
463
Returns the index number of the newly added version.
411
466
Symbolic name for this version.
412
467
(Typically the revision-id of the revision that added it.)
413
If None, a name will be allocated based on the hash. (sha1:SHAHASH)
416
470
List or set of direct parent version numbers.
419
473
Sequence of lines to be added in the new version.
421
:param nostore_sha: See VersionedFile.add_lines.
476
assert isinstance(version_id, basestring)
423
477
self._check_lines_not_unicode(lines)
424
478
self._check_lines_are_lines(lines)
426
480
sha1 = sha_strings(lines)
427
if sha1 == nostore_sha:
428
raise errors.ExistingContent
429
if version_id is None:
430
version_id = "sha1:" + sha1
431
481
if version_id in self._name_map:
432
482
return self._check_repeated_add(version_id, parents, lines, sha1)
564
635
def _compatible_parents(self, my_parents, other_parents):
565
636
"""During join check that other_parents are joinable with my_parents.
567
Joinable is defined as 'is a subset of' - supersets may require
638
Joinable is defined as 'is a subset of' - supersets may require
568
639
regeneration of diffs, but subsets do not.
570
641
return len(other_parents.difference(my_parents)) == 0
572
643
def annotate(self, version_id):
573
"""Return a list of (version-id, line) tuples for version_id.
644
if isinstance(version_id, int):
645
warnings.warn('Weave.annotate(int) is deprecated. Please use version names'
646
' in all circumstances as of 0.8',
651
for origin, lineno, text in self._extract([version_id]):
652
result.append((origin, text))
655
return super(Weave, self).annotate(version_id)
657
def annotate_iter(self, version_id):
658
"""Yield list of (version-id, line) pairs for the specified version.
575
660
The index indicates when the line originated in the weave."""
576
661
incls = [self._lookup(version_id)]
577
return [(self._idx_to_name(origin), text) for origin, lineno, text in
578
self._extract(incls)]
662
for origin, lineno, text in self._extract(incls):
663
yield self._idx_to_name(origin), text
665
@deprecated_method(zero_eight)
667
"""_walk has become visit, a supported api."""
668
return self._walk_internal()
580
670
def iter_lines_added_or_present_in_versions(self, version_ids=None,
855
984
# no lines outside of insertion blocks, that deletions are
856
985
# properly paired, etc.
987
def _join(self, other, pb, msg, version_ids, ignore_missing):
988
"""Worker routine for join()."""
989
if not other.versions():
990
return # nothing to update, easy
993
# versions is never none, InterWeave checks this.
996
# two loops so that we do not change ourselves before verifying it
998
# work through in index order to make sure we get all dependencies
1001
# get the selected versions only that are in other.versions.
1002
version_ids = set(other.versions()).intersection(set(version_ids))
1003
# pull in the referenced graph.
1004
version_ids = other.get_ancestry(version_ids)
1005
pending_graph = [(version, other.get_parents(version)) for
1006
version in version_ids]
1007
for name in topo_sort(pending_graph):
1008
other_idx = other._name_map[name]
1009
# returns True if we have it, False if we need it.
1010
if not self._check_version_consistent(other, other_idx, name):
1011
names_to_join.append((other_idx, name))
1020
for other_idx, name in names_to_join:
1021
# TODO: If all the parents of the other version are already
1022
# present then we can avoid some work by just taking the delta
1023
# and adjusting the offsets.
1024
new_parents = self._imported_parents(other, other_idx)
1025
sha1 = other._sha1s[other_idx]
1030
pb.update(msg, merged, len(names_to_join))
1032
lines = other.get_lines(other_idx)
1033
self._add(name, lines, new_parents, sha1)
1035
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1036
merged, processed, self._weave_name, time.time()-time0))
858
1038
def _imported_parents(self, other, other_idx):
859
1039
"""Return list of parents in self corresponding to indexes in other."""
860
1040
new_parents = []
953
1139
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1141
def create_empty(self, name, transport, filemode=None):
1142
return WeaveFile(name, transport, filemode, create=True)
955
1144
def _save(self):
956
1145
"""Save the weave."""
957
1146
self._check_write_ok()
958
1147
sio = StringIO()
959
1148
write_weave_v5(self, sio)
961
bytes = sio.getvalue()
962
path = self._weave_name + WeaveFile.WEAVE_SUFFIX
964
self._transport.put_bytes(path, bytes, self._filemode)
965
except errors.NoSuchFile:
966
self._transport.mkdir(dirname(path))
967
self._transport.put_bytes(path, bytes, self._filemode)
1150
self._transport.put_file(self._weave_name + WeaveFile.WEAVE_SUFFIX,
970
1155
def get_suffixes():
971
1156
"""See VersionedFile.get_suffixes()."""
972
1157
return [WeaveFile.WEAVE_SUFFIX]
974
def insert_record_stream(self, stream):
975
super(WeaveFile, self).insert_record_stream(stream)
1159
def join(self, other, pb=None, msg=None, version_ids=None,
1160
ignore_missing=False):
1161
"""Join other into self and save."""
1162
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
1166
@deprecated_function(zero_eight)
1167
def reweave(wa, wb, pb=None, msg=None):
1168
"""reweaving is deprecation, please just use weave.join()."""
1169
_reweave(wa, wb, pb, msg)
979
1171
def _reweave(wa, wb, pb=None, msg=None):
980
1172
"""Combine two weaves and return the result.
982
This works even if a revision R has different parents in
1174
This works even if a revision R has different parents in
983
1175
wa and wb. In the resulting weave all the parents are given.
985
This is done by just building up a new weave, maintaining ordering
1177
This is done by just building up a new weave, maintaining ordering
986
1178
of the versions in the two inputs. More efficient approaches
987
might be possible but it should only be necessary to do
988
this operation rarely, when a new previously ghost version is
1179
might be possible but it should only be necessary to do
1180
this operation rarely, when a new previously ghost version is
991
1183
:param pb: An optional progress bar, indicating how far done we are