69
71
from copy import copy
70
72
from cStringIO import StringIO
73
from bzrlib.lazy_import import lazy_import
74
lazy_import(globals(), """
75
from bzrlib import tsort
77
78
from bzrlib import (
81
from bzrlib.trace import mutter
81
82
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
82
83
RevisionAlreadyPresent,
83
84
RevisionNotPresent,
84
UnavailableRepresentation,
85
WeaveRevisionAlreadyPresent,
86
WeaveRevisionNotPresent,
86
from bzrlib.osutils import dirname, sha, sha_strings, split_lines
88
import bzrlib.errors as errors
89
from bzrlib.osutils import sha_strings
87
90
import bzrlib.patiencediff
88
from bzrlib.revision import NULL_REVISION
89
from bzrlib.symbol_versioning import *
90
from bzrlib.trace import mutter
91
from bzrlib.versionedfile import (
91
from bzrlib.symbol_versioning import (deprecated_method,
95
from bzrlib.tsort import topo_sort
96
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
98
97
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
101
class WeaveContentFactory(ContentFactory):
102
"""Content factory for streaming from weaves.
104
:seealso ContentFactory:
107
def __init__(self, version, weave):
108
"""Create a WeaveContentFactory for version from weave."""
109
ContentFactory.__init__(self)
110
self.sha1 = weave.get_sha1s([version])[version]
111
self.key = (version,)
112
parents = weave.get_parent_map([version])[version]
113
self.parents = tuple((parent,) for parent in parents)
114
self.storage_kind = 'fulltext'
117
def get_bytes_as(self, storage_kind):
118
if storage_kind == 'fulltext':
119
return self._weave.get_text(self.key[-1])
120
elif storage_kind == 'chunked':
121
return self._weave.get_lines(self.key[-1])
123
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
126
100
class Weave(VersionedFile):
127
101
"""weave - versioned text file storage.
129
103
A Weave manages versions of line-based text files, keeping track
130
104
of the originating version for each line.
297
273
__contains__ = has_version
299
def get_record_stream(self, versions, ordering, include_delta_closure):
300
"""Get a stream of records for versions.
302
:param versions: The versions to include. Each version is a tuple
304
:param ordering: Either 'unordered' or 'topological'. A topologically
305
sorted stream has compression parents strictly before their
307
:param include_delta_closure: If True then the closure across any
308
compression parents will be included (in the opaque data).
309
:return: An iterator of ContentFactory objects, each of which is only
310
valid until the iterator is advanced.
312
versions = [version[-1] for version in versions]
313
if ordering == 'topological':
314
parents = self.get_parent_map(versions)
315
new_versions = tsort.topo_sort(parents)
316
new_versions.extend(set(versions).difference(set(parents)))
317
versions = new_versions
318
elif ordering == 'groupcompress':
319
parents = self.get_parent_map(versions)
320
new_versions = sort_groupcompress(parents)
321
new_versions.extend(set(versions).difference(set(parents)))
322
versions = new_versions
323
for version in versions:
325
yield WeaveContentFactory(version, self)
327
yield AbsentContentFactory((version,))
329
def get_parent_map(self, version_ids):
330
"""See VersionedFile.get_parent_map."""
275
def get_delta(self, version_id):
276
"""See VersionedFile.get_delta."""
277
return self.get_deltas([version_id])[version_id]
279
def get_deltas(self, version_ids):
280
"""See VersionedFile.get_deltas."""
281
version_ids = self.get_ancestry(version_ids)
332
282
for version_id in version_ids:
333
if version_id == NULL_REVISION:
338
map(self._idx_to_name,
339
self._parents[self._lookup(version_id)]))
340
except RevisionNotPresent:
283
if not self.has_version(version_id):
284
raise RevisionNotPresent(version_id, self)
285
# try extracting all versions; parallel extraction is used
286
nv = self.num_versions()
292
last_parent_lines = {}
294
parent_inclusions = {}
299
# its simplest to generate a full set of prepared variables.
301
name = self._names[i]
302
sha1s[name] = self.get_sha1(name)
303
parents_list = self.get_parents(name)
305
parent = parents_list[0]
306
parents[name] = parent
307
parent_inclusions[name] = inclusions[parent]
310
parent_inclusions[name] = set()
311
# we want to emit start, finish, replacement_length, replacement_lines tuples.
312
diff_hunks[name] = []
313
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
314
parent_linenums[name] = 0
316
parent_noeols[name] = False
317
last_parent_lines[name] = None
318
new_inc = set([name])
319
for p in self._parents[i]:
320
new_inc.update(inclusions[self._idx_to_name(p)])
321
# debug only, known good so far.
322
#assert set(new_inc) == set(self.get_ancestry(name)), \
323
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
324
inclusions[name] = new_inc
326
nlines = len(self._weave)
328
for lineno, inserted, deletes, line in self._walk_internal():
329
# a line is active in a version if:
330
# insert is in the versions inclusions
332
# deleteset & the versions inclusions is an empty set.
333
# so - if we have a included by mapping - version is included by
334
# children, we get a list of children to examine for deletes affect
335
# ing them, which is less than the entire set of children.
336
for version_id in version_ids:
337
# The active inclusion must be an ancestor,
338
# and no ancestors must have deleted this line,
339
# because we don't support resurrection.
340
parent_inclusion = parent_inclusions[version_id]
341
inclusion = inclusions[version_id]
342
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
343
version_active = inserted in inclusion and not (deletes & inclusion)
344
if not parent_active and not version_active:
345
# unrelated line of ancestry
342
result[version_id] = parents
347
elif parent_active and version_active:
349
parent_linenum = parent_linenums[version_id]
350
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
351
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
353
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
354
parent_linenums[version_id] = parent_linenum
357
noeols[version_id] = True
360
elif parent_active and not version_active:
362
current_hunks[version_id][1] += 1
363
parent_linenums[version_id] += 1
364
last_parent_lines[version_id] = line
365
elif not parent_active and version_active:
367
# noeol only occurs at the end of a file because we
368
# diff linewise. We want to show noeol changes as a
369
# empty diff unless the actual eol-less content changed.
372
if last_parent_lines[version_id][-1] != '\n':
373
parent_noeols[version_id] = True
374
except (TypeError, IndexError):
377
if theline[-1] != '\n':
378
noeols[version_id] = True
382
parent_should_go = False
384
if parent_noeols[version_id] == noeols[version_id]:
385
# no noeol toggle, so trust the weaves statement
386
# that this line is changed.
388
if parent_noeols[version_id]:
389
theline = theline + '\n'
390
elif parent_noeols[version_id]:
391
# parent has no eol, we do:
392
# our line is new, report as such..
394
elif noeols[version_id]:
395
# append a eol so that it looks like
397
theline = theline + '\n'
398
if parents[version_id] is not None:
399
#if last_parent_lines[version_id] is not None:
400
parent_should_go = True
401
if last_parent_lines[version_id] != theline:
404
#parent_should_go = False
406
current_hunks[version_id][2] += 1
407
current_hunks[version_id][3].append((inserted, theline))
409
# last hunk last parent line is not eaten
410
current_hunks[version_id][1] -= 1
411
if current_hunks[version_id][1] < 0:
412
current_hunks[version_id][1] = 0
413
# import pdb;pdb.set_trace()
414
# assert current_hunks[version_id][1] >= 0
418
version = self._idx_to_name(i)
419
if current_hunks[version] != [0, 0, 0, []]:
420
diff_hunks[version].append(tuple(current_hunks[version]))
422
for version_id in version_ids:
423
result[version_id] = (
427
diff_hunks[version_id],
345
def get_parents_with_ghosts(self, version_id):
346
raise NotImplementedError(self.get_parents_with_ghosts)
348
def insert_record_stream(self, stream):
349
"""Insert a record stream into this versioned file.
351
:param stream: A stream of records to insert.
353
:seealso VersionedFile.get_record_stream:
356
for record in stream:
357
# Raise an error when a record is missing.
358
if record.storage_kind == 'absent':
359
raise RevisionNotPresent([record.key[0]], self)
360
# adapt to non-tuple interface
361
parents = [parent[0] for parent in record.parents]
362
if (record.storage_kind == 'fulltext'
363
or record.storage_kind == 'chunked'):
364
self.add_lines(record.key[0], parents,
365
osutils.chunks_to_lines(record.get_bytes_as('chunked')))
367
adapter_key = record.storage_kind, 'fulltext'
369
adapter = adapters[adapter_key]
371
adapter_factory = adapter_registry.get(adapter_key)
372
adapter = adapter_factory(self)
373
adapters[adapter_key] = adapter
374
lines = split_lines(adapter.get_bytes(record))
376
self.add_lines(record.key[0], parents, lines)
377
except RevisionAlreadyPresent:
431
def get_parents(self, version_id):
432
"""See VersionedFile.get_parent."""
433
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
380
435
def _check_repeated_add(self, name, parents, text, sha1):
381
436
"""Check that a duplicated add is OK.
388
443
raise RevisionAlreadyPresent(name, self._weave_name)
391
def _add_lines(self, version_id, parents, lines, parent_texts,
392
left_matching_blocks, nostore_sha, random_id, check_content):
446
@deprecated_method(zero_eight)
447
def add_identical(self, old_rev_id, new_rev_id, parents):
448
"""Please use Weave.clone_text now."""
449
return self.clone_text(new_rev_id, old_rev_id, parents)
451
def _add_lines(self, version_id, parents, lines, parent_texts):
393
452
"""See VersionedFile.add_lines."""
394
idx = self._add(version_id, lines, map(self._lookup, parents),
395
nostore_sha=nostore_sha)
396
return sha_strings(lines), sum(map(len, lines)), idx
398
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
453
return self._add(version_id, lines, map(self._lookup, parents))
455
@deprecated_method(zero_eight)
456
def add(self, name, parents, text, sha1=None):
457
"""See VersionedFile.add_lines for the non deprecated api."""
458
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
460
def _add(self, version_id, lines, parents, sha1=None):
399
461
"""Add a single text on top of the weave.
401
463
Returns the index number of the newly added version.
404
466
Symbolic name for this version.
405
467
(Typically the revision-id of the revision that added it.)
406
If None, a name will be allocated based on the hash. (sha1:SHAHASH)
409
470
List or set of direct parent version numbers.
412
473
Sequence of lines to be added in the new version.
414
:param nostore_sha: See VersionedFile.add_lines.
476
assert isinstance(version_id, basestring)
416
477
self._check_lines_not_unicode(lines)
417
478
self._check_lines_are_lines(lines)
419
480
sha1 = sha_strings(lines)
420
if sha1 == nostore_sha:
421
raise errors.ExistingContent
422
if version_id is None:
423
version_id = "sha1:" + sha1
424
481
if version_id in self._name_map:
425
482
return self._check_repeated_add(version_id, parents, lines, sha1)
557
635
def _compatible_parents(self, my_parents, other_parents):
558
636
"""During join check that other_parents are joinable with my_parents.
560
Joinable is defined as 'is a subset of' - supersets may require
638
Joinable is defined as 'is a subset of' - supersets may require
561
639
regeneration of diffs, but subsets do not.
563
641
return len(other_parents.difference(my_parents)) == 0
565
643
def annotate(self, version_id):
566
"""Return a list of (version-id, line) tuples for version_id.
644
if isinstance(version_id, int):
645
warnings.warn('Weave.annotate(int) is deprecated. Please use version names'
646
' in all circumstances as of 0.8',
651
for origin, lineno, text in self._extract([version_id]):
652
result.append((origin, text))
655
return super(Weave, self).annotate(version_id)
657
def annotate_iter(self, version_id):
658
"""Yield list of (version-id, line) pairs for the specified version.
568
660
The index indicates when the line originated in the weave."""
569
661
incls = [self._lookup(version_id)]
570
return [(self._idx_to_name(origin), text) for origin, lineno, text in
571
self._extract(incls)]
662
for origin, lineno, text in self._extract(incls):
663
yield self._idx_to_name(origin), text
665
@deprecated_method(zero_eight)
667
"""_walk has become visit, a supported api."""
668
return self._walk_internal()
573
670
def iter_lines_added_or_present_in_versions(self, version_ids=None,
848
984
# no lines outside of insertion blocks, that deletions are
849
985
# properly paired, etc.
987
def _join(self, other, pb, msg, version_ids, ignore_missing):
988
"""Worker routine for join()."""
989
if not other.versions():
990
return # nothing to update, easy
993
# versions is never none, InterWeave checks this.
996
# two loops so that we do not change ourselves before verifying it
998
# work through in index order to make sure we get all dependencies
1001
# get the selected versions only that are in other.versions.
1002
version_ids = set(other.versions()).intersection(set(version_ids))
1003
# pull in the referenced graph.
1004
version_ids = other.get_ancestry(version_ids)
1005
pending_graph = [(version, other.get_parents(version)) for
1006
version in version_ids]
1007
for name in topo_sort(pending_graph):
1008
other_idx = other._name_map[name]
1009
# returns True if we have it, False if we need it.
1010
if not self._check_version_consistent(other, other_idx, name):
1011
names_to_join.append((other_idx, name))
1020
for other_idx, name in names_to_join:
1021
# TODO: If all the parents of the other version are already
1022
# present then we can avoid some work by just taking the delta
1023
# and adjusting the offsets.
1024
new_parents = self._imported_parents(other, other_idx)
1025
sha1 = other._sha1s[other_idx]
1030
pb.update(msg, merged, len(names_to_join))
1032
lines = other.get_lines(other_idx)
1033
self._add(name, lines, new_parents, sha1)
1035
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1036
merged, processed, self._weave_name, time.time()-time0))
851
1038
def _imported_parents(self, other, other_idx):
852
1039
"""Return list of parents in self corresponding to indexes in other."""
853
1040
new_parents = []
946
1139
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1141
def create_empty(self, name, transport, filemode=None):
1142
return WeaveFile(name, transport, filemode, create=True)
948
1144
def _save(self):
949
1145
"""Save the weave."""
950
1146
self._check_write_ok()
951
1147
sio = StringIO()
952
1148
write_weave_v5(self, sio)
954
bytes = sio.getvalue()
955
path = self._weave_name + WeaveFile.WEAVE_SUFFIX
957
self._transport.put_bytes(path, bytes, self._filemode)
958
except errors.NoSuchFile:
959
self._transport.mkdir(dirname(path))
960
self._transport.put_bytes(path, bytes, self._filemode)
1150
self._transport.put_file(self._weave_name + WeaveFile.WEAVE_SUFFIX,
963
1155
def get_suffixes():
964
1156
"""See VersionedFile.get_suffixes()."""
965
1157
return [WeaveFile.WEAVE_SUFFIX]
967
def insert_record_stream(self, stream):
968
super(WeaveFile, self).insert_record_stream(stream)
1159
def join(self, other, pb=None, msg=None, version_ids=None,
1160
ignore_missing=False):
1161
"""Join other into self and save."""
1162
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
1166
@deprecated_function(zero_eight)
1167
def reweave(wa, wb, pb=None, msg=None):
1168
"""reweaving is deprecation, please just use weave.join()."""
1169
_reweave(wa, wb, pb, msg)
972
1171
def _reweave(wa, wb, pb=None, msg=None):
973
1172
"""Combine two weaves and return the result.
975
This works even if a revision R has different parents in
1174
This works even if a revision R has different parents in
976
1175
wa and wb. In the resulting weave all the parents are given.
978
This is done by just building up a new weave, maintaining ordering
1177
This is done by just building up a new weave, maintaining ordering
979
1178
of the versions in the two inputs. More efficient approaches
980
might be possible but it should only be necessary to do
981
this operation rarely, when a new previously ghost version is
1179
might be possible but it should only be necessary to do
1180
this operation rarely, when a new previously ghost version is
984
1183
:param pb: An optional progress bar, indicating how far done we are
1029
1227
p = combined.setdefault(name, set())
1030
1228
p.update(map(weave._idx_to_name, weave._parents[idx]))
1031
1229
return combined
1233
"""Show the weave's table-of-contents"""
1234
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
1235
for i in (6, 50, 10, 10):
1238
for i in range(w.num_versions()):
1241
parent_str = ' '.join(map(str, w._parents[i]))
1242
print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
1246
def weave_stats(weave_file, pb):
1247
from bzrlib.weavefile import read_weave
1249
wf = file(weave_file, 'rb')
1251
# FIXME: doesn't work on pipes
1252
weave_size = wf.tell()
1256
for i in range(vers):
1257
pb.update('checking sizes', i, vers)
1258
for origin, lineno, line in w._extract([i]):
1263
print 'versions %9d' % vers
1264
print 'weave file %9d bytes' % weave_size
1265
print 'total contents %9d bytes' % total
1266
print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
1269
print 'average size %9d bytes' % avg
1270
print 'relative size %9.2fx' % (float(weave_size) / float(avg))
1274
print """bzr weave tool
1276
Experimental tool for weave algorithm.
1279
weave init WEAVEFILE
1280
Create an empty weave file
1281
weave get WEAVEFILE VERSION
1282
Write out specified version.
1283
weave check WEAVEFILE
1284
Check consistency of all versions.
1286
Display table of contents.
1287
weave add WEAVEFILE NAME [BASE...] < NEWTEXT
1288
Add NEWTEXT, with specified parent versions.
1289
weave annotate WEAVEFILE VERSION
1290
Display origin of each line.
1291
weave merge WEAVEFILE VERSION1 VERSION2 > OUT
1292
Auto-merge two versions and display conflicts.
1293
weave diff WEAVEFILE VERSION1 VERSION2
1294
Show differences between two versions.
1298
% weave init foo.weave
1300
% weave add foo.weave ver0 < foo.txt
1303
(create updated version)
1305
% weave get foo.weave 0 | diff -u - foo.txt
1306
% weave add foo.weave ver1 0 < foo.txt
1309
% weave get foo.weave 0 > foo.txt (create forked version)
1311
% weave add foo.weave ver2 0 < foo.txt
1314
% weave merge foo.weave 1 2 > foo.txt (merge them)
1315
% vi foo.txt (resolve conflicts)
1316
% weave add foo.weave merged 1 2 < foo.txt (commit merged version)
1328
# in case we're run directly from the subdirectory
1329
sys.path.append('..')
1331
from bzrlib.weavefile import write_weave, read_weave
1332
from bzrlib.progress import ProgressBar
1347
return read_weave(file(argv[2], 'rb'))
1353
# at the moment, based on everything in the file
1355
parents = map(int, argv[4:])
1356
lines = sys.stdin.readlines()
1357
ver = w.add(name, parents, lines)
1358
write_weave(w, file(argv[2], 'wb'))
1359
print 'added version %r %d' % (name, ver)
1362
if os.path.exists(fn):
1363
raise IOError("file exists")
1365
write_weave(w, file(fn, 'wb'))
1366
elif cmd == 'get': # get one version
1368
sys.stdout.writelines(w.get_iter(int(argv[3])))
1373
v1, v2 = map(int, argv[3:5])
1376
diff_gen = bzrlib.patiencediff.unified_diff(lines1, lines2,
1377
'%s version %d' % (fn, v1),
1378
'%s version %d' % (fn, v2))
1379
sys.stdout.writelines(diff_gen)
1381
elif cmd == 'annotate':
1383
# newline is added to all lines regardless; too hard to get
1384
# reasonable formatting otherwise
1386
for origin, text in w.annotate(int(argv[3])):
1387
text = text.rstrip('\r\n')
1389
print ' | %s' % (text)
1391
print '%5d | %s' % (origin, text)
1397
elif cmd == 'stats':
1398
weave_stats(argv[2], ProgressBar())
1400
elif cmd == 'check':
1405
print '%d versions ok' % w.num_versions()
1407
elif cmd == 'inclusions':
1409
print ' '.join(map(str, w.inclusions([int(argv[3])])))
1411
elif cmd == 'parents':
1413
print ' '.join(map(str, w._parents[int(argv[3])]))
1415
elif cmd == 'plan-merge':
1416
# replaced by 'bzr weave-plan-merge'
1418
for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
1420
print '%14s | %s' % (state, line),
1421
elif cmd == 'merge':
1422
# replaced by 'bzr weave-merge-text'
1424
p = w.plan_merge(int(argv[3]), int(argv[4]))
1425
sys.stdout.writelines(w.weave_merge(p))
1427
raise ValueError('unknown command %r' % cmd)
1430
if __name__ == '__main__':
1432
sys.exit(main(sys.argv))
1435
class InterWeave(InterVersionedFile):
1436
"""Optimised code paths for weave to weave operations."""
1438
_matching_file_from_factory = staticmethod(WeaveFile)
1439
_matching_file_to_factory = staticmethod(WeaveFile)
1442
def is_compatible(source, target):
1443
"""Be compatible with weaves."""
1445
return (isinstance(source, Weave) and
1446
isinstance(target, Weave))
1447
except AttributeError:
1450
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1451
"""See InterVersionedFile.join."""
1452
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1453
if self.target.versions() == [] and version_ids is None:
1454
self.target._copy_weave_content(self.source)
1457
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1458
except errors.WeaveParentMismatch:
1459
self.target._reweave(self.source, pb, msg)
1462
InterVersionedFile.register_optimiser(InterWeave)