71
69
from copy import copy
72
70
from cStringIO import StringIO
73
from bzrlib.lazy_import import lazy_import
74
lazy_import(globals(), """
75
from bzrlib import tsort
78
77
from bzrlib import (
81
from bzrlib.trace import mutter
82
81
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
83
82
RevisionAlreadyPresent,
84
83
RevisionNotPresent,
85
WeaveRevisionAlreadyPresent,
86
WeaveRevisionNotPresent,
84
UnavailableRepresentation,
88
import bzrlib.errors as errors
89
from bzrlib.osutils import sha_strings
86
from bzrlib.osutils import dirname, sha, sha_strings, split_lines
90
87
import bzrlib.patiencediff
91
from bzrlib.symbol_versioning import (deprecated_method,
95
from bzrlib.tsort import topo_sort
96
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
88
from bzrlib.revision import NULL_REVISION
89
from bzrlib.symbol_versioning import *
90
from bzrlib.trace import mutter
91
from bzrlib.versionedfile import (
97
98
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
101
class WeaveContentFactory(ContentFactory):
102
"""Content factory for streaming from weaves.
104
:seealso ContentFactory:
107
def __init__(self, version, weave):
108
"""Create a WeaveContentFactory for version from weave."""
109
ContentFactory.__init__(self)
110
self.sha1 = weave.get_sha1s([version])[version]
111
self.key = (version,)
112
parents = weave.get_parent_map([version])[version]
113
self.parents = tuple((parent,) for parent in parents)
114
self.storage_kind = 'fulltext'
117
def get_bytes_as(self, storage_kind):
118
if storage_kind == 'fulltext':
119
return self._weave.get_text(self.key[-1])
120
elif storage_kind == 'chunked':
121
return self._weave.get_lines(self.key[-1])
123
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
100
126
class Weave(VersionedFile):
101
127
"""weave - versioned text file storage.
103
129
A Weave manages versions of line-based text files, keeping track
104
130
of the originating version for each line.
273
297
__contains__ = has_version
275
def get_delta(self, version_id):
276
"""See VersionedFile.get_delta."""
277
return self.get_deltas([version_id])[version_id]
279
def get_deltas(self, version_ids):
280
"""See VersionedFile.get_deltas."""
281
version_ids = self.get_ancestry(version_ids)
299
def get_record_stream(self, versions, ordering, include_delta_closure):
300
"""Get a stream of records for versions.
302
:param versions: The versions to include. Each version is a tuple
304
:param ordering: Either 'unordered' or 'topological'. A topologically
305
sorted stream has compression parents strictly before their
307
:param include_delta_closure: If True then the closure across any
308
compression parents will be included (in the opaque data).
309
:return: An iterator of ContentFactory objects, each of which is only
310
valid until the iterator is advanced.
312
versions = [version[-1] for version in versions]
313
if ordering == 'topological':
314
parents = self.get_parent_map(versions)
315
new_versions = tsort.topo_sort(parents)
316
new_versions.extend(set(versions).difference(set(parents)))
317
versions = new_versions
318
elif ordering == 'groupcompress':
319
parents = self.get_parent_map(versions)
320
new_versions = sort_groupcompress(parents)
321
new_versions.extend(set(versions).difference(set(parents)))
322
versions = new_versions
323
for version in versions:
325
yield WeaveContentFactory(version, self)
327
yield AbsentContentFactory((version,))
329
def get_parent_map(self, version_ids):
330
"""See VersionedFile.get_parent_map."""
282
332
for version_id in version_ids:
283
if not self.has_version(version_id):
284
raise RevisionNotPresent(version_id, self)
285
# try extracting all versions; parallel extraction is used
286
nv = self.num_versions()
292
last_parent_lines = {}
294
parent_inclusions = {}
299
# its simplest to generate a full set of prepared variables.
301
name = self._names[i]
302
sha1s[name] = self.get_sha1(name)
303
parents_list = self.get_parents(name)
305
parent = parents_list[0]
306
parents[name] = parent
307
parent_inclusions[name] = inclusions[parent]
310
parent_inclusions[name] = set()
311
# we want to emit start, finish, replacement_length, replacement_lines tuples.
312
diff_hunks[name] = []
313
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
314
parent_linenums[name] = 0
316
parent_noeols[name] = False
317
last_parent_lines[name] = None
318
new_inc = set([name])
319
for p in self._parents[i]:
320
new_inc.update(inclusions[self._idx_to_name(p)])
321
# debug only, known good so far.
322
#assert set(new_inc) == set(self.get_ancestry(name)), \
323
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
324
inclusions[name] = new_inc
326
nlines = len(self._weave)
328
for lineno, inserted, deletes, line in self._walk_internal():
329
# a line is active in a version if:
330
# insert is in the versions inclusions
332
# deleteset & the versions inclusions is an empty set.
333
# so - if we have a included by mapping - version is included by
334
# children, we get a list of children to examine for deletes affect
335
# ing them, which is less than the entire set of children.
336
for version_id in version_ids:
337
# The active inclusion must be an ancestor,
338
# and no ancestors must have deleted this line,
339
# because we don't support resurrection.
340
parent_inclusion = parent_inclusions[version_id]
341
inclusion = inclusions[version_id]
342
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
343
version_active = inserted in inclusion and not (deletes & inclusion)
344
if not parent_active and not version_active:
345
# unrelated line of ancestry
333
if version_id == NULL_REVISION:
338
map(self._idx_to_name,
339
self._parents[self._lookup(version_id)]))
340
except RevisionNotPresent:
347
elif parent_active and version_active:
349
parent_linenum = parent_linenums[version_id]
350
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
351
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
353
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
354
parent_linenums[version_id] = parent_linenum
357
noeols[version_id] = True
360
elif parent_active and not version_active:
362
current_hunks[version_id][1] += 1
363
parent_linenums[version_id] += 1
364
last_parent_lines[version_id] = line
365
elif not parent_active and version_active:
367
# noeol only occurs at the end of a file because we
368
# diff linewise. We want to show noeol changes as a
369
# empty diff unless the actual eol-less content changed.
372
if last_parent_lines[version_id][-1] != '\n':
373
parent_noeols[version_id] = True
374
except (TypeError, IndexError):
377
if theline[-1] != '\n':
378
noeols[version_id] = True
382
parent_should_go = False
384
if parent_noeols[version_id] == noeols[version_id]:
385
# no noeol toggle, so trust the weaves statement
386
# that this line is changed.
388
if parent_noeols[version_id]:
389
theline = theline + '\n'
390
elif parent_noeols[version_id]:
391
# parent has no eol, we do:
392
# our line is new, report as such..
394
elif noeols[version_id]:
395
# append a eol so that it looks like
397
theline = theline + '\n'
398
if parents[version_id] is not None:
399
#if last_parent_lines[version_id] is not None:
400
parent_should_go = True
401
if last_parent_lines[version_id] != theline:
404
#parent_should_go = False
406
current_hunks[version_id][2] += 1
407
current_hunks[version_id][3].append((inserted, theline))
409
# last hunk last parent line is not eaten
410
current_hunks[version_id][1] -= 1
411
if current_hunks[version_id][1] < 0:
412
current_hunks[version_id][1] = 0
413
# import pdb;pdb.set_trace()
414
# assert current_hunks[version_id][1] >= 0
418
version = self._idx_to_name(i)
419
if current_hunks[version] != [0, 0, 0, []]:
420
diff_hunks[version].append(tuple(current_hunks[version]))
422
for version_id in version_ids:
423
result[version_id] = (
427
diff_hunks[version_id],
342
result[version_id] = parents
431
def get_parents(self, version_id):
432
"""See VersionedFile.get_parent."""
433
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
345
def get_parents_with_ghosts(self, version_id):
346
raise NotImplementedError(self.get_parents_with_ghosts)
348
def insert_record_stream(self, stream):
349
"""Insert a record stream into this versioned file.
351
:param stream: A stream of records to insert.
353
:seealso VersionedFile.get_record_stream:
356
for record in stream:
357
# Raise an error when a record is missing.
358
if record.storage_kind == 'absent':
359
raise RevisionNotPresent([record.key[0]], self)
360
# adapt to non-tuple interface
361
parents = [parent[0] for parent in record.parents]
362
if (record.storage_kind == 'fulltext'
363
or record.storage_kind == 'chunked'):
364
self.add_lines(record.key[0], parents,
365
osutils.chunks_to_lines(record.get_bytes_as('chunked')))
367
adapter_key = record.storage_kind, 'fulltext'
369
adapter = adapters[adapter_key]
371
adapter_factory = adapter_registry.get(adapter_key)
372
adapter = adapter_factory(self)
373
adapters[adapter_key] = adapter
374
lines = split_lines(adapter.get_bytes(record))
376
self.add_lines(record.key[0], parents, lines)
377
except RevisionAlreadyPresent:
435
380
def _check_repeated_add(self, name, parents, text, sha1):
436
381
"""Check that a duplicated add is OK.
443
388
raise RevisionAlreadyPresent(name, self._weave_name)
446
@deprecated_method(zero_eight)
447
def add_identical(self, old_rev_id, new_rev_id, parents):
448
"""Please use Weave.clone_text now."""
449
return self.clone_text(new_rev_id, old_rev_id, parents)
451
def _add_lines(self, version_id, parents, lines, parent_texts):
391
def _add_lines(self, version_id, parents, lines, parent_texts,
392
left_matching_blocks, nostore_sha, random_id, check_content):
452
393
"""See VersionedFile.add_lines."""
453
return self._add(version_id, lines, map(self._lookup, parents))
455
@deprecated_method(zero_eight)
456
def add(self, name, parents, text, sha1=None):
457
"""See VersionedFile.add_lines for the non deprecated api."""
458
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
460
def _add(self, version_id, lines, parents, sha1=None):
394
idx = self._add(version_id, lines, map(self._lookup, parents),
395
nostore_sha=nostore_sha)
396
return sha_strings(lines), sum(map(len, lines)), idx
398
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
461
399
"""Add a single text on top of the weave.
463
401
Returns the index number of the newly added version.
466
404
Symbolic name for this version.
467
405
(Typically the revision-id of the revision that added it.)
406
If None, a name will be allocated based on the hash. (sha1:SHAHASH)
470
409
List or set of direct parent version numbers.
473
412
Sequence of lines to be added in the new version.
414
:param nostore_sha: See VersionedFile.add_lines.
476
assert isinstance(version_id, basestring)
477
416
self._check_lines_not_unicode(lines)
478
417
self._check_lines_are_lines(lines)
480
419
sha1 = sha_strings(lines)
420
if sha1 == nostore_sha:
421
raise errors.ExistingContent
422
if version_id is None:
423
version_id = "sha1:" + sha1
481
424
if version_id in self._name_map:
482
425
return self._check_repeated_add(version_id, parents, lines, sha1)
635
557
def _compatible_parents(self, my_parents, other_parents):
636
558
"""During join check that other_parents are joinable with my_parents.
638
Joinable is defined as 'is a subset of' - supersets may require
560
Joinable is defined as 'is a subset of' - supersets may require
639
561
regeneration of diffs, but subsets do not.
641
563
return len(other_parents.difference(my_parents)) == 0
643
565
def annotate(self, version_id):
644
if isinstance(version_id, int):
645
warnings.warn('Weave.annotate(int) is deprecated. Please use version names'
646
' in all circumstances as of 0.8',
651
for origin, lineno, text in self._extract([version_id]):
652
result.append((origin, text))
655
return super(Weave, self).annotate(version_id)
657
def annotate_iter(self, version_id):
658
"""Yield list of (version-id, line) pairs for the specified version.
566
"""Return a list of (version-id, line) tuples for version_id.
660
568
The index indicates when the line originated in the weave."""
661
569
incls = [self._lookup(version_id)]
662
for origin, lineno, text in self._extract(incls):
663
yield self._idx_to_name(origin), text
665
@deprecated_method(zero_eight)
667
"""_walk has become visit, a supported api."""
668
return self._walk_internal()
570
return [(self._idx_to_name(origin), text) for origin, lineno, text in
571
self._extract(incls)]
670
573
def iter_lines_added_or_present_in_versions(self, version_ids=None,
984
848
# no lines outside of insertion blocks, that deletions are
985
849
# properly paired, etc.
987
def _join(self, other, pb, msg, version_ids, ignore_missing):
988
"""Worker routine for join()."""
989
if not other.versions():
990
return # nothing to update, easy
993
# versions is never none, InterWeave checks this.
996
# two loops so that we do not change ourselves before verifying it
998
# work through in index order to make sure we get all dependencies
1001
# get the selected versions only that are in other.versions.
1002
version_ids = set(other.versions()).intersection(set(version_ids))
1003
# pull in the referenced graph.
1004
version_ids = other.get_ancestry(version_ids)
1005
pending_graph = [(version, other.get_parents(version)) for
1006
version in version_ids]
1007
for name in topo_sort(pending_graph):
1008
other_idx = other._name_map[name]
1009
# returns True if we have it, False if we need it.
1010
if not self._check_version_consistent(other, other_idx, name):
1011
names_to_join.append((other_idx, name))
1020
for other_idx, name in names_to_join:
1021
# TODO: If all the parents of the other version are already
1022
# present then we can avoid some work by just taking the delta
1023
# and adjusting the offsets.
1024
new_parents = self._imported_parents(other, other_idx)
1025
sha1 = other._sha1s[other_idx]
1030
pb.update(msg, merged, len(names_to_join))
1032
lines = other.get_lines(other_idx)
1033
self._add(name, lines, new_parents, sha1)
1035
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1036
merged, processed, self._weave_name, time.time()-time0))
1038
851
def _imported_parents(self, other, other_idx):
1039
852
"""Return list of parents in self corresponding to indexes in other."""
1040
853
new_parents = []
1139
946
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1141
def create_empty(self, name, transport, filemode=None):
1142
return WeaveFile(name, transport, filemode, create=True)
1144
948
def _save(self):
1145
949
"""Save the weave."""
1146
950
self._check_write_ok()
1147
951
sio = StringIO()
1148
952
write_weave_v5(self, sio)
1150
self._transport.put_file(self._weave_name + WeaveFile.WEAVE_SUFFIX,
954
bytes = sio.getvalue()
955
path = self._weave_name + WeaveFile.WEAVE_SUFFIX
957
self._transport.put_bytes(path, bytes, self._filemode)
958
except errors.NoSuchFile:
959
self._transport.mkdir(dirname(path))
960
self._transport.put_bytes(path, bytes, self._filemode)
1155
963
def get_suffixes():
1156
964
"""See VersionedFile.get_suffixes()."""
1157
965
return [WeaveFile.WEAVE_SUFFIX]
1159
def join(self, other, pb=None, msg=None, version_ids=None,
1160
ignore_missing=False):
1161
"""Join other into self and save."""
1162
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
967
def insert_record_stream(self, stream):
968
super(WeaveFile, self).insert_record_stream(stream)
1166
@deprecated_function(zero_eight)
1167
def reweave(wa, wb, pb=None, msg=None):
1168
"""reweaving is deprecation, please just use weave.join()."""
1169
_reweave(wa, wb, pb, msg)
1171
972
def _reweave(wa, wb, pb=None, msg=None):
1172
973
"""Combine two weaves and return the result.
1174
This works even if a revision R has different parents in
975
This works even if a revision R has different parents in
1175
976
wa and wb. In the resulting weave all the parents are given.
1177
This is done by just building up a new weave, maintaining ordering
978
This is done by just building up a new weave, maintaining ordering
1178
979
of the versions in the two inputs. More efficient approaches
1179
might be possible but it should only be necessary to do
1180
this operation rarely, when a new previously ghost version is
980
might be possible but it should only be necessary to do
981
this operation rarely, when a new previously ghost version is
1183
984
:param pb: An optional progress bar, indicating how far done we are
1227
1029
p = combined.setdefault(name, set())
1228
1030
p.update(map(weave._idx_to_name, weave._parents[idx]))
1229
1031
return combined
1233
"""Show the weave's table-of-contents"""
1234
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
1235
for i in (6, 50, 10, 10):
1238
for i in range(w.num_versions()):
1241
parent_str = ' '.join(map(str, w._parents[i]))
1242
print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
1246
def weave_stats(weave_file, pb):
1247
from bzrlib.weavefile import read_weave
1249
wf = file(weave_file, 'rb')
1251
# FIXME: doesn't work on pipes
1252
weave_size = wf.tell()
1256
for i in range(vers):
1257
pb.update('checking sizes', i, vers)
1258
for origin, lineno, line in w._extract([i]):
1263
print 'versions %9d' % vers
1264
print 'weave file %9d bytes' % weave_size
1265
print 'total contents %9d bytes' % total
1266
print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
1269
print 'average size %9d bytes' % avg
1270
print 'relative size %9.2fx' % (float(weave_size) / float(avg))
1274
print """bzr weave tool
1276
Experimental tool for weave algorithm.
1279
weave init WEAVEFILE
1280
Create an empty weave file
1281
weave get WEAVEFILE VERSION
1282
Write out specified version.
1283
weave check WEAVEFILE
1284
Check consistency of all versions.
1286
Display table of contents.
1287
weave add WEAVEFILE NAME [BASE...] < NEWTEXT
1288
Add NEWTEXT, with specified parent versions.
1289
weave annotate WEAVEFILE VERSION
1290
Display origin of each line.
1291
weave merge WEAVEFILE VERSION1 VERSION2 > OUT
1292
Auto-merge two versions and display conflicts.
1293
weave diff WEAVEFILE VERSION1 VERSION2
1294
Show differences between two versions.
1298
% weave init foo.weave
1300
% weave add foo.weave ver0 < foo.txt
1303
(create updated version)
1305
% weave get foo.weave 0 | diff -u - foo.txt
1306
% weave add foo.weave ver1 0 < foo.txt
1309
% weave get foo.weave 0 > foo.txt (create forked version)
1311
% weave add foo.weave ver2 0 < foo.txt
1314
% weave merge foo.weave 1 2 > foo.txt (merge them)
1315
% vi foo.txt (resolve conflicts)
1316
% weave add foo.weave merged 1 2 < foo.txt (commit merged version)
1328
# in case we're run directly from the subdirectory
1329
sys.path.append('..')
1331
from bzrlib.weavefile import write_weave, read_weave
1332
from bzrlib.progress import ProgressBar
1347
return read_weave(file(argv[2], 'rb'))
1353
# at the moment, based on everything in the file
1355
parents = map(int, argv[4:])
1356
lines = sys.stdin.readlines()
1357
ver = w.add(name, parents, lines)
1358
write_weave(w, file(argv[2], 'wb'))
1359
print 'added version %r %d' % (name, ver)
1362
if os.path.exists(fn):
1363
raise IOError("file exists")
1365
write_weave(w, file(fn, 'wb'))
1366
elif cmd == 'get': # get one version
1368
sys.stdout.writelines(w.get_iter(int(argv[3])))
1373
v1, v2 = map(int, argv[3:5])
1376
diff_gen = bzrlib.patiencediff.unified_diff(lines1, lines2,
1377
'%s version %d' % (fn, v1),
1378
'%s version %d' % (fn, v2))
1379
sys.stdout.writelines(diff_gen)
1381
elif cmd == 'annotate':
1383
# newline is added to all lines regardless; too hard to get
1384
# reasonable formatting otherwise
1386
for origin, text in w.annotate(int(argv[3])):
1387
text = text.rstrip('\r\n')
1389
print ' | %s' % (text)
1391
print '%5d | %s' % (origin, text)
1397
elif cmd == 'stats':
1398
weave_stats(argv[2], ProgressBar())
1400
elif cmd == 'check':
1405
print '%d versions ok' % w.num_versions()
1407
elif cmd == 'inclusions':
1409
print ' '.join(map(str, w.inclusions([int(argv[3])])))
1411
elif cmd == 'parents':
1413
print ' '.join(map(str, w._parents[int(argv[3])]))
1415
elif cmd == 'plan-merge':
1416
# replaced by 'bzr weave-plan-merge'
1418
for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
1420
print '%14s | %s' % (state, line),
1421
elif cmd == 'merge':
1422
# replaced by 'bzr weave-merge-text'
1424
p = w.plan_merge(int(argv[3]), int(argv[4]))
1425
sys.stdout.writelines(w.weave_merge(p))
1427
raise ValueError('unknown command %r' % cmd)
1430
if __name__ == '__main__':
1432
sys.exit(main(sys.argv))
1435
class InterWeave(InterVersionedFile):
1436
"""Optimised code paths for weave to weave operations."""
1438
_matching_file_from_factory = staticmethod(WeaveFile)
1439
_matching_file_to_factory = staticmethod(WeaveFile)
1442
def is_compatible(source, target):
1443
"""Be compatible with weaves."""
1445
return (isinstance(source, Weave) and
1446
isinstance(target, Weave))
1447
except AttributeError:
1450
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1451
"""See InterVersionedFile.join."""
1452
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1453
if self.target.versions() == [] and version_ids is None:
1454
self.target._copy_weave_content(self.source)
1457
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1458
except errors.WeaveParentMismatch:
1459
self.target._reweave(self.source, pb, msg)
1462
InterVersionedFile.register_optimiser(InterWeave)