71
71
from copy import copy
72
72
from cStringIO import StringIO
73
from difflib import SequenceMatcher
78
from bzrlib.trace import mutter
77
from bzrlib.lazy_import import lazy_import
78
lazy_import(globals(), """
79
from bzrlib import tsort
79
86
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
80
87
RevisionAlreadyPresent,
81
88
RevisionNotPresent,
89
UnavailableRepresentation,
82
90
WeaveRevisionAlreadyPresent,
83
91
WeaveRevisionNotPresent,
85
import bzrlib.errors as errors
86
from bzrlib.osutils import sha_strings
87
from bzrlib.patiencediff import SequenceMatcher, unified_diff
93
from bzrlib.osutils import dirname, sha, sha_strings, split_lines
94
import bzrlib.patiencediff
95
from bzrlib.revision import NULL_REVISION
88
96
from bzrlib.symbol_versioning import *
89
from bzrlib.tsort import topo_sort
90
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
97
from bzrlib.trace import mutter
98
from bzrlib.versionedfile import (
91
105
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
108
class WeaveContentFactory(ContentFactory):
109
"""Content factory for streaming from weaves.
111
:seealso ContentFactory:
114
def __init__(self, version, weave):
115
"""Create a WeaveContentFactory for version from weave."""
116
ContentFactory.__init__(self)
117
self.sha1 = weave.get_sha1s([version])[version]
118
self.key = (version,)
119
parents = weave.get_parent_map([version])[version]
120
self.parents = tuple((parent,) for parent in parents)
121
self.storage_kind = 'fulltext'
124
def get_bytes_as(self, storage_kind):
125
if storage_kind == 'fulltext':
126
return self._weave.get_text(self.key[-1])
127
elif storage_kind == 'chunked':
128
return self._weave.get_lines(self.key[-1])
130
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
94
133
class Weave(VersionedFile):
95
134
"""weave - versioned text file storage.
97
136
A Weave manages versions of line-based text files, keeping track
98
137
of the originating version for each line.
218
277
return self._parents == other._parents \
219
278
and self._weave == other._weave \
220
and self._sha1s == other._sha1s
279
and self._sha1s == other._sha1s
222
281
def __ne__(self, other):
223
282
return not self.__eq__(other)
225
@deprecated_method(zero_eight)
226
def idx_to_name(self, index):
227
"""Old public interface, the public interface is all names now."""
230
284
def _idx_to_name(self, version):
231
285
return self._names[version]
233
@deprecated_method(zero_eight)
234
def lookup(self, name):
235
"""Backwards compatability thunk:
237
Return name, as name is valid in the api now, and spew deprecation
242
287
def _lookup(self, name):
243
288
"""Convert symbolic version name to index."""
289
if not self._allow_reserved:
290
self.check_not_reserved_id(name)
245
292
return self._name_map[name]
247
294
raise RevisionNotPresent(name, self._weave_name)
249
@deprecated_method(zero_eight)
250
def iter_names(self):
251
"""Deprecated convenience function, please see VersionedFile.names()."""
252
return iter(self.names())
254
@deprecated_method(zero_eight)
256
"""See Weave.versions for the current api."""
257
return self.versions()
259
296
def versions(self):
260
297
"""See VersionedFile.versions."""
261
298
return self._names[:]
263
300
def has_version(self, version_id):
264
301
"""See VersionedFile.has_version."""
265
return self._name_map.has_key(version_id)
302
return (version_id in self._name_map)
267
304
__contains__ = has_version
269
def get_delta(self, version_id):
270
"""See VersionedFile.get_delta."""
271
return self.get_deltas([version_id])[version_id]
273
def get_deltas(self, version_ids):
274
"""See VersionedFile.get_deltas."""
275
version_ids = self.get_ancestry(version_ids)
306
def get_record_stream(self, versions, ordering, include_delta_closure):
307
"""Get a stream of records for versions.
309
:param versions: The versions to include. Each version is a tuple
311
:param ordering: Either 'unordered' or 'topological'. A topologically
312
sorted stream has compression parents strictly before their
314
:param include_delta_closure: If True then the closure across any
315
compression parents will be included (in the opaque data).
316
:return: An iterator of ContentFactory objects, each of which is only
317
valid until the iterator is advanced.
319
versions = [version[-1] for version in versions]
320
if ordering == 'topological':
321
parents = self.get_parent_map(versions)
322
new_versions = tsort.topo_sort(parents)
323
new_versions.extend(set(versions).difference(set(parents)))
324
versions = new_versions
325
elif ordering == 'groupcompress':
326
parents = self.get_parent_map(versions)
327
new_versions = sort_groupcompress(parents)
328
new_versions.extend(set(versions).difference(set(parents)))
329
versions = new_versions
330
for version in versions:
332
yield WeaveContentFactory(version, self)
334
yield AbsentContentFactory((version,))
336
def get_parent_map(self, version_ids):
337
"""See VersionedFile.get_parent_map."""
276
339
for version_id in version_ids:
277
if not self.has_version(version_id):
278
raise RevisionNotPresent(version_id, self)
279
# try extracting all versions; parallel extraction is used
280
nv = self.num_versions()
286
last_parent_lines = {}
288
parent_inclusions = {}
293
# its simplest to generate a full set of prepared variables.
295
name = self._names[i]
296
sha1s[name] = self.get_sha1(name)
297
parents_list = self.get_parents(name)
299
parent = parents_list[0]
300
parents[name] = parent
301
parent_inclusions[name] = inclusions[parent]
304
parent_inclusions[name] = set()
305
# we want to emit start, finish, replacement_length, replacement_lines tuples.
306
diff_hunks[name] = []
307
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
308
parent_linenums[name] = 0
310
parent_noeols[name] = False
311
last_parent_lines[name] = None
312
new_inc = set([name])
313
for p in self._parents[i]:
314
new_inc.update(inclusions[self._idx_to_name(p)])
315
# debug only, known good so far.
316
#assert set(new_inc) == set(self.get_ancestry(name)), \
317
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
318
inclusions[name] = new_inc
320
nlines = len(self._weave)
322
for lineno, inserted, deletes, line in self._walk_internal():
323
# a line is active in a version if:
324
# insert is in the versions inclusions
326
# deleteset & the versions inclusions is an empty set.
327
# so - if we have a included by mapping - version is included by
328
# children, we get a list of children to examine for deletes affect
329
# ing them, which is less than the entire set of children.
330
for version_id in version_ids:
331
# The active inclusion must be an ancestor,
332
# and no ancestors must have deleted this line,
333
# because we don't support resurrection.
334
parent_inclusion = parent_inclusions[version_id]
335
inclusion = inclusions[version_id]
336
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
337
version_active = inserted in inclusion and not (deletes & inclusion)
338
if not parent_active and not version_active:
339
# unrelated line of ancestry
340
if version_id == NULL_REVISION:
345
map(self._idx_to_name,
346
self._parents[self._lookup(version_id)]))
347
except RevisionNotPresent:
341
elif parent_active and version_active:
343
parent_linenum = parent_linenums[version_id]
344
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
345
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
347
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
348
parent_linenums[version_id] = parent_linenum
351
noeols[version_id] = True
354
elif parent_active and not version_active:
356
current_hunks[version_id][1] += 1
357
parent_linenums[version_id] += 1
358
last_parent_lines[version_id] = line
359
elif not parent_active and version_active:
361
# noeol only occurs at the end of a file because we
362
# diff linewise. We want to show noeol changes as a
363
# empty diff unless the actual eol-less content changed.
366
if last_parent_lines[version_id][-1] != '\n':
367
parent_noeols[version_id] = True
368
except (TypeError, IndexError):
371
if theline[-1] != '\n':
372
noeols[version_id] = True
376
parent_should_go = False
378
if parent_noeols[version_id] == noeols[version_id]:
379
# no noeol toggle, so trust the weaves statement
380
# that this line is changed.
382
if parent_noeols[version_id]:
383
theline = theline + '\n'
384
elif parent_noeols[version_id]:
385
# parent has no eol, we do:
386
# our line is new, report as such..
388
elif noeols[version_id]:
389
# append a eol so that it looks like
391
theline = theline + '\n'
392
if parents[version_id] is not None:
393
#if last_parent_lines[version_id] is not None:
394
parent_should_go = True
395
if last_parent_lines[version_id] != theline:
398
#parent_should_go = False
400
current_hunks[version_id][2] += 1
401
current_hunks[version_id][3].append((inserted, theline))
403
# last hunk last parent line is not eaten
404
current_hunks[version_id][1] -= 1
405
if current_hunks[version_id][1] < 0:
406
current_hunks[version_id][1] = 0
407
# import pdb;pdb.set_trace()
408
# assert current_hunks[version_id][1] >= 0
412
version = self._idx_to_name(i)
413
if current_hunks[version] != [0, 0, 0, []]:
414
diff_hunks[version].append(tuple(current_hunks[version]))
416
for version_id in version_ids:
417
result[version_id] = (
421
diff_hunks[version_id],
349
result[version_id] = parents
425
def get_parents(self, version_id):
426
"""See VersionedFile.get_parent."""
427
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
352
def get_parents_with_ghosts(self, version_id):
353
raise NotImplementedError(self.get_parents_with_ghosts)
355
def insert_record_stream(self, stream):
356
"""Insert a record stream into this versioned file.
358
:param stream: A stream of records to insert.
360
:seealso VersionedFile.get_record_stream:
363
for record in stream:
364
# Raise an error when a record is missing.
365
if record.storage_kind == 'absent':
366
raise RevisionNotPresent([record.key[0]], self)
367
# adapt to non-tuple interface
368
parents = [parent[0] for parent in record.parents]
369
if (record.storage_kind == 'fulltext'
370
or record.storage_kind == 'chunked'):
371
self.add_lines(record.key[0], parents,
372
osutils.chunks_to_lines(record.get_bytes_as('chunked')))
374
adapter_key = record.storage_kind, 'fulltext'
376
adapter = adapters[adapter_key]
378
adapter_factory = adapter_registry.get(adapter_key)
379
adapter = adapter_factory(self)
380
adapters[adapter_key] = adapter
381
lines = split_lines(adapter.get_bytes(record))
383
self.add_lines(record.key[0], parents, lines)
384
except RevisionAlreadyPresent:
429
387
def _check_repeated_add(self, name, parents, text, sha1):
430
388
"""Check that a duplicated add is OK.
437
395
raise RevisionAlreadyPresent(name, self._weave_name)
440
@deprecated_method(zero_eight)
441
def add_identical(self, old_rev_id, new_rev_id, parents):
442
"""Please use Weave.clone_text now."""
443
return self.clone_text(new_rev_id, old_rev_id, parents)
445
def _add_lines(self, version_id, parents, lines, parent_texts):
398
def _add_lines(self, version_id, parents, lines, parent_texts,
399
left_matching_blocks, nostore_sha, random_id, check_content):
446
400
"""See VersionedFile.add_lines."""
447
return self._add(version_id, lines, map(self._lookup, parents))
449
@deprecated_method(zero_eight)
450
def add(self, name, parents, text, sha1=None):
451
"""See VersionedFile.add_lines for the non deprecated api."""
452
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
454
def _add(self, version_id, lines, parents, sha1=None):
401
idx = self._add(version_id, lines, map(self._lookup, parents),
402
nostore_sha=nostore_sha)
403
return sha_strings(lines), sum(map(len, lines)), idx
405
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
455
406
"""Add a single text on top of the weave.
457
408
Returns the index number of the newly added version.
629
561
def _compatible_parents(self, my_parents, other_parents):
630
562
"""During join check that other_parents are joinable with my_parents.
632
Joinable is defined as 'is a subset of' - supersets may require
564
Joinable is defined as 'is a subset of' - supersets may require
633
565
regeneration of diffs, but subsets do not.
635
567
return len(other_parents.difference(my_parents)) == 0
637
569
def annotate(self, version_id):
638
if isinstance(version_id, int):
639
warn('Weave.annotate(int) is deprecated. Please use version names'
640
' in all circumstances as of 0.8',
645
for origin, lineno, text in self._extract([version_id]):
646
result.append((origin, text))
649
return super(Weave, self).annotate(version_id)
651
def annotate_iter(self, version_id):
652
"""Yield list of (version-id, line) pairs for the specified version.
570
"""Return a list of (version-id, line) tuples for version_id.
654
572
The index indicates when the line originated in the weave."""
655
573
incls = [self._lookup(version_id)]
656
for origin, lineno, text in self._extract(incls):
657
yield self._idx_to_name(origin), text
659
@deprecated_method(zero_eight)
661
"""_walk has become visit, a supported api."""
662
return self._walk_internal()
664
def iter_lines_added_or_present_in_versions(self, version_ids=None):
574
return [(self._idx_to_name(origin), text) for origin, lineno, text in
575
self._extract(incls)]
577
def iter_lines_added_or_present_in_versions(self, version_ids=None,
665
579
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
666
580
if version_ids is None:
667
581
version_ids = self.versions()
668
582
version_ids = set(version_ids)
669
583
for lineno, inserted, deletes, line in self._walk_internal(version_ids):
670
# if inserted not in version_ids then it was inserted before the
671
# versions we care about, but because weaves cannot represent ghosts
672
# properly, we do not filter down to that
673
# if inserted not in version_ids: continue
584
if inserted not in version_ids: continue
674
585
if line[-1] != '\n':
586
yield line + '\n', inserted
679
#@deprecated_method(zero_eight)
680
def walk(self, version_ids=None):
681
"""See VersionedFile.walk."""
682
return self._walk_internal(version_ids)
684
590
def _walk_internal(self, version_ids=None):
685
591
"""Helper method for weave actions."""
977
852
# no lines outside of insertion blocks, that deletions are
978
853
# properly paired, etc.
980
def _join(self, other, pb, msg, version_ids, ignore_missing):
981
"""Worker routine for join()."""
982
if not other.versions():
983
return # nothing to update, easy
986
# versions is never none, InterWeave checks this.
989
# two loops so that we do not change ourselves before verifying it
991
# work through in index order to make sure we get all dependencies
994
# get the selected versions only that are in other.versions.
995
version_ids = set(other.versions()).intersection(set(version_ids))
996
# pull in the referenced graph.
997
version_ids = other.get_ancestry(version_ids)
998
pending_graph = [(version, other.get_parents(version)) for
999
version in version_ids]
1000
for name in topo_sort(pending_graph):
1001
other_idx = other._name_map[name]
1002
# returns True if we have it, False if we need it.
1003
if not self._check_version_consistent(other, other_idx, name):
1004
names_to_join.append((other_idx, name))
1013
for other_idx, name in names_to_join:
1014
# TODO: If all the parents of the other version are already
1015
# present then we can avoid some work by just taking the delta
1016
# and adjusting the offsets.
1017
new_parents = self._imported_parents(other, other_idx)
1018
sha1 = other._sha1s[other_idx]
1023
pb.update(msg, merged, len(names_to_join))
1025
lines = other.get_lines(other_idx)
1026
self._add(name, lines, new_parents, sha1)
1028
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1029
merged, processed, self._weave_name, time.time()-time0))
1031
855
def _imported_parents(self, other, other_idx):
1032
856
"""Return list of parents in self corresponding to indexes in other."""
1033
857
new_parents = []
1140
955
sio = StringIO()
1141
956
write_weave_v5(self, sio)
1143
self._transport.put(self._weave_name + WeaveFile.WEAVE_SUFFIX,
958
bytes = sio.getvalue()
959
path = self._weave_name + WeaveFile.WEAVE_SUFFIX
961
self._transport.put_bytes(path, bytes, self._filemode)
962
except errors.NoSuchFile:
963
self._transport.mkdir(dirname(path))
964
self._transport.put_bytes(path, bytes, self._filemode)
1148
967
def get_suffixes():
1149
968
"""See VersionedFile.get_suffixes()."""
1150
969
return [WeaveFile.WEAVE_SUFFIX]
1152
def join(self, other, pb=None, msg=None, version_ids=None,
1153
ignore_missing=False):
1154
"""Join other into self and save."""
1155
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
971
def insert_record_stream(self, stream):
972
super(WeaveFile, self).insert_record_stream(stream)
1159
@deprecated_function(zero_eight)
1160
def reweave(wa, wb, pb=None, msg=None):
1161
"""reweaving is deprecation, please just use weave.join()."""
1162
_reweave(wa, wb, pb, msg)
1164
976
def _reweave(wa, wb, pb=None, msg=None):
1165
977
"""Combine two weaves and return the result.
1167
This works even if a revision R has different parents in
979
This works even if a revision R has different parents in
1168
980
wa and wb. In the resulting weave all the parents are given.
1170
This is done by just building up a new weave, maintaining ordering
982
This is done by just building up a new weave, maintaining ordering
1171
983
of the versions in the two inputs. More efficient approaches
1172
might be possible but it should only be necessary to do
1173
this operation rarely, when a new previously ghost version is
984
might be possible but it should only be necessary to do
985
this operation rarely, when a new previously ghost version is
1176
988
:param pb: An optional progress bar, indicating how far done we are
1418
1230
sys.stdout.writelines(w.weave_merge(p))
1420
1232
raise ValueError('unknown command %r' % cmd)
1424
def profile_main(argv):
1425
import tempfile, hotshot, hotshot.stats
1427
prof_f = tempfile.NamedTemporaryFile()
1429
prof = hotshot.Profile(prof_f.name)
1431
ret = prof.runcall(main, argv)
1434
stats = hotshot.stats.load(prof_f.name)
1436
stats.sort_stats('cumulative')
1437
## XXX: Might like to write to stderr or the trace file instead but
1438
## print_stats seems hardcoded to stdout
1439
stats.print_stats(20)
1444
def lsprofile_main(argv):
1445
from bzrlib.lsprof import profile
1446
ret,stats = profile(main, argv)
1452
1235
if __name__ == '__main__':
1454
if '--profile' in sys.argv:
1456
args.remove('--profile')
1457
sys.exit(profile_main(args))
1458
elif '--lsprof' in sys.argv:
1460
args.remove('--lsprof')
1461
sys.exit(lsprofile_main(args))
1463
sys.exit(main(sys.argv))
1466
class InterWeave(InterVersionedFile):
1467
"""Optimised code paths for weave to weave operations."""
1469
_matching_file_from_factory = staticmethod(WeaveFile)
1470
_matching_file_to_factory = staticmethod(WeaveFile)
1473
def is_compatible(source, target):
1474
"""Be compatible with weaves."""
1476
return (isinstance(source, Weave) and
1477
isinstance(target, Weave))
1478
except AttributeError:
1481
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1482
"""See InterVersionedFile.join."""
1483
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1484
if self.target.versions() == [] and version_ids is None:
1485
self.target._copy_weave_content(self.source)
1488
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1489
except errors.WeaveParentMismatch:
1490
self.target._reweave(self.source, pb, msg)
1493
InterVersionedFile.register_optimiser(InterWeave)
1237
sys.exit(main(sys.argv))