71
71
from copy import copy
72
72
from cStringIO import StringIO
73
from difflib import SequenceMatcher
78
from bzrlib.trace import mutter
79
81
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
80
82
RevisionAlreadyPresent,
81
83
RevisionNotPresent,
84
UnavailableRepresentation,
82
85
WeaveRevisionAlreadyPresent,
83
86
WeaveRevisionNotPresent,
85
88
import bzrlib.errors as errors
86
from bzrlib.osutils import sha_strings
87
from bzrlib.patiencediff import SequenceMatcher, unified_diff
89
from bzrlib.osutils import dirname, sha_strings, split_lines
90
import bzrlib.patiencediff
91
from bzrlib.revision import NULL_REVISION
88
92
from bzrlib.symbol_versioning import *
93
from bzrlib.trace import mutter
89
94
from bzrlib.tsort import topo_sort
90
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
95
from bzrlib.versionedfile import (
91
101
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
104
class WeaveContentFactory(ContentFactory):
105
"""Content factory for streaming from weaves.
107
:seealso ContentFactory:
110
def __init__(self, version, weave):
111
"""Create a WeaveContentFactory for version from weave."""
112
ContentFactory.__init__(self)
113
self.sha1 = weave.get_sha1s([version])[version]
114
self.key = (version,)
115
parents = weave.get_parent_map([version])[version]
116
self.parents = tuple((parent,) for parent in parents)
117
self.storage_kind = 'fulltext'
120
def get_bytes_as(self, storage_kind):
121
if storage_kind == 'fulltext':
122
return self._weave.get_text(self.key[-1])
124
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
94
127
class Weave(VersionedFile):
95
128
"""weave - versioned text file storage.
222
275
def __ne__(self, other):
223
276
return not self.__eq__(other)
225
@deprecated_method(zero_eight)
226
def idx_to_name(self, index):
227
"""Old public interface, the public interface is all names now."""
230
278
def _idx_to_name(self, version):
231
279
return self._names[version]
233
@deprecated_method(zero_eight)
234
def lookup(self, name):
235
"""Backwards compatability thunk:
237
Return name, as name is valid in the api now, and spew deprecation
242
281
def _lookup(self, name):
243
282
"""Convert symbolic version name to index."""
283
if not self._allow_reserved:
284
self.check_not_reserved_id(name)
245
286
return self._name_map[name]
247
288
raise RevisionNotPresent(name, self._weave_name)
249
@deprecated_method(zero_eight)
250
def iter_names(self):
251
"""Deprecated convenience function, please see VersionedFile.names()."""
252
return iter(self.names())
254
@deprecated_method(zero_eight)
256
"""See Weave.versions for the current api."""
257
return self.versions()
259
290
def versions(self):
260
291
"""See VersionedFile.versions."""
261
292
return self._names[:]
263
294
def has_version(self, version_id):
264
295
"""See VersionedFile.has_version."""
265
return self._name_map.has_key(version_id)
296
return (version_id in self._name_map)
267
298
__contains__ = has_version
269
def get_delta(self, version_id):
270
"""See VersionedFile.get_delta."""
271
return self.get_deltas([version_id])[version_id]
273
def get_deltas(self, version_ids):
274
"""See VersionedFile.get_deltas."""
275
version_ids = self.get_ancestry(version_ids)
300
def get_record_stream(self, versions, ordering, include_delta_closure):
301
"""Get a stream of records for versions.
303
:param versions: The versions to include. Each version is a tuple
305
:param ordering: Either 'unordered' or 'topological'. A topologically
306
sorted stream has compression parents strictly before their
308
:param include_delta_closure: If True then the closure across any
309
compression parents will be included (in the opaque data).
310
:return: An iterator of ContentFactory objects, each of which is only
311
valid until the iterator is advanced.
313
versions = [version[-1] for version in versions]
314
if ordering == 'topological':
315
parents = self.get_parent_map(versions)
316
new_versions = topo_sort(parents)
317
new_versions.extend(set(versions).difference(set(parents)))
318
versions = new_versions
319
for version in versions:
321
yield WeaveContentFactory(version, self)
323
yield AbsentContentFactory((version,))
325
def get_parent_map(self, version_ids):
326
"""See VersionedFile.get_parent_map."""
276
328
for version_id in version_ids:
277
if not self.has_version(version_id):
278
raise RevisionNotPresent(version_id, self)
279
# try extracting all versions; parallel extraction is used
280
nv = self.num_versions()
286
last_parent_lines = {}
288
parent_inclusions = {}
293
# its simplest to generate a full set of prepared variables.
295
name = self._names[i]
296
sha1s[name] = self.get_sha1(name)
297
parents_list = self.get_parents(name)
299
parent = parents_list[0]
300
parents[name] = parent
301
parent_inclusions[name] = inclusions[parent]
304
parent_inclusions[name] = set()
305
# we want to emit start, finish, replacement_length, replacement_lines tuples.
306
diff_hunks[name] = []
307
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
308
parent_linenums[name] = 0
310
parent_noeols[name] = False
311
last_parent_lines[name] = None
312
new_inc = set([name])
313
for p in self._parents[i]:
314
new_inc.update(inclusions[self._idx_to_name(p)])
315
# debug only, known good so far.
316
#assert set(new_inc) == set(self.get_ancestry(name)), \
317
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
318
inclusions[name] = new_inc
320
nlines = len(self._weave)
322
for lineno, inserted, deletes, line in self._walk_internal():
323
# a line is active in a version if:
324
# insert is in the versions inclusions
326
# deleteset & the versions inclusions is an empty set.
327
# so - if we have a included by mapping - version is included by
328
# children, we get a list of children to examine for deletes affect
329
# ing them, which is less than the entire set of children.
330
for version_id in version_ids:
331
# The active inclusion must be an ancestor,
332
# and no ancestors must have deleted this line,
333
# because we don't support resurrection.
334
parent_inclusion = parent_inclusions[version_id]
335
inclusion = inclusions[version_id]
336
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
337
version_active = inserted in inclusion and not (deletes & inclusion)
338
if not parent_active and not version_active:
339
# unrelated line of ancestry
329
if version_id == NULL_REVISION:
334
map(self._idx_to_name,
335
self._parents[self._lookup(version_id)]))
336
except RevisionNotPresent:
341
elif parent_active and version_active:
343
parent_linenum = parent_linenums[version_id]
344
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
345
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
347
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
348
parent_linenums[version_id] = parent_linenum
351
noeols[version_id] = True
354
elif parent_active and not version_active:
356
current_hunks[version_id][1] += 1
357
parent_linenums[version_id] += 1
358
last_parent_lines[version_id] = line
359
elif not parent_active and version_active:
361
# noeol only occurs at the end of a file because we
362
# diff linewise. We want to show noeol changes as a
363
# empty diff unless the actual eol-less content changed.
366
if last_parent_lines[version_id][-1] != '\n':
367
parent_noeols[version_id] = True
368
except (TypeError, IndexError):
371
if theline[-1] != '\n':
372
noeols[version_id] = True
376
parent_should_go = False
378
if parent_noeols[version_id] == noeols[version_id]:
379
# no noeol toggle, so trust the weaves statement
380
# that this line is changed.
382
if parent_noeols[version_id]:
383
theline = theline + '\n'
384
elif parent_noeols[version_id]:
385
# parent has no eol, we do:
386
# our line is new, report as such..
388
elif noeols[version_id]:
389
# append a eol so that it looks like
391
theline = theline + '\n'
392
if parents[version_id] is not None:
393
#if last_parent_lines[version_id] is not None:
394
parent_should_go = True
395
if last_parent_lines[version_id] != theline:
398
#parent_should_go = False
400
current_hunks[version_id][2] += 1
401
current_hunks[version_id][3].append((inserted, theline))
403
# last hunk last parent line is not eaten
404
current_hunks[version_id][1] -= 1
405
if current_hunks[version_id][1] < 0:
406
current_hunks[version_id][1] = 0
407
# import pdb;pdb.set_trace()
408
# assert current_hunks[version_id][1] >= 0
412
version = self._idx_to_name(i)
413
if current_hunks[version] != [0, 0, 0, []]:
414
diff_hunks[version].append(tuple(current_hunks[version]))
416
for version_id in version_ids:
417
result[version_id] = (
421
diff_hunks[version_id],
338
result[version_id] = parents
425
def get_parents(self, version_id):
426
"""See VersionedFile.get_parent."""
427
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
341
def get_parents_with_ghosts(self, version_id):
342
raise NotImplementedError(self.get_parents_with_ghosts)
344
def insert_record_stream(self, stream):
345
"""Insert a record stream into this versioned file.
347
:param stream: A stream of records to insert.
349
:seealso VersionedFile.get_record_stream:
352
for record in stream:
353
# Raise an error when a record is missing.
354
if record.storage_kind == 'absent':
355
raise RevisionNotPresent([record.key[0]], self)
356
# adapt to non-tuple interface
357
parents = [parent[0] for parent in record.parents]
358
if record.storage_kind == 'fulltext':
359
self.add_lines(record.key[0], parents,
360
split_lines(record.get_bytes_as('fulltext')))
362
adapter_key = record.storage_kind, 'fulltext'
364
adapter = adapters[adapter_key]
366
adapter_factory = adapter_registry.get(adapter_key)
367
adapter = adapter_factory(self)
368
adapters[adapter_key] = adapter
369
lines = split_lines(adapter.get_bytes(
370
record, record.get_bytes_as(record.storage_kind)))
372
self.add_lines(record.key[0], parents, lines)
373
except RevisionAlreadyPresent:
429
376
def _check_repeated_add(self, name, parents, text, sha1):
430
377
"""Check that a duplicated add is OK.
437
384
raise RevisionAlreadyPresent(name, self._weave_name)
440
@deprecated_method(zero_eight)
441
def add_identical(self, old_rev_id, new_rev_id, parents):
442
"""Please use Weave.clone_text now."""
443
return self.clone_text(new_rev_id, old_rev_id, parents)
445
def _add_lines(self, version_id, parents, lines, parent_texts):
387
def _add_lines(self, version_id, parents, lines, parent_texts,
388
left_matching_blocks, nostore_sha, random_id, check_content):
446
389
"""See VersionedFile.add_lines."""
447
return self._add(version_id, lines, map(self._lookup, parents))
449
@deprecated_method(zero_eight)
450
def add(self, name, parents, text, sha1=None):
451
"""See VersionedFile.add_lines for the non deprecated api."""
452
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
454
def _add(self, version_id, lines, parents, sha1=None):
390
idx = self._add(version_id, lines, map(self._lookup, parents),
391
nostore_sha=nostore_sha)
392
return sha_strings(lines), sum(map(len, lines)), idx
394
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
455
395
"""Add a single text on top of the weave.
457
397
Returns the index number of the newly added version.
977
844
# no lines outside of insertion blocks, that deletions are
978
845
# properly paired, etc.
980
def _join(self, other, pb, msg, version_ids, ignore_missing):
981
"""Worker routine for join()."""
982
if not other.versions():
983
return # nothing to update, easy
986
# versions is never none, InterWeave checks this.
989
# two loops so that we do not change ourselves before verifying it
991
# work through in index order to make sure we get all dependencies
994
# get the selected versions only that are in other.versions.
995
version_ids = set(other.versions()).intersection(set(version_ids))
996
# pull in the referenced graph.
997
version_ids = other.get_ancestry(version_ids)
998
pending_graph = [(version, other.get_parents(version)) for
999
version in version_ids]
1000
for name in topo_sort(pending_graph):
1001
other_idx = other._name_map[name]
1002
# returns True if we have it, False if we need it.
1003
if not self._check_version_consistent(other, other_idx, name):
1004
names_to_join.append((other_idx, name))
1013
for other_idx, name in names_to_join:
1014
# TODO: If all the parents of the other version are already
1015
# present then we can avoid some work by just taking the delta
1016
# and adjusting the offsets.
1017
new_parents = self._imported_parents(other, other_idx)
1018
sha1 = other._sha1s[other_idx]
1023
pb.update(msg, merged, len(names_to_join))
1025
lines = other.get_lines(other_idx)
1026
self._add(name, lines, new_parents, sha1)
1028
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1029
merged, processed, self._weave_name, time.time()-time0))
1031
847
def _imported_parents(self, other, other_idx):
1032
848
"""Return list of parents in self corresponding to indexes in other."""
1033
849
new_parents = []
1111
923
# new file, save it
1114
def _add_lines(self, version_id, parents, lines, parent_texts):
926
def _add_lines(self, version_id, parents, lines, parent_texts,
927
left_matching_blocks, nostore_sha, random_id, check_content):
1115
928
"""Add a version and save the weave."""
929
self.check_not_reserved_id(version_id)
1116
930
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
931
parent_texts, left_matching_blocks, nostore_sha, random_id,
1121
def _clone_text(self, new_version_id, old_version_id, parents):
1122
"""See VersionedFile.clone_text."""
1123
super(WeaveFile, self)._clone_text(new_version_id, old_version_id, parents)
1126
936
def copy_to(self, name, transport):
1127
937
"""See VersionedFile.copy_to()."""
1128
938
# as we are all in memory always, just serialise to the new place.
1129
939
sio = StringIO()
1130
940
write_weave_v5(self, sio)
1132
transport.put(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1134
def create_empty(self, name, transport, filemode=None):
1135
return WeaveFile(name, transport, filemode, create=True)
942
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1137
944
def _save(self):
1138
945
"""Save the weave."""
1420
1231
raise ValueError('unknown command %r' % cmd)
1424
def profile_main(argv):
1425
import tempfile, hotshot, hotshot.stats
1427
prof_f = tempfile.NamedTemporaryFile()
1429
prof = hotshot.Profile(prof_f.name)
1431
ret = prof.runcall(main, argv)
1434
stats = hotshot.stats.load(prof_f.name)
1436
stats.sort_stats('cumulative')
1437
## XXX: Might like to write to stderr or the trace file instead but
1438
## print_stats seems hardcoded to stdout
1439
stats.print_stats(20)
1444
def lsprofile_main(argv):
1445
from bzrlib.lsprof import profile
1446
ret,stats = profile(main, argv)
1452
1234
if __name__ == '__main__':
1454
if '--profile' in sys.argv:
1456
args.remove('--profile')
1457
sys.exit(profile_main(args))
1458
elif '--lsprof' in sys.argv:
1460
args.remove('--lsprof')
1461
sys.exit(lsprofile_main(args))
1463
sys.exit(main(sys.argv))
1466
class InterWeave(InterVersionedFile):
1467
"""Optimised code paths for weave to weave operations."""
1469
_matching_file_from_factory = staticmethod(WeaveFile)
1470
_matching_file_to_factory = staticmethod(WeaveFile)
1473
def is_compatible(source, target):
1474
"""Be compatible with weaves."""
1476
return (isinstance(source, Weave) and
1477
isinstance(target, Weave))
1478
except AttributeError:
1481
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1482
"""See InterVersionedFile.join."""
1483
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1484
if self.target.versions() == [] and version_ids is None:
1485
self.target._copy_weave_content(self.source)
1488
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1489
except errors.WeaveParentMismatch:
1490
self.target._reweave(self.source, pb, msg)
1493
InterVersionedFile.register_optimiser(InterWeave)
1236
sys.exit(main(sys.argv))