77
from bzrlib.trace import mutter
81
78
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
82
79
RevisionAlreadyPresent,
83
80
RevisionNotPresent,
84
UnavailableRepresentation,
85
81
WeaveRevisionAlreadyPresent,
86
82
WeaveRevisionNotPresent,
88
84
import bzrlib.errors as errors
89
from bzrlib.osutils import dirname, sha_strings, split_lines
85
from bzrlib.osutils import sha_strings
90
86
import bzrlib.patiencediff
91
from bzrlib.revision import NULL_REVISION
92
87
from bzrlib.symbol_versioning import *
93
from bzrlib.trace import mutter
94
88
from bzrlib.tsort import topo_sort
95
from bzrlib.versionedfile import (
89
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
101
90
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
104
class WeaveContentFactory(ContentFactory):
105
"""Content factory for streaming from weaves.
107
:seealso ContentFactory:
110
def __init__(self, version, weave):
111
"""Create a WeaveContentFactory for version from weave."""
112
ContentFactory.__init__(self)
113
self.sha1 = weave.get_sha1s([version])[version]
114
self.key = (version,)
115
parents = weave.get_parent_map([version])[version]
116
self.parents = tuple((parent,) for parent in parents)
117
self.storage_kind = 'fulltext'
120
def get_bytes_as(self, storage_kind):
121
if storage_kind == 'fulltext':
122
return self._weave.get_text(self.key[-1])
124
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
127
93
class Weave(VersionedFile):
128
94
"""weave - versioned text file storage.
275
221
def __ne__(self, other):
276
222
return not self.__eq__(other)
224
@deprecated_method(zero_eight)
225
def idx_to_name(self, index):
226
"""Old public interface, the public interface is all names now."""
278
229
def _idx_to_name(self, version):
279
230
return self._names[version]
232
@deprecated_method(zero_eight)
233
def lookup(self, name):
234
"""Backwards compatibility thunk:
236
Return name, as name is valid in the api now, and spew deprecation
281
241
def _lookup(self, name):
282
242
"""Convert symbolic version name to index."""
283
if not self._allow_reserved:
284
self.check_not_reserved_id(name)
286
244
return self._name_map[name]
288
246
raise RevisionNotPresent(name, self._weave_name)
248
@deprecated_method(zero_eight)
249
def iter_names(self):
250
"""Deprecated convenience function, please see VersionedFile.names()."""
251
return iter(self.names())
253
@deprecated_method(zero_eight)
255
"""See Weave.versions for the current api."""
256
return self.versions()
290
258
def versions(self):
291
259
"""See VersionedFile.versions."""
292
260
return self._names[:]
294
262
def has_version(self, version_id):
295
263
"""See VersionedFile.has_version."""
296
return (version_id in self._name_map)
264
return self._name_map.has_key(version_id)
298
266
__contains__ = has_version
300
def get_record_stream(self, versions, ordering, include_delta_closure):
301
"""Get a stream of records for versions.
303
:param versions: The versions to include. Each version is a tuple
305
:param ordering: Either 'unordered' or 'topological'. A topologically
306
sorted stream has compression parents strictly before their
308
:param include_delta_closure: If True then the closure across any
309
compression parents will be included (in the opaque data).
310
:return: An iterator of ContentFactory objects, each of which is only
311
valid until the iterator is advanced.
313
versions = [version[-1] for version in versions]
314
if ordering == 'topological':
315
parents = self.get_parent_map(versions)
316
new_versions = topo_sort(parents)
317
new_versions.extend(set(versions).difference(set(parents)))
318
versions = new_versions
319
for version in versions:
321
yield WeaveContentFactory(version, self)
323
yield AbsentContentFactory((version,))
325
def get_parent_map(self, version_ids):
326
"""See VersionedFile.get_parent_map."""
268
def get_delta(self, version_id):
269
"""See VersionedFile.get_delta."""
270
return self.get_deltas([version_id])[version_id]
272
def get_deltas(self, version_ids):
273
"""See VersionedFile.get_deltas."""
274
version_ids = self.get_ancestry(version_ids)
328
275
for version_id in version_ids:
329
if version_id == NULL_REVISION:
334
map(self._idx_to_name,
335
self._parents[self._lookup(version_id)]))
336
except RevisionNotPresent:
276
if not self.has_version(version_id):
277
raise RevisionNotPresent(version_id, self)
278
# try extracting all versions; parallel extraction is used
279
nv = self.num_versions()
285
last_parent_lines = {}
287
parent_inclusions = {}
292
# its simplest to generate a full set of prepared variables.
294
name = self._names[i]
295
sha1s[name] = self.get_sha1(name)
296
parents_list = self.get_parents(name)
298
parent = parents_list[0]
299
parents[name] = parent
300
parent_inclusions[name] = inclusions[parent]
303
parent_inclusions[name] = set()
304
# we want to emit start, finish, replacement_length, replacement_lines tuples.
305
diff_hunks[name] = []
306
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
307
parent_linenums[name] = 0
309
parent_noeols[name] = False
310
last_parent_lines[name] = None
311
new_inc = set([name])
312
for p in self._parents[i]:
313
new_inc.update(inclusions[self._idx_to_name(p)])
314
# debug only, known good so far.
315
#assert set(new_inc) == set(self.get_ancestry(name)), \
316
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
317
inclusions[name] = new_inc
319
nlines = len(self._weave)
321
for lineno, inserted, deletes, line in self._walk_internal():
322
# a line is active in a version if:
323
# insert is in the versions inclusions
325
# deleteset & the versions inclusions is an empty set.
326
# so - if we have a included by mapping - version is included by
327
# children, we get a list of children to examine for deletes affect
328
# ing them, which is less than the entire set of children.
329
for version_id in version_ids:
330
# The active inclusion must be an ancestor,
331
# and no ancestors must have deleted this line,
332
# because we don't support resurrection.
333
parent_inclusion = parent_inclusions[version_id]
334
inclusion = inclusions[version_id]
335
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
336
version_active = inserted in inclusion and not (deletes & inclusion)
337
if not parent_active and not version_active:
338
# unrelated line of ancestry
338
result[version_id] = parents
340
elif parent_active and version_active:
342
parent_linenum = parent_linenums[version_id]
343
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
344
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
346
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
347
parent_linenums[version_id] = parent_linenum
350
noeols[version_id] = True
353
elif parent_active and not version_active:
355
current_hunks[version_id][1] += 1
356
parent_linenums[version_id] += 1
357
last_parent_lines[version_id] = line
358
elif not parent_active and version_active:
360
# noeol only occurs at the end of a file because we
361
# diff linewise. We want to show noeol changes as a
362
# empty diff unless the actual eol-less content changed.
365
if last_parent_lines[version_id][-1] != '\n':
366
parent_noeols[version_id] = True
367
except (TypeError, IndexError):
370
if theline[-1] != '\n':
371
noeols[version_id] = True
375
parent_should_go = False
377
if parent_noeols[version_id] == noeols[version_id]:
378
# no noeol toggle, so trust the weaves statement
379
# that this line is changed.
381
if parent_noeols[version_id]:
382
theline = theline + '\n'
383
elif parent_noeols[version_id]:
384
# parent has no eol, we do:
385
# our line is new, report as such..
387
elif noeols[version_id]:
388
# append a eol so that it looks like
390
theline = theline + '\n'
391
if parents[version_id] is not None:
392
#if last_parent_lines[version_id] is not None:
393
parent_should_go = True
394
if last_parent_lines[version_id] != theline:
397
#parent_should_go = False
399
current_hunks[version_id][2] += 1
400
current_hunks[version_id][3].append((inserted, theline))
402
# last hunk last parent line is not eaten
403
current_hunks[version_id][1] -= 1
404
if current_hunks[version_id][1] < 0:
405
current_hunks[version_id][1] = 0
406
# import pdb;pdb.set_trace()
407
# assert current_hunks[version_id][1] >= 0
411
version = self._idx_to_name(i)
412
if current_hunks[version] != [0, 0, 0, []]:
413
diff_hunks[version].append(tuple(current_hunks[version]))
415
for version_id in version_ids:
416
result[version_id] = (
420
diff_hunks[version_id],
341
def get_parents_with_ghosts(self, version_id):
342
raise NotImplementedError(self.get_parents_with_ghosts)
344
def insert_record_stream(self, stream):
345
"""Insert a record stream into this versioned file.
347
:param stream: A stream of records to insert.
349
:seealso VersionedFile.get_record_stream:
352
for record in stream:
353
# Raise an error when a record is missing.
354
if record.storage_kind == 'absent':
355
raise RevisionNotPresent([record.key[0]], self)
356
# adapt to non-tuple interface
357
parents = [parent[0] for parent in record.parents]
358
if record.storage_kind == 'fulltext':
359
self.add_lines(record.key[0], parents,
360
split_lines(record.get_bytes_as('fulltext')))
362
adapter_key = record.storage_kind, 'fulltext'
364
adapter = adapters[adapter_key]
366
adapter_factory = adapter_registry.get(adapter_key)
367
adapter = adapter_factory(self)
368
adapters[adapter_key] = adapter
369
lines = split_lines(adapter.get_bytes(
370
record, record.get_bytes_as(record.storage_kind)))
372
self.add_lines(record.key[0], parents, lines)
373
except RevisionAlreadyPresent:
424
def get_parents(self, version_id):
425
"""See VersionedFile.get_parent."""
426
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
376
428
def _check_repeated_add(self, name, parents, text, sha1):
377
429
"""Check that a duplicated add is OK.
384
436
raise RevisionAlreadyPresent(name, self._weave_name)
387
def _add_lines(self, version_id, parents, lines, parent_texts,
388
left_matching_blocks, nostore_sha, random_id, check_content):
439
@deprecated_method(zero_eight)
440
def add_identical(self, old_rev_id, new_rev_id, parents):
441
"""Please use Weave.clone_text now."""
442
return self.clone_text(new_rev_id, old_rev_id, parents)
444
def _add_lines(self, version_id, parents, lines, parent_texts):
389
445
"""See VersionedFile.add_lines."""
390
idx = self._add(version_id, lines, map(self._lookup, parents),
391
nostore_sha=nostore_sha)
392
return sha_strings(lines), sum(map(len, lines)), idx
394
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
446
return self._add(version_id, lines, map(self._lookup, parents))
448
@deprecated_method(zero_eight)
449
def add(self, name, parents, text, sha1=None):
450
"""See VersionedFile.add_lines for the non deprecated api."""
451
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
453
def _add(self, version_id, lines, parents, sha1=None):
395
454
"""Add a single text on top of the weave.
397
456
Returns the index number of the newly added version.
844
976
# no lines outside of insertion blocks, that deletions are
845
977
# properly paired, etc.
979
def _join(self, other, pb, msg, version_ids, ignore_missing):
980
"""Worker routine for join()."""
981
if not other.versions():
982
return # nothing to update, easy
985
# versions is never none, InterWeave checks this.
988
# two loops so that we do not change ourselves before verifying it
990
# work through in index order to make sure we get all dependencies
993
# get the selected versions only that are in other.versions.
994
version_ids = set(other.versions()).intersection(set(version_ids))
995
# pull in the referenced graph.
996
version_ids = other.get_ancestry(version_ids)
997
pending_graph = [(version, other.get_parents(version)) for
998
version in version_ids]
999
for name in topo_sort(pending_graph):
1000
other_idx = other._name_map[name]
1001
# returns True if we have it, False if we need it.
1002
if not self._check_version_consistent(other, other_idx, name):
1003
names_to_join.append((other_idx, name))
1012
for other_idx, name in names_to_join:
1013
# TODO: If all the parents of the other version are already
1014
# present then we can avoid some work by just taking the delta
1015
# and adjusting the offsets.
1016
new_parents = self._imported_parents(other, other_idx)
1017
sha1 = other._sha1s[other_idx]
1022
pb.update(msg, merged, len(names_to_join))
1024
lines = other.get_lines(other_idx)
1025
self._add(name, lines, new_parents, sha1)
1027
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1028
merged, processed, self._weave_name, time.time()-time0))
847
1030
def _imported_parents(self, other, other_idx):
848
1031
"""Return list of parents in self corresponding to indexes in other."""
849
1032
new_parents = []
923
1110
# new file, save it
926
def _add_lines(self, version_id, parents, lines, parent_texts,
927
left_matching_blocks, nostore_sha, random_id, check_content):
1113
def _add_lines(self, version_id, parents, lines, parent_texts):
928
1114
"""Add a version and save the weave."""
929
self.check_not_reserved_id(version_id)
930
1115
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
931
parent_texts, left_matching_blocks, nostore_sha, random_id,
1120
def _clone_text(self, new_version_id, old_version_id, parents):
1121
"""See VersionedFile.clone_text."""
1122
super(WeaveFile, self)._clone_text(new_version_id, old_version_id, parents)
936
1125
def copy_to(self, name, transport):
937
1126
"""See VersionedFile.copy_to()."""
938
1127
# as we are all in memory always, just serialise to the new place.
939
1128
sio = StringIO()
940
1129
write_weave_v5(self, sio)
942
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1131
transport.put(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1133
def create_empty(self, name, transport, filemode=None):
1134
return WeaveFile(name, transport, filemode, create=True)
944
1136
def _save(self):
945
1137
"""Save the weave."""
1231
1419
raise ValueError('unknown command %r' % cmd)
1423
def profile_main(argv):
1424
import tempfile, hotshot, hotshot.stats
1426
prof_f = tempfile.NamedTemporaryFile()
1428
prof = hotshot.Profile(prof_f.name)
1430
ret = prof.runcall(main, argv)
1433
stats = hotshot.stats.load(prof_f.name)
1435
stats.sort_stats('cumulative')
1436
## XXX: Might like to write to stderr or the trace file instead but
1437
## print_stats seems hardcoded to stdout
1438
stats.print_stats(20)
1443
def lsprofile_main(argv):
1444
from bzrlib.lsprof import profile
1445
ret,stats = profile(main, argv)
1234
1451
if __name__ == '__main__':
1236
sys.exit(main(sys.argv))
1453
if '--profile' in sys.argv:
1455
args.remove('--profile')
1456
sys.exit(profile_main(args))
1457
elif '--lsprof' in sys.argv:
1459
args.remove('--lsprof')
1460
sys.exit(lsprofile_main(args))
1462
sys.exit(main(sys.argv))
1465
class InterWeave(InterVersionedFile):
1466
"""Optimised code paths for weave to weave operations."""
1468
_matching_file_from_factory = staticmethod(WeaveFile)
1469
_matching_file_to_factory = staticmethod(WeaveFile)
1472
def is_compatible(source, target):
1473
"""Be compatible with weaves."""
1475
return (isinstance(source, Weave) and
1476
isinstance(target, Weave))
1477
except AttributeError:
1480
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1481
"""See InterVersionedFile.join."""
1482
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1483
if self.target.versions() == [] and version_ids is None:
1484
self.target._copy_weave_content(self.source)
1487
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1488
except errors.WeaveParentMismatch:
1489
self.target._reweave(self.source, pb, msg)
1492
InterVersionedFile.register_optimiser(InterWeave)