77
from bzrlib.trace import mutter
78
81
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
79
82
RevisionAlreadyPresent,
80
83
RevisionNotPresent,
84
UnavailableRepresentation,
81
85
WeaveRevisionAlreadyPresent,
82
86
WeaveRevisionNotPresent,
84
88
import bzrlib.errors as errors
85
from bzrlib.osutils import sha_strings
89
from bzrlib.osutils import dirname, sha_strings, split_lines
86
90
import bzrlib.patiencediff
91
from bzrlib.revision import NULL_REVISION
87
92
from bzrlib.symbol_versioning import *
93
from bzrlib.trace import mutter
88
94
from bzrlib.tsort import topo_sort
89
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
95
from bzrlib.versionedfile import (
90
101
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
104
class WeaveContentFactory(ContentFactory):
105
"""Content factory for streaming from weaves.
107
:seealso ContentFactory:
110
def __init__(self, version, weave):
111
"""Create a WeaveContentFactory for version from weave."""
112
ContentFactory.__init__(self)
113
self.sha1 = weave.get_sha1s([version])[version]
114
self.key = (version,)
115
parents = weave.get_parent_map([version])[version]
116
self.parents = tuple((parent,) for parent in parents)
117
self.storage_kind = 'fulltext'
120
def get_bytes_as(self, storage_kind):
121
if storage_kind == 'fulltext':
122
return self._weave.get_text(self.key[-1])
124
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
93
127
class Weave(VersionedFile):
94
128
"""weave - versioned text file storage.
221
275
def __ne__(self, other):
222
276
return not self.__eq__(other)
224
@deprecated_method(zero_eight)
225
def idx_to_name(self, index):
226
"""Old public interface, the public interface is all names now."""
229
278
def _idx_to_name(self, version):
230
279
return self._names[version]
232
@deprecated_method(zero_eight)
233
def lookup(self, name):
234
"""Backwards compatibility thunk:
236
Return name, as name is valid in the api now, and spew deprecation
241
281
def _lookup(self, name):
242
282
"""Convert symbolic version name to index."""
283
if not self._allow_reserved:
284
self.check_not_reserved_id(name)
244
286
return self._name_map[name]
246
288
raise RevisionNotPresent(name, self._weave_name)
248
@deprecated_method(zero_eight)
249
def iter_names(self):
250
"""Deprecated convenience function, please see VersionedFile.names()."""
251
return iter(self.names())
253
@deprecated_method(zero_eight)
255
"""See Weave.versions for the current api."""
256
return self.versions()
258
290
def versions(self):
259
291
"""See VersionedFile.versions."""
260
292
return self._names[:]
262
294
def has_version(self, version_id):
263
295
"""See VersionedFile.has_version."""
264
return self._name_map.has_key(version_id)
296
return (version_id in self._name_map)
266
298
__contains__ = has_version
268
def get_delta(self, version_id):
269
"""See VersionedFile.get_delta."""
270
return self.get_deltas([version_id])[version_id]
272
def get_deltas(self, version_ids):
273
"""See VersionedFile.get_deltas."""
274
version_ids = self.get_ancestry(version_ids)
300
def get_record_stream(self, versions, ordering, include_delta_closure):
301
"""Get a stream of records for versions.
303
:param versions: The versions to include. Each version is a tuple
305
:param ordering: Either 'unordered' or 'topological'. A topologically
306
sorted stream has compression parents strictly before their
308
:param include_delta_closure: If True then the closure across any
309
compression parents will be included (in the opaque data).
310
:return: An iterator of ContentFactory objects, each of which is only
311
valid until the iterator is advanced.
313
versions = [version[-1] for version in versions]
314
if ordering == 'topological':
315
parents = self.get_parent_map(versions)
316
new_versions = topo_sort(parents)
317
new_versions.extend(set(versions).difference(set(parents)))
318
versions = new_versions
319
for version in versions:
321
yield WeaveContentFactory(version, self)
323
yield AbsentContentFactory((version,))
325
def get_parent_map(self, version_ids):
326
"""See VersionedFile.get_parent_map."""
275
328
for version_id in version_ids:
276
if not self.has_version(version_id):
277
raise RevisionNotPresent(version_id, self)
278
# try extracting all versions; parallel extraction is used
279
nv = self.num_versions()
285
last_parent_lines = {}
287
parent_inclusions = {}
292
# its simplest to generate a full set of prepared variables.
294
name = self._names[i]
295
sha1s[name] = self.get_sha1(name)
296
parents_list = self.get_parents(name)
298
parent = parents_list[0]
299
parents[name] = parent
300
parent_inclusions[name] = inclusions[parent]
303
parent_inclusions[name] = set()
304
# we want to emit start, finish, replacement_length, replacement_lines tuples.
305
diff_hunks[name] = []
306
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
307
parent_linenums[name] = 0
309
parent_noeols[name] = False
310
last_parent_lines[name] = None
311
new_inc = set([name])
312
for p in self._parents[i]:
313
new_inc.update(inclusions[self._idx_to_name(p)])
314
# debug only, known good so far.
315
#assert set(new_inc) == set(self.get_ancestry(name)), \
316
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
317
inclusions[name] = new_inc
319
nlines = len(self._weave)
321
for lineno, inserted, deletes, line in self._walk_internal():
322
# a line is active in a version if:
323
# insert is in the versions inclusions
325
# deleteset & the versions inclusions is an empty set.
326
# so - if we have a included by mapping - version is included by
327
# children, we get a list of children to examine for deletes affect
328
# ing them, which is less than the entire set of children.
329
for version_id in version_ids:
330
# The active inclusion must be an ancestor,
331
# and no ancestors must have deleted this line,
332
# because we don't support resurrection.
333
parent_inclusion = parent_inclusions[version_id]
334
inclusion = inclusions[version_id]
335
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
336
version_active = inserted in inclusion and not (deletes & inclusion)
337
if not parent_active and not version_active:
338
# unrelated line of ancestry
329
if version_id == NULL_REVISION:
334
map(self._idx_to_name,
335
self._parents[self._lookup(version_id)]))
336
except RevisionNotPresent:
340
elif parent_active and version_active:
342
parent_linenum = parent_linenums[version_id]
343
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
344
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
346
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
347
parent_linenums[version_id] = parent_linenum
350
noeols[version_id] = True
353
elif parent_active and not version_active:
355
current_hunks[version_id][1] += 1
356
parent_linenums[version_id] += 1
357
last_parent_lines[version_id] = line
358
elif not parent_active and version_active:
360
# noeol only occurs at the end of a file because we
361
# diff linewise. We want to show noeol changes as a
362
# empty diff unless the actual eol-less content changed.
365
if last_parent_lines[version_id][-1] != '\n':
366
parent_noeols[version_id] = True
367
except (TypeError, IndexError):
370
if theline[-1] != '\n':
371
noeols[version_id] = True
375
parent_should_go = False
377
if parent_noeols[version_id] == noeols[version_id]:
378
# no noeol toggle, so trust the weaves statement
379
# that this line is changed.
381
if parent_noeols[version_id]:
382
theline = theline + '\n'
383
elif parent_noeols[version_id]:
384
# parent has no eol, we do:
385
# our line is new, report as such..
387
elif noeols[version_id]:
388
# append a eol so that it looks like
390
theline = theline + '\n'
391
if parents[version_id] is not None:
392
#if last_parent_lines[version_id] is not None:
393
parent_should_go = True
394
if last_parent_lines[version_id] != theline:
397
#parent_should_go = False
399
current_hunks[version_id][2] += 1
400
current_hunks[version_id][3].append((inserted, theline))
402
# last hunk last parent line is not eaten
403
current_hunks[version_id][1] -= 1
404
if current_hunks[version_id][1] < 0:
405
current_hunks[version_id][1] = 0
406
# import pdb;pdb.set_trace()
407
# assert current_hunks[version_id][1] >= 0
411
version = self._idx_to_name(i)
412
if current_hunks[version] != [0, 0, 0, []]:
413
diff_hunks[version].append(tuple(current_hunks[version]))
415
for version_id in version_ids:
416
result[version_id] = (
420
diff_hunks[version_id],
338
result[version_id] = parents
424
def get_parents(self, version_id):
425
"""See VersionedFile.get_parent."""
426
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
341
def get_parents_with_ghosts(self, version_id):
342
raise NotImplementedError(self.get_parents_with_ghosts)
344
def insert_record_stream(self, stream):
345
"""Insert a record stream into this versioned file.
347
:param stream: A stream of records to insert.
349
:seealso VersionedFile.get_record_stream:
352
for record in stream:
353
# Raise an error when a record is missing.
354
if record.storage_kind == 'absent':
355
raise RevisionNotPresent([record.key[0]], self)
356
# adapt to non-tuple interface
357
parents = [parent[0] for parent in record.parents]
358
if record.storage_kind == 'fulltext':
359
self.add_lines(record.key[0], parents,
360
split_lines(record.get_bytes_as('fulltext')))
362
adapter_key = record.storage_kind, 'fulltext'
364
adapter = adapters[adapter_key]
366
adapter_factory = adapter_registry.get(adapter_key)
367
adapter = adapter_factory(self)
368
adapters[adapter_key] = adapter
369
lines = split_lines(adapter.get_bytes(
370
record, record.get_bytes_as(record.storage_kind)))
372
self.add_lines(record.key[0], parents, lines)
373
except RevisionAlreadyPresent:
428
376
def _check_repeated_add(self, name, parents, text, sha1):
429
377
"""Check that a duplicated add is OK.
436
384
raise RevisionAlreadyPresent(name, self._weave_name)
439
@deprecated_method(zero_eight)
440
def add_identical(self, old_rev_id, new_rev_id, parents):
441
"""Please use Weave.clone_text now."""
442
return self.clone_text(new_rev_id, old_rev_id, parents)
444
def _add_lines(self, version_id, parents, lines, parent_texts):
387
def _add_lines(self, version_id, parents, lines, parent_texts,
388
left_matching_blocks, nostore_sha, random_id, check_content):
445
389
"""See VersionedFile.add_lines."""
446
return self._add(version_id, lines, map(self._lookup, parents))
448
@deprecated_method(zero_eight)
449
def add(self, name, parents, text, sha1=None):
450
"""See VersionedFile.add_lines for the non deprecated api."""
451
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
453
def _add(self, version_id, lines, parents, sha1=None):
390
idx = self._add(version_id, lines, map(self._lookup, parents),
391
nostore_sha=nostore_sha)
392
return sha_strings(lines), sum(map(len, lines)), idx
394
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
454
395
"""Add a single text on top of the weave.
456
397
Returns the index number of the newly added version.
976
844
# no lines outside of insertion blocks, that deletions are
977
845
# properly paired, etc.
979
def _join(self, other, pb, msg, version_ids, ignore_missing):
980
"""Worker routine for join()."""
981
if not other.versions():
982
return # nothing to update, easy
985
# versions is never none, InterWeave checks this.
988
# two loops so that we do not change ourselves before verifying it
990
# work through in index order to make sure we get all dependencies
993
# get the selected versions only that are in other.versions.
994
version_ids = set(other.versions()).intersection(set(version_ids))
995
# pull in the referenced graph.
996
version_ids = other.get_ancestry(version_ids)
997
pending_graph = [(version, other.get_parents(version)) for
998
version in version_ids]
999
for name in topo_sort(pending_graph):
1000
other_idx = other._name_map[name]
1001
# returns True if we have it, False if we need it.
1002
if not self._check_version_consistent(other, other_idx, name):
1003
names_to_join.append((other_idx, name))
1012
for other_idx, name in names_to_join:
1013
# TODO: If all the parents of the other version are already
1014
# present then we can avoid some work by just taking the delta
1015
# and adjusting the offsets.
1016
new_parents = self._imported_parents(other, other_idx)
1017
sha1 = other._sha1s[other_idx]
1022
pb.update(msg, merged, len(names_to_join))
1024
lines = other.get_lines(other_idx)
1025
self._add(name, lines, new_parents, sha1)
1027
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1028
merged, processed, self._weave_name, time.time()-time0))
1030
847
def _imported_parents(self, other, other_idx):
1031
848
"""Return list of parents in self corresponding to indexes in other."""
1032
849
new_parents = []
1110
923
# new file, save it
1113
def _add_lines(self, version_id, parents, lines, parent_texts):
926
def _add_lines(self, version_id, parents, lines, parent_texts,
927
left_matching_blocks, nostore_sha, random_id, check_content):
1114
928
"""Add a version and save the weave."""
929
self.check_not_reserved_id(version_id)
1115
930
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
931
parent_texts, left_matching_blocks, nostore_sha, random_id,
1120
def _clone_text(self, new_version_id, old_version_id, parents):
1121
"""See VersionedFile.clone_text."""
1122
super(WeaveFile, self)._clone_text(new_version_id, old_version_id, parents)
1125
936
def copy_to(self, name, transport):
1126
937
"""See VersionedFile.copy_to()."""
1127
938
# as we are all in memory always, just serialise to the new place.
1128
939
sio = StringIO()
1129
940
write_weave_v5(self, sio)
1131
transport.put(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1133
def create_empty(self, name, transport, filemode=None):
1134
return WeaveFile(name, transport, filemode, create=True)
942
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1136
944
def _save(self):
1137
945
"""Save the weave."""
1419
1231
raise ValueError('unknown command %r' % cmd)
1423
def profile_main(argv):
1424
import tempfile, hotshot, hotshot.stats
1426
prof_f = tempfile.NamedTemporaryFile()
1428
prof = hotshot.Profile(prof_f.name)
1430
ret = prof.runcall(main, argv)
1433
stats = hotshot.stats.load(prof_f.name)
1435
stats.sort_stats('cumulative')
1436
## XXX: Might like to write to stderr or the trace file instead but
1437
## print_stats seems hardcoded to stdout
1438
stats.print_stats(20)
1443
def lsprofile_main(argv):
1444
from bzrlib.lsprof import profile
1445
ret,stats = profile(main, argv)
1451
1234
if __name__ == '__main__':
1453
if '--profile' in sys.argv:
1455
args.remove('--profile')
1456
sys.exit(profile_main(args))
1457
elif '--lsprof' in sys.argv:
1459
args.remove('--lsprof')
1460
sys.exit(lsprofile_main(args))
1462
sys.exit(main(sys.argv))
1465
class InterWeave(InterVersionedFile):
1466
"""Optimised code paths for weave to weave operations."""
1468
_matching_file_from_factory = staticmethod(WeaveFile)
1469
_matching_file_to_factory = staticmethod(WeaveFile)
1472
def is_compatible(source, target):
1473
"""Be compatible with weaves."""
1475
return (isinstance(source, Weave) and
1476
isinstance(target, Weave))
1477
except AttributeError:
1480
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1481
"""See InterVersionedFile.join."""
1482
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1483
if self.target.versions() == [] and version_ids is None:
1484
self.target._copy_weave_content(self.source)
1487
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1488
except errors.WeaveParentMismatch:
1489
self.target._reweave(self.source, pb, msg)
1492
InterVersionedFile.register_optimiser(InterWeave)
1236
sys.exit(main(sys.argv))