71
71
from copy import copy
72
72
from cStringIO import StringIO
77
from bzrlib.trace import mutter
77
from bzrlib.lazy_import import lazy_import
78
lazy_import(globals(), """
79
from bzrlib import tsort
78
86
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
79
87
RevisionAlreadyPresent,
80
88
RevisionNotPresent,
89
UnavailableRepresentation,
81
90
WeaveRevisionAlreadyPresent,
82
91
WeaveRevisionNotPresent,
84
import bzrlib.errors as errors
85
from bzrlib.osutils import sha_strings
93
from bzrlib.osutils import dirname, sha, sha_strings, split_lines
86
94
import bzrlib.patiencediff
95
from bzrlib.revision import NULL_REVISION
87
96
from bzrlib.symbol_versioning import *
88
from bzrlib.tsort import topo_sort
89
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
97
from bzrlib.trace import mutter
98
from bzrlib.versionedfile import (
90
104
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
107
class WeaveContentFactory(ContentFactory):
108
"""Content factory for streaming from weaves.
110
:seealso ContentFactory:
113
def __init__(self, version, weave):
114
"""Create a WeaveContentFactory for version from weave."""
115
ContentFactory.__init__(self)
116
self.sha1 = weave.get_sha1s([version])[version]
117
self.key = (version,)
118
parents = weave.get_parent_map([version])[version]
119
self.parents = tuple((parent,) for parent in parents)
120
self.storage_kind = 'fulltext'
123
def get_bytes_as(self, storage_kind):
124
if storage_kind == 'fulltext':
125
return self._weave.get_text(self.key[-1])
126
elif storage_kind == 'chunked':
127
return self._weave.get_lines(self.key[-1])
129
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
93
132
class Weave(VersionedFile):
94
133
"""weave - versioned text file storage.
221
280
def __ne__(self, other):
222
281
return not self.__eq__(other)
224
@deprecated_method(zero_eight)
225
def idx_to_name(self, index):
226
"""Old public interface, the public interface is all names now."""
229
283
def _idx_to_name(self, version):
230
284
return self._names[version]
232
@deprecated_method(zero_eight)
233
def lookup(self, name):
234
"""Backwards compatibility thunk:
236
Return name, as name is valid in the api now, and spew deprecation
241
286
def _lookup(self, name):
242
287
"""Convert symbolic version name to index."""
288
if not self._allow_reserved:
289
self.check_not_reserved_id(name)
244
291
return self._name_map[name]
246
293
raise RevisionNotPresent(name, self._weave_name)
248
@deprecated_method(zero_eight)
249
def iter_names(self):
250
"""Deprecated convenience function, please see VersionedFile.names()."""
251
return iter(self.names())
253
@deprecated_method(zero_eight)
255
"""See Weave.versions for the current api."""
256
return self.versions()
258
295
def versions(self):
259
296
"""See VersionedFile.versions."""
260
297
return self._names[:]
262
299
def has_version(self, version_id):
263
300
"""See VersionedFile.has_version."""
264
return self._name_map.has_key(version_id)
301
return (version_id in self._name_map)
266
303
__contains__ = has_version
268
def get_delta(self, version_id):
269
"""See VersionedFile.get_delta."""
270
return self.get_deltas([version_id])[version_id]
272
def get_deltas(self, version_ids):
273
"""See VersionedFile.get_deltas."""
274
version_ids = self.get_ancestry(version_ids)
305
def get_record_stream(self, versions, ordering, include_delta_closure):
306
"""Get a stream of records for versions.
308
:param versions: The versions to include. Each version is a tuple
310
:param ordering: Either 'unordered' or 'topological'. A topologically
311
sorted stream has compression parents strictly before their
313
:param include_delta_closure: If True then the closure across any
314
compression parents will be included (in the opaque data).
315
:return: An iterator of ContentFactory objects, each of which is only
316
valid until the iterator is advanced.
318
versions = [version[-1] for version in versions]
319
if ordering == 'topological':
320
parents = self.get_parent_map(versions)
321
new_versions = tsort.topo_sort(parents)
322
new_versions.extend(set(versions).difference(set(parents)))
323
versions = new_versions
324
for version in versions:
326
yield WeaveContentFactory(version, self)
328
yield AbsentContentFactory((version,))
330
def get_parent_map(self, version_ids):
331
"""See VersionedFile.get_parent_map."""
275
333
for version_id in version_ids:
276
if not self.has_version(version_id):
277
raise RevisionNotPresent(version_id, self)
278
# try extracting all versions; parallel extraction is used
279
nv = self.num_versions()
285
last_parent_lines = {}
287
parent_inclusions = {}
292
# its simplest to generate a full set of prepared variables.
294
name = self._names[i]
295
sha1s[name] = self.get_sha1(name)
296
parents_list = self.get_parents(name)
298
parent = parents_list[0]
299
parents[name] = parent
300
parent_inclusions[name] = inclusions[parent]
303
parent_inclusions[name] = set()
304
# we want to emit start, finish, replacement_length, replacement_lines tuples.
305
diff_hunks[name] = []
306
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
307
parent_linenums[name] = 0
309
parent_noeols[name] = False
310
last_parent_lines[name] = None
311
new_inc = set([name])
312
for p in self._parents[i]:
313
new_inc.update(inclusions[self._idx_to_name(p)])
314
# debug only, known good so far.
315
#assert set(new_inc) == set(self.get_ancestry(name)), \
316
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
317
inclusions[name] = new_inc
319
nlines = len(self._weave)
321
for lineno, inserted, deletes, line in self._walk_internal():
322
# a line is active in a version if:
323
# insert is in the versions inclusions
325
# deleteset & the versions inclusions is an empty set.
326
# so - if we have a included by mapping - version is included by
327
# children, we get a list of children to examine for deletes affect
328
# ing them, which is less than the entire set of children.
329
for version_id in version_ids:
330
# The active inclusion must be an ancestor,
331
# and no ancestors must have deleted this line,
332
# because we don't support resurrection.
333
parent_inclusion = parent_inclusions[version_id]
334
inclusion = inclusions[version_id]
335
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
336
version_active = inserted in inclusion and not (deletes & inclusion)
337
if not parent_active and not version_active:
338
# unrelated line of ancestry
334
if version_id == NULL_REVISION:
339
map(self._idx_to_name,
340
self._parents[self._lookup(version_id)]))
341
except RevisionNotPresent:
340
elif parent_active and version_active:
342
parent_linenum = parent_linenums[version_id]
343
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
344
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
346
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
347
parent_linenums[version_id] = parent_linenum
350
noeols[version_id] = True
353
elif parent_active and not version_active:
355
current_hunks[version_id][1] += 1
356
parent_linenums[version_id] += 1
357
last_parent_lines[version_id] = line
358
elif not parent_active and version_active:
360
# noeol only occurs at the end of a file because we
361
# diff linewise. We want to show noeol changes as a
362
# empty diff unless the actual eol-less content changed.
365
if last_parent_lines[version_id][-1] != '\n':
366
parent_noeols[version_id] = True
367
except (TypeError, IndexError):
370
if theline[-1] != '\n':
371
noeols[version_id] = True
375
parent_should_go = False
377
if parent_noeols[version_id] == noeols[version_id]:
378
# no noeol toggle, so trust the weaves statement
379
# that this line is changed.
381
if parent_noeols[version_id]:
382
theline = theline + '\n'
383
elif parent_noeols[version_id]:
384
# parent has no eol, we do:
385
# our line is new, report as such..
387
elif noeols[version_id]:
388
# append a eol so that it looks like
390
theline = theline + '\n'
391
if parents[version_id] is not None:
392
#if last_parent_lines[version_id] is not None:
393
parent_should_go = True
394
if last_parent_lines[version_id] != theline:
397
#parent_should_go = False
399
current_hunks[version_id][2] += 1
400
current_hunks[version_id][3].append((inserted, theline))
402
# last hunk last parent line is not eaten
403
current_hunks[version_id][1] -= 1
404
if current_hunks[version_id][1] < 0:
405
current_hunks[version_id][1] = 0
406
# import pdb;pdb.set_trace()
407
# assert current_hunks[version_id][1] >= 0
411
version = self._idx_to_name(i)
412
if current_hunks[version] != [0, 0, 0, []]:
413
diff_hunks[version].append(tuple(current_hunks[version]))
415
for version_id in version_ids:
416
result[version_id] = (
420
diff_hunks[version_id],
343
result[version_id] = parents
424
def get_parents(self, version_id):
425
"""See VersionedFile.get_parent."""
426
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
346
def get_parents_with_ghosts(self, version_id):
347
raise NotImplementedError(self.get_parents_with_ghosts)
349
def insert_record_stream(self, stream):
350
"""Insert a record stream into this versioned file.
352
:param stream: A stream of records to insert.
354
:seealso VersionedFile.get_record_stream:
357
for record in stream:
358
# Raise an error when a record is missing.
359
if record.storage_kind == 'absent':
360
raise RevisionNotPresent([record.key[0]], self)
361
# adapt to non-tuple interface
362
parents = [parent[0] for parent in record.parents]
363
if (record.storage_kind == 'fulltext'
364
or record.storage_kind == 'chunked'):
365
self.add_lines(record.key[0], parents,
366
osutils.chunks_to_lines(record.get_bytes_as('chunked')))
368
adapter_key = record.storage_kind, 'fulltext'
370
adapter = adapters[adapter_key]
372
adapter_factory = adapter_registry.get(adapter_key)
373
adapter = adapter_factory(self)
374
adapters[adapter_key] = adapter
375
lines = split_lines(adapter.get_bytes(
376
record, record.get_bytes_as(record.storage_kind)))
378
self.add_lines(record.key[0], parents, lines)
379
except RevisionAlreadyPresent:
428
382
def _check_repeated_add(self, name, parents, text, sha1):
429
383
"""Check that a duplicated add is OK.
436
390
raise RevisionAlreadyPresent(name, self._weave_name)
439
@deprecated_method(zero_eight)
440
def add_identical(self, old_rev_id, new_rev_id, parents):
441
"""Please use Weave.clone_text now."""
442
return self.clone_text(new_rev_id, old_rev_id, parents)
444
def _add_lines(self, version_id, parents, lines, parent_texts):
393
def _add_lines(self, version_id, parents, lines, parent_texts,
394
left_matching_blocks, nostore_sha, random_id, check_content):
445
395
"""See VersionedFile.add_lines."""
446
return self._add(version_id, lines, map(self._lookup, parents))
448
@deprecated_method(zero_eight)
449
def add(self, name, parents, text, sha1=None):
450
"""See VersionedFile.add_lines for the non deprecated api."""
451
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
453
def _add(self, version_id, lines, parents, sha1=None):
396
idx = self._add(version_id, lines, map(self._lookup, parents),
397
nostore_sha=nostore_sha)
398
return sha_strings(lines), sum(map(len, lines)), idx
400
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
454
401
"""Add a single text on top of the weave.
456
403
Returns the index number of the newly added version.
976
850
# no lines outside of insertion blocks, that deletions are
977
851
# properly paired, etc.
979
def _join(self, other, pb, msg, version_ids, ignore_missing):
980
"""Worker routine for join()."""
981
if not other.versions():
982
return # nothing to update, easy
985
# versions is never none, InterWeave checks this.
988
# two loops so that we do not change ourselves before verifying it
990
# work through in index order to make sure we get all dependencies
993
# get the selected versions only that are in other.versions.
994
version_ids = set(other.versions()).intersection(set(version_ids))
995
# pull in the referenced graph.
996
version_ids = other.get_ancestry(version_ids)
997
pending_graph = [(version, other.get_parents(version)) for
998
version in version_ids]
999
for name in topo_sort(pending_graph):
1000
other_idx = other._name_map[name]
1001
# returns True if we have it, False if we need it.
1002
if not self._check_version_consistent(other, other_idx, name):
1003
names_to_join.append((other_idx, name))
1012
for other_idx, name in names_to_join:
1013
# TODO: If all the parents of the other version are already
1014
# present then we can avoid some work by just taking the delta
1015
# and adjusting the offsets.
1016
new_parents = self._imported_parents(other, other_idx)
1017
sha1 = other._sha1s[other_idx]
1022
pb.update(msg, merged, len(names_to_join))
1024
lines = other.get_lines(other_idx)
1025
self._add(name, lines, new_parents, sha1)
1027
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1028
merged, processed, self._weave_name, time.time()-time0))
1030
853
def _imported_parents(self, other, other_idx):
1031
854
"""Return list of parents in self corresponding to indexes in other."""
1032
855
new_parents = []
1110
929
# new file, save it
1113
def _add_lines(self, version_id, parents, lines, parent_texts):
932
def _add_lines(self, version_id, parents, lines, parent_texts,
933
left_matching_blocks, nostore_sha, random_id, check_content):
1114
934
"""Add a version and save the weave."""
935
self.check_not_reserved_id(version_id)
1115
936
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
937
parent_texts, left_matching_blocks, nostore_sha, random_id,
1120
def _clone_text(self, new_version_id, old_version_id, parents):
1121
"""See VersionedFile.clone_text."""
1122
super(WeaveFile, self)._clone_text(new_version_id, old_version_id, parents)
1125
942
def copy_to(self, name, transport):
1126
943
"""See VersionedFile.copy_to()."""
1127
944
# as we are all in memory always, just serialise to the new place.
1128
945
sio = StringIO()
1129
946
write_weave_v5(self, sio)
1131
transport.put(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1133
def create_empty(self, name, transport, filemode=None):
1134
return WeaveFile(name, transport, filemode, create=True)
948
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1136
950
def _save(self):
1137
951
"""Save the weave."""
1419
1237
raise ValueError('unknown command %r' % cmd)
1423
def profile_main(argv):
1424
import tempfile, hotshot, hotshot.stats
1426
prof_f = tempfile.NamedTemporaryFile()
1428
prof = hotshot.Profile(prof_f.name)
1430
ret = prof.runcall(main, argv)
1433
stats = hotshot.stats.load(prof_f.name)
1435
stats.sort_stats('cumulative')
1436
## XXX: Might like to write to stderr or the trace file instead but
1437
## print_stats seems hardcoded to stdout
1438
stats.print_stats(20)
1443
def lsprofile_main(argv):
1444
from bzrlib.lsprof import profile
1445
ret,stats = profile(main, argv)
1451
1240
if __name__ == '__main__':
1453
if '--profile' in sys.argv:
1455
args.remove('--profile')
1456
sys.exit(profile_main(args))
1457
elif '--lsprof' in sys.argv:
1459
args.remove('--lsprof')
1460
sys.exit(lsprofile_main(args))
1462
sys.exit(main(sys.argv))
1465
class InterWeave(InterVersionedFile):
1466
"""Optimised code paths for weave to weave operations."""
1468
_matching_file_from_factory = staticmethod(WeaveFile)
1469
_matching_file_to_factory = staticmethod(WeaveFile)
1472
def is_compatible(source, target):
1473
"""Be compatible with weaves."""
1475
return (isinstance(source, Weave) and
1476
isinstance(target, Weave))
1477
except AttributeError:
1480
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1481
"""See InterVersionedFile.join."""
1482
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1483
if self.target.versions() == [] and version_ids is None:
1484
self.target._copy_weave_content(self.source)
1487
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1488
except errors.WeaveParentMismatch:
1489
self.target._reweave(self.source, pb, msg)
1492
InterVersionedFile.register_optimiser(InterWeave)
1242
sys.exit(main(sys.argv))