71
71
from copy import copy
72
72
from cStringIO import StringIO
77
from bzrlib.lazy_import import lazy_import
78
lazy_import(globals(), """
79
from bzrlib import tsort
81
78
from bzrlib import (
81
from bzrlib.trace import mutter
86
82
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
87
83
RevisionAlreadyPresent,
88
84
RevisionNotPresent,
89
UnavailableRepresentation,
90
85
WeaveRevisionAlreadyPresent,
91
86
WeaveRevisionNotPresent,
93
from bzrlib.osutils import dirname, sha, sha_strings, split_lines
88
import bzrlib.errors as errors
89
from bzrlib.osutils import sha_strings
94
90
import bzrlib.patiencediff
95
from bzrlib.revision import NULL_REVISION
96
from bzrlib.symbol_versioning import *
97
from bzrlib.trace import mutter
98
from bzrlib.versionedfile import (
91
from bzrlib.symbol_versioning import (deprecated_method,
95
from bzrlib.tsort import topo_sort
96
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
104
97
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
107
class WeaveContentFactory(ContentFactory):
108
"""Content factory for streaming from weaves.
110
:seealso ContentFactory:
113
def __init__(self, version, weave):
114
"""Create a WeaveContentFactory for version from weave."""
115
ContentFactory.__init__(self)
116
self.sha1 = weave.get_sha1s([version])[version]
117
self.key = (version,)
118
parents = weave.get_parent_map([version])[version]
119
self.parents = tuple((parent,) for parent in parents)
120
self.storage_kind = 'fulltext'
123
def get_bytes_as(self, storage_kind):
124
if storage_kind == 'fulltext':
125
return self._weave.get_text(self.key[-1])
126
elif storage_kind == 'chunked':
127
return self._weave.get_lines(self.key[-1])
129
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
132
100
class Weave(VersionedFile):
133
101
"""weave - versioned text file storage.
303
273
__contains__ = has_version
305
def get_record_stream(self, versions, ordering, include_delta_closure):
306
"""Get a stream of records for versions.
308
:param versions: The versions to include. Each version is a tuple
310
:param ordering: Either 'unordered' or 'topological'. A topologically
311
sorted stream has compression parents strictly before their
313
:param include_delta_closure: If True then the closure across any
314
compression parents will be included (in the opaque data).
315
:return: An iterator of ContentFactory objects, each of which is only
316
valid until the iterator is advanced.
318
versions = [version[-1] for version in versions]
319
if ordering == 'topological':
320
parents = self.get_parent_map(versions)
321
new_versions = tsort.topo_sort(parents)
322
new_versions.extend(set(versions).difference(set(parents)))
323
versions = new_versions
324
for version in versions:
326
yield WeaveContentFactory(version, self)
328
yield AbsentContentFactory((version,))
330
def get_parent_map(self, version_ids):
331
"""See VersionedFile.get_parent_map."""
275
def get_delta(self, version_id):
276
"""See VersionedFile.get_delta."""
277
return self.get_deltas([version_id])[version_id]
279
def get_deltas(self, version_ids):
280
"""See VersionedFile.get_deltas."""
281
version_ids = self.get_ancestry(version_ids)
333
282
for version_id in version_ids:
334
if version_id == NULL_REVISION:
339
map(self._idx_to_name,
340
self._parents[self._lookup(version_id)]))
341
except RevisionNotPresent:
283
if not self.has_version(version_id):
284
raise RevisionNotPresent(version_id, self)
285
# try extracting all versions; parallel extraction is used
286
nv = self.num_versions()
292
last_parent_lines = {}
294
parent_inclusions = {}
299
# its simplest to generate a full set of prepared variables.
301
name = self._names[i]
302
sha1s[name] = self.get_sha1(name)
303
parents_list = self.get_parents(name)
305
parent = parents_list[0]
306
parents[name] = parent
307
parent_inclusions[name] = inclusions[parent]
310
parent_inclusions[name] = set()
311
# we want to emit start, finish, replacement_length, replacement_lines tuples.
312
diff_hunks[name] = []
313
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
314
parent_linenums[name] = 0
316
parent_noeols[name] = False
317
last_parent_lines[name] = None
318
new_inc = set([name])
319
for p in self._parents[i]:
320
new_inc.update(inclusions[self._idx_to_name(p)])
321
# debug only, known good so far.
322
#assert set(new_inc) == set(self.get_ancestry(name)), \
323
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
324
inclusions[name] = new_inc
326
nlines = len(self._weave)
328
for lineno, inserted, deletes, line in self._walk_internal():
329
# a line is active in a version if:
330
# insert is in the versions inclusions
332
# deleteset & the versions inclusions is an empty set.
333
# so - if we have a included by mapping - version is included by
334
# children, we get a list of children to examine for deletes affect
335
# ing them, which is less than the entire set of children.
336
for version_id in version_ids:
337
# The active inclusion must be an ancestor,
338
# and no ancestors must have deleted this line,
339
# because we don't support resurrection.
340
parent_inclusion = parent_inclusions[version_id]
341
inclusion = inclusions[version_id]
342
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
343
version_active = inserted in inclusion and not (deletes & inclusion)
344
if not parent_active and not version_active:
345
# unrelated line of ancestry
343
result[version_id] = parents
347
elif parent_active and version_active:
349
parent_linenum = parent_linenums[version_id]
350
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
351
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
353
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
354
parent_linenums[version_id] = parent_linenum
357
noeols[version_id] = True
360
elif parent_active and not version_active:
362
current_hunks[version_id][1] += 1
363
parent_linenums[version_id] += 1
364
last_parent_lines[version_id] = line
365
elif not parent_active and version_active:
367
# noeol only occurs at the end of a file because we
368
# diff linewise. We want to show noeol changes as a
369
# empty diff unless the actual eol-less content changed.
372
if last_parent_lines[version_id][-1] != '\n':
373
parent_noeols[version_id] = True
374
except (TypeError, IndexError):
377
if theline[-1] != '\n':
378
noeols[version_id] = True
382
parent_should_go = False
384
if parent_noeols[version_id] == noeols[version_id]:
385
# no noeol toggle, so trust the weaves statement
386
# that this line is changed.
388
if parent_noeols[version_id]:
389
theline = theline + '\n'
390
elif parent_noeols[version_id]:
391
# parent has no eol, we do:
392
# our line is new, report as such..
394
elif noeols[version_id]:
395
# append a eol so that it looks like
397
theline = theline + '\n'
398
if parents[version_id] is not None:
399
#if last_parent_lines[version_id] is not None:
400
parent_should_go = True
401
if last_parent_lines[version_id] != theline:
404
#parent_should_go = False
406
current_hunks[version_id][2] += 1
407
current_hunks[version_id][3].append((inserted, theline))
409
# last hunk last parent line is not eaten
410
current_hunks[version_id][1] -= 1
411
if current_hunks[version_id][1] < 0:
412
current_hunks[version_id][1] = 0
413
# import pdb;pdb.set_trace()
414
# assert current_hunks[version_id][1] >= 0
418
version = self._idx_to_name(i)
419
if current_hunks[version] != [0, 0, 0, []]:
420
diff_hunks[version].append(tuple(current_hunks[version]))
422
for version_id in version_ids:
423
result[version_id] = (
427
diff_hunks[version_id],
346
def get_parents_with_ghosts(self, version_id):
347
raise NotImplementedError(self.get_parents_with_ghosts)
349
def insert_record_stream(self, stream):
350
"""Insert a record stream into this versioned file.
352
:param stream: A stream of records to insert.
354
:seealso VersionedFile.get_record_stream:
357
for record in stream:
358
# Raise an error when a record is missing.
359
if record.storage_kind == 'absent':
360
raise RevisionNotPresent([record.key[0]], self)
361
# adapt to non-tuple interface
362
parents = [parent[0] for parent in record.parents]
363
if (record.storage_kind == 'fulltext'
364
or record.storage_kind == 'chunked'):
365
self.add_lines(record.key[0], parents,
366
osutils.chunks_to_lines(record.get_bytes_as('chunked')))
368
adapter_key = record.storage_kind, 'fulltext'
370
adapter = adapters[adapter_key]
372
adapter_factory = adapter_registry.get(adapter_key)
373
adapter = adapter_factory(self)
374
adapters[adapter_key] = adapter
375
lines = split_lines(adapter.get_bytes(
376
record, record.get_bytes_as(record.storage_kind)))
378
self.add_lines(record.key[0], parents, lines)
379
except RevisionAlreadyPresent:
431
def get_parents(self, version_id):
432
"""See VersionedFile.get_parent."""
433
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
382
435
def _check_repeated_add(self, name, parents, text, sha1):
383
436
"""Check that a duplicated add is OK.
390
443
raise RevisionAlreadyPresent(name, self._weave_name)
393
def _add_lines(self, version_id, parents, lines, parent_texts,
394
left_matching_blocks, nostore_sha, random_id, check_content):
446
@deprecated_method(zero_eight)
447
def add_identical(self, old_rev_id, new_rev_id, parents):
448
"""Please use Weave.clone_text now."""
449
return self.clone_text(new_rev_id, old_rev_id, parents)
451
def _add_lines(self, version_id, parents, lines, parent_texts):
395
452
"""See VersionedFile.add_lines."""
396
idx = self._add(version_id, lines, map(self._lookup, parents),
397
nostore_sha=nostore_sha)
398
return sha_strings(lines), sum(map(len, lines)), idx
400
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
453
return self._add(version_id, lines, map(self._lookup, parents))
455
@deprecated_method(zero_eight)
456
def add(self, name, parents, text, sha1=None):
457
"""See VersionedFile.add_lines for the non deprecated api."""
458
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
460
def _add(self, version_id, lines, parents, sha1=None):
401
461
"""Add a single text on top of the weave.
403
463
Returns the index number of the newly added version.
850
984
# no lines outside of insertion blocks, that deletions are
851
985
# properly paired, etc.
987
def _join(self, other, pb, msg, version_ids, ignore_missing):
988
"""Worker routine for join()."""
989
if not other.versions():
990
return # nothing to update, easy
993
# versions is never none, InterWeave checks this.
996
# two loops so that we do not change ourselves before verifying it
998
# work through in index order to make sure we get all dependencies
1001
# get the selected versions only that are in other.versions.
1002
version_ids = set(other.versions()).intersection(set(version_ids))
1003
# pull in the referenced graph.
1004
version_ids = other.get_ancestry(version_ids)
1005
pending_graph = [(version, other.get_parents(version)) for
1006
version in version_ids]
1007
for name in topo_sort(pending_graph):
1008
other_idx = other._name_map[name]
1009
# returns True if we have it, False if we need it.
1010
if not self._check_version_consistent(other, other_idx, name):
1011
names_to_join.append((other_idx, name))
1020
for other_idx, name in names_to_join:
1021
# TODO: If all the parents of the other version are already
1022
# present then we can avoid some work by just taking the delta
1023
# and adjusting the offsets.
1024
new_parents = self._imported_parents(other, other_idx)
1025
sha1 = other._sha1s[other_idx]
1030
pb.update(msg, merged, len(names_to_join))
1032
lines = other.get_lines(other_idx)
1033
self._add(name, lines, new_parents, sha1)
1035
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1036
merged, processed, self._weave_name, time.time()-time0))
853
1038
def _imported_parents(self, other, other_idx):
854
1039
"""Return list of parents in self corresponding to indexes in other."""
855
1040
new_parents = []