15
15
# You should have received a copy of the GNU General Public License
16
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
19
# Author: Martin Pool <mbp@canonical.com>
61
61
# where the basis and destination are unchanged.
63
63
# FIXME: Sometimes we will be given a parents list for a revision
64
# that includes some redundant parents (i.e. already a parent of
65
# something in the list.) We should eliminate them. This can
64
# that includes some redundant parents (i.e. already a parent of
65
# something in the list.) We should eliminate them. This can
66
66
# be done fairly efficiently because the sequence numbers constrain
67
67
# the possible relationships.
90
85
WeaveRevisionAlreadyPresent,
91
86
WeaveRevisionNotPresent,
93
from bzrlib.osutils import dirname, sha, sha_strings, split_lines
88
import bzrlib.errors as errors
89
from bzrlib.osutils import sha_strings, split_lines
94
90
import bzrlib.patiencediff
95
from bzrlib.revision import NULL_REVISION
96
91
from bzrlib.symbol_versioning import *
97
92
from bzrlib.trace import mutter
93
from bzrlib.tsort import topo_sort
98
94
from bzrlib.versionedfile import (
99
95
AbsentContentFactory,
105
101
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
114
110
def __init__(self, version, weave):
115
111
"""Create a WeaveContentFactory for version from weave."""
116
112
ContentFactory.__init__(self)
117
self.sha1 = weave.get_sha1s([version])[version]
113
self.sha1 = weave.get_sha1s([version])[0]
118
114
self.key = (version,)
119
115
parents = weave.get_parent_map([version])[version]
120
116
self.parents = tuple((parent,) for parent in parents)
124
120
def get_bytes_as(self, storage_kind):
125
121
if storage_kind == 'fulltext':
126
return self._weave.get_text(self.key[-1])
127
elif storage_kind == 'chunked':
128
return self._weave.get_lines(self.key[-1])
122
return self._weave.get_text(self.key[0])
130
124
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
133
127
class Weave(VersionedFile):
134
128
"""weave - versioned text file storage.
136
130
A Weave manages versions of line-based text files, keeping track
137
131
of the originating version for each line.
185
179
* It doesn't seem very useful to have an active insertion
186
180
inside an inactive insertion, but it might happen.
188
182
* Therefore, all instructions are always"considered"; that
189
183
is passed onto and off the stack. An outer inactive block
190
184
doesn't disable an inner block.
222
216
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
223
'_weave_name', '_matcher', '_allow_reserved']
225
def __init__(self, weave_name=None, access_mode='w', matcher=None,
226
get_scope=None, allow_reserved=False):
217
'_weave_name', '_matcher']
219
def __init__(self, weave_name=None, access_mode='w', matcher=None, get_scope=None):
227
220
"""Create a weave.
229
222
:param get_scope: A callable that returns an opaque object to be used
230
223
for detecting when this weave goes out of scope (should stop
231
224
answering requests or allowing mutation).
233
super(Weave, self).__init__()
226
super(Weave, self).__init__(access_mode)
235
228
self._parents = []
316
307
:return: An iterator of ContentFactory objects, each of which is only
317
308
valid until the iterator is advanced.
319
versions = [version[-1] for version in versions]
320
310
if ordering == 'topological':
321
311
parents = self.get_parent_map(versions)
322
new_versions = tsort.topo_sort(parents)
323
new_versions.extend(set(versions).difference(set(parents)))
324
versions = new_versions
325
elif ordering == 'groupcompress':
326
parents = self.get_parent_map(versions)
327
new_versions = sort_groupcompress(parents)
312
new_versions = topo_sort(parents)
328
313
new_versions.extend(set(versions).difference(set(parents)))
329
314
versions = new_versions
330
315
for version in versions:
337
322
"""See VersionedFile.get_parent_map."""
339
324
for version_id in version_ids:
340
if version_id == NULL_REVISION:
345
map(self._idx_to_name,
346
self._parents[self._lookup(version_id)]))
347
except RevisionNotPresent:
349
result[version_id] = parents
326
result[version_id] = tuple(
327
map(self._idx_to_name, self._parents[self._lookup(version_id)]))
328
except RevisionNotPresent:
352
332
def get_parents_with_ghosts(self, version_id):
366
346
raise RevisionNotPresent([record.key[0]], self)
367
347
# adapt to non-tuple interface
368
348
parents = [parent[0] for parent in record.parents]
369
if (record.storage_kind == 'fulltext'
370
or record.storage_kind == 'chunked'):
349
if record.storage_kind == 'fulltext':
371
350
self.add_lines(record.key[0], parents,
372
osutils.chunks_to_lines(record.get_bytes_as('chunked')))
351
split_lines(record.get_bytes_as('fulltext')))
374
353
adapter_key = record.storage_kind, 'fulltext'
378
357
adapter_factory = adapter_registry.get(adapter_key)
379
358
adapter = adapter_factory(self)
380
359
adapters[adapter_key] = adapter
381
lines = split_lines(adapter.get_bytes(record))
360
lines = split_lines(adapter.get_bytes(
361
record, record.get_bytes_as(record.storage_kind)))
383
363
self.add_lines(record.key[0], parents, lines)
384
364
except RevisionAlreadyPresent:
405
385
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
406
386
"""Add a single text on top of the weave.
408
388
Returns the index number of the newly added version.
411
391
Symbolic name for this version.
412
392
(Typically the revision-id of the revision that added it.)
413
If None, a name will be allocated based on the hash. (sha1:SHAHASH)
416
395
List or set of direct parent version numbers.
419
398
Sequence of lines to be added in the new version.
426
405
sha1 = sha_strings(lines)
427
406
if sha1 == nostore_sha:
428
407
raise errors.ExistingContent
429
if version_id is None:
430
version_id = "sha1:" + sha1
431
408
if version_id in self._name_map:
432
409
return self._check_repeated_add(version_id, parents, lines, sha1)
564
541
def _compatible_parents(self, my_parents, other_parents):
565
542
"""During join check that other_parents are joinable with my_parents.
567
Joinable is defined as 'is a subset of' - supersets may require
544
Joinable is defined as 'is a subset of' - supersets may require
568
545
regeneration of diffs, but subsets do not.
570
547
return len(other_parents.difference(my_parents)) == 0
584
561
version_ids = self.versions()
585
562
version_ids = set(version_ids)
586
563
for lineno, inserted, deletes, line in self._walk_internal(version_ids):
587
if inserted not in version_ids: continue
564
# if inserted not in version_ids then it was inserted before the
565
# versions we care about, but because weaves cannot represent ghosts
566
# properly, we do not filter down to that
567
# if inserted not in version_ids: continue
588
568
if line[-1] != '\n':
589
569
yield line + '\n', inserted
706
688
# we're still spending ~1/4 of the method in isinstance though.
707
689
# so lets hard code the acceptable string classes we expect:
708
690
# 449 0 1202.9420 786.2930 bzrlib.weave:556(_extract)
709
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
691
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
711
693
# yay, down to ~1/4 the initial extract time, and our inline time
712
694
# has shrunk again, with isinstance no longer dominating.
713
695
# tweaking the stack inclusion test to use a set gives:
714
696
# 449 0 1122.8030 713.0080 bzrlib.weave:556(_extract)
715
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
697
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
717
699
# - a 5% win, or possibly just noise. However with large istacks that
718
700
# 'in' test could dominate, so I'm leaving this change in place -
719
701
# when its fast enough to consider profiling big datasets we can review.
724
706
for l in self._weave:
725
707
if l.__class__ == tuple:
770
752
measured_sha1 = sha_strings(result)
771
753
if measured_sha1 != expected_sha1:
772
754
raise errors.WeaveInvalidChecksum(
773
'file %s, revision %s, expected: %s, measured %s'
755
'file %s, revision %s, expected: %s, measured %s'
774
756
% (self._weave_name, version_id,
775
757
expected_sha1, measured_sha1))
778
760
def get_sha1s(self, version_ids):
779
761
"""See VersionedFile.get_sha1s()."""
781
for v in version_ids:
782
result[v] = self._sha1s[self._lookup(v)]
762
return [self._sha1s[self._lookup(v)] for v in version_ids]
785
764
def num_versions(self):
786
765
"""How many versions are in this weave?"""
855
834
# no lines outside of insertion blocks, that deletions are
856
835
# properly paired, etc.
837
def _join(self, other, pb, msg, version_ids, ignore_missing):
838
"""Worker routine for join()."""
839
if not other.versions():
840
return # nothing to update, easy
843
# versions is never none, InterWeave checks this.
846
# two loops so that we do not change ourselves before verifying it
848
# work through in index order to make sure we get all dependencies
851
# get the selected versions only that are in other.versions.
852
version_ids = set(other.versions()).intersection(set(version_ids))
853
# pull in the referenced graph.
854
version_ids = other.get_ancestry(version_ids)
855
pending_parents = other.get_parent_map(version_ids)
856
pending_graph = pending_parents.items()
857
if len(pending_graph) != len(version_ids):
858
raise RevisionNotPresent(
859
set(version_ids) - set(pending_parents.keys()), self)
860
for name in topo_sort(pending_graph):
861
other_idx = other._name_map[name]
862
# returns True if we have it, False if we need it.
863
if not self._check_version_consistent(other, other_idx, name):
864
names_to_join.append((other_idx, name))
872
for other_idx, name in names_to_join:
873
# TODO: If all the parents of the other version are already
874
# present then we can avoid some work by just taking the delta
875
# and adjusting the offsets.
876
new_parents = self._imported_parents(other, other_idx)
877
sha1 = other._sha1s[other_idx]
882
pb.update(msg, merged, len(names_to_join))
884
lines = other.get_lines(other_idx)
885
self._add(name, lines, new_parents, sha1)
887
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
888
merged, processed, self._weave_name, time.time()-time0))
858
890
def _imported_parents(self, other, other_idx):
859
891
"""Return list of parents in self corresponding to indexes in other."""
862
894
parent_name = other._names[parent_idx]
863
895
if parent_name not in self._name_map:
864
896
# should not be possible
865
raise WeaveError("missing parent {%s} of {%s} in %r"
897
raise WeaveError("missing parent {%s} of {%s} in %r"
866
898
% (parent_name, other._name_map[other_idx], self))
867
899
new_parents.append(self._name_map[parent_name])
868
900
return new_parents
876
908
* the same direct parents (by name, not index, and disregarding
879
911
If present & correct return True;
880
if not present in self return False;
912
if not present in self return False;
881
913
if inconsistent raise error."""
882
914
this_idx = self._name_map.get(name, -1)
883
915
if this_idx != -1:
916
948
"""A WeaveFile represents a Weave on disk and writes on change."""
918
950
WEAVE_SUFFIX = '.weave'
920
952
def __init__(self, name, transport, filemode=None, create=False, access_mode='w', get_scope=None):
921
953
"""Create a WeaveFile.
923
955
:param create: If not True, only open an existing knit.
925
super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope,
926
allow_reserved=False)
957
super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope)
927
958
self._transport = transport
928
959
self._filemode = filemode
959
990
write_weave_v5(self, sio)
961
bytes = sio.getvalue()
962
path = self._weave_name + WeaveFile.WEAVE_SUFFIX
964
self._transport.put_bytes(path, bytes, self._filemode)
965
except errors.NoSuchFile:
966
self._transport.mkdir(dirname(path))
967
self._transport.put_bytes(path, bytes, self._filemode)
992
self._transport.put_file(self._weave_name + WeaveFile.WEAVE_SUFFIX,
970
997
def get_suffixes():
975
1002
super(WeaveFile, self).insert_record_stream(stream)
1005
@deprecated_method(one_five)
1006
def join(self, other, pb=None, msg=None, version_ids=None,
1007
ignore_missing=False):
1008
"""Join other into self and save."""
1009
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
979
1013
def _reweave(wa, wb, pb=None, msg=None):
980
1014
"""Combine two weaves and return the result.
982
This works even if a revision R has different parents in
1016
This works even if a revision R has different parents in
983
1017
wa and wb. In the resulting weave all the parents are given.
985
This is done by just building up a new weave, maintaining ordering
1019
This is done by just building up a new weave, maintaining ordering
986
1020
of the versions in the two inputs. More efficient approaches
987
might be possible but it should only be necessary to do
988
this operation rarely, when a new previously ghost version is
1021
might be possible but it should only be necessary to do
1022
this operation rarely, when a new previously ghost version is
991
1025
:param pb: An optional progress bar, indicating how far done we are
999
1033
# map from version name -> all parent names
1000
1034
combined_parents = _reweave_parent_graphs(wa, wb)
1001
1035
mutter("combined parents: %r", combined_parents)
1002
order = tsort.topo_sort(combined_parents.iteritems())
1036
order = topo_sort(combined_parents.iteritems())
1003
1037
mutter("order to reweave: %r", order)
1005
1039
if pb and not msg:
1098
1132
Display origin of each line.
1099
1133
weave merge WEAVEFILE VERSION1 VERSION2 > OUT
1100
1134
Auto-merge two versions and display conflicts.
1101
weave diff WEAVEFILE VERSION1 VERSION2
1135
weave diff WEAVEFILE VERSION1 VERSION2
1102
1136
Show differences between two versions.
1233
1267
sys.stdout.writelines(w.weave_merge(p))
1235
1269
raise ValueError('unknown command %r' % cmd)
1238
1272
if __name__ == '__main__':
1240
1274
sys.exit(main(sys.argv))
1277
class InterWeave(InterVersionedFile):
1278
"""Optimised code paths for weave to weave operations."""
1280
_matching_file_from_factory = staticmethod(WeaveFile)
1281
_matching_file_to_factory = staticmethod(WeaveFile)
1284
def is_compatible(source, target):
1285
"""Be compatible with weaves."""
1287
return (isinstance(source, Weave) and
1288
isinstance(target, Weave))
1289
except AttributeError:
1292
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1293
"""See InterVersionedFile.join."""
1294
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1295
if self.target.versions() == [] and version_ids is None:
1296
self.target._copy_weave_content(self.source)
1298
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1301
InterVersionedFile.register_optimiser(InterWeave)