27
27
# created, but it's not for now.
28
28
ROOT_ID = "TREE_ROOT"
34
from bzrlib.lazy_import import lazy_import
35
lazy_import(globals(), """
37
from warnings import warn
49
from bzrlib.errors import (
53
from bzrlib.symbol_versioning import deprecated_method
40
from bzrlib import errors, osutils
41
from bzrlib.osutils import (pumpfile, quotefn, splitpath, joinpath,
42
pathjoin, sha_strings)
43
from bzrlib.errors import (NotVersionedError, InvalidEntryName,
44
BzrError, BzrCheckError, BinaryFile)
54
45
from bzrlib.trace import mutter
91
82
InventoryDirectory('123', 'src', parent_id='TREE_ROOT', revision=None)
92
83
>>> i.add(InventoryFile('2323', 'hello.c', parent_id='123'))
93
84
InventoryFile('2323', 'hello.c', parent_id='123', sha1=None, len=None)
94
>>> shouldbe = {0: '', 1: 'src', 2: 'src/hello.c'}
85
>>> shouldbe = {0: '', 1: 'src', 2: pathjoin('src','hello.c')}
95
86
>>> for ix, j in enumerate(i.iter_entries()):
96
87
... print (j[0] == shouldbe[ix], j[1])
98
(True, InventoryDirectory('TREE_ROOT', u'', parent_id=None, revision=None))
89
(True, InventoryDirectory('TREE_ROOT', '', parent_id=None, revision=None))
99
90
(True, InventoryDirectory('123', 'src', parent_id='TREE_ROOT', revision=None))
100
91
(True, InventoryFile('2323', 'hello.c', parent_id='123', sha1=None, len=None))
92
>>> i.add(InventoryFile('2323', 'bye.c', '123'))
93
Traceback (most recent call last):
95
BzrError: inventory already contains entry with id {2323}
101
96
>>> i.add(InventoryFile('2324', 'bye.c', '123'))
102
97
InventoryFile('2324', 'bye.c', parent_id='123', sha1=None, len=None)
103
98
>>> i.add(InventoryDirectory('2325', 'wibble', '123'))
142
138
return False, False
140
def diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
141
output_to, reverse=False):
142
"""Perform a diff from this to to_entry.
144
text_diff will be used for textual difference calculation.
145
This is a template method, override _diff in child classes.
147
self._read_tree_state(tree.id2path(self.file_id), tree)
149
# cannot diff from one kind to another - you must do a removal
150
# and an addif they do not match.
151
assert self.kind == to_entry.kind
152
to_entry._read_tree_state(to_tree.id2path(to_entry.file_id),
154
self._diff(text_diff, from_label, tree, to_label, to_entry, to_tree,
144
157
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
145
158
output_to, reverse=False):
146
159
"""Perform a diff between two entries of the same kind."""
148
def parent_candidates(self, previous_inventories):
149
"""Find possible per-file graph parents.
151
This is currently defined by:
152
- Select the last changed revision in the parent inventory.
153
- Do deal with a short lived bug in bzr 0.8's development two entries
154
that have the same last changed but different 'x' bit settings are
161
def find_previous_heads(self, previous_inventories,
162
versioned_file_store,
165
"""Return the revisions and entries that directly precede this.
167
Returned as a map from revision to inventory entry.
169
This is a map containing the file revisions in all parents
170
for which the file exists, and its revision is not a parent of
171
any other. If the file is new, the set will be empty.
173
:param versioned_file_store: A store where ancestry data on this
174
file id can be queried.
175
:param transaction: The transaction that queries to the versioned
176
file store should be completed under.
177
:param entry_vf: The entry versioned file, if its already available.
179
def get_ancestors(weave, entry):
180
return set(weave.get_ancestry(entry.revision))
157
181
# revision:ie mapping for each ie found in previous_inventories.
183
# revision:ie mapping with one revision for each head.
185
# revision: ancestor list for each head
159
187
# identify candidate head revision ids.
160
188
for inv in previous_inventories:
161
189
if self.file_id in inv:
162
190
ie = inv[self.file_id]
191
assert ie.file_id == self.file_id
163
192
if ie.revision in candidates:
164
193
# same revision value in two different inventories:
165
194
# correct possible inconsistencies:
171
200
ie.executable = False
172
201
except AttributeError:
203
# must now be the same.
204
assert candidates[ie.revision] == ie
175
206
# add this revision as a candidate.
176
207
candidates[ie.revision] = ie
209
# common case optimisation
210
if len(candidates) == 1:
211
# if there is only one candidate revision found
212
# then we can opening the versioned file to access ancestry:
213
# there cannot be any ancestors to eliminate when there is
214
# only one revision available.
215
heads[ie.revision] = ie
218
# eliminate ancestors amongst the available candidates:
219
# heads are those that are not an ancestor of any other candidate
220
# - this provides convergence at a per-file level.
221
for ie in candidates.values():
222
# may be an ancestor of a known head:
223
already_present = 0 != len(
224
[head for head in heads
225
if ie.revision in head_ancestors[head]])
227
# an ancestor of an analyzed candidate.
229
# not an ancestor of a known head:
230
# load the versioned file for this file id if needed
232
entry_vf = versioned_file_store.get_weave_or_empty(
233
self.file_id, transaction)
234
ancestors = get_ancestors(entry_vf, ie)
235
# may knock something else out:
236
check_heads = list(heads.keys())
237
for head in check_heads:
238
if head in ancestors:
239
# this previously discovered 'head' is not
240
# really a head - its an ancestor of the newly
243
head_ancestors[ie.revision] = ancestors
244
heads[ie.revision] = ie
179
247
def get_tar_item(self, root, dp, now, tree):
180
248
"""Get a tarfile item and a file stream for its content."""
181
item = tarfile.TarInfo(osutils.pathjoin(root, dp).encode('utf8'))
249
item = tarfile.TarInfo(pathjoin(root, dp))
182
250
# TODO: would be cool to actually set it to the timestamp of the
183
251
# revision it was last changed
407
def snapshot(self, revision, path, previous_entries,
408
work_tree, commit_builder):
409
"""Make a snapshot of this entry which may or may not have changed.
411
This means that all its fields are populated, that it has its
412
text stored in the text store or weave.
414
# mutter('new parents of %s are %r', path, previous_entries)
415
self._read_tree_state(path, work_tree)
416
# TODO: Where should we determine whether to reuse a
417
# previous revision id or create a new revision? 20060606
418
if len(previous_entries) == 1:
419
# cannot be unchanged unless there is only one parent file rev.
420
parent_ie = previous_entries.values()[0]
421
if self._unchanged(parent_ie):
422
# mutter("found unchanged entry")
423
self.revision = parent_ie.revision
425
return self._snapshot_into_revision(revision, previous_entries,
426
work_tree, commit_builder)
428
def _snapshot_into_revision(self, revision, previous_entries, work_tree,
430
"""Record this revision unconditionally into a store.
432
The entry's last-changed revision property (`revision`) is updated to
433
that of the new revision.
435
:param revision: id of the new revision that is being recorded.
437
:returns: String description of the commit (e.g. "merged", "modified"), etc.
439
# mutter('new revision {%s} for {%s}', revision, self.file_id)
440
self.revision = revision
441
self._snapshot_text(previous_entries, work_tree, commit_builder)
443
def _snapshot_text(self, file_parents, work_tree, commit_builder):
444
"""Record the 'text' of this entry, whatever form that takes.
446
This default implementation simply adds an empty text.
448
raise NotImplementedError(self._snapshot_text)
341
450
def __eq__(self, other):
342
451
if not isinstance(other, InventoryEntry):
343
452
return NotImplemented
395
501
class RootEntry(InventoryEntry):
397
503
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
398
'text_id', 'parent_id', 'children', 'executable',
399
'revision', 'symlink_target', 'reference_revision']
504
'text_id', 'parent_id', 'children', 'executable',
505
'revision', 'symlink_target']
401
507
def _check(self, checker, rev_id, tree):
402
508
"""See InventoryEntry._check"""
424
530
"""A directory in an inventory."""
426
532
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
427
'text_id', 'parent_id', 'children', 'executable',
428
'revision', 'symlink_target', 'reference_revision']
533
'text_id', 'parent_id', 'children', 'executable',
534
'revision', 'symlink_target']
430
536
def _check(self, checker, rev_id, tree):
431
537
"""See InventoryEntry._check"""
462
568
"""See InventoryEntry._put_on_disk."""
463
569
os.mkdir(fullpath)
571
def _snapshot_text(self, file_parents, work_tree, commit_builder):
572
"""See InventoryEntry._snapshot_text."""
573
commit_builder.modified_directory(self.file_id, file_parents)
466
576
class InventoryFile(InventoryEntry):
467
577
"""A file in an inventory."""
469
579
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
470
'text_id', 'parent_id', 'children', 'executable',
471
'revision', 'symlink_target', 'reference_revision']
580
'text_id', 'parent_id', 'children', 'executable',
581
'revision', 'symlink_target']
473
583
def _check(self, checker, tree_revision_id, tree):
474
584
"""See InventoryEntry._check"""
476
586
if t in checker.checked_texts:
477
587
prev_sha = checker.checked_texts[t]
478
588
if prev_sha != self.text_sha1:
480
'mismatched sha1 on {%s} in {%s} (%s != %s) %r' %
481
(self.file_id, tree_revision_id, prev_sha, self.text_sha1,
589
raise BzrCheckError('mismatched sha1 on {%s} in {%s}' %
590
(self.file_id, tree_revision_id))
484
592
checker.repeated_text_cnt += 1
487
595
if self.file_id not in checker.checked_weaves:
488
596
mutter('check weave {%s}', self.file_id)
489
w = tree._get_weave(self.file_id)
597
w = tree.get_weave(self.file_id)
490
598
# Not passing a progress bar, because it creates a new
491
599
# progress, which overwrites the current progress,
492
600
# and doesn't look nice
494
602
checker.checked_weaves[self.file_id] = True
496
w = tree._get_weave(self.file_id)
604
w = tree.get_weave(self.file_id)
498
606
mutter('check version {%s} of {%s}', tree_revision_id, self.file_id)
499
607
checker.checked_text_cnt += 1
500
608
# We can't check the length, because Weave doesn't store that
501
609
# information, and the whole point of looking at the weave's
502
610
# sha1sum is that we don't have to extract the text.
503
if self.text_sha1 != w.get_sha1s([self.revision])[0]:
611
if self.text_sha1 != w.get_sha1(self.revision):
504
612
raise BzrCheckError('text {%s} version {%s} wrong sha1'
505
613
% (self.file_id, self.revision))
506
614
checker.checked_texts[t] = self.text_sha1
523
633
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
524
634
output_to, reverse=False):
525
635
"""See InventoryEntry._diff."""
526
from bzrlib.diff import DiffText
527
from_file_id = self.file_id
529
to_file_id = to_entry.file_id
533
to_file_id, from_file_id = from_file_id, to_file_id
534
tree, to_tree = to_tree, tree
535
from_label, to_label = to_label, from_label
536
differ = DiffText(tree, to_tree, output_to, 'utf-8', '', '',
538
return differ.diff_text(from_file_id, to_file_id, from_label, to_label)
637
from_text = tree.get_file(self.file_id).readlines()
639
to_text = to_tree.get_file(to_entry.file_id).readlines()
643
text_diff(from_label, from_text,
644
to_label, to_text, output_to)
646
text_diff(to_label, to_text,
647
from_label, from_text, output_to)
650
label_pair = (to_label, from_label)
652
label_pair = (from_label, to_label)
653
print >> output_to, "Binary files %s and %s differ" % label_pair
540
655
def has_text(self):
541
656
"""See InventoryEntry.has_text."""
585
700
def _forget_tree_state(self):
586
701
self.text_sha1 = None
703
def _snapshot_text(self, file_parents, work_tree, commit_builder):
704
"""See InventoryEntry._snapshot_text."""
705
def get_content_byte_lines():
706
return work_tree.get_file(self.file_id).readlines()
707
self.text_sha1, self.text_size = commit_builder.modified_file_text(
708
self.file_id, file_parents, get_content_byte_lines, self.text_sha1, self.text_size)
588
710
def _unchanged(self, previous_ie):
589
711
"""See InventoryEntry._unchanged."""
590
712
compatible = super(InventoryFile, self)._unchanged(previous_ie)
603
725
"""A file in an inventory."""
605
727
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
606
'text_id', 'parent_id', 'children', 'executable',
607
'revision', 'symlink_target', 'reference_revision']
728
'text_id', 'parent_id', 'children', 'executable',
729
'revision', 'symlink_target']
609
731
def _check(self, checker, rev_id, tree):
610
732
"""See InventoryEntry._check"""
633
755
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
634
756
output_to, reverse=False):
635
757
"""See InventoryEntry._diff."""
636
from bzrlib.diff import DiffSymlink
637
old_target = self.symlink_target
758
from_text = self.symlink_target
638
759
if to_entry is not None:
639
new_target = to_entry.symlink_target
648
new_target, old_target = old_target, new_target
649
differ = DiffSymlink(old_tree, new_tree, output_to)
650
return differ.diff_symlink(old_target, new_target)
760
to_text = to_entry.symlink_target
765
print >>output_to, '=== target changed %r => %r' % (from_text, to_text)
768
print >>output_to, '=== target was %r' % self.symlink_target
770
print >>output_to, '=== target is %r' % self.symlink_target
652
772
def __init__(self, file_id, name, parent_id):
653
773
super(InventoryLink, self).__init__(file_id, name, parent_id)
687
807
compatible = False
688
808
return compatible
691
class TreeReference(InventoryEntry):
693
kind = 'tree-reference'
695
def __init__(self, file_id, name, parent_id, revision=None,
696
reference_revision=None):
697
InventoryEntry.__init__(self, file_id, name, parent_id)
698
self.revision = revision
699
self.reference_revision = reference_revision
702
return TreeReference(self.file_id, self.name, self.parent_id,
703
self.revision, self.reference_revision)
705
def _read_tree_state(self, path, work_tree):
706
"""Populate fields in the inventory entry from the given tree.
708
self.reference_revision = work_tree.get_reference_revision(
711
def _forget_tree_state(self):
712
self.reference_revision = None
714
def _unchanged(self, previous_ie):
715
"""See InventoryEntry._unchanged."""
716
compatible = super(TreeReference, self)._unchanged(previous_ie)
717
if self.reference_revision != previous_ie.reference_revision:
810
def _snapshot_text(self, file_parents, work_tree, commit_builder):
811
"""See InventoryEntry._snapshot_text."""
812
commit_builder.modified_link(
813
self.file_id, file_parents, self.symlink_target)
722
816
class Inventory(object):
770
861
The inventory is created with a default root directory, with
864
# We are letting Branch.create() create a unique inventory
865
# root id. Rather than generating a random one here.
867
# root_id = bzrlib.branch.gen_file_id('TREE_ROOT')
773
868
if root_id is not None:
774
self._set_root(InventoryDirectory(root_id, u'', None))
869
self._set_root(InventoryDirectory(root_id, '', None))
873
# FIXME: this isn't ever used, changing it to self.revision may break
874
# things. TODO make everything use self.revision_id
778
875
self.revision_id = revision_id
781
return "<Inventory object at %x, contents=%r>" % (id(self), self._byid)
783
def apply_delta(self, delta):
784
"""Apply a delta to this inventory.
786
:param delta: A list of changes to apply. After all the changes are
787
applied the final inventory must be internally consistent, but it
788
is ok to supply changes which, if only half-applied would have an
789
invalid result - such as supplying two changes which rename two
790
files, 'A' and 'B' with each other : [('A', 'B', 'A-id', a_entry),
791
('B', 'A', 'B-id', b_entry)].
793
Each change is a tuple, of the form (old_path, new_path, file_id,
796
When new_path is None, the change indicates the removal of an entry
797
from the inventory and new_entry will be ignored (using None is
798
appropriate). If new_path is not None, then new_entry must be an
799
InventoryEntry instance, which will be incorporated into the
800
inventory (and replace any existing entry with the same file id).
802
When old_path is None, the change indicates the addition of
803
a new entry to the inventory.
805
When neither new_path nor old_path are None, the change is a
806
modification to an entry, such as a rename, reparent, kind change
809
The children attribute of new_entry is ignored. This is because
810
this method preserves children automatically across alterations to
811
the parent of the children, and cases where the parent id of a
812
child is changing require the child to be passed in as a separate
813
change regardless. E.g. in the recursive deletion of a directory -
814
the directory's children must be included in the delta, or the
815
final inventory will be invalid.
818
# Remove all affected items which were in the original inventory,
819
# starting with the longest paths, thus ensuring parents are examined
820
# after their children, which means that everything we examine has no
821
# modified children remaining by the time we examine it.
822
for old_path, file_id in sorted(((op, f) for op, np, f, e in delta
823
if op is not None), reverse=True):
824
if file_id not in self:
827
# Preserve unaltered children of file_id for later reinsertion.
828
children[file_id] = getattr(self[file_id], 'children', {})
829
# Remove file_id and the unaltered children. If file_id is not
830
# being deleted it will be reinserted back later.
831
self.remove_recursive_id(file_id)
832
# Insert all affected which should be in the new inventory, reattaching
833
# their children if they had any. This is done from shortest path to
834
# longest, ensuring that items which were modified and whose parents in
835
# the resulting inventory were also modified, are inserted after their
837
for new_path, new_entry in sorted((np, e) for op, np, f, e in
838
delta if np is not None):
839
if new_entry.kind == 'directory':
840
new_entry.children = children.get(new_entry.file_id, {})
843
877
def _set_root(self, ie):
845
879
self._byid = {self.root.file_id: self.root}
916
946
lexicographically sorted order, and is a hybrid between
917
947
depth-first and breadth-first.
919
:param yield_parents: If True, yield the parents from the root leading
920
down to specific_file_ids that have been requested. This has no
921
impact if specific_file_ids is None.
922
949
:return: This yields (path, entry) pairs
924
if specific_file_ids and not isinstance(specific_file_ids, set):
925
specific_file_ids = set(specific_file_ids)
926
951
# TODO? Perhaps this should return the from_dir so that the root is
927
952
# yielded? or maybe an option?
928
953
if from_dir is None:
929
if self.root is None:
931
# Optimize a common case
932
if (not yield_parents and specific_file_ids is not None and
933
len(specific_file_ids) == 1):
934
file_id = list(specific_file_ids)[0]
936
yield self.id2path(file_id), self[file_id]
938
955
from_dir = self.root
939
if (specific_file_ids is None or yield_parents or
940
self.root.file_id in specific_file_ids):
942
957
elif isinstance(from_dir, basestring):
943
958
from_dir = self._byid[from_dir]
945
if specific_file_ids is not None:
946
# TODO: jam 20070302 This could really be done as a loop rather
947
# than a bunch of recursive calls.
950
def add_ancestors(file_id):
951
if file_id not in byid:
953
parent_id = byid[file_id].parent_id
954
if parent_id is None:
956
if parent_id not in parents:
957
parents.add(parent_id)
958
add_ancestors(parent_id)
959
for file_id in specific_file_ids:
960
add_ancestors(file_id)
964
960
stack = [(u'', from_dir)]
971
967
child_relpath = cur_relpath + child_name
973
if (specific_file_ids is None or
974
child_ie.file_id in specific_file_ids or
975
(yield_parents and child_ie.file_id in parents)):
976
yield child_relpath, child_ie
969
yield child_relpath, child_ie
978
971
if child_ie.kind == 'directory':
979
if parents is None or child_ie.file_id in parents:
980
child_dirs.append((child_relpath+'/', child_ie))
972
child_dirs.append((child_relpath+'/', child_ie))
981
973
stack.extend(reversed(child_dirs))
983
def make_entry(self, kind, name, parent_id, file_id=None):
984
"""Simple thunk to bzrlib.inventory.make_entry."""
985
return make_entry(kind, name, parent_id, file_id)
987
975
def entries(self):
988
976
"""Return list of (path, ie) for all entries except the root.
1071
1051
Returns the new entry object.
1073
1053
if entry.file_id in self._byid:
1074
raise errors.DuplicateFileId(entry.file_id,
1075
self._byid[entry.file_id])
1054
raise BzrError("inventory already contains entry with id {%s}" % entry.file_id)
1077
1056
if entry.parent_id is None:
1081
parent = self._byid[entry.parent_id]
1083
raise BzrError("parent_id {%s} not in inventory" %
1086
if entry.name in parent.children:
1087
raise BzrError("%s is already versioned" %
1088
osutils.pathjoin(self.id2path(parent.file_id),
1089
entry.name).encode('utf-8'))
1090
parent.children[entry.name] = entry
1091
return self._add_child(entry)
1057
assert self.root is None and len(self._byid) == 0
1058
self._set_root(entry)
1060
if entry.parent_id == ROOT_ID:
1061
assert self.root is not None, self
1062
entry.parent_id = self.root.file_id
1065
parent = self._byid[entry.parent_id]
1067
raise BzrError("parent_id {%s} not in inventory" % entry.parent_id)
1069
if entry.name in parent.children:
1070
raise BzrError("%s is already versioned" %
1071
pathjoin(self.id2path(parent.file_id), entry.name))
1073
self._byid[entry.file_id] = entry
1074
parent.children[entry.name] = entry
1093
1077
def add_path(self, relpath, kind, file_id=None, parent_id=None):
1094
1078
"""Add entry from a path.
1230
1215
return bool(self.path2id(names))
1232
1217
def has_id(self, file_id):
1233
return (file_id in self._byid)
1235
def remove_recursive_id(self, file_id):
1236
"""Remove file_id, and children, from the inventory.
1238
:param file_id: A file_id to remove.
1240
to_find_delete = [self._byid[file_id]]
1242
while to_find_delete:
1243
ie = to_find_delete.pop()
1244
to_delete.append(ie.file_id)
1245
if ie.kind == 'directory':
1246
to_find_delete.extend(ie.children.values())
1247
for file_id in reversed(to_delete):
1249
del self._byid[file_id]
1250
if ie.parent_id is not None:
1251
del self[ie.parent_id].children[ie.name]
1218
return self._byid.has_key(file_id)
1255
1220
def rename(self, file_id, new_parent_id, new_name):
1256
1221
"""Move a file within the inventory.
1258
1223
This can change either the name, or the parent, or both.
1260
This does not move the working file.
1262
new_name = ensure_normalized_name(new_name)
1225
This does not move the working file."""
1263
1226
if not is_valid_name(new_name):
1264
1227
raise BzrError("not an acceptable filename: %r" % new_name)
1303
1256
:param file_id: the file_id to use. if None, one will be created.
1305
1258
if file_id is None:
1306
file_id = generate_ids.gen_file_id(name)
1307
name = ensure_normalized_name(name)
1309
factory = entry_factory[kind]
1311
raise BzrError("unknown kind %r" % kind)
1312
return factory(file_id, name, parent_id)
1315
def ensure_normalized_name(name):
1318
:raises InvalidNormalization: When name is not normalized, and cannot be
1319
accessed on this platform by the normalized path.
1320
:return: The NFC normalised version of name.
1322
#------- This has been copied to bzrlib.dirstate.DirState.add, please
1323
# keep them synchronised.
1324
# we dont import normalized_filename directly because we want to be
1325
# able to change the implementation at runtime for tests.
1259
file_id = bzrlib.workingtree.gen_file_id(name)
1326
1261
norm_name, can_access = osutils.normalized_filename(name)
1327
1262
if norm_name != name:
1331
1266
# TODO: jam 20060701 This would probably be more useful
1332
1267
# if the error was raised with the full path
1333
1268
raise errors.InvalidNormalization(name)
1270
if kind == 'directory':
1271
return InventoryDirectory(file_id, name, parent_id)
1272
elif kind == 'file':
1273
return InventoryFile(file_id, name, parent_id)
1274
elif kind == 'symlink':
1275
return InventoryLink(file_id, name, parent_id)
1277
raise BzrError("unknown kind %r" % kind)
1337
1280
_NAME_RE = None