90
78
>>> i.add(InventoryDirectory('123', 'src', ROOT_ID))
91
InventoryDirectory('123', 'src', parent_id='TREE_ROOT', revision=None)
79
InventoryDirectory('123', 'src', parent_id='TREE_ROOT')
92
80
>>> i.add(InventoryFile('2323', 'hello.c', parent_id='123'))
93
InventoryFile('2323', 'hello.c', parent_id='123', sha1=None, len=None)
94
>>> shouldbe = {0: '', 1: 'src', 2: 'src/hello.c'}
81
InventoryFile('2323', 'hello.c', parent_id='123')
82
>>> shouldbe = {0: 'src', 1: pathjoin('src','hello.c')}
95
83
>>> for ix, j in enumerate(i.iter_entries()):
96
84
... print (j[0] == shouldbe[ix], j[1])
98
(True, InventoryDirectory('TREE_ROOT', u'', parent_id=None, revision=None))
99
(True, InventoryDirectory('123', 'src', parent_id='TREE_ROOT', revision=None))
100
(True, InventoryFile('2323', 'hello.c', parent_id='123', sha1=None, len=None))
86
(True, InventoryDirectory('123', 'src', parent_id='TREE_ROOT'))
87
(True, InventoryFile('2323', 'hello.c', parent_id='123'))
88
>>> i.add(InventoryFile('2323', 'bye.c', '123'))
89
Traceback (most recent call last):
91
BzrError: inventory already contains entry with id {2323}
101
92
>>> i.add(InventoryFile('2324', 'bye.c', '123'))
102
InventoryFile('2324', 'bye.c', parent_id='123', sha1=None, len=None)
93
InventoryFile('2324', 'bye.c', parent_id='123')
103
94
>>> i.add(InventoryDirectory('2325', 'wibble', '123'))
104
InventoryDirectory('2325', 'wibble', parent_id='123', revision=None)
95
InventoryDirectory('2325', 'wibble', parent_id='123')
105
96
>>> i.path2id('src/wibble')
109
100
>>> i.add(InventoryFile('2326', 'wibble.c', '2325'))
110
InventoryFile('2326', 'wibble.c', parent_id='2325', sha1=None, len=None)
101
InventoryFile('2326', 'wibble.c', parent_id='2325')
112
InventoryFile('2326', 'wibble.c', parent_id='2325', sha1=None, len=None)
103
InventoryFile('2326', 'wibble.c', parent_id='2325')
113
104
>>> for path, entry in i.iter_entries():
106
... assert i.path2id(path)
142
141
return False, False
143
def diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
144
output_to, reverse=False):
145
"""Perform a diff from this to to_entry.
147
text_diff will be used for textual difference calculation.
148
This is a template method, override _diff in child classes.
150
self._read_tree_state(tree.id2path(self.file_id), tree)
152
# cannot diff from one kind to another - you must do a removal
153
# and an addif they do not match.
154
assert self.kind == to_entry.kind
155
to_entry._read_tree_state(to_tree.id2path(to_entry.file_id),
157
self._diff(text_diff, from_label, tree, to_label, to_entry, to_tree,
144
160
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
145
161
output_to, reverse=False):
146
162
"""Perform a diff between two entries of the same kind."""
148
def parent_candidates(self, previous_inventories):
149
"""Find possible per-file graph parents.
151
This is currently defined by:
152
- Select the last changed revision in the parent inventory.
153
- Do deal with a short lived bug in bzr 0.8's development two entries
154
that have the same last changed but different 'x' bit settings are
164
def find_previous_heads(self, previous_inventories,
165
versioned_file_store,
168
"""Return the revisions and entries that directly preceed this.
170
Returned as a map from revision to inventory entry.
172
This is a map containing the file revisions in all parents
173
for which the file exists, and its revision is not a parent of
174
any other. If the file is new, the set will be empty.
176
:param versioned_file_store: A store where ancestry data on this
177
file id can be queried.
178
:param transaction: The transaction that queries to the versioned
179
file store should be completed under.
180
:param entry_vf: The entry versioned file, if its already available.
182
def get_ancestors(weave, entry):
183
return set(weave.get_ancestry(entry.revision))
157
184
# revision:ie mapping for each ie found in previous_inventories.
186
# revision:ie mapping with one revision for each head.
188
# revision: ancestor list for each head
159
190
# identify candidate head revision ids.
160
191
for inv in previous_inventories:
161
192
if self.file_id in inv:
162
193
ie = inv[self.file_id]
194
assert ie.file_id == self.file_id
163
195
if ie.revision in candidates:
164
196
# same revision value in two different inventories:
165
197
# correct possible inconsistencies:
171
203
ie.executable = False
172
204
except AttributeError:
206
# must now be the same.
207
assert candidates[ie.revision] == ie
175
209
# add this revision as a candidate.
176
210
candidates[ie.revision] = ie
179
@deprecated_method(deprecated_in((1, 6, 0)))
212
# common case optimisation
213
if len(candidates) == 1:
214
# if there is only one candidate revision found
215
# then we can opening the versioned file to access ancestry:
216
# there cannot be any ancestors to eliminate when there is
217
# only one revision available.
218
heads[ie.revision] = ie
221
# eliminate ancestors amongst the available candidates:
222
# heads are those that are not an ancestor of any other candidate
223
# - this provides convergence at a per-file level.
224
for ie in candidates.values():
225
# may be an ancestor of a known head:
226
already_present = 0 != len(
227
[head for head in heads
228
if ie.revision in head_ancestors[head]])
230
# an ancestor of an analyzed candidate.
232
# not an ancestor of a known head:
233
# load the versioned file for this file id if needed
235
entry_vf = versioned_file_store.get_weave_or_empty(
236
self.file_id, transaction)
237
ancestors = get_ancestors(entry_vf, ie)
238
# may knock something else out:
239
check_heads = list(heads.keys())
240
for head in check_heads:
241
if head in ancestors:
242
# this previously discovered 'head' is not
243
# really a head - its an ancestor of the newly
246
head_ancestors[ie.revision] = ancestors
247
heads[ie.revision] = ie
180
250
def get_tar_item(self, root, dp, now, tree):
181
251
"""Get a tarfile item and a file stream for its content."""
182
item = tarfile.TarInfo(osutils.pathjoin(root, dp).encode('utf8'))
252
item = tarfile.TarInfo(pathjoin(root, dp))
183
253
# TODO: would be cool to actually set it to the timestamp of the
184
254
# revision it was last changed
333
402
return 'unchanged'
335
404
def __repr__(self):
336
return ("%s(%r, %r, parent_id=%r, revision=%r)"
405
return ("%s(%r, %r, parent_id=%r)"
337
406
% (self.__class__.__name__,
411
def snapshot(self, revision, path, previous_entries,
412
work_tree, weave_store, transaction):
413
"""Make a snapshot of this entry which may or may not have changed.
415
This means that all its fields are populated, that it has its
416
text stored in the text store or weave.
418
mutter('new parents of %s are %r', path, previous_entries)
419
self._read_tree_state(path, work_tree)
420
if len(previous_entries) == 1:
421
# cannot be unchanged unless there is only one parent file rev.
422
parent_ie = previous_entries.values()[0]
423
if self._unchanged(parent_ie):
424
mutter("found unchanged entry")
425
self.revision = parent_ie.revision
427
return self._snapshot_into_revision(revision, previous_entries,
428
work_tree, weave_store, transaction)
430
def _snapshot_into_revision(self, revision, previous_entries, work_tree,
431
weave_store, transaction):
432
"""Record this revision unconditionally into a store.
434
The entry's last-changed revision property (`revision`) is updated to
435
that of the new revision.
437
:param revision: id of the new revision that is being recorded.
439
:returns: String description of the commit (e.g. "merged", "modified"), etc.
441
mutter('new revision {%s} for {%s}', revision, self.file_id)
442
self.revision = revision
443
self._snapshot_text(previous_entries, work_tree, weave_store,
446
def _snapshot_text(self, file_parents, work_tree, weave_store, transaction):
447
"""Record the 'text' of this entry, whatever form that takes.
449
This default implementation simply adds an empty text.
451
mutter('storing file {%s} in revision {%s}',
452
self.file_id, self.revision)
453
self._add_text_to_weave([], file_parents.keys(), weave_store, transaction)
343
455
def __eq__(self, other):
344
456
if not isinstance(other, InventoryEntry):
468
565
class InventoryFile(InventoryEntry):
469
566
"""A file in an inventory."""
471
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
472
'text_id', 'parent_id', 'children', 'executable',
473
'revision', 'symlink_target', 'reference_revision']
475
568
def _check(self, checker, tree_revision_id, tree):
476
569
"""See InventoryEntry._check"""
477
key = (self.file_id, self.revision)
478
if key in checker.checked_texts:
479
prev_sha = checker.checked_texts[key]
570
t = (self.file_id, self.revision)
571
if t in checker.checked_texts:
572
prev_sha = checker.checked_texts[t]
480
573
if prev_sha != self.text_sha1:
482
'mismatched sha1 on {%s} in {%s} (%s != %s) %r' %
483
(self.file_id, tree_revision_id, prev_sha, self.text_sha1,
574
raise BzrCheckError('mismatched sha1 on {%s} in {%s}' %
575
(self.file_id, tree_revision_id))
486
577
checker.repeated_text_cnt += 1
580
if self.file_id not in checker.checked_weaves:
581
mutter('check weave {%s}', self.file_id)
582
w = tree.get_weave(self.file_id)
583
# Not passing a progress bar, because it creates a new
584
# progress, which overwrites the current progress,
585
# and doesn't look nice
587
checker.checked_weaves[self.file_id] = True
589
w = tree.get_weave(self.file_id)
489
591
mutter('check version {%s} of {%s}', tree_revision_id, self.file_id)
490
592
checker.checked_text_cnt += 1
491
593
# We can't check the length, because Weave doesn't store that
492
594
# information, and the whole point of looking at the weave's
493
595
# sha1sum is that we don't have to extract the text.
494
if (self.text_sha1 != tree._repository.texts.get_sha1s([key])[key]):
495
raise BzrCheckError('text {%s} version {%s} wrong sha1' % key)
496
checker.checked_texts[key] = self.text_sha1
596
if self.text_sha1 != w.get_sha1(self.revision):
597
raise BzrCheckError('text {%s} version {%s} wrong sha1'
598
% (self.file_id, self.revision))
599
checker.checked_texts[t] = self.text_sha1
499
602
other = InventoryFile(self.file_id, self.name, self.parent_id)
513
618
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
514
619
output_to, reverse=False):
515
620
"""See InventoryEntry._diff."""
516
from bzrlib.diff import DiffText
517
from_file_id = self.file_id
519
to_file_id = to_entry.file_id
523
to_file_id, from_file_id = from_file_id, to_file_id
524
tree, to_tree = to_tree, tree
525
from_label, to_label = to_label, from_label
526
differ = DiffText(tree, to_tree, output_to, 'utf-8', '', '',
528
return differ.diff_text(from_file_id, to_file_id, from_label, to_label)
622
from_text = tree.get_file(self.file_id).readlines()
624
to_text = to_tree.get_file(to_entry.file_id).readlines()
628
text_diff(from_label, from_text,
629
to_label, to_text, output_to)
631
text_diff(to_label, to_text,
632
from_label, from_text, output_to)
635
label_pair = (to_label, from_label)
637
label_pair = (from_label, to_label)
638
print >> output_to, "Binary files %s and %s differ" % label_pair
530
640
def has_text(self):
531
641
"""See InventoryEntry.has_text."""
553
663
def _put_on_disk(self, fullpath, tree):
554
664
"""See InventoryEntry._put_on_disk."""
555
osutils.pumpfile(tree.get_file(self.file_id), file(fullpath, 'wb'))
665
pumpfile(tree.get_file(self.file_id), file(fullpath, 'wb'))
556
666
if tree.is_executable(self.file_id):
557
667
os.chmod(fullpath, 0755)
559
669
def _read_tree_state(self, path, work_tree):
560
670
"""See InventoryEntry._read_tree_state."""
561
self.text_sha1 = work_tree.get_file_sha1(self.file_id, path=path)
562
# FIXME: 20050930 probe for the text size when getting sha1
563
# in _read_tree_state
564
self.executable = work_tree.is_executable(self.file_id, path=path)
567
return ("%s(%r, %r, parent_id=%r, sha1=%r, len=%s)"
568
% (self.__class__.__name__,
671
self.text_sha1 = work_tree.get_file_sha1(self.file_id)
672
self.executable = work_tree.is_executable(self.file_id)
575
674
def _forget_tree_state(self):
576
675
self.text_sha1 = None
676
self.executable = None
678
def _snapshot_text(self, file_parents, work_tree, versionedfile_store, transaction):
679
"""See InventoryEntry._snapshot_text."""
680
mutter('storing text of file {%s} in revision {%s} into %r',
681
self.file_id, self.revision, versionedfile_store)
682
# special case to avoid diffing on renames or
684
if (len(file_parents) == 1
685
and self.text_sha1 == file_parents.values()[0].text_sha1
686
and self.text_size == file_parents.values()[0].text_size):
687
previous_ie = file_parents.values()[0]
688
versionedfile = versionedfile_store.get_weave(self.file_id, transaction)
689
versionedfile.clone_text(self.revision, previous_ie.revision, file_parents.keys())
691
new_lines = work_tree.get_file(self.file_id).readlines()
692
self._add_text_to_weave(new_lines, file_parents.keys(), versionedfile_store,
694
self.text_sha1 = sha_strings(new_lines)
695
self.text_size = sum(map(len, new_lines))
578
698
def _unchanged(self, previous_ie):
579
699
"""See InventoryEntry._unchanged."""
760
842
The inventory is created with a default root directory, with
763
if root_id is not None:
764
self._set_root(InventoryDirectory(root_id, u'', None))
845
# We are letting Branch.create() create a unique inventory
846
# root id. Rather than generating a random one here.
848
# root_id = bzrlib.branch.gen_file_id('TREE_ROOT')
849
self.root = RootEntry(root_id)
768
850
self.revision_id = revision_id
771
return "<Inventory object at %x, contents=%r>" % (id(self), self._byid)
773
def apply_delta(self, delta):
774
"""Apply a delta to this inventory.
776
:param delta: A list of changes to apply. After all the changes are
777
applied the final inventory must be internally consistent, but it
778
is ok to supply changes which, if only half-applied would have an
779
invalid result - such as supplying two changes which rename two
780
files, 'A' and 'B' with each other : [('A', 'B', 'A-id', a_entry),
781
('B', 'A', 'B-id', b_entry)].
783
Each change is a tuple, of the form (old_path, new_path, file_id,
786
When new_path is None, the change indicates the removal of an entry
787
from the inventory and new_entry will be ignored (using None is
788
appropriate). If new_path is not None, then new_entry must be an
789
InventoryEntry instance, which will be incorporated into the
790
inventory (and replace any existing entry with the same file id).
792
When old_path is None, the change indicates the addition of
793
a new entry to the inventory.
795
When neither new_path nor old_path are None, the change is a
796
modification to an entry, such as a rename, reparent, kind change
799
The children attribute of new_entry is ignored. This is because
800
this method preserves children automatically across alterations to
801
the parent of the children, and cases where the parent id of a
802
child is changing require the child to be passed in as a separate
803
change regardless. E.g. in the recursive deletion of a directory -
804
the directory's children must be included in the delta, or the
805
final inventory will be invalid.
808
# Remove all affected items which were in the original inventory,
809
# starting with the longest paths, thus ensuring parents are examined
810
# after their children, which means that everything we examine has no
811
# modified children remaining by the time we examine it.
812
for old_path, file_id in sorted(((op, f) for op, np, f, e in delta
813
if op is not None), reverse=True):
814
if file_id not in self:
817
# Preserve unaltered children of file_id for later reinsertion.
818
file_id_children = getattr(self[file_id], 'children', {})
819
if len(file_id_children):
820
children[file_id] = file_id_children
821
# Remove file_id and the unaltered children. If file_id is not
822
# being deleted it will be reinserted back later.
823
self.remove_recursive_id(file_id)
824
# Insert all affected which should be in the new inventory, reattaching
825
# their children if they had any. This is done from shortest path to
826
# longest, ensuring that items which were modified and whose parents in
827
# the resulting inventory were also modified, are inserted after their
829
for new_path, new_entry in sorted((np, e) for op, np, f, e in
830
delta if np is not None):
831
if new_entry.kind == 'directory':
832
# Pop the child which to allow detection of children whose
833
# parents were deleted and which were not reattached to a new
835
new_entry.children = children.pop(new_entry.file_id, {})
838
# Get the parent id that was deleted
839
parent_id, children = children.popitem()
840
raise errors.InconsistentDelta("<deleted>", parent_id,
841
"The file id was deleted but its children were not deleted.")
843
def _set_root(self, ie):
845
851
self._byid = {self.root.file_id: self.root}
848
855
# TODO: jam 20051218 Should copy also copy the revision_id?
849
entries = self.iter_entries()
850
if self.root is None:
851
return Inventory(root_id=None)
852
other = Inventory(entries.next()[1].file_id)
853
other.root.revision = self.root.revision
856
other = Inventory(self.root.file_id)
854
857
# copy recursively so we know directories will be added before
855
858
# their children. There are more efficient ways than this...
856
for path, entry in entries:
859
for path, entry in self.iter_entries():
860
if entry == self.root:
857
862
other.add(entry.copy())
860
866
def __iter__(self):
861
867
return iter(self._byid)
863
870
def __len__(self):
864
871
"""Returns number of entries."""
865
872
return len(self._byid)
867
875
def iter_entries(self, from_dir=None):
868
876
"""Return (path, entry) pairs, in order by name."""
870
if self.root is None:
874
elif isinstance(from_dir, basestring):
875
from_dir = self._byid[from_dir]
877
# unrolling the recursive called changed the time from
878
# 440ms/663ms (inline/total) to 116ms/116ms
879
children = from_dir.children.items()
881
children = collections.deque(children)
882
stack = [(u'', children)]
884
from_dir_relpath, children = stack[-1]
887
name, ie = children.popleft()
889
# we know that from_dir_relpath never ends in a slash
890
# and 'f' doesn't begin with one, we can do a string op, rather
891
# than the checks of pathjoin(), though this means that all paths
893
path = from_dir_relpath + '/' + name
897
if ie.kind != 'directory':
900
# But do this child first
901
new_children = ie.children.items()
903
new_children = collections.deque(new_children)
904
stack.append((path, new_children))
905
# Break out of inner loop, so that we start outer loop with child
908
# if we finished all children, pop it off the stack
911
def iter_entries_by_dir(self, from_dir=None, specific_file_ids=None,
912
yield_parents=False):
913
"""Iterate over the entries in a directory first order.
915
This returns all entries for a directory before returning
916
the entries for children of a directory. This is not
917
lexicographically sorted order, and is a hybrid between
918
depth-first and breadth-first.
920
:param yield_parents: If True, yield the parents from the root leading
921
down to specific_file_ids that have been requested. This has no
922
impact if specific_file_ids is None.
923
:return: This yields (path, entry) pairs
925
if specific_file_ids and not isinstance(specific_file_ids, set):
926
specific_file_ids = set(specific_file_ids)
927
# TODO? Perhaps this should return the from_dir so that the root is
928
# yielded? or maybe an option?
930
if self.root is None:
932
# Optimize a common case
933
if (not yield_parents and specific_file_ids is not None and
934
len(specific_file_ids) == 1):
935
file_id = list(specific_file_ids)[0]
937
yield self.id2path(file_id), self[file_id]
940
if (specific_file_ids is None or yield_parents or
941
self.root.file_id in specific_file_ids):
943
elif isinstance(from_dir, basestring):
944
from_dir = self._byid[from_dir]
946
if specific_file_ids is not None:
947
# TODO: jam 20070302 This could really be done as a loop rather
948
# than a bunch of recursive calls.
951
def add_ancestors(file_id):
952
if file_id not in byid:
954
parent_id = byid[file_id].parent_id
955
if parent_id is None:
957
if parent_id not in parents:
958
parents.add(parent_id)
959
add_ancestors(parent_id)
960
for file_id in specific_file_ids:
961
add_ancestors(file_id)
965
stack = [(u'', from_dir)]
967
cur_relpath, cur_dir = stack.pop()
970
for child_name, child_ie in sorted(cur_dir.children.iteritems()):
972
child_relpath = cur_relpath + child_name
974
if (specific_file_ids is None or
975
child_ie.file_id in specific_file_ids or
976
(yield_parents and child_ie.file_id in parents)):
977
yield child_relpath, child_ie
979
if child_ie.kind == 'directory':
980
if parents is None or child_ie.file_id in parents:
981
child_dirs.append((child_relpath+'/', child_ie))
982
stack.extend(reversed(child_dirs))
984
def make_entry(self, kind, name, parent_id, file_id=None):
985
"""Simple thunk to bzrlib.inventory.make_entry."""
986
return make_entry(kind, name, parent_id, file_id)
880
elif isinstance(from_dir, basestring):
881
from_dir = self._byid[from_dir]
883
kids = from_dir.children.items()
885
for name, ie in kids:
887
if ie.kind == 'directory':
888
for cn, cie in self.iter_entries(from_dir=ie.file_id):
889
yield pathjoin(name, cn), cie
988
892
def entries(self):
989
893
"""Return list of (path, ie) for all entries except the root.