272
272
:param rev_id: Revision id from which this InventoryEntry was loaded.
273
273
Not necessarily the last-changed revision for this file.
274
274
:param inv: Inventory from which the entry was loaded.
275
:param tree: RevisionTree for this entry.
277
276
if self.parent_id is not None:
278
277
if not inv.has_id(self.parent_id):
279
278
raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'
280
279
% (self.parent_id, rev_id))
281
self._check(checker, rev_id, tree)
280
checker._add_entry_to_text_key_references(inv, self)
281
self._check(checker, rev_id)
283
def _check(self, checker, rev_id, tree):
283
def _check(self, checker, rev_id):
284
284
"""Check this inventory entry for kind specific errors."""
285
raise BzrCheckError('unknown entry kind %r in revision {%s}' %
285
checker._report_items.append(
286
'unknown entry kind %r in revision {%s}' % (self.kind, rev_id))
289
289
"""Clone this inventory entry."""
431
431
'text_id', 'parent_id', 'children', 'executable',
432
432
'revision', 'symlink_target', 'reference_revision']
434
def _check(self, checker, rev_id, tree):
434
def _check(self, checker, rev_id):
435
435
"""See InventoryEntry._check"""
436
if self.text_sha1 is not None or self.text_size is not None or self.text_id is not None:
437
raise BzrCheckError('directory {%s} has text in revision {%s}'
436
if (self.text_sha1 is not None or self.text_size is not None or
437
self.text_id is not None):
438
checker._report_items.append('directory {%s} has text in revision {%s}'
438
439
% (self.file_id, rev_id))
440
# In non rich root repositories we do not expect a file graph for the
442
if self.name == '' and not checker.rich_roots:
444
# Directories are stored as an empty file, but the file should exist
445
# to provide a per-fileid log. The hash of every directory content is
446
# "da..." below (the sha1sum of '').
447
checker.add_pending_item(rev_id,
448
('texts', self.file_id, self.revision), 'text',
449
'da39a3ee5e6b4b0d3255bfef95601890afd80709')
441
452
other = InventoryDirectory(self.file_id, self.name, self.parent_id)
474
485
'text_id', 'parent_id', 'children', 'executable',
475
486
'revision', 'symlink_target', 'reference_revision']
477
def _check(self, checker, tree_revision_id, tree):
488
def _check(self, checker, tree_revision_id):
478
489
"""See InventoryEntry._check"""
479
key = (self.file_id, self.revision)
480
if key in checker.checked_texts:
481
prev_sha = checker.checked_texts[key]
482
if prev_sha != self.text_sha1:
484
'mismatched sha1 on {%s} in {%s} (%s != %s) %r' %
485
(self.file_id, tree_revision_id, prev_sha, self.text_sha1,
488
checker.repeated_text_cnt += 1
491
checker.checked_text_cnt += 1
492
# We can't check the length, because Weave doesn't store that
493
# information, and the whole point of looking at the weave's
494
# sha1sum is that we don't have to extract the text.
495
if (self.text_sha1 != tree._repository.texts.get_sha1s([key])[key]):
496
raise BzrCheckError('text {%s} version {%s} wrong sha1' % key)
497
checker.checked_texts[key] = self.text_sha1
490
# TODO: check size too.
491
checker.add_pending_item(tree_revision_id,
492
('texts', self.file_id, self.revision), 'text',
494
if self.text_size is None:
495
checker._report_items.append(
496
'fileid {%s} in {%s} has None for text_size' % (self.file_id,
500
500
other = InventoryFile(self.file_id, self.name, self.parent_id)
598
598
'text_id', 'parent_id', 'children', 'executable',
599
599
'revision', 'symlink_target', 'reference_revision']
601
def _check(self, checker, rev_id, tree):
601
def _check(self, checker, tree_revision_id):
602
602
"""See InventoryEntry._check"""
603
603
if self.text_sha1 is not None or self.text_size is not None or self.text_id is not None:
604
raise BzrCheckError('symlink {%s} has text in revision {%s}'
605
% (self.file_id, rev_id))
604
checker._report_items.append(
605
'symlink {%s} has text in revision {%s}'
606
% (self.file_id, tree_revision_id))
606
607
if self.symlink_target is None:
607
raise BzrCheckError('symlink {%s} has no target in revision {%s}'
608
% (self.file_id, rev_id))
608
checker._report_items.append(
609
'symlink {%s} has no target in revision {%s}'
610
% (self.file_id, tree_revision_id))
611
# Symlinks are stored as ''
612
checker.add_pending_item(tree_revision_id,
613
('texts', self.file_id, self.revision), 'text',
614
'da39a3ee5e6b4b0d3255bfef95601890afd80709')
611
617
other = InventoryLink(self.file_id, self.name, self.parent_id)
1539
1555
raise ValueError("unknown kind %r" % entry.kind)
1557
def _expand_fileids_to_parents_and_children(self, file_ids):
1558
"""Give a more wholistic view starting with the given file_ids.
1560
For any file_id which maps to a directory, we will include all children
1561
of that directory. We will also include all directories which are
1562
parents of the given file_ids, but we will not include their children.
1569
fringle # fringle-id
1573
if given [foo-id] we will include
1574
TREE_ROOT as interesting parents
1576
foo-id, baz-id, frob-id, fringle-id
1580
# TODO: Pre-pass over the list of fileids to see if anything is already
1581
# deserialized in self._fileid_to_entry_cache
1583
directories_to_expand = set()
1584
children_of_parent_id = {}
1585
# It is okay if some of the fileids are missing
1586
for entry in self._getitems(file_ids):
1587
if entry.kind == 'directory':
1588
directories_to_expand.add(entry.file_id)
1589
interesting.add(entry.parent_id)
1590
children_of_parent_id.setdefault(entry.parent_id, []
1591
).append(entry.file_id)
1593
# Now, interesting has all of the direct parents, but not the
1594
# parents of those parents. It also may have some duplicates with
1596
remaining_parents = interesting.difference(file_ids)
1597
# When we hit the TREE_ROOT, we'll get an interesting parent of None,
1598
# but we don't actually want to recurse into that
1599
interesting.add(None) # this will auto-filter it in the loop
1600
remaining_parents.discard(None)
1601
while remaining_parents:
1602
if None in remaining_parents:
1603
import pdb; pdb.set_trace()
1604
next_parents = set()
1605
for entry in self._getitems(remaining_parents):
1606
next_parents.add(entry.parent_id)
1607
children_of_parent_id.setdefault(entry.parent_id, []
1608
).append(entry.file_id)
1609
# Remove any search tips we've already processed
1610
remaining_parents = next_parents.difference(interesting)
1611
interesting.update(remaining_parents)
1612
# We should probably also .difference(directories_to_expand)
1613
interesting.update(file_ids)
1614
interesting.discard(None)
1615
while directories_to_expand:
1616
# Expand directories by looking in the
1617
# parent_id_basename_to_file_id map
1618
keys = [(f,) for f in directories_to_expand]
1619
directories_to_expand = set()
1620
items = self.parent_id_basename_to_file_id.iteritems(keys)
1621
next_file_ids = set([item[1] for item in items])
1622
next_file_ids = next_file_ids.difference(interesting)
1623
interesting.update(next_file_ids)
1624
for entry in self._getitems(next_file_ids):
1625
if entry.kind == 'directory':
1626
directories_to_expand.add(entry.file_id)
1627
children_of_parent_id.setdefault(entry.parent_id, []
1628
).append(entry.file_id)
1629
return interesting, children_of_parent_id
1631
def filter(self, specific_fileids):
1632
"""Get an inventory view filtered against a set of file-ids.
1634
Children of directories and parents are included.
1636
The result may or may not reference the underlying inventory
1637
so it should be treated as immutable.
1640
parent_to_children) = self._expand_fileids_to_parents_and_children(
1642
# There is some overlap here, but we assume that all interesting items
1643
# are in the _fileid_to_entry_cache because we had to read them to
1644
# determine if they were a dir we wanted to recurse, or just a file
1645
# This should give us all the entries we'll want to add, so start
1647
other = Inventory(self.root_id)
1648
other.root.revision = self.root.revision
1649
other.revision_id = self.revision_id
1650
if not interesting or not parent_to_children:
1651
# empty filter, or filtering entrys that don't exist
1652
# (if even 1 existed, then we would have populated
1653
# parent_to_children with at least the tree root.)
1655
cache = self._fileid_to_entry_cache
1657
remaining_children = collections.deque(parent_to_children[self.root_id])
1659
import pdb; pdb.set_trace()
1661
while remaining_children:
1662
file_id = remaining_children.popleft()
1664
if ie.kind == 'directory':
1665
ie = ie.copy() # We create a copy to depopulate the .children attribute
1666
# TODO: depending on the uses of 'other' we should probably alwyas
1667
# '.copy()' to prevent someone from mutating other and
1668
# invaliding our internal cache
1670
if file_id in parent_to_children:
1671
remaining_children.extend(parent_to_children[file_id])
1542
1675
def _bytes_to_utf8name_key(bytes):
1543
1676
"""Get the file_id, revision_id key out of bytes."""
1877
2010
# really we're passing an inventory, not a tree...
1878
2011
raise errors.NoSuchId(self, file_id)
2013
def _getitems(self, file_ids):
2014
"""Similar to __getitem__, but lets you query for multiple.
2016
The returned order is undefined. And currently if an item doesn't
2017
exist, it isn't included in the output.
2021
for file_id in file_ids:
2022
entry = self._fileid_to_entry_cache.get(file_id, None)
2024
remaining.append(file_id)
2026
result.append(entry)
2027
file_keys = [(f,) for f in remaining]
2028
for file_key, value in self.id_to_entry.iteritems(file_keys):
2029
entry = self._bytes_to_entry(value)
2030
result.append(entry)
2031
self._fileid_to_entry_cache[entry.file_id] = entry
1880
2034
def has_id(self, file_id):
1881
2035
# Perhaps have an explicit 'contains' method on CHKMap ?
1882
2036
if self._fileid_to_entry_cache.get(file_id, None) is not None: