272
273
:param rev_id: Revision id from which this InventoryEntry was loaded.
273
274
Not necessarily the last-changed revision for this file.
274
275
:param inv: Inventory from which the entry was loaded.
275
:param tree: RevisionTree for this entry.
277
277
if self.parent_id is not None:
278
278
if not inv.has_id(self.parent_id):
279
279
raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'
280
280
% (self.parent_id, rev_id))
281
self._check(checker, rev_id, tree)
281
checker._add_entry_to_text_key_references(inv, self)
282
self._check(checker, rev_id)
283
def _check(self, checker, rev_id, tree):
284
def _check(self, checker, rev_id):
284
285
"""Check this inventory entry for kind specific errors."""
285
raise BzrCheckError('unknown entry kind %r in revision {%s}' %
286
checker._report_items.append(
287
'unknown entry kind %r in revision {%s}' % (self.kind, rev_id))
289
290
"""Clone this inventory entry."""
431
432
'text_id', 'parent_id', 'children', 'executable',
432
433
'revision', 'symlink_target', 'reference_revision']
434
def _check(self, checker, rev_id, tree):
435
def _check(self, checker, rev_id):
435
436
"""See InventoryEntry._check"""
436
if self.text_sha1 is not None or self.text_size is not None or self.text_id is not None:
437
raise BzrCheckError('directory {%s} has text in revision {%s}'
437
if (self.text_sha1 is not None or self.text_size is not None or
438
self.text_id is not None):
439
checker._report_items.append('directory {%s} has text in revision {%s}'
438
440
% (self.file_id, rev_id))
441
# In non rich root repositories we do not expect a file graph for the
443
if self.name == '' and not checker.rich_roots:
445
# Directories are stored as an empty file, but the file should exist
446
# to provide a per-fileid log. The hash of every directory content is
447
# "da..." below (the sha1sum of '').
448
checker.add_pending_item(rev_id,
449
('texts', self.file_id, self.revision), 'text',
450
'da39a3ee5e6b4b0d3255bfef95601890afd80709')
441
453
other = InventoryDirectory(self.file_id, self.name, self.parent_id)
474
486
'text_id', 'parent_id', 'children', 'executable',
475
487
'revision', 'symlink_target', 'reference_revision']
477
def _check(self, checker, tree_revision_id, tree):
489
def _check(self, checker, tree_revision_id):
478
490
"""See InventoryEntry._check"""
479
key = (self.file_id, self.revision)
480
if key in checker.checked_texts:
481
prev_sha = checker.checked_texts[key]
482
if prev_sha != self.text_sha1:
484
'mismatched sha1 on {%s} in {%s} (%s != %s) %r' %
485
(self.file_id, tree_revision_id, prev_sha, self.text_sha1,
488
checker.repeated_text_cnt += 1
491
checker.checked_text_cnt += 1
492
# We can't check the length, because Weave doesn't store that
493
# information, and the whole point of looking at the weave's
494
# sha1sum is that we don't have to extract the text.
495
if (self.text_sha1 != tree._repository.texts.get_sha1s([key])[key]):
496
raise BzrCheckError('text {%s} version {%s} wrong sha1' % key)
497
checker.checked_texts[key] = self.text_sha1
491
# TODO: check size too.
492
checker.add_pending_item(tree_revision_id,
493
('texts', self.file_id, self.revision), 'text',
495
if self.text_size is None:
496
checker._report_items.append(
497
'fileid {%s} in {%s} has None for text_size' % (self.file_id,
500
501
other = InventoryFile(self.file_id, self.name, self.parent_id)
598
599
'text_id', 'parent_id', 'children', 'executable',
599
600
'revision', 'symlink_target', 'reference_revision']
601
def _check(self, checker, rev_id, tree):
602
def _check(self, checker, tree_revision_id):
602
603
"""See InventoryEntry._check"""
603
604
if self.text_sha1 is not None or self.text_size is not None or self.text_id is not None:
604
raise BzrCheckError('symlink {%s} has text in revision {%s}'
605
% (self.file_id, rev_id))
605
checker._report_items.append(
606
'symlink {%s} has text in revision {%s}'
607
% (self.file_id, tree_revision_id))
606
608
if self.symlink_target is None:
607
raise BzrCheckError('symlink {%s} has no target in revision {%s}'
608
% (self.file_id, rev_id))
609
checker._report_items.append(
610
'symlink {%s} has no target in revision {%s}'
611
% (self.file_id, tree_revision_id))
612
# Symlinks are stored as ''
613
checker.add_pending_item(tree_revision_id,
614
('texts', self.file_id, self.revision), 'text',
615
'da39a3ee5e6b4b0d3255bfef95601890afd80709')
611
618
other = InventoryLink(self.file_id, self.name, self.parent_id)
947
959
descend(self.root, u'')
950
def path2id(self, name):
962
def path2id(self, relpath):
951
963
"""Walk down through directories to return entry of last component.
953
names may be either a list of path components, or a single
954
string, in which case it is automatically split.
965
:param relpath: may be either a list of path components, or a single
966
string, in which case it is automatically split.
956
968
This returns the entry of the last component in the path,
957
969
which may be either a file or a directory.
959
971
Returns None IFF the path is not found.
961
if isinstance(name, basestring):
962
name = osutils.splitpath(name)
964
# mutter("lookup path %r" % name)
973
if isinstance(relpath, basestring):
974
names = osutils.splitpath(relpath)
967
979
parent = self.root
1539
1556
raise ValueError("unknown kind %r" % entry.kind)
1558
def _expand_fileids_to_parents_and_children(self, file_ids):
1559
"""Give a more wholistic view starting with the given file_ids.
1561
For any file_id which maps to a directory, we will include all children
1562
of that directory. We will also include all directories which are
1563
parents of the given file_ids, but we will not include their children.
1570
fringle # fringle-id
1574
if given [foo-id] we will include
1575
TREE_ROOT as interesting parents
1577
foo-id, baz-id, frob-id, fringle-id
1581
# TODO: Pre-pass over the list of fileids to see if anything is already
1582
# deserialized in self._fileid_to_entry_cache
1584
directories_to_expand = set()
1585
children_of_parent_id = {}
1586
# It is okay if some of the fileids are missing
1587
for entry in self._getitems(file_ids):
1588
if entry.kind == 'directory':
1589
directories_to_expand.add(entry.file_id)
1590
interesting.add(entry.parent_id)
1591
children_of_parent_id.setdefault(entry.parent_id, []
1592
).append(entry.file_id)
1594
# Now, interesting has all of the direct parents, but not the
1595
# parents of those parents. It also may have some duplicates with
1597
remaining_parents = interesting.difference(file_ids)
1598
# When we hit the TREE_ROOT, we'll get an interesting parent of None,
1599
# but we don't actually want to recurse into that
1600
interesting.add(None) # this will auto-filter it in the loop
1601
remaining_parents.discard(None)
1602
while remaining_parents:
1603
next_parents = set()
1604
for entry in self._getitems(remaining_parents):
1605
next_parents.add(entry.parent_id)
1606
children_of_parent_id.setdefault(entry.parent_id, []
1607
).append(entry.file_id)
1608
# Remove any search tips we've already processed
1609
remaining_parents = next_parents.difference(interesting)
1610
interesting.update(remaining_parents)
1611
# We should probably also .difference(directories_to_expand)
1612
interesting.update(file_ids)
1613
interesting.discard(None)
1614
while directories_to_expand:
1615
# Expand directories by looking in the
1616
# parent_id_basename_to_file_id map
1617
keys = [StaticTuple(f,).intern() for f in directories_to_expand]
1618
directories_to_expand = set()
1619
items = self.parent_id_basename_to_file_id.iteritems(keys)
1620
next_file_ids = set([item[1] for item in items])
1621
next_file_ids = next_file_ids.difference(interesting)
1622
interesting.update(next_file_ids)
1623
for entry in self._getitems(next_file_ids):
1624
if entry.kind == 'directory':
1625
directories_to_expand.add(entry.file_id)
1626
children_of_parent_id.setdefault(entry.parent_id, []
1627
).append(entry.file_id)
1628
return interesting, children_of_parent_id
1630
def filter(self, specific_fileids):
1631
"""Get an inventory view filtered against a set of file-ids.
1633
Children of directories and parents are included.
1635
The result may or may not reference the underlying inventory
1636
so it should be treated as immutable.
1639
parent_to_children) = self._expand_fileids_to_parents_and_children(
1641
# There is some overlap here, but we assume that all interesting items
1642
# are in the _fileid_to_entry_cache because we had to read them to
1643
# determine if they were a dir we wanted to recurse, or just a file
1644
# This should give us all the entries we'll want to add, so start
1646
other = Inventory(self.root_id)
1647
other.root.revision = self.root.revision
1648
other.revision_id = self.revision_id
1649
if not interesting or not parent_to_children:
1650
# empty filter, or filtering entrys that don't exist
1651
# (if even 1 existed, then we would have populated
1652
# parent_to_children with at least the tree root.)
1654
cache = self._fileid_to_entry_cache
1656
remaining_children = collections.deque(parent_to_children[self.root_id])
1658
import pdb; pdb.set_trace()
1660
while remaining_children:
1661
file_id = remaining_children.popleft()
1663
if ie.kind == 'directory':
1664
ie = ie.copy() # We create a copy to depopulate the .children attribute
1665
# TODO: depending on the uses of 'other' we should probably alwyas
1666
# '.copy()' to prevent someone from mutating other and
1667
# invaliding our internal cache
1669
if file_id in parent_to_children:
1670
remaining_children.extend(parent_to_children[file_id])
1542
1674
def _bytes_to_utf8name_key(bytes):
1543
1675
"""Get the file_id, revision_id key out of bytes."""
1785
1918
raise errors.BzrError('Duplicate key in inventory: %r\n%r'
1786
1919
% (key, bytes))
1787
1920
info[key] = value
1788
revision_id = info['revision_id']
1789
root_id = info['root_id']
1790
search_key_name = info.get('search_key_name', 'plain')
1791
parent_id_basename_to_file_id = info.get(
1792
'parent_id_basename_to_file_id', None)
1921
revision_id = intern(info['revision_id'])
1922
root_id = intern(info['root_id'])
1923
search_key_name = intern(info.get('search_key_name', 'plain'))
1924
parent_id_basename_to_file_id = intern(info.get(
1925
'parent_id_basename_to_file_id', None))
1926
if not parent_id_basename_to_file_id.startswith('sha1:'):
1927
raise ValueError('parent_id_basename_to_file_id should be a sha1'
1928
' key not %r' % (parent_id_basename_to_file_id,))
1793
1929
id_to_entry = info['id_to_entry']
1930
if not id_to_entry.startswith('sha1:'):
1931
raise ValueError('id_to_entry should be a sha1'
1932
' key not %r' % (id_to_entry,))
1795
1934
result = CHKInventory(search_key_name)
1796
1935
result.revision_id = revision_id
1799
1938
result._search_key_name)
1800
1939
if parent_id_basename_to_file_id is not None:
1801
1940
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1802
chk_store, (parent_id_basename_to_file_id,),
1941
chk_store, StaticTuple(parent_id_basename_to_file_id,),
1803
1942
search_key_func=search_key_func)
1805
1944
result.parent_id_basename_to_file_id = None
1807
result.id_to_entry = chk_map.CHKMap(chk_store, (id_to_entry,),
1946
result.id_to_entry = chk_map.CHKMap(chk_store,
1947
StaticTuple(id_to_entry,),
1808
1948
search_key_func=search_key_func)
1809
1949
if (result.revision_id,) != expected_revision_id:
1810
1950
raise ValueError("Mismatched revision id and expected: %r, %r" %
1874
2015
return self._bytes_to_entry(
1875
self.id_to_entry.iteritems([(file_id,)]).next()[1])
2016
self.id_to_entry.iteritems([StaticTuple(file_id,)]).next()[1])
1876
2017
except StopIteration:
1877
2018
# really we're passing an inventory, not a tree...
1878
2019
raise errors.NoSuchId(self, file_id)
2021
def _getitems(self, file_ids):
2022
"""Similar to __getitem__, but lets you query for multiple.
2024
The returned order is undefined. And currently if an item doesn't
2025
exist, it isn't included in the output.
2029
for file_id in file_ids:
2030
entry = self._fileid_to_entry_cache.get(file_id, None)
2032
remaining.append(file_id)
2034
result.append(entry)
2035
file_keys = [StaticTuple(f,).intern() for f in remaining]
2036
for file_key, value in self.id_to_entry.iteritems(file_keys):
2037
entry = self._bytes_to_entry(value)
2038
result.append(entry)
2039
self._fileid_to_entry_cache[entry.file_id] = entry
1880
2042
def has_id(self, file_id):
1881
2043
# Perhaps have an explicit 'contains' method on CHKMap ?
1882
2044
if self._fileid_to_entry_cache.get(file_id, None) is not None:
1884
return len(list(self.id_to_entry.iteritems([(file_id,)]))) == 1
2047
self.id_to_entry.iteritems([StaticTuple(file_id,)]))) == 1
1886
2049
def is_root(self, file_id):
1887
2050
return file_id == self.root_id
2016
2179
delta.append((old_path, new_path, file_id, entry))
2019
def path2id(self, name):
2182
def path2id(self, relpath):
2020
2183
"""See CommonInventory.path2id()."""
2021
2184
# TODO: perhaps support negative hits?
2022
result = self._path_to_fileid_cache.get(name, None)
2185
result = self._path_to_fileid_cache.get(relpath, None)
2023
2186
if result is not None:
2025
if isinstance(name, basestring):
2026
names = osutils.splitpath(name)
2188
if isinstance(relpath, basestring):
2189
names = osutils.splitpath(relpath)
2029
2192
current_id = self.root_id
2030
2193
if current_id is None:
2032
2195
parent_id_index = self.parent_id_basename_to_file_id
2033
2197
for basename in names:
2034
# TODO: Cache each path we figure out in this function.
2198
if cur_path is None:
2201
cur_path = cur_path + '/' + basename
2035
2202
basename_utf8 = basename.encode('utf8')
2036
key_filter = [(current_id, basename_utf8)]
2038
for (parent_id, name_utf8), file_id in parent_id_index.iteritems(
2039
key_filter=key_filter):
2040
if parent_id != current_id or name_utf8 != basename_utf8:
2041
raise errors.BzrError("corrupt inventory lookup! "
2042
"%r %r %r %r" % (parent_id, current_id, name_utf8,
2203
file_id = self._path_to_fileid_cache.get(cur_path, None)
2044
2204
if file_id is None:
2205
key_filter = [StaticTuple(current_id, basename_utf8)]
2206
items = parent_id_index.iteritems(key_filter)
2207
for (parent_id, name_utf8), file_id in items:
2208
if parent_id != current_id or name_utf8 != basename_utf8:
2209
raise errors.BzrError("corrupt inventory lookup! "
2210
"%r %r %r %r" % (parent_id, current_id, name_utf8,
2215
self._path_to_fileid_cache[cur_path] = file_id
2046
2216
current_id = file_id
2047
self._path_to_fileid_cache[name] = current_id
2048
2217
return current_id
2050
2219
def to_lines(self):
2055
2224
lines.append('search_key_name: %s\n' % (self._search_key_name,))
2056
2225
lines.append("root_id: %s\n" % self.root_id)
2057
2226
lines.append('parent_id_basename_to_file_id: %s\n' %
2058
self.parent_id_basename_to_file_id.key())
2227
(self.parent_id_basename_to_file_id.key()[0],))
2059
2228
lines.append("revision_id: %s\n" % self.revision_id)
2060
lines.append("id_to_entry: %s\n" % self.id_to_entry.key())
2229
lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],))
2062
2231
lines.append("revision_id: %s\n" % self.revision_id)
2063
2232
lines.append("root_id: %s\n" % self.root_id)
2064
2233
if self.parent_id_basename_to_file_id is not None:
2065
2234
lines.append('parent_id_basename_to_file_id: %s\n' %
2066
self.parent_id_basename_to_file_id.key())
2067
lines.append("id_to_entry: %s\n" % self.id_to_entry.key())
2235
(self.parent_id_basename_to_file_id.key()[0],))
2236
lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],))