273
274
:param rev_id: Revision id from which this InventoryEntry was loaded.
274
275
Not necessarily the last-changed revision for this file.
275
276
:param inv: Inventory from which the entry was loaded.
277
:param tree: RevisionTree for this entry.
277
279
if self.parent_id is not None:
278
280
if not inv.has_id(self.parent_id):
279
281
raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'
280
282
% (self.parent_id, rev_id))
281
checker._add_entry_to_text_key_references(inv, self)
282
self._check(checker, rev_id)
283
self._check(checker, rev_id, tree)
284
def _check(self, checker, rev_id):
285
def _check(self, checker, rev_id, tree):
285
286
"""Check this inventory entry for kind specific errors."""
286
checker._report_items.append(
287
'unknown entry kind %r in revision {%s}' % (self.kind, rev_id))
287
raise BzrCheckError('unknown entry kind %r in revision {%s}' %
290
291
"""Clone this inventory entry."""
432
433
'text_id', 'parent_id', 'children', 'executable',
433
434
'revision', 'symlink_target', 'reference_revision']
435
def _check(self, checker, rev_id):
436
def _check(self, checker, rev_id, tree):
436
437
"""See InventoryEntry._check"""
437
if (self.text_sha1 is not None or self.text_size is not None or
438
self.text_id is not None):
439
checker._report_items.append('directory {%s} has text in revision {%s}'
438
if self.text_sha1 is not None or self.text_size is not None or self.text_id is not None:
439
raise BzrCheckError('directory {%s} has text in revision {%s}'
440
440
% (self.file_id, rev_id))
441
# In non rich root repositories we do not expect a file graph for the
443
if self.name == '' and not checker.rich_roots:
445
# Directories are stored as an empty file, but the file should exist
446
# to provide a per-fileid log. The hash of every directory content is
447
# "da..." below (the sha1sum of '').
448
checker.add_pending_item(rev_id,
449
('texts', self.file_id, self.revision), 'text',
450
'da39a3ee5e6b4b0d3255bfef95601890afd80709')
453
443
other = InventoryDirectory(self.file_id, self.name, self.parent_id)
486
476
'text_id', 'parent_id', 'children', 'executable',
487
477
'revision', 'symlink_target', 'reference_revision']
489
def _check(self, checker, tree_revision_id):
479
def _check(self, checker, tree_revision_id, tree):
490
480
"""See InventoryEntry._check"""
491
# TODO: check size too.
492
checker.add_pending_item(tree_revision_id,
493
('texts', self.file_id, self.revision), 'text',
495
if self.text_size is None:
496
checker._report_items.append(
497
'fileid {%s} in {%s} has None for text_size' % (self.file_id,
481
key = (self.file_id, self.revision)
482
if key in checker.checked_texts:
483
prev_sha = checker.checked_texts[key]
484
if prev_sha != self.text_sha1:
486
'mismatched sha1 on {%s} in {%s} (%s != %s) %r' %
487
(self.file_id, tree_revision_id, prev_sha, self.text_sha1,
490
checker.repeated_text_cnt += 1
493
checker.checked_text_cnt += 1
494
# We can't check the length, because Weave doesn't store that
495
# information, and the whole point of looking at the weave's
496
# sha1sum is that we don't have to extract the text.
497
if (self.text_sha1 != tree._repository.texts.get_sha1s([key])[key]):
498
raise BzrCheckError('text {%s} version {%s} wrong sha1' % key)
499
checker.checked_texts[key] = self.text_sha1
501
502
other = InventoryFile(self.file_id, self.name, self.parent_id)
599
600
'text_id', 'parent_id', 'children', 'executable',
600
601
'revision', 'symlink_target', 'reference_revision']
602
def _check(self, checker, tree_revision_id):
603
def _check(self, checker, rev_id, tree):
603
604
"""See InventoryEntry._check"""
604
605
if self.text_sha1 is not None or self.text_size is not None or self.text_id is not None:
605
checker._report_items.append(
606
'symlink {%s} has text in revision {%s}'
607
% (self.file_id, tree_revision_id))
606
raise BzrCheckError('symlink {%s} has text in revision {%s}'
607
% (self.file_id, rev_id))
608
608
if self.symlink_target is None:
609
checker._report_items.append(
610
'symlink {%s} has no target in revision {%s}'
611
% (self.file_id, tree_revision_id))
612
# Symlinks are stored as ''
613
checker.add_pending_item(tree_revision_id,
614
('texts', self.file_id, self.revision), 'text',
615
'da39a3ee5e6b4b0d3255bfef95601890afd80709')
609
raise BzrCheckError('symlink {%s} has no target in revision {%s}'
610
% (self.file_id, rev_id))
618
613
other = InventoryLink(self.file_id, self.name, self.parent_id)
721
716
class CommonInventory(object):
722
"""Basic inventory logic, defined in terms of primitives like has_id.
724
An inventory is the metadata about the contents of a tree.
726
This is broadly a map from file_id to entries such as directories, files,
727
symlinks and tree references. Each entry maintains its own metadata like
728
SHA1 and length for files, or children for a directory.
730
Entries can be looked up either by path or by file_id.
732
InventoryEntry objects must not be modified after they are
733
inserted, other than through the Inventory API.
717
"""Basic inventory logic, defined in terms of primitives like has_id."""
736
719
def __contains__(self, file_id):
737
720
"""True if this entry contains a file with given id.
761
741
>>> e = i.add(InventoryFile('foo-id', 'foo.c', parent_id='src-id'))
762
742
>>> print i.id2path('foo-id')
765
:raises NoSuchId: If file_id is not present in the inventory.
767
745
# get all names, skipping root
768
746
return '/'.join(reversed(
769
747
[parent.name for parent in
770
748
self._iter_file_id_parents(file_id)][:-1]))
772
def iter_entries(self, from_dir=None, recursive=True):
773
"""Return (path, entry) pairs, in order by name.
775
:param from_dir: if None, start from the root,
776
otherwise start from this directory (either file-id or entry)
777
:param recursive: recurse into directories or not
750
def iter_entries(self, from_dir=None):
751
"""Return (path, entry) pairs, in order by name."""
779
752
if from_dir is None:
780
753
if self.root is None:
959
928
descend(self.root, u'')
962
def path2id(self, relpath):
931
def path2id(self, name):
963
932
"""Walk down through directories to return entry of last component.
965
:param relpath: may be either a list of path components, or a single
966
string, in which case it is automatically split.
934
names may be either a list of path components, or a single
935
string, in which case it is automatically split.
968
937
This returns the entry of the last component in the path,
969
938
which may be either a file or a directory.
971
940
Returns None IFF the path is not found.
973
if isinstance(relpath, basestring):
974
names = osutils.splitpath(relpath)
942
if isinstance(name, basestring):
943
name = osutils.splitpath(name)
945
# mutter("lookup path %r" % name)
979
948
parent = self.root
1045
1014
class Inventory(CommonInventory):
1046
"""Mutable dict based in-memory inventory.
1048
We never store the full path to a file, because renaming a directory
1049
implicitly moves all of its contents. This class internally maintains a
1015
"""Inventory of versioned files in a tree.
1017
This describes which file_id is present at each point in the tree,
1018
and possibly the SHA-1 or other information about the file.
1019
Entries can be looked up either by path or by file_id.
1021
The inventory represents a typical unix file tree, with
1022
directories containing files and subdirectories. We never store
1023
the full path to a file, because renaming a directory implicitly
1024
moves all of its contents. This class internally maintains a
1050
1025
lookup tree that allows the children under a directory to be
1051
1026
returned quickly.
1028
InventoryEntry objects must not be modified after they are
1029
inserted, other than through the Inventory API.
1053
1031
>>> inv = Inventory()
1054
1032
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
1055
1033
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
1056
1034
>>> inv['123-123'].name
1059
Id's may be looked up from paths:
1037
May be treated as an iterator or set to look up file ids:
1061
>>> inv.path2id('hello.c')
1039
>>> bool(inv.path2id('hello.c'))
1063
1041
>>> '123-123' in inv
1066
There are iterators over the contents:
1044
May also look up by name:
1068
>>> [entry[0] for entry in inv.iter_entries()]
1046
>>> [x[0] for x in inv.iter_entries()]
1069
1047
['', u'hello.c']
1048
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
1049
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
1050
Traceback (most recent call last):
1051
BzrError: parent_id {TREE_ROOT} not in inventory
1052
>>> inv.add(InventoryFile('123-123', 'hello.c', 'TREE_ROOT-12345678-12345678'))
1053
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT-12345678-12345678', sha1=None, len=None, revision=None)
1072
1055
def __init__(self, root_id=ROOT_ID, revision_id=None):
1073
1056
"""Create or read an inventory.
1155
1131
# modified children remaining by the time we examine it.
1156
1132
for old_path, file_id in sorted(((op, f) for op, np, f, e in delta
1157
1133
if op is not None), reverse=True):
1134
if file_id not in self:
1158
1137
# Preserve unaltered children of file_id for later reinsertion.
1159
1138
file_id_children = getattr(self[file_id], 'children', {})
1160
1139
if len(file_id_children):
1161
1140
children[file_id] = file_id_children
1162
if self.id2path(file_id) != old_path:
1163
raise errors.InconsistentDelta(old_path, file_id,
1164
"Entry was at wrong other path %r." % self.id2path(file_id))
1165
1141
# Remove file_id and the unaltered children. If file_id is not
1166
1142
# being deleted it will be reinserted back later.
1167
1143
self.remove_recursive_id(file_id)
1181
1157
replacement.revision = new_entry.revision
1182
1158
replacement.children = children.pop(replacement.file_id, {})
1183
1159
new_entry = replacement
1186
except errors.DuplicateFileId:
1187
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1188
"New id is already present in target.")
1189
except AttributeError:
1190
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1191
"Parent is not a directory.")
1192
if self.id2path(new_entry.file_id) != new_path:
1193
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1194
"New path is not consistent with parent path.")
1195
1161
if len(children):
1196
1162
# Get the parent id that was deleted
1197
1163
parent_id, children = children.popitem()
1198
1164
raise errors.InconsistentDelta("<deleted>", parent_id,
1199
1165
"The file id was deleted but its children were not deleted.")
1201
def create_by_apply_delta(self, inventory_delta, new_revision_id,
1202
propagate_caches=False):
1203
"""See CHKInventory.create_by_apply_delta()"""
1204
new_inv = self.copy()
1205
new_inv.apply_delta(inventory_delta)
1206
new_inv.revision_id = new_revision_id
1209
1167
def _set_root(self, ie):
1211
1169
self._byid = {self.root.file_id: self.root}
1285
1243
To add a file to a branch ready to be committed, use Branch.add,
1286
1244
which calls this.
1246
Returns the new entry object.
1290
1248
if entry.file_id in self._byid:
1291
1249
raise errors.DuplicateFileId(entry.file_id,
1292
1250
self._byid[entry.file_id])
1293
1252
if entry.parent_id is None:
1294
1253
self.root = entry
1297
1256
parent = self._byid[entry.parent_id]
1298
1257
except KeyError:
1299
raise errors.InconsistentDelta("<unknown>", entry.parent_id,
1300
"Parent not in inventory.")
1258
raise BzrError("parent_id {%s} not in inventory" %
1301
1261
if entry.name in parent.children:
1302
raise errors.InconsistentDelta(
1303
self.id2path(parent.children[entry.name].file_id),
1305
"Path already versioned")
1262
raise BzrError("%s is already versioned" %
1263
osutils.pathjoin(self.id2path(parent.file_id),
1264
entry.name).encode('utf-8'))
1306
1265
parent.children[entry.name] = entry
1307
1266
return self._add_child(entry)
1498
1460
self._fileid_to_entry_cache = {}
1499
1461
self._path_to_fileid_cache = {}
1500
1462
self._search_key_name = search_key_name
1503
def __eq__(self, other):
1504
"""Compare two sets by comparing their contents."""
1505
if not isinstance(other, CHKInventory):
1506
return NotImplemented
1508
this_key = self.id_to_entry.key()
1509
other_key = other.id_to_entry.key()
1510
this_pid_key = self.parent_id_basename_to_file_id.key()
1511
other_pid_key = other.parent_id_basename_to_file_id.key()
1512
if None in (this_key, this_pid_key, other_key, other_pid_key):
1514
return this_key == other_key and this_pid_key == other_pid_key
1516
1464
def _entry_to_bytes(self, entry):
1517
1465
"""Serialise entry as a single bytestring.
1556
1504
raise ValueError("unknown kind %r" % entry.kind)
1558
def _expand_fileids_to_parents_and_children(self, file_ids):
1559
"""Give a more wholistic view starting with the given file_ids.
1561
For any file_id which maps to a directory, we will include all children
1562
of that directory. We will also include all directories which are
1563
parents of the given file_ids, but we will not include their children.
1570
fringle # fringle-id
1574
if given [foo-id] we will include
1575
TREE_ROOT as interesting parents
1577
foo-id, baz-id, frob-id, fringle-id
1581
# TODO: Pre-pass over the list of fileids to see if anything is already
1582
# deserialized in self._fileid_to_entry_cache
1584
directories_to_expand = set()
1585
children_of_parent_id = {}
1586
# It is okay if some of the fileids are missing
1587
for entry in self._getitems(file_ids):
1588
if entry.kind == 'directory':
1589
directories_to_expand.add(entry.file_id)
1590
interesting.add(entry.parent_id)
1591
children_of_parent_id.setdefault(entry.parent_id, []
1592
).append(entry.file_id)
1594
# Now, interesting has all of the direct parents, but not the
1595
# parents of those parents. It also may have some duplicates with
1597
remaining_parents = interesting.difference(file_ids)
1598
# When we hit the TREE_ROOT, we'll get an interesting parent of None,
1599
# but we don't actually want to recurse into that
1600
interesting.add(None) # this will auto-filter it in the loop
1601
remaining_parents.discard(None)
1602
while remaining_parents:
1603
next_parents = set()
1604
for entry in self._getitems(remaining_parents):
1605
next_parents.add(entry.parent_id)
1606
children_of_parent_id.setdefault(entry.parent_id, []
1607
).append(entry.file_id)
1608
# Remove any search tips we've already processed
1609
remaining_parents = next_parents.difference(interesting)
1610
interesting.update(remaining_parents)
1611
# We should probably also .difference(directories_to_expand)
1612
interesting.update(file_ids)
1613
interesting.discard(None)
1614
while directories_to_expand:
1615
# Expand directories by looking in the
1616
# parent_id_basename_to_file_id map
1617
keys = [StaticTuple(f,).intern() for f in directories_to_expand]
1618
directories_to_expand = set()
1619
items = self.parent_id_basename_to_file_id.iteritems(keys)
1620
next_file_ids = set([item[1] for item in items])
1621
next_file_ids = next_file_ids.difference(interesting)
1622
interesting.update(next_file_ids)
1623
for entry in self._getitems(next_file_ids):
1624
if entry.kind == 'directory':
1625
directories_to_expand.add(entry.file_id)
1626
children_of_parent_id.setdefault(entry.parent_id, []
1627
).append(entry.file_id)
1628
return interesting, children_of_parent_id
1630
def filter(self, specific_fileids):
1631
"""Get an inventory view filtered against a set of file-ids.
1633
Children of directories and parents are included.
1635
The result may or may not reference the underlying inventory
1636
so it should be treated as immutable.
1639
parent_to_children) = self._expand_fileids_to_parents_and_children(
1641
# There is some overlap here, but we assume that all interesting items
1642
# are in the _fileid_to_entry_cache because we had to read them to
1643
# determine if they were a dir we wanted to recurse, or just a file
1644
# This should give us all the entries we'll want to add, so start
1646
other = Inventory(self.root_id)
1647
other.root.revision = self.root.revision
1648
other.revision_id = self.revision_id
1649
if not interesting or not parent_to_children:
1650
# empty filter, or filtering entrys that don't exist
1651
# (if even 1 existed, then we would have populated
1652
# parent_to_children with at least the tree root.)
1654
cache = self._fileid_to_entry_cache
1656
remaining_children = collections.deque(parent_to_children[self.root_id])
1658
import pdb; pdb.set_trace()
1660
while remaining_children:
1661
file_id = remaining_children.popleft()
1663
if ie.kind == 'directory':
1664
ie = ie.copy() # We create a copy to depopulate the .children attribute
1665
# TODO: depending on the uses of 'other' we should probably alwyas
1666
# '.copy()' to prevent someone from mutating other and
1667
# invaliding our internal cache
1669
if file_id in parent_to_children:
1670
remaining_children.extend(parent_to_children[file_id])
1674
1507
def _bytes_to_utf8name_key(bytes):
1675
1508
"""Get the file_id, revision_id key out of bytes."""
1749
1577
search_key_func=search_key_func)
1750
1578
result.id_to_entry._ensure_root()
1751
1579
result.id_to_entry._root_node.set_maximum_size(maximum_size)
1752
# Change to apply to the parent_id_basename delta. The dict maps
1753
# (parent_id, basename) -> (old_key, new_value). We use a dict because
1754
# when a path has its id replaced (e.g. the root is changed, or someone
1755
# does bzr mv a b, bzr mv c a, we should output a single change to this
1756
# map rather than two.
1757
parent_id_basename_delta = {}
1580
parent_id_basename_delta = []
1758
1581
if self.parent_id_basename_to_file_id is not None:
1759
1582
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1760
1583
self.parent_id_basename_to_file_id._store,
1770
1593
result.parent_id_basename_to_file_id = None
1771
1594
result.root_id = self.root_id
1772
1595
id_to_entry_delta = []
1773
# inventory_delta is only traversed once, so we just update the
1775
# Check for repeated file ids
1776
inventory_delta = _check_delta_unique_ids(inventory_delta)
1777
# Repeated old paths
1778
inventory_delta = _check_delta_unique_old_paths(inventory_delta)
1779
# Check for repeated new paths
1780
inventory_delta = _check_delta_unique_new_paths(inventory_delta)
1781
# Check for entries that don't match the fileid
1782
inventory_delta = _check_delta_ids_match_entry(inventory_delta)
1783
# Check for nonsense fileids
1784
inventory_delta = _check_delta_ids_are_valid(inventory_delta)
1785
# Check for new_path <-> entry consistency
1786
inventory_delta = _check_delta_new_path_entry_both_or_None(
1788
# All changed entries need to have their parents be directories and be
1789
# at the right path. This set contains (path, id) tuples.
1791
# When we delete an item, all the children of it must be either deleted
1792
# or altered in their own right. As we batch process the change via
1793
# CHKMap.apply_delta, we build a set of things to use to validate the
1797
1596
for old_path, new_path, file_id, entry in inventory_delta:
1798
1597
# file id changes
1799
1598
if new_path == '':
1808
1607
del result._path_to_fileid_cache[old_path]
1809
1608
except KeyError:
1811
deletes.add(file_id)
1813
new_key = StaticTuple(file_id,)
1611
new_key = (file_id,)
1814
1612
new_value = result._entry_to_bytes(entry)
1815
1613
# Update caches. It's worth doing this whether
1816
1614
# we're propagating the old caches or not.
1817
1615
result._path_to_fileid_cache[new_path] = file_id
1818
parents.add((split(new_path)[0], entry.parent_id))
1819
1616
if old_path is None:
1822
old_key = StaticTuple(file_id,)
1823
if self.id2path(file_id) != old_path:
1824
raise errors.InconsistentDelta(old_path, file_id,
1825
"Entry was at wrong other path %r." %
1826
self.id2path(file_id))
1827
altered.add(file_id)
1828
id_to_entry_delta.append(StaticTuple(old_key, new_key, new_value))
1619
old_key = (file_id,)
1620
id_to_entry_delta.append((old_key, new_key, new_value))
1829
1621
if result.parent_id_basename_to_file_id is not None:
1830
1622
# parent_id, basename changes
1831
1623
if old_path is None:
1840
1632
new_key = self._parent_id_basename_key(entry)
1841
1633
new_value = file_id
1842
# If the two keys are the same, the value will be unchanged
1843
# as its always the file id for this entry.
1844
1634
if old_key != new_key:
1845
# Transform a change into explicit delete/add preserving
1846
# a possible match on the key from a different file id.
1847
if old_key is not None:
1848
parent_id_basename_delta.setdefault(
1849
old_key, [None, None])[0] = old_key
1850
if new_key is not None:
1851
parent_id_basename_delta.setdefault(
1852
new_key, [None, None])[1] = new_value
1853
# validate that deletes are complete.
1854
for file_id in deletes:
1855
entry = self[file_id]
1856
if entry.kind != 'directory':
1858
# This loop could potentially be better by using the id_basename
1859
# map to just get the child file ids.
1860
for child in entry.children.values():
1861
if child.file_id not in altered:
1862
raise errors.InconsistentDelta(self.id2path(child.file_id),
1863
child.file_id, "Child not deleted or reparented when "
1635
# If the two keys are the same, the value will be unchanged
1636
# as its always the file id.
1637
parent_id_basename_delta.append((old_key, new_key, new_value))
1865
1638
result.id_to_entry.apply_delta(id_to_entry_delta)
1866
1639
if parent_id_basename_delta:
1867
# Transform the parent_id_basename delta data into a linear delta
1868
# with only one record for a given key. Optimally this would allow
1869
# re-keying, but its simpler to just output that as a delete+add
1870
# to spend less time calculating the delta.
1872
for key, (old_key, value) in parent_id_basename_delta.iteritems():
1873
if value is not None:
1874
delta_list.append((old_key, key, value))
1876
delta_list.append((old_key, None, None))
1877
result.parent_id_basename_to_file_id.apply_delta(delta_list)
1878
parents.discard(('', None))
1879
for parent_path, parent in parents:
1881
if result[parent].kind != 'directory':
1882
raise errors.InconsistentDelta(result.id2path(parent), parent,
1883
'Not a directory, but given children')
1884
except errors.NoSuchId:
1885
raise errors.InconsistentDelta("<unknown>", parent,
1886
"Parent is not present in resulting inventory.")
1887
if result.path2id(parent_path) != parent:
1888
raise errors.InconsistentDelta(parent_path, parent,
1889
"Parent has wrong path %r." % result.path2id(parent_path))
1640
result.parent_id_basename_to_file_id.apply_delta(parent_id_basename_delta)
1918
1669
raise errors.BzrError('Duplicate key in inventory: %r\n%r'
1919
1670
% (key, bytes))
1920
1671
info[key] = value
1921
revision_id = intern(info['revision_id'])
1922
root_id = intern(info['root_id'])
1923
search_key_name = intern(info.get('search_key_name', 'plain'))
1924
parent_id_basename_to_file_id = intern(info.get(
1925
'parent_id_basename_to_file_id', None))
1926
if not parent_id_basename_to_file_id.startswith('sha1:'):
1927
raise ValueError('parent_id_basename_to_file_id should be a sha1'
1928
' key not %r' % (parent_id_basename_to_file_id,))
1672
revision_id = info['revision_id']
1673
root_id = info['root_id']
1674
search_key_name = info.get('search_key_name', 'plain')
1675
parent_id_basename_to_file_id = info.get(
1676
'parent_id_basename_to_file_id', None)
1929
1677
id_to_entry = info['id_to_entry']
1930
if not id_to_entry.startswith('sha1:'):
1931
raise ValueError('id_to_entry should be a sha1'
1932
' key not %r' % (id_to_entry,))
1934
1679
result = CHKInventory(search_key_name)
1935
1680
result.revision_id = revision_id
1938
1683
result._search_key_name)
1939
1684
if parent_id_basename_to_file_id is not None:
1940
1685
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1941
chk_store, StaticTuple(parent_id_basename_to_file_id,),
1686
chk_store, (parent_id_basename_to_file_id,),
1942
1687
search_key_func=search_key_func)
1944
1689
result.parent_id_basename_to_file_id = None
1946
result.id_to_entry = chk_map.CHKMap(chk_store,
1947
StaticTuple(id_to_entry,),
1691
result.id_to_entry = chk_map.CHKMap(chk_store, (id_to_entry,),
1948
1692
search_key_func=search_key_func)
1949
1693
if (result.revision_id,) != expected_revision_id:
1950
1694
raise ValueError("Mismatched revision id and expected: %r, %r" %
1963
1707
:param maximum_size: The CHKMap node size limit.
1964
1708
:param search_key_name: The identifier for the search key function
1966
result = klass(search_key_name)
1710
result = CHKInventory(search_key_name)
1967
1711
result.revision_id = inventory.revision_id
1968
1712
result.root_id = inventory.root.file_id
1970
entry_to_bytes = result._entry_to_bytes
1971
parent_id_basename_key = result._parent_id_basename_key
1972
id_to_entry_dict = {}
1973
parent_id_basename_dict = {}
1713
search_key_func = chk_map.search_key_registry.get(search_key_name)
1714
result.id_to_entry = chk_map.CHKMap(chk_store, None, search_key_func)
1715
result.id_to_entry._root_node.set_maximum_size(maximum_size)
1717
result.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
1718
None, search_key_func)
1719
result.parent_id_basename_to_file_id._root_node.set_maximum_size(
1721
result.parent_id_basename_to_file_id._root_node._key_width = 2
1722
parent_id_delta = []
1974
1723
for path, entry in inventory.iter_entries():
1975
key = StaticTuple(entry.file_id,).intern()
1976
id_to_entry_dict[key] = entry_to_bytes(entry)
1977
p_id_key = parent_id_basename_key(entry)
1978
parent_id_basename_dict[p_id_key] = entry.file_id
1980
result._populate_from_dicts(chk_store, id_to_entry_dict,
1981
parent_id_basename_dict, maximum_size=maximum_size)
1724
file_id_delta.append((None, (entry.file_id,),
1725
result._entry_to_bytes(entry)))
1726
parent_id_delta.append(
1727
(None, result._parent_id_basename_key(entry),
1729
result.id_to_entry.apply_delta(file_id_delta)
1730
result.parent_id_basename_to_file_id.apply_delta(parent_id_delta)
1984
def _populate_from_dicts(self, chk_store, id_to_entry_dict,
1985
parent_id_basename_dict, maximum_size):
1986
search_key_func = chk_map.search_key_registry.get(self._search_key_name)
1987
root_key = chk_map.CHKMap.from_dict(chk_store, id_to_entry_dict,
1988
maximum_size=maximum_size, key_width=1,
1989
search_key_func=search_key_func)
1990
self.id_to_entry = chk_map.CHKMap(chk_store, root_key,
1992
root_key = chk_map.CHKMap.from_dict(chk_store,
1993
parent_id_basename_dict,
1994
maximum_size=maximum_size, key_width=2,
1995
search_key_func=search_key_func)
1996
self.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
1997
root_key, search_key_func)
1999
1733
def _parent_id_basename_key(self, entry):
2000
1734
"""Create a key for a entry in a parent_id_basename_to_file_id index."""
2001
1735
if entry.parent_id is not None:
2002
1736
parent_id = entry.parent_id
2005
return StaticTuple(parent_id, entry.name.encode('utf8')).intern()
1739
return parent_id, entry.name.encode('utf8')
2007
1741
def __getitem__(self, file_id):
2008
1742
"""map a single file_id -> InventoryEntry."""
2015
1749
return self._bytes_to_entry(
2016
self.id_to_entry.iteritems([StaticTuple(file_id,)]).next()[1])
1750
self.id_to_entry.iteritems([(file_id,)]).next()[1])
2017
1751
except StopIteration:
2018
1752
# really we're passing an inventory, not a tree...
2019
1753
raise errors.NoSuchId(self, file_id)
2021
def _getitems(self, file_ids):
2022
"""Similar to __getitem__, but lets you query for multiple.
2024
The returned order is undefined. And currently if an item doesn't
2025
exist, it isn't included in the output.
2029
for file_id in file_ids:
2030
entry = self._fileid_to_entry_cache.get(file_id, None)
2032
remaining.append(file_id)
2034
result.append(entry)
2035
file_keys = [StaticTuple(f,).intern() for f in remaining]
2036
for file_key, value in self.id_to_entry.iteritems(file_keys):
2037
entry = self._bytes_to_entry(value)
2038
result.append(entry)
2039
self._fileid_to_entry_cache[entry.file_id] = entry
2042
1755
def has_id(self, file_id):
2043
1756
# Perhaps have an explicit 'contains' method on CHKMap ?
2044
1757
if self._fileid_to_entry_cache.get(file_id, None) is not None:
2047
self.id_to_entry.iteritems([StaticTuple(file_id,)]))) == 1
1759
return len(list(self.id_to_entry.iteritems([(file_id,)]))) == 1
2049
1761
def is_root(self, file_id):
2050
1762
return file_id == self.root_id
2179
1891
delta.append((old_path, new_path, file_id, entry))
2182
def path2id(self, relpath):
1894
def path2id(self, name):
2183
1895
"""See CommonInventory.path2id()."""
2184
# TODO: perhaps support negative hits?
2185
result = self._path_to_fileid_cache.get(relpath, None)
2186
if result is not None:
2188
if isinstance(relpath, basestring):
2189
names = osutils.splitpath(relpath)
2192
current_id = self.root_id
2193
if current_id is None:
2195
parent_id_index = self.parent_id_basename_to_file_id
2197
for basename in names:
2198
if cur_path is None:
2201
cur_path = cur_path + '/' + basename
2202
basename_utf8 = basename.encode('utf8')
2203
file_id = self._path_to_fileid_cache.get(cur_path, None)
2205
key_filter = [StaticTuple(current_id, basename_utf8)]
2206
items = parent_id_index.iteritems(key_filter)
2207
for (parent_id, name_utf8), file_id in items:
2208
if parent_id != current_id or name_utf8 != basename_utf8:
2209
raise errors.BzrError("corrupt inventory lookup! "
2210
"%r %r %r %r" % (parent_id, current_id, name_utf8,
2215
self._path_to_fileid_cache[cur_path] = file_id
2216
current_id = file_id
1896
result = self._path_to_fileid_cache.get(name, None)
1898
result = CommonInventory.path2id(self, name)
1899
self._path_to_fileid_cache[name] = result
2219
1902
def to_lines(self):
2220
1903
"""Serialise the inventory to lines."""
2224
1907
lines.append('search_key_name: %s\n' % (self._search_key_name,))
2225
1908
lines.append("root_id: %s\n" % self.root_id)
2226
1909
lines.append('parent_id_basename_to_file_id: %s\n' %
2227
(self.parent_id_basename_to_file_id.key()[0],))
1910
self.parent_id_basename_to_file_id.key())
2228
1911
lines.append("revision_id: %s\n" % self.revision_id)
2229
lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],))
1912
lines.append("id_to_entry: %s\n" % self.id_to_entry.key())
2231
1914
lines.append("revision_id: %s\n" % self.revision_id)
2232
1915
lines.append("root_id: %s\n" % self.root_id)
2233
1916
if self.parent_id_basename_to_file_id is not None:
2234
1917
lines.append('parent_id_basename_to_file_id: %s\n' %
2235
(self.parent_id_basename_to_file_id.key()[0],))
2236
lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],))
1918
self.parent_id_basename_to_file_id.key())
1919
lines.append("id_to_entry: %s\n" % self.id_to_entry.key())
2354
2037
_NAME_RE = re.compile(r'^[^/\\]+$')
2356
2039
return bool(_NAME_RE.match(name))
2359
def _check_delta_unique_ids(delta):
2360
"""Decorate a delta and check that the file ids in it are unique.
2362
:return: A generator over delta.
2366
length = len(ids) + 1
2368
if len(ids) != length:
2369
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2374
def _check_delta_unique_new_paths(delta):
2375
"""Decorate a delta and check that the new paths in it are unique.
2377
:return: A generator over delta.
2381
length = len(paths) + 1
2383
if path is not None:
2385
if len(paths) != length:
2386
raise errors.InconsistentDelta(path, item[2], "repeated path")
2390
def _check_delta_unique_old_paths(delta):
2391
"""Decorate a delta and check that the old paths in it are unique.
2393
:return: A generator over delta.
2397
length = len(paths) + 1
2399
if path is not None:
2401
if len(paths) != length:
2402
raise errors.InconsistentDelta(path, item[2], "repeated path")
2406
def _check_delta_ids_are_valid(delta):
2407
"""Decorate a delta and check that the ids in it are valid.
2409
:return: A generator over delta.
2414
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2415
"entry with file_id None %r" % entry)
2416
if type(item[2]) != str:
2417
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2418
"entry with non bytes file_id %r" % entry)
2422
def _check_delta_ids_match_entry(delta):
2423
"""Decorate a delta and check that the ids in it match the entry.file_id.
2425
:return: A generator over delta.
2429
if entry is not None:
2430
if entry.file_id != item[2]:
2431
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2432
"mismatched id with %r" % entry)
2436
def _check_delta_new_path_entry_both_or_None(delta):
2437
"""Decorate a delta and check that the new_path and entry are paired.
2439
:return: A generator over delta.
2444
if new_path is None and entry is not None:
2445
raise errors.InconsistentDelta(item[0], item[1],
2446
"Entry with no new_path")
2447
if new_path is not None and entry is None:
2448
raise errors.InconsistentDelta(new_path, item[1],
2449
"new_path with no entry")