714
714
class CommonInventory(object):
715
"""Basic inventory logic, defined in terms of primitives like has_id."""
715
"""Basic inventory logic, defined in terms of primitives like has_id.
717
An inventory is the metadata about the contents of a tree.
719
This is broadly a map from file_id to entries such as directories, files,
720
symlinks and tree references. Each entry maintains its own metadata like
721
SHA1 and length for files, or children for a directory.
723
Entries can be looked up either by path or by file_id.
725
InventoryEntry objects must not be modified after they are
726
inserted, other than through the Inventory API.
717
729
def __contains__(self, file_id):
718
730
"""True if this entry contains a file with given id.
1021
1033
class Inventory(CommonInventory):
1022
"""Inventory of versioned files in a tree.
1024
This describes which file_id is present at each point in the tree,
1025
and possibly the SHA-1 or other information about the file.
1026
Entries can be looked up either by path or by file_id.
1028
The inventory represents a typical unix file tree, with
1029
directories containing files and subdirectories. We never store
1030
the full path to a file, because renaming a directory implicitly
1031
moves all of its contents. This class internally maintains a
1034
"""Mutable dict based in-memory inventory.
1036
We never store the full path to a file, because renaming a directory
1037
implicitly moves all of its contents. This class internally maintains a
1032
1038
lookup tree that allows the children under a directory to be
1033
1039
returned quickly.
1035
InventoryEntry objects must not be modified after they are
1036
inserted, other than through the Inventory API.
1038
1041
>>> inv = Inventory()
1039
1042
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
1040
1043
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
1041
1044
>>> inv['123-123'].name
1044
May be treated as an iterator or set to look up file ids:
1047
Id's may be looked up from paths:
1046
>>> bool(inv.path2id('hello.c'))
1049
>>> inv.path2id('hello.c')
1048
1051
>>> '123-123' in inv
1051
May also look up by name:
1054
There are iterators over the contents:
1053
>>> [x[0] for x in inv.iter_entries()]
1056
>>> [entry[0] for entry in inv.iter_entries()]
1054
1057
['', u'hello.c']
1055
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
1056
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
1057
Traceback (most recent call last):
1058
BzrError: parent_id {TREE_ROOT} not in inventory
1059
>>> inv.add(InventoryFile('123-123', 'hello.c', 'TREE_ROOT-12345678-12345678'))
1060
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT-12345678-12345678', sha1=None, len=None, revision=None)
1062
1060
def __init__(self, root_id=ROOT_ID, revision_id=None):
1063
1061
"""Create or read an inventory.
1091
1089
See the inventory developers documentation for the theory behind
1092
1090
inventory deltas.
1092
If delta application fails the inventory is left in an indeterminate
1093
state and must not be used.
1094
1095
:param delta: A list of changes to apply. After all the changes are
1095
1096
applied the final inventory must be internally consistent, but it
1096
1097
is ok to supply changes which, if only half-applied would have an
1128
1129
# Check that the delta is legal. It would be nice if this could be
1129
1130
# done within the loops below but it's safer to validate the delta
1130
# before starting to mutate the inventory.
1131
unique_file_ids = set([f for _, _, f, _ in delta])
1132
if len(unique_file_ids) != len(delta):
1133
raise AssertionError("a file-id appears multiple times in %r"
1131
# before starting to mutate the inventory, as there isn't a rollback
1133
list(_check_delta_unique_ids(_check_delta_unique_new_paths(
1134
_check_delta_unique_old_paths(_check_delta_ids_match_entry(
1135
_check_delta_ids_are_valid(
1136
_check_delta_new_path_entry_both_or_None(
1138
1140
# Remove all affected items which were in the original inventory,
1141
1143
# modified children remaining by the time we examine it.
1142
1144
for old_path, file_id in sorted(((op, f) for op, np, f, e in delta
1143
1145
if op is not None), reverse=True):
1144
if file_id not in self:
1147
1146
# Preserve unaltered children of file_id for later reinsertion.
1148
1147
file_id_children = getattr(self[file_id], 'children', {})
1149
1148
if len(file_id_children):
1150
1149
children[file_id] = file_id_children
1150
if self.id2path(file_id) != old_path:
1151
raise errors.InconsistentDelta(old_path, file_id,
1152
"Entry was at wrong other path %r." % self.id2path(file_id))
1151
1153
# Remove file_id and the unaltered children. If file_id is not
1152
1154
# being deleted it will be reinserted back later.
1153
1155
self.remove_recursive_id(file_id)
1156
1158
# longest, ensuring that items which were modified and whose parents in
1157
1159
# the resulting inventory were also modified, are inserted after their
1159
for new_path, new_entry in sorted((np, e) for op, np, f, e in
1161
for new_path, f, new_entry in sorted((np, f, e) for op, np, f, e in
1160
1162
delta if np is not None):
1161
1163
if new_entry.kind == 'directory':
1162
1164
# Pop the child which to allow detection of children whose
1167
1169
replacement.revision = new_entry.revision
1168
1170
replacement.children = children.pop(replacement.file_id, {})
1169
1171
new_entry = replacement
1174
except errors.DuplicateFileId:
1175
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1176
"New id is already present in target.")
1177
except AttributeError:
1178
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1179
"Parent is not a directory.")
1180
if self.id2path(new_entry.file_id) != new_path:
1181
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1182
"New path is not consistent with parent path.")
1171
1183
if len(children):
1172
1184
# Get the parent id that was deleted
1173
1185
parent_id, children = children.popitem()
1253
1265
To add a file to a branch ready to be committed, use Branch.add,
1254
1266
which calls this.
1256
Returns the new entry object.
1258
1270
if entry.file_id in self._byid:
1259
1271
raise errors.DuplicateFileId(entry.file_id,
1260
1272
self._byid[entry.file_id])
1262
1273
if entry.parent_id is None:
1263
1274
self.root = entry
1266
1277
parent = self._byid[entry.parent_id]
1267
1278
except KeyError:
1268
raise BzrError("parent_id {%s} not in inventory" %
1279
raise errors.InconsistentDelta("<unknown>", entry.parent_id,
1280
"Parent not in inventory.")
1271
1281
if entry.name in parent.children:
1272
raise BzrError("%s is already versioned" %
1273
osutils.pathjoin(self.id2path(parent.file_id),
1274
entry.name).encode('utf-8'))
1282
raise errors.InconsistentDelta(
1283
self.id2path(parent.children[entry.name].file_id),
1285
"Path already versioned")
1275
1286
parent.children[entry.name] = entry
1276
1287
return self._add_child(entry)
1603
1616
search_key_func=search_key_func)
1604
1617
result.id_to_entry._ensure_root()
1605
1618
result.id_to_entry._root_node.set_maximum_size(maximum_size)
1606
parent_id_basename_delta = []
1619
# Change to apply to the parent_id_basename delta. The dict maps
1620
# (parent_id, basename) -> (old_key, new_value). We use a dict because
1621
# when a path has its id replaced (e.g. the root is changed, or someone
1622
# does bzr mv a b, bzr mv c a, we should output a single change to this
1623
# map rather than two.
1624
parent_id_basename_delta = {}
1607
1625
if self.parent_id_basename_to_file_id is not None:
1608
1626
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1609
1627
self.parent_id_basename_to_file_id._store,
1619
1637
result.parent_id_basename_to_file_id = None
1620
1638
result.root_id = self.root_id
1621
1639
id_to_entry_delta = []
1640
# inventory_delta is only traversed once, so we just update the
1642
# Check for repeated file ids
1643
inventory_delta = _check_delta_unique_ids(inventory_delta)
1644
# Repeated old paths
1645
inventory_delta = _check_delta_unique_old_paths(inventory_delta)
1646
# Check for repeated new paths
1647
inventory_delta = _check_delta_unique_new_paths(inventory_delta)
1648
# Check for entries that don't match the fileid
1649
inventory_delta = _check_delta_ids_match_entry(inventory_delta)
1650
# Check for nonsense fileids
1651
inventory_delta = _check_delta_ids_are_valid(inventory_delta)
1652
# Check for new_path <-> entry consistency
1653
inventory_delta = _check_delta_new_path_entry_both_or_None(
1655
# All changed entries need to have their parents be directories and be
1656
# at the right path. This set contains (path, id) tuples.
1658
# When we delete an item, all the children of it must be either deleted
1659
# or altered in their own right. As we batch process the change via
1660
# CHKMap.apply_delta, we build a set of things to use to validate the
1622
1664
for old_path, new_path, file_id, entry in inventory_delta:
1623
1665
# file id changes
1624
1666
if new_path == '':
1633
1675
del result._path_to_fileid_cache[old_path]
1634
1676
except KeyError:
1678
deletes.add(file_id)
1637
1680
new_key = (file_id,)
1638
1681
new_value = result._entry_to_bytes(entry)
1639
1682
# Update caches. It's worth doing this whether
1640
1683
# we're propagating the old caches or not.
1641
1684
result._path_to_fileid_cache[new_path] = file_id
1685
parents.add((split(new_path)[0], entry.parent_id))
1642
1686
if old_path is None:
1645
1689
old_key = (file_id,)
1690
if self.id2path(file_id) != old_path:
1691
raise errors.InconsistentDelta(old_path, file_id,
1692
"Entry was at wrong other path %r." %
1693
self.id2path(file_id))
1694
altered.add(file_id)
1646
1695
id_to_entry_delta.append((old_key, new_key, new_value))
1647
1696
if result.parent_id_basename_to_file_id is not None:
1648
1697
# parent_id, basename changes
1658
1707
new_key = self._parent_id_basename_key(entry)
1659
1708
new_value = file_id
1709
# If the two keys are the same, the value will be unchanged
1710
# as its always the file id for this entry.
1660
1711
if old_key != new_key:
1661
# If the two keys are the same, the value will be unchanged
1662
# as its always the file id.
1663
parent_id_basename_delta.append((old_key, new_key, new_value))
1712
# Transform a change into explicit delete/add preserving
1713
# a possible match on the key from a different file id.
1714
if old_key is not None:
1715
parent_id_basename_delta.setdefault(
1716
old_key, [None, None])[0] = old_key
1717
if new_key is not None:
1718
parent_id_basename_delta.setdefault(
1719
new_key, [None, None])[1] = new_value
1720
# validate that deletes are complete.
1721
for file_id in deletes:
1722
entry = self[file_id]
1723
if entry.kind != 'directory':
1725
# This loop could potentially be better by using the id_basename
1726
# map to just get the child file ids.
1727
for child in entry.children.values():
1728
if child.file_id not in altered:
1729
raise errors.InconsistentDelta(self.id2path(child.file_id),
1730
child.file_id, "Child not deleted or reparented when "
1664
1732
result.id_to_entry.apply_delta(id_to_entry_delta)
1665
1733
if parent_id_basename_delta:
1666
result.parent_id_basename_to_file_id.apply_delta(parent_id_basename_delta)
1734
# Transform the parent_id_basename delta data into a linear delta
1735
# with only one record for a given key. Optimally this would allow
1736
# re-keying, but its simpler to just output that as a delete+add
1737
# to spend less time calculating the delta.
1739
for key, (old_key, value) in parent_id_basename_delta.iteritems():
1740
if value is not None:
1741
delta_list.append((old_key, key, value))
1743
delta_list.append((old_key, None, None))
1744
result.parent_id_basename_to_file_id.apply_delta(delta_list)
1745
parents.discard(('', None))
1746
for parent_path, parent in parents:
1748
if result[parent].kind != 'directory':
1749
raise errors.InconsistentDelta(result.id2path(parent), parent,
1750
'Not a directory, but given children')
1751
except errors.NoSuchId:
1752
raise errors.InconsistentDelta("<unknown>", parent,
1753
"Parent is not present in resulting inventory.")
1754
if result.path2id(parent_path) != parent:
1755
raise errors.InconsistentDelta(parent_path, parent,
1756
"Parent has wrong path %r." % result.path2id(parent_path))
1929
2019
def path2id(self, name):
1930
2020
"""See CommonInventory.path2id()."""
2021
# TODO: perhaps support negative hits?
1931
2022
result = self._path_to_fileid_cache.get(name, None)
1933
result = CommonInventory.path2id(self, name)
1934
self._path_to_fileid_cache[name] = result
2023
if result is not None:
2025
if isinstance(name, basestring):
2026
names = osutils.splitpath(name)
2029
current_id = self.root_id
2030
if current_id is None:
2032
parent_id_index = self.parent_id_basename_to_file_id
2033
for basename in names:
2034
# TODO: Cache each path we figure out in this function.
2035
basename_utf8 = basename.encode('utf8')
2036
key_filter = [(current_id, basename_utf8)]
2038
for (parent_id, name_utf8), file_id in parent_id_index.iteritems(
2039
key_filter=key_filter):
2040
if parent_id != current_id or name_utf8 != basename_utf8:
2041
raise errors.BzrError("corrupt inventory lookup! "
2042
"%r %r %r %r" % (parent_id, current_id, name_utf8,
2046
current_id = file_id
2047
self._path_to_fileid_cache[name] = current_id
1937
2050
def to_lines(self):
1938
2051
"""Serialise the inventory to lines."""
2072
2185
_NAME_RE = re.compile(r'^[^/\\]+$')
2074
2187
return bool(_NAME_RE.match(name))
2190
def _check_delta_unique_ids(delta):
2191
"""Decorate a delta and check that the file ids in it are unique.
2193
:return: A generator over delta.
2197
length = len(ids) + 1
2199
if len(ids) != length:
2200
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2205
def _check_delta_unique_new_paths(delta):
2206
"""Decorate a delta and check that the new paths in it are unique.
2208
:return: A generator over delta.
2212
length = len(paths) + 1
2214
if path is not None:
2216
if len(paths) != length:
2217
raise errors.InconsistentDelta(path, item[2], "repeated path")
2221
def _check_delta_unique_old_paths(delta):
2222
"""Decorate a delta and check that the old paths in it are unique.
2224
:return: A generator over delta.
2228
length = len(paths) + 1
2230
if path is not None:
2232
if len(paths) != length:
2233
raise errors.InconsistentDelta(path, item[2], "repeated path")
2237
def _check_delta_ids_are_valid(delta):
2238
"""Decorate a delta and check that the ids in it are valid.
2240
:return: A generator over delta.
2245
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2246
"entry with file_id None %r" % entry)
2247
if type(item[2]) != str:
2248
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2249
"entry with non bytes file_id %r" % entry)
2253
def _check_delta_ids_match_entry(delta):
2254
"""Decorate a delta and check that the ids in it match the entry.file_id.
2256
:return: A generator over delta.
2260
if entry is not None:
2261
if entry.file_id != item[2]:
2262
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2263
"mismatched id with %r" % entry)
2267
def _check_delta_new_path_entry_both_or_None(delta):
2268
"""Decorate a delta and check that the new_path and entry are paired.
2270
:return: A generator over delta.
2275
if new_path is None and entry is not None:
2276
raise errors.InconsistentDelta(item[0], item[1],
2277
"Entry with no new_path")
2278
if new_path is not None and entry is None:
2279
raise errors.InconsistentDelta(new_path, item[1],
2280
"new_path with no entry")