714
716
class CommonInventory(object):
715
"""Basic inventory logic, defined in terms of primitives like has_id.
717
An inventory is the metadata about the contents of a tree.
719
This is broadly a map from file_id to entries such as directories, files,
720
symlinks and tree references. Each entry maintains its own metadata like
721
SHA1 and length for files, or children for a directory.
723
Entries can be looked up either by path or by file_id.
725
InventoryEntry objects must not be modified after they are
726
inserted, other than through the Inventory API.
717
"""Basic inventory logic, defined in terms of primitives like has_id."""
729
719
def __contains__(self, file_id):
730
720
"""True if this entry contains a file with given id.
1033
1023
class Inventory(CommonInventory):
1034
"""Mutable dict based in-memory inventory.
1036
We never store the full path to a file, because renaming a directory
1037
implicitly moves all of its contents. This class internally maintains a
1024
"""Inventory of versioned files in a tree.
1026
This describes which file_id is present at each point in the tree,
1027
and possibly the SHA-1 or other information about the file.
1028
Entries can be looked up either by path or by file_id.
1030
The inventory represents a typical unix file tree, with
1031
directories containing files and subdirectories. We never store
1032
the full path to a file, because renaming a directory implicitly
1033
moves all of its contents. This class internally maintains a
1038
1034
lookup tree that allows the children under a directory to be
1039
1035
returned quickly.
1037
InventoryEntry objects must not be modified after they are
1038
inserted, other than through the Inventory API.
1041
1040
>>> inv = Inventory()
1042
1041
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
1043
1042
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
1044
1043
>>> inv['123-123'].name
1047
Id's may be looked up from paths:
1046
May be treated as an iterator or set to look up file ids:
1049
>>> inv.path2id('hello.c')
1048
>>> bool(inv.path2id('hello.c'))
1051
1050
>>> '123-123' in inv
1054
There are iterators over the contents:
1053
May also look up by name:
1056
>>> [entry[0] for entry in inv.iter_entries()]
1055
>>> [x[0] for x in inv.iter_entries()]
1057
1056
['', u'hello.c']
1057
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
1058
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
1059
Traceback (most recent call last):
1060
BzrError: parent_id {TREE_ROOT} not in inventory
1061
>>> inv.add(InventoryFile('123-123', 'hello.c', 'TREE_ROOT-12345678-12345678'))
1062
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT-12345678-12345678', sha1=None, len=None, revision=None)
1060
1064
def __init__(self, root_id=ROOT_ID, revision_id=None):
1061
1065
"""Create or read an inventory.
1086
1090
def apply_delta(self, delta):
1087
1091
"""Apply a delta to this inventory.
1089
See the inventory developers documentation for the theory behind
1092
If delta application fails the inventory is left in an indeterminate
1093
state and must not be used.
1095
1093
:param delta: A list of changes to apply. After all the changes are
1096
1094
applied the final inventory must be internally consistent, but it
1097
1095
is ok to supply changes which, if only half-applied would have an
1129
1127
# Check that the delta is legal. It would be nice if this could be
1130
1128
# done within the loops below but it's safer to validate the delta
1131
# before starting to mutate the inventory, as there isn't a rollback
1133
list(_check_delta_unique_ids(_check_delta_unique_new_paths(
1134
_check_delta_unique_old_paths(_check_delta_ids_match_entry(
1135
_check_delta_ids_are_valid(
1136
_check_delta_new_path_entry_both_or_None(
1129
# before starting to mutate the inventory.
1130
unique_file_ids = set([f for _, _, f, _ in delta])
1131
if len(unique_file_ids) != len(delta):
1132
raise AssertionError("a file-id appears multiple times in %r"
1140
1137
# Remove all affected items which were in the original inventory,
1143
1140
# modified children remaining by the time we examine it.
1144
1141
for old_path, file_id in sorted(((op, f) for op, np, f, e in delta
1145
1142
if op is not None), reverse=True):
1143
if file_id not in self:
1146
1146
# Preserve unaltered children of file_id for later reinsertion.
1147
1147
file_id_children = getattr(self[file_id], 'children', {})
1148
1148
if len(file_id_children):
1149
1149
children[file_id] = file_id_children
1150
if self.id2path(file_id) != old_path:
1151
raise errors.InconsistentDelta(old_path, file_id,
1152
"Entry was at wrong other path %r." % self.id2path(file_id))
1153
1150
# Remove file_id and the unaltered children. If file_id is not
1154
1151
# being deleted it will be reinserted back later.
1155
1152
self.remove_recursive_id(file_id)
1158
1155
# longest, ensuring that items which were modified and whose parents in
1159
1156
# the resulting inventory were also modified, are inserted after their
1161
for new_path, f, new_entry in sorted((np, f, e) for op, np, f, e in
1158
for new_path, new_entry in sorted((np, e) for op, np, f, e in
1162
1159
delta if np is not None):
1163
1160
if new_entry.kind == 'directory':
1164
1161
# Pop the child which to allow detection of children whose
1169
1166
replacement.revision = new_entry.revision
1170
1167
replacement.children = children.pop(replacement.file_id, {})
1171
1168
new_entry = replacement
1174
except errors.DuplicateFileId:
1175
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1176
"New id is already present in target.")
1177
except AttributeError:
1178
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1179
"Parent is not a directory.")
1180
if self.id2path(new_entry.file_id) != new_path:
1181
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1182
"New path is not consistent with parent path.")
1183
1170
if len(children):
1184
1171
# Get the parent id that was deleted
1185
1172
parent_id, children = children.popitem()
1265
1252
To add a file to a branch ready to be committed, use Branch.add,
1266
1253
which calls this.
1255
Returns the new entry object.
1270
1257
if entry.file_id in self._byid:
1271
1258
raise errors.DuplicateFileId(entry.file_id,
1272
1259
self._byid[entry.file_id])
1273
1261
if entry.parent_id is None:
1274
1262
self.root = entry
1277
1265
parent = self._byid[entry.parent_id]
1278
1266
except KeyError:
1279
raise errors.InconsistentDelta("<unknown>", entry.parent_id,
1280
"Parent not in inventory.")
1267
raise BzrError("parent_id {%s} not in inventory" %
1281
1270
if entry.name in parent.children:
1282
raise errors.InconsistentDelta(
1283
self.id2path(parent.children[entry.name].file_id),
1285
"Path already versioned")
1271
raise BzrError("%s is already versioned" %
1272
osutils.pathjoin(self.id2path(parent.file_id),
1273
entry.name).encode('utf-8'))
1286
1274
parent.children[entry.name] = entry
1287
1275
return self._add_child(entry)
1616
1599
search_key_func=search_key_func)
1617
1600
result.id_to_entry._ensure_root()
1618
1601
result.id_to_entry._root_node.set_maximum_size(maximum_size)
1619
# Change to apply to the parent_id_basename delta. The dict maps
1620
# (parent_id, basename) -> (old_key, new_value). We use a dict because
1621
# when a path has its id replaced (e.g. the root is changed, or someone
1622
# does bzr mv a b, bzr mv c a, we should output a single change to this
1623
# map rather than two.
1624
parent_id_basename_delta = {}
1602
parent_id_basename_delta = []
1625
1603
if self.parent_id_basename_to_file_id is not None:
1626
1604
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1627
1605
self.parent_id_basename_to_file_id._store,
1637
1615
result.parent_id_basename_to_file_id = None
1638
1616
result.root_id = self.root_id
1639
1617
id_to_entry_delta = []
1640
# inventory_delta is only traversed once, so we just update the
1642
# Check for repeated file ids
1643
inventory_delta = _check_delta_unique_ids(inventory_delta)
1644
# Repeated old paths
1645
inventory_delta = _check_delta_unique_old_paths(inventory_delta)
1646
# Check for repeated new paths
1647
inventory_delta = _check_delta_unique_new_paths(inventory_delta)
1648
# Check for entries that don't match the fileid
1649
inventory_delta = _check_delta_ids_match_entry(inventory_delta)
1650
# Check for nonsense fileids
1651
inventory_delta = _check_delta_ids_are_valid(inventory_delta)
1652
# Check for new_path <-> entry consistency
1653
inventory_delta = _check_delta_new_path_entry_both_or_None(
1655
# All changed entries need to have their parents be directories and be
1656
# at the right path. This set contains (path, id) tuples.
1658
# When we delete an item, all the children of it must be either deleted
1659
# or altered in their own right. As we batch process the change via
1660
# CHKMap.apply_delta, we build a set of things to use to validate the
1664
1618
for old_path, new_path, file_id, entry in inventory_delta:
1665
1619
# file id changes
1666
1620
if new_path == '':
1675
1629
del result._path_to_fileid_cache[old_path]
1676
1630
except KeyError:
1678
deletes.add(file_id)
1680
1633
new_key = (file_id,)
1681
1634
new_value = result._entry_to_bytes(entry)
1682
1635
# Update caches. It's worth doing this whether
1683
1636
# we're propagating the old caches or not.
1684
1637
result._path_to_fileid_cache[new_path] = file_id
1685
parents.add((split(new_path)[0], entry.parent_id))
1686
1638
if old_path is None:
1689
1641
old_key = (file_id,)
1690
if self.id2path(file_id) != old_path:
1691
raise errors.InconsistentDelta(old_path, file_id,
1692
"Entry was at wrong other path %r." %
1693
self.id2path(file_id))
1694
altered.add(file_id)
1695
1642
id_to_entry_delta.append((old_key, new_key, new_value))
1696
1643
if result.parent_id_basename_to_file_id is not None:
1697
1644
# parent_id, basename changes
1707
1654
new_key = self._parent_id_basename_key(entry)
1708
1655
new_value = file_id
1709
# If the two keys are the same, the value will be unchanged
1710
# as its always the file id for this entry.
1711
1656
if old_key != new_key:
1712
# Transform a change into explicit delete/add preserving
1713
# a possible match on the key from a different file id.
1714
if old_key is not None:
1715
parent_id_basename_delta.setdefault(
1716
old_key, [None, None])[0] = old_key
1717
if new_key is not None:
1718
parent_id_basename_delta.setdefault(
1719
new_key, [None, None])[1] = new_value
1720
# validate that deletes are complete.
1721
for file_id in deletes:
1722
entry = self[file_id]
1723
if entry.kind != 'directory':
1725
# This loop could potentially be better by using the id_basename
1726
# map to just get the child file ids.
1727
for child in entry.children.values():
1728
if child.file_id not in altered:
1729
raise errors.InconsistentDelta(self.id2path(child.file_id),
1730
child.file_id, "Child not deleted or reparented when "
1657
# If the two keys are the same, the value will be unchanged
1658
# as its always the file id.
1659
parent_id_basename_delta.append((old_key, new_key, new_value))
1732
1660
result.id_to_entry.apply_delta(id_to_entry_delta)
1733
1661
if parent_id_basename_delta:
1734
# Transform the parent_id_basename delta data into a linear delta
1735
# with only one record for a given key. Optimally this would allow
1736
# re-keying, but its simpler to just output that as a delete+add
1737
# to spend less time calculating the delta.
1739
for key, (old_key, value) in parent_id_basename_delta.iteritems():
1740
if value is not None:
1741
delta_list.append((old_key, key, value))
1743
delta_list.append((old_key, None, None))
1744
result.parent_id_basename_to_file_id.apply_delta(delta_list)
1745
parents.discard(('', None))
1746
for parent_path, parent in parents:
1748
if result[parent].kind != 'directory':
1749
raise errors.InconsistentDelta(result.id2path(parent), parent,
1750
'Not a directory, but given children')
1751
except errors.NoSuchId:
1752
raise errors.InconsistentDelta("<unknown>", parent,
1753
"Parent is not present in resulting inventory.")
1754
if result.path2id(parent_path) != parent:
1755
raise errors.InconsistentDelta(parent_path, parent,
1756
"Parent has wrong path %r." % result.path2id(parent_path))
1662
result.parent_id_basename_to_file_id.apply_delta(parent_id_basename_delta)
2019
1925
def path2id(self, name):
2020
1926
"""See CommonInventory.path2id()."""
2021
# TODO: perhaps support negative hits?
2022
1927
result = self._path_to_fileid_cache.get(name, None)
2023
if result is not None:
2025
if isinstance(name, basestring):
2026
names = osutils.splitpath(name)
2029
current_id = self.root_id
2030
if current_id is None:
2032
parent_id_index = self.parent_id_basename_to_file_id
2033
for basename in names:
2034
# TODO: Cache each path we figure out in this function.
2035
basename_utf8 = basename.encode('utf8')
2036
key_filter = [(current_id, basename_utf8)]
2038
for (parent_id, name_utf8), file_id in parent_id_index.iteritems(
2039
key_filter=key_filter):
2040
if parent_id != current_id or name_utf8 != basename_utf8:
2041
raise errors.BzrError("corrupt inventory lookup! "
2042
"%r %r %r %r" % (parent_id, current_id, name_utf8,
2046
current_id = file_id
2047
self._path_to_fileid_cache[name] = current_id
1929
result = CommonInventory.path2id(self, name)
1930
self._path_to_fileid_cache[name] = result
2050
1933
def to_lines(self):
2051
1934
"""Serialise the inventory to lines."""
2185
2068
_NAME_RE = re.compile(r'^[^/\\]+$')
2187
2070
return bool(_NAME_RE.match(name))
2190
def _check_delta_unique_ids(delta):
2191
"""Decorate a delta and check that the file ids in it are unique.
2193
:return: A generator over delta.
2197
length = len(ids) + 1
2199
if len(ids) != length:
2200
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2205
def _check_delta_unique_new_paths(delta):
2206
"""Decorate a delta and check that the new paths in it are unique.
2208
:return: A generator over delta.
2212
length = len(paths) + 1
2214
if path is not None:
2216
if len(paths) != length:
2217
raise errors.InconsistentDelta(path, item[2], "repeated path")
2221
def _check_delta_unique_old_paths(delta):
2222
"""Decorate a delta and check that the old paths in it are unique.
2224
:return: A generator over delta.
2228
length = len(paths) + 1
2230
if path is not None:
2232
if len(paths) != length:
2233
raise errors.InconsistentDelta(path, item[2], "repeated path")
2237
def _check_delta_ids_are_valid(delta):
2238
"""Decorate a delta and check that the ids in it are valid.
2240
:return: A generator over delta.
2245
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2246
"entry with file_id None %r" % entry)
2247
if type(item[2]) != str:
2248
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2249
"entry with non bytes file_id %r" % entry)
2253
def _check_delta_ids_match_entry(delta):
2254
"""Decorate a delta and check that the ids in it match the entry.file_id.
2256
:return: A generator over delta.
2260
if entry is not None:
2261
if entry.file_id != item[2]:
2262
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2263
"mismatched id with %r" % entry)
2267
def _check_delta_new_path_entry_both_or_None(delta):
2268
"""Decorate a delta and check that the new_path and entry are paired.
2270
:return: A generator over delta.
2275
if new_path is None and entry is not None:
2276
raise errors.InconsistentDelta(item[0], item[1],
2277
"Entry with no new_path")
2278
if new_path is not None and entry is None:
2279
raise errors.InconsistentDelta(new_path, item[1],
2280
"new_path with no entry")