712
718
return compatible
715
class Inventory(object):
716
"""Inventory of versioned files in a tree.
718
This describes which file_id is present at each point in the tree,
719
and possibly the SHA-1 or other information about the file.
721
class CommonInventory(object):
722
"""Basic inventory logic, defined in terms of primitives like has_id.
724
An inventory is the metadata about the contents of a tree.
726
This is broadly a map from file_id to entries such as directories, files,
727
symlinks and tree references. Each entry maintains its own metadata like
728
SHA1 and length for files, or children for a directory.
720
730
Entries can be looked up either by path or by file_id.
722
The inventory represents a typical unix file tree, with
723
directories containing files and subdirectories. We never store
724
the full path to a file, because renaming a directory implicitly
725
moves all of its contents. This class internally maintains a
726
lookup tree that allows the children under a directory to be
729
732
InventoryEntry objects must not be modified after they are
730
733
inserted, other than through the Inventory API.
732
>>> inv = Inventory()
733
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
734
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None)
735
>>> inv['123-123'].name
738
May be treated as an iterator or set to look up file ids:
740
>>> bool(inv.path2id('hello.c'))
745
May also look up by name:
747
>>> [x[0] for x in inv.iter_entries()]
749
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
750
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
751
Traceback (most recent call last):
752
BzrError: parent_id {TREE_ROOT} not in inventory
753
>>> inv.add(InventoryFile('123-123', 'hello.c', 'TREE_ROOT-12345678-12345678'))
754
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT-12345678-12345678', sha1=None, len=None)
756
def __init__(self, root_id=ROOT_ID, revision_id=None):
757
"""Create or read an inventory.
759
If a working directory is specified, the inventory is read
760
from there. If the file is specified, read from that. If not,
761
the inventory is created empty.
763
The inventory is created with a default root directory, with
766
if root_id is not None:
767
self._set_root(InventoryDirectory(root_id, u'', None))
771
self.revision_id = revision_id
774
return "<Inventory object at %x, contents=%r>" % (id(self), self._byid)
776
def apply_delta(self, delta):
777
"""Apply a delta to this inventory.
779
:param delta: A list of changes to apply. After all the changes are
780
applied the final inventory must be internally consistent, but it
781
is ok to supply changes which, if only half-applied would have an
782
invalid result - such as supplying two changes which rename two
783
files, 'A' and 'B' with each other : [('A', 'B', 'A-id', a_entry),
784
('B', 'A', 'B-id', b_entry)].
786
Each change is a tuple, of the form (old_path, new_path, file_id,
789
When new_path is None, the change indicates the removal of an entry
790
from the inventory and new_entry will be ignored (using None is
791
appropriate). If new_path is not None, then new_entry must be an
792
InventoryEntry instance, which will be incorporated into the
793
inventory (and replace any existing entry with the same file id).
795
When old_path is None, the change indicates the addition of
796
a new entry to the inventory.
798
When neither new_path nor old_path are None, the change is a
799
modification to an entry, such as a rename, reparent, kind change
802
The children attribute of new_entry is ignored. This is because
803
this method preserves children automatically across alterations to
804
the parent of the children, and cases where the parent id of a
805
child is changing require the child to be passed in as a separate
806
change regardless. E.g. in the recursive deletion of a directory -
807
the directory's children must be included in the delta, or the
808
final inventory will be invalid.
811
# Remove all affected items which were in the original inventory,
812
# starting with the longest paths, thus ensuring parents are examined
813
# after their children, which means that everything we examine has no
814
# modified children remaining by the time we examine it.
815
for old_path, file_id in sorted(((op, f) for op, np, f, e in delta
816
if op is not None), reverse=True):
817
if file_id not in self:
820
# Preserve unaltered children of file_id for later reinsertion.
821
file_id_children = getattr(self[file_id], 'children', {})
822
if len(file_id_children):
823
children[file_id] = file_id_children
824
# Remove file_id and the unaltered children. If file_id is not
825
# being deleted it will be reinserted back later.
826
self.remove_recursive_id(file_id)
827
# Insert all affected which should be in the new inventory, reattaching
828
# their children if they had any. This is done from shortest path to
829
# longest, ensuring that items which were modified and whose parents in
830
# the resulting inventory were also modified, are inserted after their
832
for new_path, new_entry in sorted((np, e) for op, np, f, e in
833
delta if np is not None):
834
if new_entry.kind == 'directory':
835
# Pop the child which to allow detection of children whose
836
# parents were deleted and which were not reattached to a new
838
new_entry.children = children.pop(new_entry.file_id, {})
841
# Get the parent id that was deleted
842
parent_id, children = children.popitem()
843
raise errors.InconsistentDelta("<deleted>", parent_id,
844
"The file id was deleted but its children were not deleted.")
846
def _set_root(self, ie):
848
self._byid = {self.root.file_id: self.root}
851
# TODO: jam 20051218 Should copy also copy the revision_id?
852
entries = self.iter_entries()
853
if self.root is None:
854
return Inventory(root_id=None)
855
other = Inventory(entries.next()[1].file_id)
856
other.root.revision = self.root.revision
857
# copy recursively so we know directories will be added before
858
# their children. There are more efficient ways than this...
859
for path, entry in entries:
860
other.add(entry.copy())
864
return iter(self._byid)
867
"""Returns number of entries."""
868
return len(self._byid)
870
def iter_entries(self, from_dir=None):
871
"""Return (path, entry) pairs, in order by name."""
736
def __contains__(self, file_id):
737
"""True if this entry contains a file with given id.
739
>>> inv = Inventory()
740
>>> inv.add(InventoryFile('123', 'foo.c', ROOT_ID))
741
InventoryFile('123', 'foo.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
747
Note that this method along with __iter__ are not encouraged for use as
748
they are less clear than specific query methods - they may be rmeoved
751
return self.has_id(file_id)
753
def has_filename(self, filename):
754
return bool(self.path2id(filename))
756
def id2path(self, file_id):
757
"""Return as a string the path to file_id.
760
>>> e = i.add(InventoryDirectory('src-id', 'src', ROOT_ID))
761
>>> e = i.add(InventoryFile('foo-id', 'foo.c', parent_id='src-id'))
762
>>> print i.id2path('foo-id')
765
:raises NoSuchId: If file_id is not present in the inventory.
767
# get all names, skipping root
768
return '/'.join(reversed(
769
[parent.name for parent in
770
self._iter_file_id_parents(file_id)][:-1]))
772
def iter_entries(self, from_dir=None, recursive=True):
773
"""Return (path, entry) pairs, in order by name.
775
:param from_dir: if None, start from the root,
776
otherwise start from this directory (either file-id or entry)
777
:param recursive: recurse into directories or not
872
779
if from_dir is None:
873
780
if self.root is None:
875
782
from_dir = self.root
876
783
yield '', self.root
877
784
elif isinstance(from_dir, basestring):
878
from_dir = self._byid[from_dir]
785
from_dir = self[from_dir]
880
787
# unrolling the recursive called changed the time from
881
788
# 440ms/663ms (inline/total) to 116ms/116ms
882
789
children = from_dir.children.items()
792
for name, ie in children:
884
795
children = collections.deque(children)
885
796
stack = [(u'', children)]
1021
958
descend(child_ie, child_path)
1022
959
descend(self.root, u'')
962
def path2id(self, relpath):
963
"""Walk down through directories to return entry of last component.
965
:param relpath: may be either a list of path components, or a single
966
string, in which case it is automatically split.
968
This returns the entry of the last component in the path,
969
which may be either a file or a directory.
971
Returns None IFF the path is not found.
973
if isinstance(relpath, basestring):
974
names = osutils.splitpath(relpath)
980
except errors.NoSuchId:
981
# root doesn't exist yet so nothing else can
987
children = getattr(parent, 'children', None)
996
return parent.file_id
998
def filter(self, specific_fileids):
999
"""Get an inventory view filtered against a set of file-ids.
1001
Children of directories and parents are included.
1003
The result may or may not reference the underlying inventory
1004
so it should be treated as immutable.
1006
interesting_parents = set()
1007
for fileid in specific_fileids:
1009
interesting_parents.update(self.get_idpath(fileid))
1010
except errors.NoSuchId:
1011
# This fileid is not in the inventory - that's ok
1013
entries = self.iter_entries()
1014
if self.root is None:
1015
return Inventory(root_id=None)
1016
other = Inventory(entries.next()[1].file_id)
1017
other.root.revision = self.root.revision
1018
other.revision_id = self.revision_id
1019
directories_to_expand = set()
1020
for path, entry in entries:
1021
file_id = entry.file_id
1022
if (file_id in specific_fileids
1023
or entry.parent_id in directories_to_expand):
1024
if entry.kind == 'directory':
1025
directories_to_expand.add(file_id)
1026
elif file_id not in interesting_parents:
1028
other.add(entry.copy())
1031
def get_idpath(self, file_id):
1032
"""Return a list of file_ids for the path to an entry.
1034
The list contains one element for each directory followed by
1035
the id of the file itself. So the length of the returned list
1036
is equal to the depth of the file in the tree, counting the
1037
root directory as depth 1.
1040
for parent in self._iter_file_id_parents(file_id):
1041
p.insert(0, parent.file_id)
1045
class Inventory(CommonInventory):
1046
"""Mutable dict based in-memory inventory.
1048
We never store the full path to a file, because renaming a directory
1049
implicitly moves all of its contents. This class internally maintains a
1050
lookup tree that allows the children under a directory to be
1053
>>> inv = Inventory()
1054
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
1055
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
1056
>>> inv['123-123'].name
1059
Id's may be looked up from paths:
1061
>>> inv.path2id('hello.c')
1063
>>> '123-123' in inv
1066
There are iterators over the contents:
1068
>>> [entry[0] for entry in inv.iter_entries()]
1072
def __init__(self, root_id=ROOT_ID, revision_id=None):
1073
"""Create or read an inventory.
1075
If a working directory is specified, the inventory is read
1076
from there. If the file is specified, read from that. If not,
1077
the inventory is created empty.
1079
The inventory is created with a default root directory, with
1082
if root_id is not None:
1083
self._set_root(InventoryDirectory(root_id, u'', None))
1087
self.revision_id = revision_id
1090
# More than one page of ouput is not useful anymore to debug
1093
contents = repr(self._byid)
1094
if len(contents) > max_len:
1095
contents = contents[:(max_len-len(closing))] + closing
1096
return "<Inventory object at %x, contents=%r>" % (id(self), contents)
1098
def apply_delta(self, delta):
1099
"""Apply a delta to this inventory.
1101
See the inventory developers documentation for the theory behind
1104
If delta application fails the inventory is left in an indeterminate
1105
state and must not be used.
1107
:param delta: A list of changes to apply. After all the changes are
1108
applied the final inventory must be internally consistent, but it
1109
is ok to supply changes which, if only half-applied would have an
1110
invalid result - such as supplying two changes which rename two
1111
files, 'A' and 'B' with each other : [('A', 'B', 'A-id', a_entry),
1112
('B', 'A', 'B-id', b_entry)].
1114
Each change is a tuple, of the form (old_path, new_path, file_id,
1117
When new_path is None, the change indicates the removal of an entry
1118
from the inventory and new_entry will be ignored (using None is
1119
appropriate). If new_path is not None, then new_entry must be an
1120
InventoryEntry instance, which will be incorporated into the
1121
inventory (and replace any existing entry with the same file id).
1123
When old_path is None, the change indicates the addition of
1124
a new entry to the inventory.
1126
When neither new_path nor old_path are None, the change is a
1127
modification to an entry, such as a rename, reparent, kind change
1130
The children attribute of new_entry is ignored. This is because
1131
this method preserves children automatically across alterations to
1132
the parent of the children, and cases where the parent id of a
1133
child is changing require the child to be passed in as a separate
1134
change regardless. E.g. in the recursive deletion of a directory -
1135
the directory's children must be included in the delta, or the
1136
final inventory will be invalid.
1138
Note that a file_id must only appear once within a given delta.
1139
An AssertionError is raised otherwise.
1141
# Check that the delta is legal. It would be nice if this could be
1142
# done within the loops below but it's safer to validate the delta
1143
# before starting to mutate the inventory, as there isn't a rollback
1145
list(_check_delta_unique_ids(_check_delta_unique_new_paths(
1146
_check_delta_unique_old_paths(_check_delta_ids_match_entry(
1147
_check_delta_ids_are_valid(
1148
_check_delta_new_path_entry_both_or_None(
1152
# Remove all affected items which were in the original inventory,
1153
# starting with the longest paths, thus ensuring parents are examined
1154
# after their children, which means that everything we examine has no
1155
# modified children remaining by the time we examine it.
1156
for old_path, file_id in sorted(((op, f) for op, np, f, e in delta
1157
if op is not None), reverse=True):
1158
# Preserve unaltered children of file_id for later reinsertion.
1159
file_id_children = getattr(self[file_id], 'children', {})
1160
if len(file_id_children):
1161
children[file_id] = file_id_children
1162
if self.id2path(file_id) != old_path:
1163
raise errors.InconsistentDelta(old_path, file_id,
1164
"Entry was at wrong other path %r." % self.id2path(file_id))
1165
# Remove file_id and the unaltered children. If file_id is not
1166
# being deleted it will be reinserted back later.
1167
self.remove_recursive_id(file_id)
1168
# Insert all affected which should be in the new inventory, reattaching
1169
# their children if they had any. This is done from shortest path to
1170
# longest, ensuring that items which were modified and whose parents in
1171
# the resulting inventory were also modified, are inserted after their
1173
for new_path, f, new_entry in sorted((np, f, e) for op, np, f, e in
1174
delta if np is not None):
1175
if new_entry.kind == 'directory':
1176
# Pop the child which to allow detection of children whose
1177
# parents were deleted and which were not reattached to a new
1179
replacement = InventoryDirectory(new_entry.file_id,
1180
new_entry.name, new_entry.parent_id)
1181
replacement.revision = new_entry.revision
1182
replacement.children = children.pop(replacement.file_id, {})
1183
new_entry = replacement
1186
except errors.DuplicateFileId:
1187
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1188
"New id is already present in target.")
1189
except AttributeError:
1190
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1191
"Parent is not a directory.")
1192
if self.id2path(new_entry.file_id) != new_path:
1193
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1194
"New path is not consistent with parent path.")
1196
# Get the parent id that was deleted
1197
parent_id, children = children.popitem()
1198
raise errors.InconsistentDelta("<deleted>", parent_id,
1199
"The file id was deleted but its children were not deleted.")
1201
def create_by_apply_delta(self, inventory_delta, new_revision_id,
1202
propagate_caches=False):
1203
"""See CHKInventory.create_by_apply_delta()"""
1204
new_inv = self.copy()
1205
new_inv.apply_delta(inventory_delta)
1206
new_inv.revision_id = new_revision_id
1209
def _set_root(self, ie):
1211
self._byid = {self.root.file_id: self.root}
1214
# TODO: jam 20051218 Should copy also copy the revision_id?
1215
entries = self.iter_entries()
1216
if self.root is None:
1217
return Inventory(root_id=None)
1218
other = Inventory(entries.next()[1].file_id)
1219
other.root.revision = self.root.revision
1220
# copy recursively so we know directories will be added before
1221
# their children. There are more efficient ways than this...
1222
for path, entry in entries:
1223
other.add(entry.copy())
1226
def _get_mutable_inventory(self):
1227
"""See CommonInventory._get_mutable_inventory."""
1228
return copy.deepcopy(self)
1231
"""Iterate over all file-ids."""
1232
return iter(self._byid)
1234
def iter_just_entries(self):
1235
"""Iterate over all entries.
1025
def __contains__(self, file_id):
1026
"""True if this entry contains a file with given id.
1237
Unlike iter_entries(), just the entries are returned (not (path, ie))
1238
and the order of entries is undefined.
1028
>>> inv = Inventory()
1029
>>> inv.add(InventoryFile('123', 'foo.c', ROOT_ID))
1030
InventoryFile('123', 'foo.c', parent_id='TREE_ROOT', sha1=None, len=None)
1240
XXX: We may not want to merge this into bzr.dev.
1036
return (file_id in self._byid)
1242
if self.root is None:
1244
for _, ie in self._byid.iteritems():
1248
"""Returns number of entries."""
1249
return len(self._byid)
1038
1251
def __getitem__(self, file_id):
1039
1252
"""Return the entry for given file_id.
1041
1254
>>> inv = Inventory()
1042
1255
>>> inv.add(InventoryFile('123123', 'hello.c', ROOT_ID))
1043
InventoryFile('123123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None)
1256
InventoryFile('123123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
1044
1257
>>> inv['123123'].name
1322
1472
return self.root is not None and file_id == self.root.file_id
1475
class CHKInventory(CommonInventory):
1476
"""An inventory persisted in a CHK store.
1478
By design, a CHKInventory is immutable so many of the methods
1479
supported by Inventory - add, rename, apply_delta, etc - are *not*
1480
supported. To create a new CHKInventory, use create_by_apply_delta()
1481
or from_inventory(), say.
1483
Internally, a CHKInventory has one or two CHKMaps:
1485
* id_to_entry - a map from (file_id,) => InventoryEntry as bytes
1486
* parent_id_basename_to_file_id - a map from (parent_id, basename_utf8)
1489
The second map is optional and not present in early CHkRepository's.
1491
No caching is performed: every method call or item access will perform
1492
requests to the storage layer. As such, keep references to objects you
1496
def __init__(self, search_key_name):
1497
CommonInventory.__init__(self)
1498
self._fileid_to_entry_cache = {}
1499
self._path_to_fileid_cache = {}
1500
self._search_key_name = search_key_name
1503
def __eq__(self, other):
1504
"""Compare two sets by comparing their contents."""
1505
if not isinstance(other, CHKInventory):
1506
return NotImplemented
1508
this_key = self.id_to_entry.key()
1509
other_key = other.id_to_entry.key()
1510
this_pid_key = self.parent_id_basename_to_file_id.key()
1511
other_pid_key = other.parent_id_basename_to_file_id.key()
1512
if None in (this_key, this_pid_key, other_key, other_pid_key):
1514
return this_key == other_key and this_pid_key == other_pid_key
1516
def _entry_to_bytes(self, entry):
1517
"""Serialise entry as a single bytestring.
1519
:param Entry: An inventory entry.
1520
:return: A bytestring for the entry.
1523
ENTRY ::= FILE | DIR | SYMLINK | TREE
1524
FILE ::= "file: " COMMON SEP SHA SEP SIZE SEP EXECUTABLE
1525
DIR ::= "dir: " COMMON
1526
SYMLINK ::= "symlink: " COMMON SEP TARGET_UTF8
1527
TREE ::= "tree: " COMMON REFERENCE_REVISION
1528
COMMON ::= FILE_ID SEP PARENT_ID SEP NAME_UTF8 SEP REVISION
1531
if entry.parent_id is not None:
1532
parent_str = entry.parent_id
1535
name_str = entry.name.encode("utf8")
1536
if entry.kind == 'file':
1537
if entry.executable:
1541
return "file: %s\n%s\n%s\n%s\n%s\n%d\n%s" % (
1542
entry.file_id, parent_str, name_str, entry.revision,
1543
entry.text_sha1, entry.text_size, exec_str)
1544
elif entry.kind == 'directory':
1545
return "dir: %s\n%s\n%s\n%s" % (
1546
entry.file_id, parent_str, name_str, entry.revision)
1547
elif entry.kind == 'symlink':
1548
return "symlink: %s\n%s\n%s\n%s\n%s" % (
1549
entry.file_id, parent_str, name_str, entry.revision,
1550
entry.symlink_target.encode("utf8"))
1551
elif entry.kind == 'tree-reference':
1552
return "tree: %s\n%s\n%s\n%s\n%s" % (
1553
entry.file_id, parent_str, name_str, entry.revision,
1554
entry.reference_revision)
1556
raise ValueError("unknown kind %r" % entry.kind)
1558
def _expand_fileids_to_parents_and_children(self, file_ids):
1559
"""Give a more wholistic view starting with the given file_ids.
1561
For any file_id which maps to a directory, we will include all children
1562
of that directory. We will also include all directories which are
1563
parents of the given file_ids, but we will not include their children.
1570
fringle # fringle-id
1574
if given [foo-id] we will include
1575
TREE_ROOT as interesting parents
1577
foo-id, baz-id, frob-id, fringle-id
1581
# TODO: Pre-pass over the list of fileids to see if anything is already
1582
# deserialized in self._fileid_to_entry_cache
1584
directories_to_expand = set()
1585
children_of_parent_id = {}
1586
# It is okay if some of the fileids are missing
1587
for entry in self._getitems(file_ids):
1588
if entry.kind == 'directory':
1589
directories_to_expand.add(entry.file_id)
1590
interesting.add(entry.parent_id)
1591
children_of_parent_id.setdefault(entry.parent_id, []
1592
).append(entry.file_id)
1594
# Now, interesting has all of the direct parents, but not the
1595
# parents of those parents. It also may have some duplicates with
1597
remaining_parents = interesting.difference(file_ids)
1598
# When we hit the TREE_ROOT, we'll get an interesting parent of None,
1599
# but we don't actually want to recurse into that
1600
interesting.add(None) # this will auto-filter it in the loop
1601
remaining_parents.discard(None)
1602
while remaining_parents:
1603
next_parents = set()
1604
for entry in self._getitems(remaining_parents):
1605
next_parents.add(entry.parent_id)
1606
children_of_parent_id.setdefault(entry.parent_id, []
1607
).append(entry.file_id)
1608
# Remove any search tips we've already processed
1609
remaining_parents = next_parents.difference(interesting)
1610
interesting.update(remaining_parents)
1611
# We should probably also .difference(directories_to_expand)
1612
interesting.update(file_ids)
1613
interesting.discard(None)
1614
while directories_to_expand:
1615
# Expand directories by looking in the
1616
# parent_id_basename_to_file_id map
1617
keys = [StaticTuple(f,).intern() for f in directories_to_expand]
1618
directories_to_expand = set()
1619
items = self.parent_id_basename_to_file_id.iteritems(keys)
1620
next_file_ids = set([item[1] for item in items])
1621
next_file_ids = next_file_ids.difference(interesting)
1622
interesting.update(next_file_ids)
1623
for entry in self._getitems(next_file_ids):
1624
if entry.kind == 'directory':
1625
directories_to_expand.add(entry.file_id)
1626
children_of_parent_id.setdefault(entry.parent_id, []
1627
).append(entry.file_id)
1628
return interesting, children_of_parent_id
1630
def filter(self, specific_fileids):
1631
"""Get an inventory view filtered against a set of file-ids.
1633
Children of directories and parents are included.
1635
The result may or may not reference the underlying inventory
1636
so it should be treated as immutable.
1639
parent_to_children) = self._expand_fileids_to_parents_and_children(
1641
# There is some overlap here, but we assume that all interesting items
1642
# are in the _fileid_to_entry_cache because we had to read them to
1643
# determine if they were a dir we wanted to recurse, or just a file
1644
# This should give us all the entries we'll want to add, so start
1646
other = Inventory(self.root_id)
1647
other.root.revision = self.root.revision
1648
other.revision_id = self.revision_id
1649
if not interesting or not parent_to_children:
1650
# empty filter, or filtering entrys that don't exist
1651
# (if even 1 existed, then we would have populated
1652
# parent_to_children with at least the tree root.)
1654
cache = self._fileid_to_entry_cache
1655
remaining_children = collections.deque(parent_to_children[self.root_id])
1656
while remaining_children:
1657
file_id = remaining_children.popleft()
1659
if ie.kind == 'directory':
1660
ie = ie.copy() # We create a copy to depopulate the .children attribute
1661
# TODO: depending on the uses of 'other' we should probably alwyas
1662
# '.copy()' to prevent someone from mutating other and
1663
# invaliding our internal cache
1665
if file_id in parent_to_children:
1666
remaining_children.extend(parent_to_children[file_id])
1670
def _bytes_to_utf8name_key(bytes):
1671
"""Get the file_id, revision_id key out of bytes."""
1672
# We don't normally care about name, except for times when we want
1673
# to filter out empty names because of non rich-root...
1674
sections = bytes.split('\n')
1675
kind, file_id = sections[0].split(': ')
1676
return (sections[2], intern(file_id), intern(sections[3]))
1678
def _bytes_to_entry(self, bytes):
1679
"""Deserialise a serialised entry."""
1680
sections = bytes.split('\n')
1681
if sections[0].startswith("file: "):
1682
result = InventoryFile(sections[0][6:],
1683
sections[2].decode('utf8'),
1685
result.text_sha1 = sections[4]
1686
result.text_size = int(sections[5])
1687
result.executable = sections[6] == "Y"
1688
elif sections[0].startswith("dir: "):
1689
result = CHKInventoryDirectory(sections[0][5:],
1690
sections[2].decode('utf8'),
1692
elif sections[0].startswith("symlink: "):
1693
result = InventoryLink(sections[0][9:],
1694
sections[2].decode('utf8'),
1696
result.symlink_target = sections[4].decode('utf8')
1697
elif sections[0].startswith("tree: "):
1698
result = TreeReference(sections[0][6:],
1699
sections[2].decode('utf8'),
1701
result.reference_revision = sections[4]
1703
raise ValueError("Not a serialised entry %r" % bytes)
1704
result.file_id = intern(result.file_id)
1705
result.revision = intern(sections[3])
1706
if result.parent_id == '':
1707
result.parent_id = None
1708
self._fileid_to_entry_cache[result.file_id] = result
1711
def _get_mutable_inventory(self):
1712
"""See CommonInventory._get_mutable_inventory."""
1713
entries = self.iter_entries()
1714
inv = Inventory(None, self.revision_id)
1715
for path, inv_entry in entries:
1716
inv.add(inv_entry.copy())
1719
def create_by_apply_delta(self, inventory_delta, new_revision_id,
1720
propagate_caches=False):
1721
"""Create a new CHKInventory by applying inventory_delta to this one.
1723
See the inventory developers documentation for the theory behind
1726
:param inventory_delta: The inventory delta to apply. See
1727
Inventory.apply_delta for details.
1728
:param new_revision_id: The revision id of the resulting CHKInventory.
1729
:param propagate_caches: If True, the caches for this inventory are
1730
copied to and updated for the result.
1731
:return: The new CHKInventory.
1733
split = osutils.split
1734
result = CHKInventory(self._search_key_name)
1735
if propagate_caches:
1736
# Just propagate the path-to-fileid cache for now
1737
result._path_to_fileid_cache = dict(self._path_to_fileid_cache.iteritems())
1738
search_key_func = chk_map.search_key_registry.get(self._search_key_name)
1739
self.id_to_entry._ensure_root()
1740
maximum_size = self.id_to_entry._root_node.maximum_size
1741
result.revision_id = new_revision_id
1742
result.id_to_entry = chk_map.CHKMap(
1743
self.id_to_entry._store,
1744
self.id_to_entry.key(),
1745
search_key_func=search_key_func)
1746
result.id_to_entry._ensure_root()
1747
result.id_to_entry._root_node.set_maximum_size(maximum_size)
1748
# Change to apply to the parent_id_basename delta. The dict maps
1749
# (parent_id, basename) -> (old_key, new_value). We use a dict because
1750
# when a path has its id replaced (e.g. the root is changed, or someone
1751
# does bzr mv a b, bzr mv c a, we should output a single change to this
1752
# map rather than two.
1753
parent_id_basename_delta = {}
1754
if self.parent_id_basename_to_file_id is not None:
1755
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1756
self.parent_id_basename_to_file_id._store,
1757
self.parent_id_basename_to_file_id.key(),
1758
search_key_func=search_key_func)
1759
result.parent_id_basename_to_file_id._ensure_root()
1760
self.parent_id_basename_to_file_id._ensure_root()
1761
result_p_id_root = result.parent_id_basename_to_file_id._root_node
1762
p_id_root = self.parent_id_basename_to_file_id._root_node
1763
result_p_id_root.set_maximum_size(p_id_root.maximum_size)
1764
result_p_id_root._key_width = p_id_root._key_width
1766
result.parent_id_basename_to_file_id = None
1767
result.root_id = self.root_id
1768
id_to_entry_delta = []
1769
# inventory_delta is only traversed once, so we just update the
1771
# Check for repeated file ids
1772
inventory_delta = _check_delta_unique_ids(inventory_delta)
1773
# Repeated old paths
1774
inventory_delta = _check_delta_unique_old_paths(inventory_delta)
1775
# Check for repeated new paths
1776
inventory_delta = _check_delta_unique_new_paths(inventory_delta)
1777
# Check for entries that don't match the fileid
1778
inventory_delta = _check_delta_ids_match_entry(inventory_delta)
1779
# Check for nonsense fileids
1780
inventory_delta = _check_delta_ids_are_valid(inventory_delta)
1781
# Check for new_path <-> entry consistency
1782
inventory_delta = _check_delta_new_path_entry_both_or_None(
1784
# All changed entries need to have their parents be directories and be
1785
# at the right path. This set contains (path, id) tuples.
1787
# When we delete an item, all the children of it must be either deleted
1788
# or altered in their own right. As we batch process the change via
1789
# CHKMap.apply_delta, we build a set of things to use to validate the
1793
for old_path, new_path, file_id, entry in inventory_delta:
1796
result.root_id = file_id
1797
if new_path is None:
1802
if propagate_caches:
1804
del result._path_to_fileid_cache[old_path]
1807
deletes.add(file_id)
1809
new_key = StaticTuple(file_id,)
1810
new_value = result._entry_to_bytes(entry)
1811
# Update caches. It's worth doing this whether
1812
# we're propagating the old caches or not.
1813
result._path_to_fileid_cache[new_path] = file_id
1814
parents.add((split(new_path)[0], entry.parent_id))
1815
if old_path is None:
1818
old_key = StaticTuple(file_id,)
1819
if self.id2path(file_id) != old_path:
1820
raise errors.InconsistentDelta(old_path, file_id,
1821
"Entry was at wrong other path %r." %
1822
self.id2path(file_id))
1823
altered.add(file_id)
1824
id_to_entry_delta.append(StaticTuple(old_key, new_key, new_value))
1825
if result.parent_id_basename_to_file_id is not None:
1826
# parent_id, basename changes
1827
if old_path is None:
1830
old_entry = self[file_id]
1831
old_key = self._parent_id_basename_key(old_entry)
1832
if new_path is None:
1836
new_key = self._parent_id_basename_key(entry)
1838
# If the two keys are the same, the value will be unchanged
1839
# as its always the file id for this entry.
1840
if old_key != new_key:
1841
# Transform a change into explicit delete/add preserving
1842
# a possible match on the key from a different file id.
1843
if old_key is not None:
1844
parent_id_basename_delta.setdefault(
1845
old_key, [None, None])[0] = old_key
1846
if new_key is not None:
1847
parent_id_basename_delta.setdefault(
1848
new_key, [None, None])[1] = new_value
1849
# validate that deletes are complete.
1850
for file_id in deletes:
1851
entry = self[file_id]
1852
if entry.kind != 'directory':
1854
# This loop could potentially be better by using the id_basename
1855
# map to just get the child file ids.
1856
for child in entry.children.values():
1857
if child.file_id not in altered:
1858
raise errors.InconsistentDelta(self.id2path(child.file_id),
1859
child.file_id, "Child not deleted or reparented when "
1861
result.id_to_entry.apply_delta(id_to_entry_delta)
1862
if parent_id_basename_delta:
1863
# Transform the parent_id_basename delta data into a linear delta
1864
# with only one record for a given key. Optimally this would allow
1865
# re-keying, but its simpler to just output that as a delete+add
1866
# to spend less time calculating the delta.
1868
for key, (old_key, value) in parent_id_basename_delta.iteritems():
1869
if value is not None:
1870
delta_list.append((old_key, key, value))
1872
delta_list.append((old_key, None, None))
1873
result.parent_id_basename_to_file_id.apply_delta(delta_list)
1874
parents.discard(('', None))
1875
for parent_path, parent in parents:
1877
if result[parent].kind != 'directory':
1878
raise errors.InconsistentDelta(result.id2path(parent), parent,
1879
'Not a directory, but given children')
1880
except errors.NoSuchId:
1881
raise errors.InconsistentDelta("<unknown>", parent,
1882
"Parent is not present in resulting inventory.")
1883
if result.path2id(parent_path) != parent:
1884
raise errors.InconsistentDelta(parent_path, parent,
1885
"Parent has wrong path %r." % result.path2id(parent_path))
1889
def deserialise(klass, chk_store, bytes, expected_revision_id):
1890
"""Deserialise a CHKInventory.
1892
:param chk_store: A CHK capable VersionedFiles instance.
1893
:param bytes: The serialised bytes.
1894
:param expected_revision_id: The revision ID we think this inventory is
1896
:return: A CHKInventory
1898
lines = bytes.split('\n')
1900
raise AssertionError('bytes to deserialize must end with an eol')
1902
if lines[0] != 'chkinventory:':
1903
raise ValueError("not a serialised CHKInventory: %r" % bytes)
1905
allowed_keys = frozenset(['root_id', 'revision_id', 'search_key_name',
1906
'parent_id_basename_to_file_id',
1908
for line in lines[1:]:
1909
key, value = line.split(': ', 1)
1910
if key not in allowed_keys:
1911
raise errors.BzrError('Unknown key in inventory: %r\n%r'
1914
raise errors.BzrError('Duplicate key in inventory: %r\n%r'
1917
revision_id = intern(info['revision_id'])
1918
root_id = intern(info['root_id'])
1919
search_key_name = intern(info.get('search_key_name', 'plain'))
1920
parent_id_basename_to_file_id = intern(info.get(
1921
'parent_id_basename_to_file_id', None))
1922
if not parent_id_basename_to_file_id.startswith('sha1:'):
1923
raise ValueError('parent_id_basename_to_file_id should be a sha1'
1924
' key not %r' % (parent_id_basename_to_file_id,))
1925
id_to_entry = info['id_to_entry']
1926
if not id_to_entry.startswith('sha1:'):
1927
raise ValueError('id_to_entry should be a sha1'
1928
' key not %r' % (id_to_entry,))
1930
result = CHKInventory(search_key_name)
1931
result.revision_id = revision_id
1932
result.root_id = root_id
1933
search_key_func = chk_map.search_key_registry.get(
1934
result._search_key_name)
1935
if parent_id_basename_to_file_id is not None:
1936
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1937
chk_store, StaticTuple(parent_id_basename_to_file_id,),
1938
search_key_func=search_key_func)
1940
result.parent_id_basename_to_file_id = None
1942
result.id_to_entry = chk_map.CHKMap(chk_store,
1943
StaticTuple(id_to_entry,),
1944
search_key_func=search_key_func)
1945
if (result.revision_id,) != expected_revision_id:
1946
raise ValueError("Mismatched revision id and expected: %r, %r" %
1947
(result.revision_id, expected_revision_id))
1951
def from_inventory(klass, chk_store, inventory, maximum_size=0, search_key_name='plain'):
1952
"""Create a CHKInventory from an existing inventory.
1954
The content of inventory is copied into the chk_store, and a
1955
CHKInventory referencing that is returned.
1957
:param chk_store: A CHK capable VersionedFiles instance.
1958
:param inventory: The inventory to copy.
1959
:param maximum_size: The CHKMap node size limit.
1960
:param search_key_name: The identifier for the search key function
1962
result = klass(search_key_name)
1963
result.revision_id = inventory.revision_id
1964
result.root_id = inventory.root.file_id
1966
entry_to_bytes = result._entry_to_bytes
1967
parent_id_basename_key = result._parent_id_basename_key
1968
id_to_entry_dict = {}
1969
parent_id_basename_dict = {}
1970
for path, entry in inventory.iter_entries():
1971
key = StaticTuple(entry.file_id,).intern()
1972
id_to_entry_dict[key] = entry_to_bytes(entry)
1973
p_id_key = parent_id_basename_key(entry)
1974
parent_id_basename_dict[p_id_key] = entry.file_id
1976
result._populate_from_dicts(chk_store, id_to_entry_dict,
1977
parent_id_basename_dict, maximum_size=maximum_size)
1980
def _populate_from_dicts(self, chk_store, id_to_entry_dict,
1981
parent_id_basename_dict, maximum_size):
1982
search_key_func = chk_map.search_key_registry.get(self._search_key_name)
1983
root_key = chk_map.CHKMap.from_dict(chk_store, id_to_entry_dict,
1984
maximum_size=maximum_size, key_width=1,
1985
search_key_func=search_key_func)
1986
self.id_to_entry = chk_map.CHKMap(chk_store, root_key,
1988
root_key = chk_map.CHKMap.from_dict(chk_store,
1989
parent_id_basename_dict,
1990
maximum_size=maximum_size, key_width=2,
1991
search_key_func=search_key_func)
1992
self.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
1993
root_key, search_key_func)
1995
def _parent_id_basename_key(self, entry):
1996
"""Create a key for a entry in a parent_id_basename_to_file_id index."""
1997
if entry.parent_id is not None:
1998
parent_id = entry.parent_id
2001
return StaticTuple(parent_id, entry.name.encode('utf8')).intern()
2003
def __getitem__(self, file_id):
2004
"""map a single file_id -> InventoryEntry."""
2006
raise errors.NoSuchId(self, file_id)
2007
result = self._fileid_to_entry_cache.get(file_id, None)
2008
if result is not None:
2011
return self._bytes_to_entry(
2012
self.id_to_entry.iteritems([StaticTuple(file_id,)]).next()[1])
2013
except StopIteration:
2014
# really we're passing an inventory, not a tree...
2015
raise errors.NoSuchId(self, file_id)
2017
def _getitems(self, file_ids):
2018
"""Similar to __getitem__, but lets you query for multiple.
2020
The returned order is undefined. And currently if an item doesn't
2021
exist, it isn't included in the output.
2025
for file_id in file_ids:
2026
entry = self._fileid_to_entry_cache.get(file_id, None)
2028
remaining.append(file_id)
2030
result.append(entry)
2031
file_keys = [StaticTuple(f,).intern() for f in remaining]
2032
for file_key, value in self.id_to_entry.iteritems(file_keys):
2033
entry = self._bytes_to_entry(value)
2034
result.append(entry)
2035
self._fileid_to_entry_cache[entry.file_id] = entry
2038
def has_id(self, file_id):
2039
# Perhaps have an explicit 'contains' method on CHKMap ?
2040
if self._fileid_to_entry_cache.get(file_id, None) is not None:
2043
self.id_to_entry.iteritems([StaticTuple(file_id,)]))) == 1
2045
def is_root(self, file_id):
2046
return file_id == self.root_id
2048
def _iter_file_id_parents(self, file_id):
2049
"""Yield the parents of file_id up to the root."""
2050
while file_id is not None:
2054
raise errors.NoSuchId(tree=self, file_id=file_id)
2056
file_id = ie.parent_id
2059
"""Iterate over all file-ids."""
2060
for key, _ in self.id_to_entry.iteritems():
2063
def iter_just_entries(self):
2064
"""Iterate over all entries.
2066
Unlike iter_entries(), just the entries are returned (not (path, ie))
2067
and the order of entries is undefined.
2069
XXX: We may not want to merge this into bzr.dev.
2071
for key, entry in self.id_to_entry.iteritems():
2073
ie = self._fileid_to_entry_cache.get(file_id, None)
2075
ie = self._bytes_to_entry(entry)
2076
self._fileid_to_entry_cache[file_id] = ie
2079
def iter_changes(self, basis):
2080
"""Generate a Tree.iter_changes change list between this and basis.
2082
:param basis: Another CHKInventory.
2083
:return: An iterator over the changes between self and basis, as per
2084
tree.iter_changes().
2086
# We want: (file_id, (path_in_source, path_in_target),
2087
# changed_content, versioned, parent, name, kind,
2089
for key, basis_value, self_value in \
2090
self.id_to_entry.iter_changes(basis.id_to_entry):
2092
if basis_value is not None:
2093
basis_entry = basis._bytes_to_entry(basis_value)
2094
path_in_source = basis.id2path(file_id)
2095
basis_parent = basis_entry.parent_id
2096
basis_name = basis_entry.name
2097
basis_executable = basis_entry.executable
2099
path_in_source = None
2102
basis_executable = None
2103
if self_value is not None:
2104
self_entry = self._bytes_to_entry(self_value)
2105
path_in_target = self.id2path(file_id)
2106
self_parent = self_entry.parent_id
2107
self_name = self_entry.name
2108
self_executable = self_entry.executable
2110
path_in_target = None
2113
self_executable = None
2114
if basis_value is None:
2116
kind = (None, self_entry.kind)
2117
versioned = (False, True)
2118
elif self_value is None:
2120
kind = (basis_entry.kind, None)
2121
versioned = (True, False)
2123
kind = (basis_entry.kind, self_entry.kind)
2124
versioned = (True, True)
2125
changed_content = False
2126
if kind[0] != kind[1]:
2127
changed_content = True
2128
elif kind[0] == 'file':
2129
if (self_entry.text_size != basis_entry.text_size or
2130
self_entry.text_sha1 != basis_entry.text_sha1):
2131
changed_content = True
2132
elif kind[0] == 'symlink':
2133
if self_entry.symlink_target != basis_entry.symlink_target:
2134
changed_content = True
2135
elif kind[0] == 'tree-reference':
2136
if (self_entry.reference_revision !=
2137
basis_entry.reference_revision):
2138
changed_content = True
2139
parent = (basis_parent, self_parent)
2140
name = (basis_name, self_name)
2141
executable = (basis_executable, self_executable)
2142
if (not changed_content
2143
and parent[0] == parent[1]
2144
and name[0] == name[1]
2145
and executable[0] == executable[1]):
2146
# Could happen when only the revision changed for a directory
2149
yield (file_id, (path_in_source, path_in_target), changed_content,
2150
versioned, parent, name, kind, executable)
2153
"""Return the number of entries in the inventory."""
2154
return len(self.id_to_entry)
2156
def _make_delta(self, old):
2157
"""Make an inventory delta from two inventories."""
2158
if type(old) != CHKInventory:
2159
return CommonInventory._make_delta(self, old)
2161
for key, old_value, self_value in \
2162
self.id_to_entry.iter_changes(old.id_to_entry):
2164
if old_value is not None:
2165
old_path = old.id2path(file_id)
2168
if self_value is not None:
2169
entry = self._bytes_to_entry(self_value)
2170
self._fileid_to_entry_cache[file_id] = entry
2171
new_path = self.id2path(file_id)
2175
delta.append((old_path, new_path, file_id, entry))
2178
def path2id(self, relpath):
2179
"""See CommonInventory.path2id()."""
2180
# TODO: perhaps support negative hits?
2181
result = self._path_to_fileid_cache.get(relpath, None)
2182
if result is not None:
2184
if isinstance(relpath, basestring):
2185
names = osutils.splitpath(relpath)
2188
current_id = self.root_id
2189
if current_id is None:
2191
parent_id_index = self.parent_id_basename_to_file_id
2193
for basename in names:
2194
if cur_path is None:
2197
cur_path = cur_path + '/' + basename
2198
basename_utf8 = basename.encode('utf8')
2199
file_id = self._path_to_fileid_cache.get(cur_path, None)
2201
key_filter = [StaticTuple(current_id, basename_utf8)]
2202
items = parent_id_index.iteritems(key_filter)
2203
for (parent_id, name_utf8), file_id in items:
2204
if parent_id != current_id or name_utf8 != basename_utf8:
2205
raise errors.BzrError("corrupt inventory lookup! "
2206
"%r %r %r %r" % (parent_id, current_id, name_utf8,
2211
self._path_to_fileid_cache[cur_path] = file_id
2212
current_id = file_id
2216
"""Serialise the inventory to lines."""
2217
lines = ["chkinventory:\n"]
2218
if self._search_key_name != 'plain':
2219
# custom ordering grouping things that don't change together
2220
lines.append('search_key_name: %s\n' % (self._search_key_name,))
2221
lines.append("root_id: %s\n" % self.root_id)
2222
lines.append('parent_id_basename_to_file_id: %s\n' %
2223
(self.parent_id_basename_to_file_id.key()[0],))
2224
lines.append("revision_id: %s\n" % self.revision_id)
2225
lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],))
2227
lines.append("revision_id: %s\n" % self.revision_id)
2228
lines.append("root_id: %s\n" % self.root_id)
2229
if self.parent_id_basename_to_file_id is not None:
2230
lines.append('parent_id_basename_to_file_id: %s\n' %
2231
(self.parent_id_basename_to_file_id.key()[0],))
2232
lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],))
2237
"""Get the root entry."""
2238
return self[self.root_id]
2241
class CHKInventoryDirectory(InventoryDirectory):
2242
"""A directory in an inventory."""
2244
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
2245
'text_id', 'parent_id', '_children', 'executable',
2246
'revision', 'symlink_target', 'reference_revision',
2249
def __init__(self, file_id, name, parent_id, chk_inventory):
2250
# Don't call InventoryDirectory.__init__ - it isn't right for this
2252
InventoryEntry.__init__(self, file_id, name, parent_id)
2253
self._children = None
2254
self.kind = 'directory'
2255
self._chk_inventory = chk_inventory
2259
"""Access the list of children of this directory.
2261
With a parent_id_basename_to_file_id index, loads all the children,
2262
without loads the entire index. Without is bad. A more sophisticated
2263
proxy object might be nice, to allow partial loading of children as
2264
well when specific names are accessed. (So path traversal can be
2265
written in the obvious way but not examine siblings.).
2267
if self._children is not None:
2268
return self._children
2269
# No longer supported
2270
if self._chk_inventory.parent_id_basename_to_file_id is None:
2271
raise AssertionError("Inventories without"
2272
" parent_id_basename_to_file_id are no longer supported")
2274
# XXX: Todo - use proxy objects for the children rather than loading
2275
# all when the attribute is referenced.
2276
parent_id_index = self._chk_inventory.parent_id_basename_to_file_id
2278
for (parent_id, name_utf8), file_id in parent_id_index.iteritems(
2279
key_filter=[StaticTuple(self.file_id,)]):
2280
child_keys.add(StaticTuple(file_id,))
2282
for file_id_key in child_keys:
2283
entry = self._chk_inventory._fileid_to_entry_cache.get(
2284
file_id_key[0], None)
2285
if entry is not None:
2286
result[entry.name] = entry
2287
cached.add(file_id_key)
2288
child_keys.difference_update(cached)
2289
# populate; todo: do by name
2290
id_to_entry = self._chk_inventory.id_to_entry
2291
for file_id_key, bytes in id_to_entry.iteritems(child_keys):
2292
entry = self._chk_inventory._bytes_to_entry(bytes)
2293
result[entry.name] = entry
2294
self._chk_inventory._fileid_to_entry_cache[file_id_key[0]] = entry
2295
self._children = result
1325
2298
entry_factory = {
1326
2299
'directory': InventoryDirectory,
1327
2300
'file': InventoryFile,