616
816
return compatible
619
class CommonInventory(object):
620
"""Basic inventory logic, defined in terms of primitives like has_id.
622
An inventory is the metadata about the contents of a tree.
624
This is broadly a map from file_id to entries such as directories, files,
625
symlinks and tree references. Each entry maintains its own metadata like
626
SHA1 and length for files, or children for a directory.
819
class Inventory(object):
820
"""Inventory of versioned files in a tree.
822
This describes which file_id is present at each point in the tree,
823
and possibly the SHA-1 or other information about the file.
628
824
Entries can be looked up either by path or by file_id.
826
The inventory represents a typical unix file tree, with
827
directories containing files and subdirectories. We never store
828
the full path to a file, because renaming a directory implicitly
829
moves all of its contents. This class internally maintains a
830
lookup tree that allows the children under a directory to be
630
833
InventoryEntry objects must not be modified after they are
631
834
inserted, other than through the Inventory API.
836
>>> inv = Inventory()
837
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
838
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None)
839
>>> inv['123-123'].name
842
May be treated as an iterator or set to look up file ids:
844
>>> bool(inv.path2id('hello.c'))
849
May also look up by name:
851
>>> [x[0] for x in inv.iter_entries()]
853
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
854
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
855
Traceback (most recent call last):
856
BzrError: parent_id {TREE_ROOT} not in inventory
857
>>> inv.add(InventoryFile('123-123', 'hello.c', 'TREE_ROOT-12345678-12345678'))
858
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT-12345678-12345678', sha1=None, len=None)
634
@deprecated_method(deprecated_in((2, 4, 0)))
635
def __contains__(self, file_id):
636
"""True if this entry contains a file with given id.
638
>>> inv = Inventory()
639
>>> inv.add(InventoryFile('123', 'foo.c', ROOT_ID))
640
InventoryFile('123', 'foo.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
641
>>> inv.has_id('123')
643
>>> inv.has_id('456')
646
Note that this method along with __iter__ are not encouraged for use as
647
they are less clear than specific query methods - they may be rmeoved
650
return self.has_id(file_id)
652
def has_filename(self, filename):
653
return bool(self.path2id(filename))
655
def id2path(self, file_id):
656
"""Return as a string the path to file_id.
659
>>> e = i.add(InventoryDirectory('src-id', 'src', ROOT_ID))
660
>>> e = i.add(InventoryFile('foo-id', 'foo.c', parent_id='src-id'))
661
>>> print i.id2path('foo-id')
664
:raises NoSuchId: If file_id is not present in the inventory.
666
# get all names, skipping root
667
return '/'.join(reversed(
668
[parent.name for parent in
669
self._iter_file_id_parents(file_id)][:-1]))
671
def iter_entries(self, from_dir=None, recursive=True):
672
"""Return (path, entry) pairs, in order by name.
674
:param from_dir: if None, start from the root,
675
otherwise start from this directory (either file-id or entry)
676
:param recursive: recurse into directories or not
860
def __init__(self, root_id=ROOT_ID, revision_id=None):
861
"""Create or read an inventory.
863
If a working directory is specified, the inventory is read
864
from there. If the file is specified, read from that. If not,
865
the inventory is created empty.
867
The inventory is created with a default root directory, with
870
if root_id is not None:
871
assert root_id.__class__ == str
872
self._set_root(InventoryDirectory(root_id, u'', None))
876
self.revision_id = revision_id
879
return "<Inventory object at %x, contents=%r>" % (id(self), self._byid)
881
def apply_delta(self, delta):
882
"""Apply a delta to this inventory.
884
:param delta: A list of changes to apply. After all the changes are
885
applied the final inventory must be internally consistent, but it
886
is ok to supply changes which, if only half-applied would have an
887
invalid result - such as supplying two changes which rename two
888
files, 'A' and 'B' with each other : [('A', 'B', 'A-id', a_entry),
889
('B', 'A', 'B-id', b_entry)].
891
Each change is a tuple, of the form (old_path, new_path, file_id,
894
When new_path is None, the change indicates the removal of an entry
895
from the inventory and new_entry will be ignored (using None is
896
appropriate). If new_path is not None, then new_entry must be an
897
InventoryEntry instance, which will be incorporated into the
898
inventory (and replace any existing entry with the same file id).
900
When old_path is None, the change indicates the addition of
901
a new entry to the inventory.
903
When neither new_path nor old_path are None, the change is a
904
modification to an entry, such as a rename, reparent, kind change
907
The children attribute of new_entry is ignored. This is because
908
this method preserves children automatically across alterations to
909
the parent of the children, and cases where the parent id of a
910
child is changing require the child to be passed in as a separate
911
change regardless. E.g. in the recursive deletion of a directory -
912
the directory's children must be included in the delta, or the
913
final inventory will be invalid.
916
# Remove all affected items which were in the original inventory,
917
# starting with the longest paths, thus ensuring parents are examined
918
# after their children, which means that everything we examine has no
919
# modified children remaining by the time we examine it.
920
for old_path, file_id in sorted(((op, f) for op, np, f, e in delta
921
if op is not None), reverse=True):
922
if file_id not in self:
925
# Preserve unaltered children of file_id for later reinsertion.
926
children[file_id] = getattr(self[file_id], 'children', {})
927
# Remove file_id and the unaltered children. If file_id is not
928
# being deleted it will be reinserted back later.
929
self.remove_recursive_id(file_id)
930
# Insert all affected which should be in the new inventory, reattaching
931
# their children if they had any. This is done from shortest path to
932
# longest, ensuring that items which were modified and whose parents in
933
# the resulting inventory were also modified, are inserted after their
935
for new_path, new_entry in sorted((np, e) for op, np, f, e in
936
delta if np is not None):
937
if new_entry.kind == 'directory':
938
new_entry.children = children.get(new_entry.file_id, {})
941
def _set_root(self, ie):
943
self._byid = {self.root.file_id: self.root}
946
# TODO: jam 20051218 Should copy also copy the revision_id?
947
entries = self.iter_entries()
948
if self.root is None:
949
return Inventory(root_id=None)
950
other = Inventory(entries.next()[1].file_id)
951
# copy recursively so we know directories will be added before
952
# their children. There are more efficient ways than this...
953
for path, entry in entries:
954
other.add(entry.copy())
958
return iter(self._byid)
961
"""Returns number of entries."""
962
return len(self._byid)
964
def iter_entries(self, from_dir=None):
965
"""Return (path, entry) pairs, in order by name."""
678
966
if from_dir is None:
679
967
if self.root is None:
681
969
from_dir = self.root
682
970
yield '', self.root
683
971
elif isinstance(from_dir, basestring):
684
from_dir = self[from_dir]
972
from_dir = self._byid[from_dir]
686
974
# unrolling the recursive called changed the time from
687
975
# 440ms/663ms (inline/total) to 116ms/116ms
688
976
children = from_dir.children.items()
691
for name, ie in children:
694
978
children = collections.deque(children)
695
979
stack = [(u'', children)]
863
1115
descend(child_ie, child_path)
864
1116
descend(self.root, u'')
867
def path2id(self, relpath):
868
"""Walk down through directories to return entry of last component.
870
:param relpath: may be either a list of path components, or a single
871
string, in which case it is automatically split.
873
This returns the entry of the last component in the path,
874
which may be either a file or a directory.
876
Returns None IFF the path is not found.
878
if isinstance(relpath, basestring):
879
names = osutils.splitpath(relpath)
885
except errors.NoSuchId:
886
# root doesn't exist yet so nothing else can
892
children = getattr(parent, 'children', None)
901
return parent.file_id
903
def filter(self, specific_fileids):
904
"""Get an inventory view filtered against a set of file-ids.
906
Children of directories and parents are included.
908
The result may or may not reference the underlying inventory
909
so it should be treated as immutable.
911
interesting_parents = set()
912
for fileid in specific_fileids:
914
interesting_parents.update(self.get_idpath(fileid))
915
except errors.NoSuchId:
916
# This fileid is not in the inventory - that's ok
918
entries = self.iter_entries()
919
if self.root is None:
920
return Inventory(root_id=None)
921
other = Inventory(entries.next()[1].file_id)
922
other.root.revision = self.root.revision
923
other.revision_id = self.revision_id
924
directories_to_expand = set()
925
for path, entry in entries:
926
file_id = entry.file_id
927
if (file_id in specific_fileids
928
or entry.parent_id in directories_to_expand):
929
if entry.kind == 'directory':
930
directories_to_expand.add(file_id)
931
elif file_id not in interesting_parents:
933
other.add(entry.copy())
936
def get_idpath(self, file_id):
937
"""Return a list of file_ids for the path to an entry.
939
The list contains one element for each directory followed by
940
the id of the file itself. So the length of the returned list
941
is equal to the depth of the file in the tree, counting the
942
root directory as depth 1.
945
for parent in self._iter_file_id_parents(file_id):
946
p.insert(0, parent.file_id)
950
class Inventory(CommonInventory):
951
"""Mutable dict based in-memory inventory.
953
We never store the full path to a file, because renaming a directory
954
implicitly moves all of its contents. This class internally maintains a
955
lookup tree that allows the children under a directory to be
958
>>> inv = Inventory()
959
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
960
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
961
>>> inv['123-123'].name
964
Id's may be looked up from paths:
966
>>> inv.path2id('hello.c')
968
>>> inv.has_id('123-123')
971
There are iterators over the contents:
973
>>> [entry[0] for entry in inv.iter_entries()]
977
def __init__(self, root_id=ROOT_ID, revision_id=None):
978
"""Create or read an inventory.
980
If a working directory is specified, the inventory is read
981
from there. If the file is specified, read from that. If not,
982
the inventory is created empty.
984
The inventory is created with a default root directory, with
987
if root_id is not None:
988
self._set_root(InventoryDirectory(root_id, u'', None))
992
self.revision_id = revision_id
995
# More than one page of ouput is not useful anymore to debug
998
contents = repr(self._byid)
999
if len(contents) > max_len:
1000
contents = contents[:(max_len-len(closing))] + closing
1001
return "<Inventory object at %x, contents=%r>" % (id(self), contents)
1003
def apply_delta(self, delta):
1004
"""Apply a delta to this inventory.
1006
See the inventory developers documentation for the theory behind
1009
If delta application fails the inventory is left in an indeterminate
1010
state and must not be used.
1012
:param delta: A list of changes to apply. After all the changes are
1013
applied the final inventory must be internally consistent, but it
1014
is ok to supply changes which, if only half-applied would have an
1015
invalid result - such as supplying two changes which rename two
1016
files, 'A' and 'B' with each other : [('A', 'B', 'A-id', a_entry),
1017
('B', 'A', 'B-id', b_entry)].
1019
Each change is a tuple, of the form (old_path, new_path, file_id,
1022
When new_path is None, the change indicates the removal of an entry
1023
from the inventory and new_entry will be ignored (using None is
1024
appropriate). If new_path is not None, then new_entry must be an
1025
InventoryEntry instance, which will be incorporated into the
1026
inventory (and replace any existing entry with the same file id).
1028
When old_path is None, the change indicates the addition of
1029
a new entry to the inventory.
1031
When neither new_path nor old_path are None, the change is a
1032
modification to an entry, such as a rename, reparent, kind change
1035
The children attribute of new_entry is ignored. This is because
1036
this method preserves children automatically across alterations to
1037
the parent of the children, and cases where the parent id of a
1038
child is changing require the child to be passed in as a separate
1039
change regardless. E.g. in the recursive deletion of a directory -
1040
the directory's children must be included in the delta, or the
1041
final inventory will be invalid.
1043
Note that a file_id must only appear once within a given delta.
1044
An AssertionError is raised otherwise.
1046
# Check that the delta is legal. It would be nice if this could be
1047
# done within the loops below but it's safer to validate the delta
1048
# before starting to mutate the inventory, as there isn't a rollback
1050
list(_check_delta_unique_ids(_check_delta_unique_new_paths(
1051
_check_delta_unique_old_paths(_check_delta_ids_match_entry(
1052
_check_delta_ids_are_valid(
1053
_check_delta_new_path_entry_both_or_None(
1057
# Remove all affected items which were in the original inventory,
1058
# starting with the longest paths, thus ensuring parents are examined
1059
# after their children, which means that everything we examine has no
1060
# modified children remaining by the time we examine it.
1061
for old_path, file_id in sorted(((op, f) for op, np, f, e in delta
1062
if op is not None), reverse=True):
1063
# Preserve unaltered children of file_id for later reinsertion.
1064
file_id_children = getattr(self[file_id], 'children', {})
1065
if len(file_id_children):
1066
children[file_id] = file_id_children
1067
if self.id2path(file_id) != old_path:
1068
raise errors.InconsistentDelta(old_path, file_id,
1069
"Entry was at wrong other path %r." % self.id2path(file_id))
1070
# Remove file_id and the unaltered children. If file_id is not
1071
# being deleted it will be reinserted back later.
1072
self.remove_recursive_id(file_id)
1073
# Insert all affected which should be in the new inventory, reattaching
1074
# their children if they had any. This is done from shortest path to
1075
# longest, ensuring that items which were modified and whose parents in
1076
# the resulting inventory were also modified, are inserted after their
1078
for new_path, f, new_entry in sorted((np, f, e) for op, np, f, e in
1079
delta if np is not None):
1080
if new_entry.kind == 'directory':
1081
# Pop the child which to allow detection of children whose
1082
# parents were deleted and which were not reattached to a new
1084
replacement = InventoryDirectory(new_entry.file_id,
1085
new_entry.name, new_entry.parent_id)
1086
replacement.revision = new_entry.revision
1087
replacement.children = children.pop(replacement.file_id, {})
1088
new_entry = replacement
1091
except errors.DuplicateFileId:
1092
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1093
"New id is already present in target.")
1094
except AttributeError:
1095
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1096
"Parent is not a directory.")
1097
if self.id2path(new_entry.file_id) != new_path:
1098
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1099
"New path is not consistent with parent path.")
1101
# Get the parent id that was deleted
1102
parent_id, children = children.popitem()
1103
raise errors.InconsistentDelta("<deleted>", parent_id,
1104
"The file id was deleted but its children were not deleted.")
1106
def create_by_apply_delta(self, inventory_delta, new_revision_id,
1107
propagate_caches=False):
1108
"""See CHKInventory.create_by_apply_delta()"""
1109
new_inv = self.copy()
1110
new_inv.apply_delta(inventory_delta)
1111
new_inv.revision_id = new_revision_id
1114
def _set_root(self, ie):
1116
self._byid = {self.root.file_id: self.root}
1119
# TODO: jam 20051218 Should copy also copy the revision_id?
1120
entries = self.iter_entries()
1121
if self.root is None:
1122
return Inventory(root_id=None)
1123
other = Inventory(entries.next()[1].file_id)
1124
other.root.revision = self.root.revision
1125
# copy recursively so we know directories will be added before
1126
# their children. There are more efficient ways than this...
1127
for path, entry in entries:
1128
other.add(entry.copy())
1132
"""Iterate over all file-ids."""
1133
return iter(self._byid)
1135
def iter_just_entries(self):
1136
"""Iterate over all entries.
1138
Unlike iter_entries(), just the entries are returned (not (path, ie))
1139
and the order of entries is undefined.
1119
def __contains__(self, file_id):
1120
"""True if this entry contains a file with given id.
1141
XXX: We may not want to merge this into bzr.dev.
1122
>>> inv = Inventory()
1123
>>> inv.add(InventoryFile('123', 'foo.c', ROOT_ID))
1124
InventoryFile('123', 'foo.c', parent_id='TREE_ROOT', sha1=None, len=None)
1143
if self.root is None:
1145
for _, ie in self._byid.iteritems():
1149
"""Returns number of entries."""
1150
return len(self._byid)
1130
return (file_id in self._byid)
1152
1132
def __getitem__(self, file_id):
1153
1133
"""Return the entry for given file_id.
1155
1135
>>> inv = Inventory()
1156
1136
>>> inv.add(InventoryFile('123123', 'hello.c', ROOT_ID))
1157
InventoryFile('123123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
1137
InventoryFile('123123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None)
1158
1138
>>> inv['123123'].name
1373
1392
return self.root is not None and file_id == self.root.file_id
1376
class CHKInventory(CommonInventory):
1377
"""An inventory persisted in a CHK store.
1379
By design, a CHKInventory is immutable so many of the methods
1380
supported by Inventory - add, rename, apply_delta, etc - are *not*
1381
supported. To create a new CHKInventory, use create_by_apply_delta()
1382
or from_inventory(), say.
1384
Internally, a CHKInventory has one or two CHKMaps:
1386
* id_to_entry - a map from (file_id,) => InventoryEntry as bytes
1387
* parent_id_basename_to_file_id - a map from (parent_id, basename_utf8)
1390
The second map is optional and not present in early CHkRepository's.
1392
No caching is performed: every method call or item access will perform
1393
requests to the storage layer. As such, keep references to objects you
1397
def __init__(self, search_key_name):
1398
CommonInventory.__init__(self)
1399
self._fileid_to_entry_cache = {}
1400
self._fully_cached = False
1401
self._path_to_fileid_cache = {}
1402
self._search_key_name = search_key_name
1405
def __eq__(self, other):
1406
"""Compare two sets by comparing their contents."""
1407
if not isinstance(other, CHKInventory):
1408
return NotImplemented
1410
this_key = self.id_to_entry.key()
1411
other_key = other.id_to_entry.key()
1412
this_pid_key = self.parent_id_basename_to_file_id.key()
1413
other_pid_key = other.parent_id_basename_to_file_id.key()
1414
if None in (this_key, this_pid_key, other_key, other_pid_key):
1416
return this_key == other_key and this_pid_key == other_pid_key
1418
def _entry_to_bytes(self, entry):
1419
"""Serialise entry as a single bytestring.
1421
:param Entry: An inventory entry.
1422
:return: A bytestring for the entry.
1425
ENTRY ::= FILE | DIR | SYMLINK | TREE
1426
FILE ::= "file: " COMMON SEP SHA SEP SIZE SEP EXECUTABLE
1427
DIR ::= "dir: " COMMON
1428
SYMLINK ::= "symlink: " COMMON SEP TARGET_UTF8
1429
TREE ::= "tree: " COMMON REFERENCE_REVISION
1430
COMMON ::= FILE_ID SEP PARENT_ID SEP NAME_UTF8 SEP REVISION
1433
if entry.parent_id is not None:
1434
parent_str = entry.parent_id
1437
name_str = entry.name.encode("utf8")
1438
if entry.kind == 'file':
1439
if entry.executable:
1443
return "file: %s\n%s\n%s\n%s\n%s\n%d\n%s" % (
1444
entry.file_id, parent_str, name_str, entry.revision,
1445
entry.text_sha1, entry.text_size, exec_str)
1446
elif entry.kind == 'directory':
1447
return "dir: %s\n%s\n%s\n%s" % (
1448
entry.file_id, parent_str, name_str, entry.revision)
1449
elif entry.kind == 'symlink':
1450
return "symlink: %s\n%s\n%s\n%s\n%s" % (
1451
entry.file_id, parent_str, name_str, entry.revision,
1452
entry.symlink_target.encode("utf8"))
1453
elif entry.kind == 'tree-reference':
1454
return "tree: %s\n%s\n%s\n%s\n%s" % (
1455
entry.file_id, parent_str, name_str, entry.revision,
1456
entry.reference_revision)
1458
raise ValueError("unknown kind %r" % entry.kind)
1460
def _expand_fileids_to_parents_and_children(self, file_ids):
1461
"""Give a more wholistic view starting with the given file_ids.
1463
For any file_id which maps to a directory, we will include all children
1464
of that directory. We will also include all directories which are
1465
parents of the given file_ids, but we will not include their children.
1472
fringle # fringle-id
1476
if given [foo-id] we will include
1477
TREE_ROOT as interesting parents
1479
foo-id, baz-id, frob-id, fringle-id
1483
# TODO: Pre-pass over the list of fileids to see if anything is already
1484
# deserialized in self._fileid_to_entry_cache
1486
directories_to_expand = set()
1487
children_of_parent_id = {}
1488
# It is okay if some of the fileids are missing
1489
for entry in self._getitems(file_ids):
1490
if entry.kind == 'directory':
1491
directories_to_expand.add(entry.file_id)
1492
interesting.add(entry.parent_id)
1493
children_of_parent_id.setdefault(entry.parent_id, set()
1494
).add(entry.file_id)
1496
# Now, interesting has all of the direct parents, but not the
1497
# parents of those parents. It also may have some duplicates with
1499
remaining_parents = interesting.difference(file_ids)
1500
# When we hit the TREE_ROOT, we'll get an interesting parent of None,
1501
# but we don't actually want to recurse into that
1502
interesting.add(None) # this will auto-filter it in the loop
1503
remaining_parents.discard(None)
1504
while remaining_parents:
1505
next_parents = set()
1506
for entry in self._getitems(remaining_parents):
1507
next_parents.add(entry.parent_id)
1508
children_of_parent_id.setdefault(entry.parent_id, set()
1509
).add(entry.file_id)
1510
# Remove any search tips we've already processed
1511
remaining_parents = next_parents.difference(interesting)
1512
interesting.update(remaining_parents)
1513
# We should probably also .difference(directories_to_expand)
1514
interesting.update(file_ids)
1515
interesting.discard(None)
1516
while directories_to_expand:
1517
# Expand directories by looking in the
1518
# parent_id_basename_to_file_id map
1519
keys = [StaticTuple(f,).intern() for f in directories_to_expand]
1520
directories_to_expand = set()
1521
items = self.parent_id_basename_to_file_id.iteritems(keys)
1522
next_file_ids = set([item[1] for item in items])
1523
next_file_ids = next_file_ids.difference(interesting)
1524
interesting.update(next_file_ids)
1525
for entry in self._getitems(next_file_ids):
1526
if entry.kind == 'directory':
1527
directories_to_expand.add(entry.file_id)
1528
children_of_parent_id.setdefault(entry.parent_id, set()
1529
).add(entry.file_id)
1530
return interesting, children_of_parent_id
1532
def filter(self, specific_fileids):
1533
"""Get an inventory view filtered against a set of file-ids.
1535
Children of directories and parents are included.
1537
The result may or may not reference the underlying inventory
1538
so it should be treated as immutable.
1541
parent_to_children) = self._expand_fileids_to_parents_and_children(
1543
# There is some overlap here, but we assume that all interesting items
1544
# are in the _fileid_to_entry_cache because we had to read them to
1545
# determine if they were a dir we wanted to recurse, or just a file
1546
# This should give us all the entries we'll want to add, so start
1548
other = Inventory(self.root_id)
1549
other.root.revision = self.root.revision
1550
other.revision_id = self.revision_id
1551
if not interesting or not parent_to_children:
1552
# empty filter, or filtering entrys that don't exist
1553
# (if even 1 existed, then we would have populated
1554
# parent_to_children with at least the tree root.)
1556
cache = self._fileid_to_entry_cache
1557
remaining_children = collections.deque(parent_to_children[self.root_id])
1558
while remaining_children:
1559
file_id = remaining_children.popleft()
1561
if ie.kind == 'directory':
1562
ie = ie.copy() # We create a copy to depopulate the .children attribute
1563
# TODO: depending on the uses of 'other' we should probably alwyas
1564
# '.copy()' to prevent someone from mutating other and
1565
# invaliding our internal cache
1567
if file_id in parent_to_children:
1568
remaining_children.extend(parent_to_children[file_id])
1572
def _bytes_to_utf8name_key(bytes):
1573
"""Get the file_id, revision_id key out of bytes."""
1574
# We don't normally care about name, except for times when we want
1575
# to filter out empty names because of non rich-root...
1576
sections = bytes.split('\n')
1577
kind, file_id = sections[0].split(': ')
1578
return (sections[2], intern(file_id), intern(sections[3]))
1580
def _bytes_to_entry(self, bytes):
1581
"""Deserialise a serialised entry."""
1582
sections = bytes.split('\n')
1583
if sections[0].startswith("file: "):
1584
result = InventoryFile(sections[0][6:],
1585
sections[2].decode('utf8'),
1587
result.text_sha1 = sections[4]
1588
result.text_size = int(sections[5])
1589
result.executable = sections[6] == "Y"
1590
elif sections[0].startswith("dir: "):
1591
result = CHKInventoryDirectory(sections[0][5:],
1592
sections[2].decode('utf8'),
1594
elif sections[0].startswith("symlink: "):
1595
result = InventoryLink(sections[0][9:],
1596
sections[2].decode('utf8'),
1598
result.symlink_target = sections[4].decode('utf8')
1599
elif sections[0].startswith("tree: "):
1600
result = TreeReference(sections[0][6:],
1601
sections[2].decode('utf8'),
1603
result.reference_revision = sections[4]
1605
raise ValueError("Not a serialised entry %r" % bytes)
1606
result.file_id = intern(result.file_id)
1607
result.revision = intern(sections[3])
1608
if result.parent_id == '':
1609
result.parent_id = None
1610
self._fileid_to_entry_cache[result.file_id] = result
1613
def create_by_apply_delta(self, inventory_delta, new_revision_id,
1614
propagate_caches=False):
1615
"""Create a new CHKInventory by applying inventory_delta to this one.
1617
See the inventory developers documentation for the theory behind
1620
:param inventory_delta: The inventory delta to apply. See
1621
Inventory.apply_delta for details.
1622
:param new_revision_id: The revision id of the resulting CHKInventory.
1623
:param propagate_caches: If True, the caches for this inventory are
1624
copied to and updated for the result.
1625
:return: The new CHKInventory.
1627
split = osutils.split
1628
result = CHKInventory(self._search_key_name)
1629
if propagate_caches:
1630
# Just propagate the path-to-fileid cache for now
1631
result._path_to_fileid_cache = dict(self._path_to_fileid_cache.iteritems())
1632
search_key_func = chk_map.search_key_registry.get(self._search_key_name)
1633
self.id_to_entry._ensure_root()
1634
maximum_size = self.id_to_entry._root_node.maximum_size
1635
result.revision_id = new_revision_id
1636
result.id_to_entry = chk_map.CHKMap(
1637
self.id_to_entry._store,
1638
self.id_to_entry.key(),
1639
search_key_func=search_key_func)
1640
result.id_to_entry._ensure_root()
1641
result.id_to_entry._root_node.set_maximum_size(maximum_size)
1642
# Change to apply to the parent_id_basename delta. The dict maps
1643
# (parent_id, basename) -> (old_key, new_value). We use a dict because
1644
# when a path has its id replaced (e.g. the root is changed, or someone
1645
# does bzr mv a b, bzr mv c a, we should output a single change to this
1646
# map rather than two.
1647
parent_id_basename_delta = {}
1648
if self.parent_id_basename_to_file_id is not None:
1649
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1650
self.parent_id_basename_to_file_id._store,
1651
self.parent_id_basename_to_file_id.key(),
1652
search_key_func=search_key_func)
1653
result.parent_id_basename_to_file_id._ensure_root()
1654
self.parent_id_basename_to_file_id._ensure_root()
1655
result_p_id_root = result.parent_id_basename_to_file_id._root_node
1656
p_id_root = self.parent_id_basename_to_file_id._root_node
1657
result_p_id_root.set_maximum_size(p_id_root.maximum_size)
1658
result_p_id_root._key_width = p_id_root._key_width
1660
result.parent_id_basename_to_file_id = None
1661
result.root_id = self.root_id
1662
id_to_entry_delta = []
1663
# inventory_delta is only traversed once, so we just update the
1665
# Check for repeated file ids
1666
inventory_delta = _check_delta_unique_ids(inventory_delta)
1667
# Repeated old paths
1668
inventory_delta = _check_delta_unique_old_paths(inventory_delta)
1669
# Check for repeated new paths
1670
inventory_delta = _check_delta_unique_new_paths(inventory_delta)
1671
# Check for entries that don't match the fileid
1672
inventory_delta = _check_delta_ids_match_entry(inventory_delta)
1673
# Check for nonsense fileids
1674
inventory_delta = _check_delta_ids_are_valid(inventory_delta)
1675
# Check for new_path <-> entry consistency
1676
inventory_delta = _check_delta_new_path_entry_both_or_None(
1678
# All changed entries need to have their parents be directories and be
1679
# at the right path. This set contains (path, id) tuples.
1681
# When we delete an item, all the children of it must be either deleted
1682
# or altered in their own right. As we batch process the change via
1683
# CHKMap.apply_delta, we build a set of things to use to validate the
1687
for old_path, new_path, file_id, entry in inventory_delta:
1690
result.root_id = file_id
1691
if new_path is None:
1696
if propagate_caches:
1698
del result._path_to_fileid_cache[old_path]
1701
deletes.add(file_id)
1703
new_key = StaticTuple(file_id,)
1704
new_value = result._entry_to_bytes(entry)
1705
# Update caches. It's worth doing this whether
1706
# we're propagating the old caches or not.
1707
result._path_to_fileid_cache[new_path] = file_id
1708
parents.add((split(new_path)[0], entry.parent_id))
1709
if old_path is None:
1712
old_key = StaticTuple(file_id,)
1713
if self.id2path(file_id) != old_path:
1714
raise errors.InconsistentDelta(old_path, file_id,
1715
"Entry was at wrong other path %r." %
1716
self.id2path(file_id))
1717
altered.add(file_id)
1718
id_to_entry_delta.append(StaticTuple(old_key, new_key, new_value))
1719
if result.parent_id_basename_to_file_id is not None:
1720
# parent_id, basename changes
1721
if old_path is None:
1724
old_entry = self[file_id]
1725
old_key = self._parent_id_basename_key(old_entry)
1726
if new_path is None:
1730
new_key = self._parent_id_basename_key(entry)
1732
# If the two keys are the same, the value will be unchanged
1733
# as its always the file id for this entry.
1734
if old_key != new_key:
1735
# Transform a change into explicit delete/add preserving
1736
# a possible match on the key from a different file id.
1737
if old_key is not None:
1738
parent_id_basename_delta.setdefault(
1739
old_key, [None, None])[0] = old_key
1740
if new_key is not None:
1741
parent_id_basename_delta.setdefault(
1742
new_key, [None, None])[1] = new_value
1743
# validate that deletes are complete.
1744
for file_id in deletes:
1745
entry = self[file_id]
1746
if entry.kind != 'directory':
1748
# This loop could potentially be better by using the id_basename
1749
# map to just get the child file ids.
1750
for child in entry.children.values():
1751
if child.file_id not in altered:
1752
raise errors.InconsistentDelta(self.id2path(child.file_id),
1753
child.file_id, "Child not deleted or reparented when "
1755
result.id_to_entry.apply_delta(id_to_entry_delta)
1756
if parent_id_basename_delta:
1757
# Transform the parent_id_basename delta data into a linear delta
1758
# with only one record for a given key. Optimally this would allow
1759
# re-keying, but its simpler to just output that as a delete+add
1760
# to spend less time calculating the delta.
1762
for key, (old_key, value) in parent_id_basename_delta.iteritems():
1763
if value is not None:
1764
delta_list.append((old_key, key, value))
1766
delta_list.append((old_key, None, None))
1767
result.parent_id_basename_to_file_id.apply_delta(delta_list)
1768
parents.discard(('', None))
1769
for parent_path, parent in parents:
1771
if result[parent].kind != 'directory':
1772
raise errors.InconsistentDelta(result.id2path(parent), parent,
1773
'Not a directory, but given children')
1774
except errors.NoSuchId:
1775
raise errors.InconsistentDelta("<unknown>", parent,
1776
"Parent is not present in resulting inventory.")
1777
if result.path2id(parent_path) != parent:
1778
raise errors.InconsistentDelta(parent_path, parent,
1779
"Parent has wrong path %r." % result.path2id(parent_path))
1783
def deserialise(klass, chk_store, bytes, expected_revision_id):
1784
"""Deserialise a CHKInventory.
1786
:param chk_store: A CHK capable VersionedFiles instance.
1787
:param bytes: The serialised bytes.
1788
:param expected_revision_id: The revision ID we think this inventory is
1790
:return: A CHKInventory
1792
lines = bytes.split('\n')
1794
raise AssertionError('bytes to deserialize must end with an eol')
1796
if lines[0] != 'chkinventory:':
1797
raise ValueError("not a serialised CHKInventory: %r" % bytes)
1799
allowed_keys = frozenset(['root_id', 'revision_id', 'search_key_name',
1800
'parent_id_basename_to_file_id',
1802
for line in lines[1:]:
1803
key, value = line.split(': ', 1)
1804
if key not in allowed_keys:
1805
raise errors.BzrError('Unknown key in inventory: %r\n%r'
1808
raise errors.BzrError('Duplicate key in inventory: %r\n%r'
1811
revision_id = intern(info['revision_id'])
1812
root_id = intern(info['root_id'])
1813
search_key_name = intern(info.get('search_key_name', 'plain'))
1814
parent_id_basename_to_file_id = intern(info.get(
1815
'parent_id_basename_to_file_id', None))
1816
if not parent_id_basename_to_file_id.startswith('sha1:'):
1817
raise ValueError('parent_id_basename_to_file_id should be a sha1'
1818
' key not %r' % (parent_id_basename_to_file_id,))
1819
id_to_entry = info['id_to_entry']
1820
if not id_to_entry.startswith('sha1:'):
1821
raise ValueError('id_to_entry should be a sha1'
1822
' key not %r' % (id_to_entry,))
1824
result = CHKInventory(search_key_name)
1825
result.revision_id = revision_id
1826
result.root_id = root_id
1827
search_key_func = chk_map.search_key_registry.get(
1828
result._search_key_name)
1829
if parent_id_basename_to_file_id is not None:
1830
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1831
chk_store, StaticTuple(parent_id_basename_to_file_id,),
1832
search_key_func=search_key_func)
1834
result.parent_id_basename_to_file_id = None
1836
result.id_to_entry = chk_map.CHKMap(chk_store,
1837
StaticTuple(id_to_entry,),
1838
search_key_func=search_key_func)
1839
if (result.revision_id,) != expected_revision_id:
1840
raise ValueError("Mismatched revision id and expected: %r, %r" %
1841
(result.revision_id, expected_revision_id))
1845
def from_inventory(klass, chk_store, inventory, maximum_size=0, search_key_name='plain'):
1846
"""Create a CHKInventory from an existing inventory.
1848
The content of inventory is copied into the chk_store, and a
1849
CHKInventory referencing that is returned.
1851
:param chk_store: A CHK capable VersionedFiles instance.
1852
:param inventory: The inventory to copy.
1853
:param maximum_size: The CHKMap node size limit.
1854
:param search_key_name: The identifier for the search key function
1856
result = klass(search_key_name)
1857
result.revision_id = inventory.revision_id
1858
result.root_id = inventory.root.file_id
1860
entry_to_bytes = result._entry_to_bytes
1861
parent_id_basename_key = result._parent_id_basename_key
1862
id_to_entry_dict = {}
1863
parent_id_basename_dict = {}
1864
for path, entry in inventory.iter_entries():
1865
key = StaticTuple(entry.file_id,).intern()
1866
id_to_entry_dict[key] = entry_to_bytes(entry)
1867
p_id_key = parent_id_basename_key(entry)
1868
parent_id_basename_dict[p_id_key] = entry.file_id
1870
result._populate_from_dicts(chk_store, id_to_entry_dict,
1871
parent_id_basename_dict, maximum_size=maximum_size)
1874
def _populate_from_dicts(self, chk_store, id_to_entry_dict,
1875
parent_id_basename_dict, maximum_size):
1876
search_key_func = chk_map.search_key_registry.get(self._search_key_name)
1877
root_key = chk_map.CHKMap.from_dict(chk_store, id_to_entry_dict,
1878
maximum_size=maximum_size, key_width=1,
1879
search_key_func=search_key_func)
1880
self.id_to_entry = chk_map.CHKMap(chk_store, root_key,
1882
root_key = chk_map.CHKMap.from_dict(chk_store,
1883
parent_id_basename_dict,
1884
maximum_size=maximum_size, key_width=2,
1885
search_key_func=search_key_func)
1886
self.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
1887
root_key, search_key_func)
1889
def _parent_id_basename_key(self, entry):
1890
"""Create a key for a entry in a parent_id_basename_to_file_id index."""
1891
if entry.parent_id is not None:
1892
parent_id = entry.parent_id
1895
return StaticTuple(parent_id, entry.name.encode('utf8')).intern()
1897
def __getitem__(self, file_id):
1898
"""map a single file_id -> InventoryEntry."""
1900
raise errors.NoSuchId(self, file_id)
1901
result = self._fileid_to_entry_cache.get(file_id, None)
1902
if result is not None:
1905
return self._bytes_to_entry(
1906
self.id_to_entry.iteritems([StaticTuple(file_id,)]).next()[1])
1907
except StopIteration:
1908
# really we're passing an inventory, not a tree...
1909
raise errors.NoSuchId(self, file_id)
1911
def _getitems(self, file_ids):
1912
"""Similar to __getitem__, but lets you query for multiple.
1914
The returned order is undefined. And currently if an item doesn't
1915
exist, it isn't included in the output.
1919
for file_id in file_ids:
1920
entry = self._fileid_to_entry_cache.get(file_id, None)
1922
remaining.append(file_id)
1924
result.append(entry)
1925
file_keys = [StaticTuple(f,).intern() for f in remaining]
1926
for file_key, value in self.id_to_entry.iteritems(file_keys):
1927
entry = self._bytes_to_entry(value)
1928
result.append(entry)
1929
self._fileid_to_entry_cache[entry.file_id] = entry
1932
def has_id(self, file_id):
1933
# Perhaps have an explicit 'contains' method on CHKMap ?
1934
if self._fileid_to_entry_cache.get(file_id, None) is not None:
1937
self.id_to_entry.iteritems([StaticTuple(file_id,)]))) == 1
1939
def is_root(self, file_id):
1940
return file_id == self.root_id
1942
def _iter_file_id_parents(self, file_id):
1943
"""Yield the parents of file_id up to the root."""
1944
while file_id is not None:
1948
raise errors.NoSuchId(tree=self, file_id=file_id)
1950
file_id = ie.parent_id
1953
"""Iterate over all file-ids."""
1954
for key, _ in self.id_to_entry.iteritems():
1957
def iter_just_entries(self):
1958
"""Iterate over all entries.
1960
Unlike iter_entries(), just the entries are returned (not (path, ie))
1961
and the order of entries is undefined.
1963
XXX: We may not want to merge this into bzr.dev.
1965
for key, entry in self.id_to_entry.iteritems():
1967
ie = self._fileid_to_entry_cache.get(file_id, None)
1969
ie = self._bytes_to_entry(entry)
1970
self._fileid_to_entry_cache[file_id] = ie
1973
def _preload_cache(self):
1974
"""Make sure all file-ids are in _fileid_to_entry_cache"""
1975
if self._fully_cached:
1976
return # No need to do it again
1977
# The optimal sort order is to use iteritems() directly
1978
cache = self._fileid_to_entry_cache
1979
for key, entry in self.id_to_entry.iteritems():
1981
if file_id not in cache:
1982
ie = self._bytes_to_entry(entry)
1986
last_parent_id = last_parent_ie = None
1987
pid_items = self.parent_id_basename_to_file_id.iteritems()
1988
for key, child_file_id in pid_items:
1989
if key == ('', ''): # This is the root
1990
if child_file_id != self.root_id:
1991
raise ValueError('Data inconsistency detected.'
1992
' We expected data with key ("","") to match'
1993
' the root id, but %s != %s'
1994
% (child_file_id, self.root_id))
1996
parent_id, basename = key
1997
ie = cache[child_file_id]
1998
if parent_id == last_parent_id:
1999
parent_ie = last_parent_ie
2001
parent_ie = cache[parent_id]
2002
if parent_ie.kind != 'directory':
2003
raise ValueError('Data inconsistency detected.'
2004
' An entry in the parent_id_basename_to_file_id map'
2005
' has parent_id {%s} but the kind of that object'
2006
' is %r not "directory"' % (parent_id, parent_ie.kind))
2007
if parent_ie._children is None:
2008
parent_ie._children = {}
2009
basename = basename.decode('utf-8')
2010
if basename in parent_ie._children:
2011
existing_ie = parent_ie._children[basename]
2012
if existing_ie != ie:
2013
raise ValueError('Data inconsistency detected.'
2014
' Two entries with basename %r were found'
2015
' in the parent entry {%s}'
2016
% (basename, parent_id))
2017
if basename != ie.name:
2018
raise ValueError('Data inconsistency detected.'
2019
' In the parent_id_basename_to_file_id map, file_id'
2020
' {%s} is listed as having basename %r, but in the'
2021
' id_to_entry map it is %r'
2022
% (child_file_id, basename, ie.name))
2023
parent_ie._children[basename] = ie
2024
self._fully_cached = True
2026
def iter_changes(self, basis):
2027
"""Generate a Tree.iter_changes change list between this and basis.
2029
:param basis: Another CHKInventory.
2030
:return: An iterator over the changes between self and basis, as per
2031
tree.iter_changes().
2033
# We want: (file_id, (path_in_source, path_in_target),
2034
# changed_content, versioned, parent, name, kind,
2036
for key, basis_value, self_value in \
2037
self.id_to_entry.iter_changes(basis.id_to_entry):
2039
if basis_value is not None:
2040
basis_entry = basis._bytes_to_entry(basis_value)
2041
path_in_source = basis.id2path(file_id)
2042
basis_parent = basis_entry.parent_id
2043
basis_name = basis_entry.name
2044
basis_executable = basis_entry.executable
2046
path_in_source = None
2049
basis_executable = None
2050
if self_value is not None:
2051
self_entry = self._bytes_to_entry(self_value)
2052
path_in_target = self.id2path(file_id)
2053
self_parent = self_entry.parent_id
2054
self_name = self_entry.name
2055
self_executable = self_entry.executable
2057
path_in_target = None
2060
self_executable = None
2061
if basis_value is None:
2063
kind = (None, self_entry.kind)
2064
versioned = (False, True)
2065
elif self_value is None:
2067
kind = (basis_entry.kind, None)
2068
versioned = (True, False)
2070
kind = (basis_entry.kind, self_entry.kind)
2071
versioned = (True, True)
2072
changed_content = False
2073
if kind[0] != kind[1]:
2074
changed_content = True
2075
elif kind[0] == 'file':
2076
if (self_entry.text_size != basis_entry.text_size or
2077
self_entry.text_sha1 != basis_entry.text_sha1):
2078
changed_content = True
2079
elif kind[0] == 'symlink':
2080
if self_entry.symlink_target != basis_entry.symlink_target:
2081
changed_content = True
2082
elif kind[0] == 'tree-reference':
2083
if (self_entry.reference_revision !=
2084
basis_entry.reference_revision):
2085
changed_content = True
2086
parent = (basis_parent, self_parent)
2087
name = (basis_name, self_name)
2088
executable = (basis_executable, self_executable)
2089
if (not changed_content
2090
and parent[0] == parent[1]
2091
and name[0] == name[1]
2092
and executable[0] == executable[1]):
2093
# Could happen when only the revision changed for a directory
2096
yield (file_id, (path_in_source, path_in_target), changed_content,
2097
versioned, parent, name, kind, executable)
2100
"""Return the number of entries in the inventory."""
2101
return len(self.id_to_entry)
2103
def _make_delta(self, old):
2104
"""Make an inventory delta from two inventories."""
2105
if type(old) != CHKInventory:
2106
return CommonInventory._make_delta(self, old)
2108
for key, old_value, self_value in \
2109
self.id_to_entry.iter_changes(old.id_to_entry):
2111
if old_value is not None:
2112
old_path = old.id2path(file_id)
2115
if self_value is not None:
2116
entry = self._bytes_to_entry(self_value)
2117
self._fileid_to_entry_cache[file_id] = entry
2118
new_path = self.id2path(file_id)
2122
delta.append((old_path, new_path, file_id, entry))
2125
def path2id(self, relpath):
2126
"""See CommonInventory.path2id()."""
2127
# TODO: perhaps support negative hits?
2128
result = self._path_to_fileid_cache.get(relpath, None)
2129
if result is not None:
2131
if isinstance(relpath, basestring):
2132
names = osutils.splitpath(relpath)
2135
current_id = self.root_id
2136
if current_id is None:
2138
parent_id_index = self.parent_id_basename_to_file_id
2140
for basename in names:
2141
if cur_path is None:
2144
cur_path = cur_path + '/' + basename
2145
basename_utf8 = basename.encode('utf8')
2146
file_id = self._path_to_fileid_cache.get(cur_path, None)
2148
key_filter = [StaticTuple(current_id, basename_utf8)]
2149
items = parent_id_index.iteritems(key_filter)
2150
for (parent_id, name_utf8), file_id in items:
2151
if parent_id != current_id or name_utf8 != basename_utf8:
2152
raise errors.BzrError("corrupt inventory lookup! "
2153
"%r %r %r %r" % (parent_id, current_id, name_utf8,
2158
self._path_to_fileid_cache[cur_path] = file_id
2159
current_id = file_id
2163
"""Serialise the inventory to lines."""
2164
lines = ["chkinventory:\n"]
2165
if self._search_key_name != 'plain':
2166
# custom ordering grouping things that don't change together
2167
lines.append('search_key_name: %s\n' % (self._search_key_name,))
2168
lines.append("root_id: %s\n" % self.root_id)
2169
lines.append('parent_id_basename_to_file_id: %s\n' %
2170
(self.parent_id_basename_to_file_id.key()[0],))
2171
lines.append("revision_id: %s\n" % self.revision_id)
2172
lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],))
2174
lines.append("revision_id: %s\n" % self.revision_id)
2175
lines.append("root_id: %s\n" % self.root_id)
2176
if self.parent_id_basename_to_file_id is not None:
2177
lines.append('parent_id_basename_to_file_id: %s\n' %
2178
(self.parent_id_basename_to_file_id.key()[0],))
2179
lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],))
2184
"""Get the root entry."""
2185
return self[self.root_id]
2188
class CHKInventoryDirectory(InventoryDirectory):
2189
"""A directory in an inventory."""
2191
__slots__ = ['_children', '_chk_inventory']
2193
def __init__(self, file_id, name, parent_id, chk_inventory):
2194
# Don't call InventoryDirectory.__init__ - it isn't right for this
2196
InventoryEntry.__init__(self, file_id, name, parent_id)
2197
self._children = None
2198
self._chk_inventory = chk_inventory
2202
"""Access the list of children of this directory.
2204
With a parent_id_basename_to_file_id index, loads all the children,
2205
without loads the entire index. Without is bad. A more sophisticated
2206
proxy object might be nice, to allow partial loading of children as
2207
well when specific names are accessed. (So path traversal can be
2208
written in the obvious way but not examine siblings.).
2210
if self._children is not None:
2211
return self._children
2212
# No longer supported
2213
if self._chk_inventory.parent_id_basename_to_file_id is None:
2214
raise AssertionError("Inventories without"
2215
" parent_id_basename_to_file_id are no longer supported")
2217
# XXX: Todo - use proxy objects for the children rather than loading
2218
# all when the attribute is referenced.
2219
parent_id_index = self._chk_inventory.parent_id_basename_to_file_id
2221
for (parent_id, name_utf8), file_id in parent_id_index.iteritems(
2222
key_filter=[StaticTuple(self.file_id,)]):
2223
child_keys.add(StaticTuple(file_id,))
2225
for file_id_key in child_keys:
2226
entry = self._chk_inventory._fileid_to_entry_cache.get(
2227
file_id_key[0], None)
2228
if entry is not None:
2229
result[entry.name] = entry
2230
cached.add(file_id_key)
2231
child_keys.difference_update(cached)
2232
# populate; todo: do by name
2233
id_to_entry = self._chk_inventory.id_to_entry
2234
for file_id_key, bytes in id_to_entry.iteritems(child_keys):
2235
entry = self._chk_inventory._bytes_to_entry(bytes)
2236
result[entry.name] = entry
2237
self._chk_inventory._fileid_to_entry_cache[file_id_key[0]] = entry
2238
self._children = result
2241
1395
entry_factory = {
2242
1396
'directory': InventoryDirectory,
2243
1397
'file': InventoryFile,