129
132
RENAMED = 'renamed'
130
133
MODIFIED_AND_RENAMED = 'modified and renamed'
132
__slots__ = ['file_id', 'revision', 'parent_id', 'name']
134
# Attributes that all InventoryEntry instances are expected to have, but
135
# that don't vary for all kinds of entry. (e.g. symlink_target is only
136
# relevant to InventoryLink, so there's no reason to make every
137
# InventoryFile instance allocate space to hold a value for it.)
138
# Attributes that only vary for files: executable, text_sha1, text_size,
144
# Attributes that only vary for symlinks: symlink_target
145
symlink_target = None
146
# Attributes that only vary for tree-references: reference_revision
147
reference_revision = None
150
137
def detect_changes(self, old_entry):
151
138
"""Return a (text_modified, meta_modified) from this to old_entry.
219
216
if '/' in name or '\\' in name:
220
217
raise errors.InvalidEntryName(name=name)
218
self.executable = False
220
self.text_sha1 = None
221
self.text_size = None
221
222
self.file_id = file_id
224
self.text_id = text_id
224
225
self.parent_id = parent_id
226
self.symlink_target = None
227
self.reference_revision = None
226
229
def kind_character(self):
227
230
"""Return a short kind indicator useful for appending to names."""
228
raise errors.BzrError('unknown kind %r' % self.kind)
231
raise BzrError('unknown kind %r' % self.kind)
230
233
known_kinds = ('file', 'directory', 'symlink')
235
def _put_in_tar(self, item, tree):
236
"""populate item for stashing in a tar, and return the content stream.
238
If no content is available, return None.
240
raise BzrError("don't know how to export {%s} of kind %r" %
241
(self.file_id, self.kind))
243
@deprecated_method(deprecated_in((1, 6, 0)))
244
def put_on_disk(self, dest, dp, tree):
245
"""Create a representation of self on disk in the prefix dest.
247
This is a template method - implement _put_on_disk in subclasses.
249
fullpath = osutils.pathjoin(dest, dp)
250
self._put_on_disk(fullpath, tree)
251
# mutter(" export {%s} kind %s to %s", self.file_id,
252
# self.kind, fullpath)
254
def _put_on_disk(self, fullpath, tree):
255
"""Put this entry onto disk at fullpath, from tree tree."""
256
raise BzrError("don't know how to export {%s} of kind %r" % (self.file_id, self.kind))
232
258
def sorted_children(self):
233
259
return sorted(self.children.items())
248
274
:param rev_id: Revision id from which this InventoryEntry was loaded.
249
275
Not necessarily the last-changed revision for this file.
250
276
:param inv: Inventory from which the entry was loaded.
277
:param tree: RevisionTree for this entry.
252
279
if self.parent_id is not None:
253
280
if not inv.has_id(self.parent_id):
254
raise errors.BzrCheckError(
255
'missing parent {%s} in inventory for revision {%s}' % (
256
self.parent_id, rev_id))
257
checker._add_entry_to_text_key_references(inv, self)
258
self._check(checker, rev_id)
281
raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'
282
% (self.parent_id, rev_id))
283
self._check(checker, rev_id, tree)
260
def _check(self, checker, rev_id):
285
def _check(self, checker, rev_id, tree):
261
286
"""Check this inventory entry for kind specific errors."""
262
checker._report_items.append(
263
'unknown entry kind %r in revision {%s}' % (self.kind, rev_id))
287
raise BzrCheckError('unknown entry kind %r in revision {%s}' %
266
291
"""Clone this inventory entry."""
401
class RootEntry(InventoryEntry):
403
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
404
'text_id', 'parent_id', 'children', 'executable',
405
'revision', 'symlink_target', 'reference_revision']
407
def _check(self, checker, rev_id, tree):
408
"""See InventoryEntry._check"""
410
def __init__(self, file_id):
411
self.file_id = file_id
413
self.kind = 'directory'
414
self.parent_id = None
417
symbol_versioning.warn('RootEntry is deprecated as of bzr 0.10.'
418
' Please use InventoryDirectory instead.',
419
DeprecationWarning, stacklevel=2)
421
def __eq__(self, other):
422
if not isinstance(other, RootEntry):
423
return NotImplemented
425
return (self.file_id == other.file_id) \
426
and (self.children == other.children)
376
429
class InventoryDirectory(InventoryEntry):
377
430
"""A directory in an inventory."""
379
__slots__ = ['children']
383
def _check(self, checker, rev_id):
432
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
433
'text_id', 'parent_id', 'children', 'executable',
434
'revision', 'symlink_target', 'reference_revision']
436
def _check(self, checker, rev_id, tree):
384
437
"""See InventoryEntry._check"""
385
# In non rich root repositories we do not expect a file graph for the
387
if self.name == '' and not checker.rich_roots:
389
# Directories are stored as an empty file, but the file should exist
390
# to provide a per-fileid log. The hash of every directory content is
391
# "da..." below (the sha1sum of '').
392
checker.add_pending_item(rev_id,
393
('texts', self.file_id, self.revision), 'text',
394
'da39a3ee5e6b4b0d3255bfef95601890afd80709')
438
if self.text_sha1 is not None or self.text_size is not None or self.text_id is not None:
439
raise BzrCheckError('directory {%s} has text in revision {%s}'
440
% (self.file_id, rev_id))
397
443
other = InventoryDirectory(self.file_id, self.name, self.parent_id)
403
449
def __init__(self, file_id, name, parent_id):
404
450
super(InventoryDirectory, self).__init__(file_id, name, parent_id)
405
451
self.children = {}
452
self.kind = 'directory'
407
454
def kind_character(self):
408
455
"""See InventoryEntry.kind_character."""
458
def _put_in_tar(self, item, tree):
459
"""See InventoryEntry._put_in_tar."""
460
item.type = tarfile.DIRTYPE
467
def _put_on_disk(self, fullpath, tree):
468
"""See InventoryEntry._put_on_disk."""
412
472
class InventoryFile(InventoryEntry):
413
473
"""A file in an inventory."""
415
__slots__ = ['text_sha1', 'text_size', 'text_id', 'executable']
419
def __init__(self, file_id, name, parent_id):
420
super(InventoryFile, self).__init__(file_id, name, parent_id)
421
self.text_sha1 = None
422
self.text_size = None
424
self.executable = False
426
def _check(self, checker, tree_revision_id):
475
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
476
'text_id', 'parent_id', 'children', 'executable',
477
'revision', 'symlink_target', 'reference_revision']
479
def _check(self, checker, tree_revision_id, tree):
427
480
"""See InventoryEntry._check"""
428
# TODO: check size too.
429
checker.add_pending_item(tree_revision_id,
430
('texts', self.file_id, self.revision), 'text',
432
if self.text_size is None:
433
checker._report_items.append(
434
'fileid {%s} in {%s} has None for text_size' % (self.file_id,
481
key = (self.file_id, self.revision)
482
if key in checker.checked_texts:
483
prev_sha = checker.checked_texts[key]
484
if prev_sha != self.text_sha1:
486
'mismatched sha1 on {%s} in {%s} (%s != %s) %r' %
487
(self.file_id, tree_revision_id, prev_sha, self.text_sha1,
490
checker.repeated_text_cnt += 1
493
checker.checked_text_cnt += 1
494
# We can't check the length, because Weave doesn't store that
495
# information, and the whole point of looking at the weave's
496
# sha1sum is that we don't have to extract the text.
497
if (self.text_sha1 != tree._repository.texts.get_sha1s([key])[key]):
498
raise BzrCheckError('text {%s} version {%s} wrong sha1' % key)
499
checker.checked_texts[key] = self.text_sha1
438
502
other = InventoryFile(self.file_id, self.name, self.parent_id)
470
534
"""See InventoryEntry.has_text."""
537
def __init__(self, file_id, name, parent_id):
538
super(InventoryFile, self).__init__(file_id, name, parent_id)
473
541
def kind_character(self):
474
542
"""See InventoryEntry.kind_character."""
545
def _put_in_tar(self, item, tree):
546
"""See InventoryEntry._put_in_tar."""
547
item.type = tarfile.REGTYPE
548
fileobj = tree.get_file(self.file_id)
549
item.size = self.text_size
550
if tree.is_executable(self.file_id):
556
def _put_on_disk(self, fullpath, tree):
557
"""See InventoryEntry._put_on_disk."""
558
osutils.pumpfile(tree.get_file(self.file_id), file(fullpath, 'wb'))
559
if tree.is_executable(self.file_id):
560
os.chmod(fullpath, 0755)
477
562
def _read_tree_state(self, path, work_tree):
478
563
"""See InventoryEntry._read_tree_state."""
479
564
self.text_sha1 = work_tree.get_file_sha1(self.file_id, path=path)
511
596
class InventoryLink(InventoryEntry):
512
597
"""A file in an inventory."""
514
__slots__ = ['symlink_target']
518
def __init__(self, file_id, name, parent_id):
519
super(InventoryLink, self).__init__(file_id, name, parent_id)
520
self.symlink_target = None
522
def _check(self, checker, tree_revision_id):
599
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
600
'text_id', 'parent_id', 'children', 'executable',
601
'revision', 'symlink_target', 'reference_revision']
603
def _check(self, checker, rev_id, tree):
523
604
"""See InventoryEntry._check"""
605
if self.text_sha1 is not None or self.text_size is not None or self.text_id is not None:
606
raise BzrCheckError('symlink {%s} has text in revision {%s}'
607
% (self.file_id, rev_id))
524
608
if self.symlink_target is None:
525
checker._report_items.append(
526
'symlink {%s} has no target in revision {%s}'
527
% (self.file_id, tree_revision_id))
528
# Symlinks are stored as ''
529
checker.add_pending_item(tree_revision_id,
530
('texts', self.file_id, self.revision), 'text',
531
'da39a3ee5e6b4b0d3255bfef95601890afd80709')
609
raise BzrCheckError('symlink {%s} has no target in revision {%s}'
610
% (self.file_id, rev_id))
534
613
other = InventoryLink(self.file_id, self.name, self.parent_id)
564
643
differ = DiffSymlink(old_tree, new_tree, output_to)
565
644
return differ.diff_symlink(old_target, new_target)
646
def __init__(self, file_id, name, parent_id):
647
super(InventoryLink, self).__init__(file_id, name, parent_id)
648
self.kind = 'symlink'
567
650
def kind_character(self):
568
651
"""See InventoryEntry.kind_character."""
654
def _put_in_tar(self, item, tree):
655
"""See InventoryEntry._put_in_tar."""
656
item.type = tarfile.SYMTYPE
660
item.linkname = self.symlink_target
663
def _put_on_disk(self, fullpath, tree):
664
"""See InventoryEntry._put_on_disk."""
666
os.symlink(self.symlink_target, fullpath)
668
raise BzrError("Failed to create symlink %r -> %r, error: %s" % (fullpath, self.symlink_target, e))
571
670
def _read_tree_state(self, path, work_tree):
572
671
"""See InventoryEntry._read_tree_state."""
573
672
self.symlink_target = work_tree.get_symlink_target(self.file_id)
619
716
class CommonInventory(object):
620
"""Basic inventory logic, defined in terms of primitives like has_id.
622
An inventory is the metadata about the contents of a tree.
624
This is broadly a map from file_id to entries such as directories, files,
625
symlinks and tree references. Each entry maintains its own metadata like
626
SHA1 and length for files, or children for a directory.
628
Entries can be looked up either by path or by file_id.
630
InventoryEntry objects must not be modified after they are
631
inserted, other than through the Inventory API.
634
@deprecated_method(deprecated_in((2, 4, 0)))
717
"""Basic inventory logic, defined in terms of primitives like has_id."""
635
719
def __contains__(self, file_id):
636
720
"""True if this entry contains a file with given id.
638
722
>>> inv = Inventory()
639
723
>>> inv.add(InventoryFile('123', 'foo.c', ROOT_ID))
640
724
InventoryFile('123', 'foo.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
641
>>> inv.has_id('123')
643
>>> inv.has_id('456')
646
730
Note that this method along with __iter__ are not encouraged for use as
660
741
>>> e = i.add(InventoryFile('foo-id', 'foo.c', parent_id='src-id'))
661
742
>>> print i.id2path('foo-id')
664
:raises NoSuchId: If file_id is not present in the inventory.
666
745
# get all names, skipping root
667
746
return '/'.join(reversed(
668
747
[parent.name for parent in
669
748
self._iter_file_id_parents(file_id)][:-1]))
671
def iter_entries(self, from_dir=None, recursive=True):
672
"""Return (path, entry) pairs, in order by name.
674
:param from_dir: if None, start from the root,
675
otherwise start from this directory (either file-id or entry)
676
:param recursive: recurse into directories or not
750
def iter_entries(self, from_dir=None):
751
"""Return (path, entry) pairs, in order by name."""
678
752
if from_dir is None:
679
753
if self.root is None:
864
928
descend(self.root, u'')
867
def path2id(self, relpath):
931
def path2id(self, name):
868
932
"""Walk down through directories to return entry of last component.
870
:param relpath: may be either a list of path components, or a single
871
string, in which case it is automatically split.
934
names may be either a list of path components, or a single
935
string, in which case it is automatically split.
873
937
This returns the entry of the last component in the path,
874
938
which may be either a file or a directory.
876
940
Returns None IFF the path is not found.
878
if isinstance(relpath, basestring):
879
names = osutils.splitpath(relpath)
942
if isinstance(name, basestring):
943
name = osutils.splitpath(name)
945
# mutter("lookup path %r" % name)
884
948
parent = self.root
950
1014
class Inventory(CommonInventory):
951
"""Mutable dict based in-memory inventory.
953
We never store the full path to a file, because renaming a directory
954
implicitly moves all of its contents. This class internally maintains a
1015
"""Inventory of versioned files in a tree.
1017
This describes which file_id is present at each point in the tree,
1018
and possibly the SHA-1 or other information about the file.
1019
Entries can be looked up either by path or by file_id.
1021
The inventory represents a typical unix file tree, with
1022
directories containing files and subdirectories. We never store
1023
the full path to a file, because renaming a directory implicitly
1024
moves all of its contents. This class internally maintains a
955
1025
lookup tree that allows the children under a directory to be
956
1026
returned quickly.
1028
InventoryEntry objects must not be modified after they are
1029
inserted, other than through the Inventory API.
958
1031
>>> inv = Inventory()
959
1032
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
960
1033
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
961
1034
>>> inv['123-123'].name
964
Id's may be looked up from paths:
966
>>> inv.path2id('hello.c')
968
>>> inv.has_id('123-123')
971
There are iterators over the contents:
973
>>> [entry[0] for entry in inv.iter_entries()]
1037
May be treated as an iterator or set to look up file ids:
1039
>>> bool(inv.path2id('hello.c'))
1041
>>> '123-123' in inv
1044
May also look up by name:
1046
>>> [x[0] for x in inv.iter_entries()]
974
1047
['', u'hello.c']
1048
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
1049
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
1050
Traceback (most recent call last):
1051
BzrError: parent_id {TREE_ROOT} not in inventory
1052
>>> inv.add(InventoryFile('123-123', 'hello.c', 'TREE_ROOT-12345678-12345678'))
1053
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT-12345678-12345678', sha1=None, len=None, revision=None)
977
1055
def __init__(self, root_id=ROOT_ID, revision_id=None):
978
1056
"""Create or read an inventory.
1060
1131
# modified children remaining by the time we examine it.
1061
1132
for old_path, file_id in sorted(((op, f) for op, np, f, e in delta
1062
1133
if op is not None), reverse=True):
1134
if file_id not in self:
1063
1137
# Preserve unaltered children of file_id for later reinsertion.
1064
1138
file_id_children = getattr(self[file_id], 'children', {})
1065
1139
if len(file_id_children):
1066
1140
children[file_id] = file_id_children
1067
if self.id2path(file_id) != old_path:
1068
raise errors.InconsistentDelta(old_path, file_id,
1069
"Entry was at wrong other path %r." % self.id2path(file_id))
1070
1141
# Remove file_id and the unaltered children. If file_id is not
1071
1142
# being deleted it will be reinserted back later.
1072
1143
self.remove_recursive_id(file_id)
1086
1157
replacement.revision = new_entry.revision
1087
1158
replacement.children = children.pop(replacement.file_id, {})
1088
1159
new_entry = replacement
1091
except errors.DuplicateFileId:
1092
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1093
"New id is already present in target.")
1094
except AttributeError:
1095
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1096
"Parent is not a directory.")
1097
if self.id2path(new_entry.file_id) != new_path:
1098
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1099
"New path is not consistent with parent path.")
1100
1161
if len(children):
1101
1162
# Get the parent id that was deleted
1102
1163
parent_id, children = children.popitem()
1103
1164
raise errors.InconsistentDelta("<deleted>", parent_id,
1104
1165
"The file id was deleted but its children were not deleted.")
1106
def create_by_apply_delta(self, inventory_delta, new_revision_id,
1107
propagate_caches=False):
1108
"""See CHKInventory.create_by_apply_delta()"""
1109
new_inv = self.copy()
1110
new_inv.apply_delta(inventory_delta)
1111
new_inv.revision_id = new_revision_id
1114
1167
def _set_root(self, ie):
1116
1169
self._byid = {self.root.file_id: self.root}
1184
1240
def add(self, entry):
1185
1241
"""Add entry to inventory.
1243
To add a file to a branch ready to be committed, use Branch.add,
1246
Returns the new entry object.
1189
1248
if entry.file_id in self._byid:
1190
1249
raise errors.DuplicateFileId(entry.file_id,
1191
1250
self._byid[entry.file_id])
1192
1252
if entry.parent_id is None:
1193
1253
self.root = entry
1196
1256
parent = self._byid[entry.parent_id]
1197
1257
except KeyError:
1198
raise errors.InconsistentDelta("<unknown>", entry.parent_id,
1199
"Parent not in inventory.")
1258
raise BzrError("parent_id {%s} not in inventory" %
1200
1261
if entry.name in parent.children:
1201
raise errors.InconsistentDelta(
1202
self.id2path(parent.children[entry.name].file_id),
1204
"Path already versioned")
1262
raise BzrError("%s is already versioned" %
1263
osutils.pathjoin(self.id2path(parent.file_id),
1264
entry.name).encode('utf-8'))
1205
1265
parent.children[entry.name] = entry
1206
1266
return self._add_child(entry)
1346
1409
new_name = ensure_normalized_name(new_name)
1347
1410
if not is_valid_name(new_name):
1348
raise errors.BzrError("not an acceptable filename: %r" % new_name)
1411
raise BzrError("not an acceptable filename: %r" % new_name)
1350
1413
new_parent = self._byid[new_parent_id]
1351
1414
if new_name in new_parent.children:
1352
raise errors.BzrError("%r already exists in %r" %
1353
(new_name, self.id2path(new_parent_id)))
1415
raise BzrError("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))
1355
1417
new_parent_idpath = self.get_idpath(new_parent_id)
1356
1418
if file_id in new_parent_idpath:
1357
raise errors.BzrError(
1358
"cannot move directory %r into a subdirectory of itself, %r"
1419
raise BzrError("cannot move directory %r into a subdirectory of itself, %r"
1359
1420
% (self.id2path(file_id), self.id2path(new_parent_id)))
1361
1422
file_ie = self._byid[file_id]
1397
1458
def __init__(self, search_key_name):
1398
1459
CommonInventory.__init__(self)
1399
1460
self._fileid_to_entry_cache = {}
1400
self._fully_cached = False
1401
1461
self._path_to_fileid_cache = {}
1402
1462
self._search_key_name = search_key_name
1405
def __eq__(self, other):
1406
"""Compare two sets by comparing their contents."""
1407
if not isinstance(other, CHKInventory):
1408
return NotImplemented
1410
this_key = self.id_to_entry.key()
1411
other_key = other.id_to_entry.key()
1412
this_pid_key = self.parent_id_basename_to_file_id.key()
1413
other_pid_key = other.parent_id_basename_to_file_id.key()
1414
if None in (this_key, this_pid_key, other_key, other_pid_key):
1416
return this_key == other_key and this_pid_key == other_pid_key
1418
1464
def _entry_to_bytes(self, entry):
1419
1465
"""Serialise entry as a single bytestring.
1458
1504
raise ValueError("unknown kind %r" % entry.kind)
1460
def _expand_fileids_to_parents_and_children(self, file_ids):
1461
"""Give a more wholistic view starting with the given file_ids.
1463
For any file_id which maps to a directory, we will include all children
1464
of that directory. We will also include all directories which are
1465
parents of the given file_ids, but we will not include their children.
1472
fringle # fringle-id
1476
if given [foo-id] we will include
1477
TREE_ROOT as interesting parents
1479
foo-id, baz-id, frob-id, fringle-id
1483
# TODO: Pre-pass over the list of fileids to see if anything is already
1484
# deserialized in self._fileid_to_entry_cache
1486
directories_to_expand = set()
1487
children_of_parent_id = {}
1488
# It is okay if some of the fileids are missing
1489
for entry in self._getitems(file_ids):
1490
if entry.kind == 'directory':
1491
directories_to_expand.add(entry.file_id)
1492
interesting.add(entry.parent_id)
1493
children_of_parent_id.setdefault(entry.parent_id, set()
1494
).add(entry.file_id)
1496
# Now, interesting has all of the direct parents, but not the
1497
# parents of those parents. It also may have some duplicates with
1499
remaining_parents = interesting.difference(file_ids)
1500
# When we hit the TREE_ROOT, we'll get an interesting parent of None,
1501
# but we don't actually want to recurse into that
1502
interesting.add(None) # this will auto-filter it in the loop
1503
remaining_parents.discard(None)
1504
while remaining_parents:
1505
next_parents = set()
1506
for entry in self._getitems(remaining_parents):
1507
next_parents.add(entry.parent_id)
1508
children_of_parent_id.setdefault(entry.parent_id, set()
1509
).add(entry.file_id)
1510
# Remove any search tips we've already processed
1511
remaining_parents = next_parents.difference(interesting)
1512
interesting.update(remaining_parents)
1513
# We should probably also .difference(directories_to_expand)
1514
interesting.update(file_ids)
1515
interesting.discard(None)
1516
while directories_to_expand:
1517
# Expand directories by looking in the
1518
# parent_id_basename_to_file_id map
1519
keys = [StaticTuple(f,).intern() for f in directories_to_expand]
1520
directories_to_expand = set()
1521
items = self.parent_id_basename_to_file_id.iteritems(keys)
1522
next_file_ids = set([item[1] for item in items])
1523
next_file_ids = next_file_ids.difference(interesting)
1524
interesting.update(next_file_ids)
1525
for entry in self._getitems(next_file_ids):
1526
if entry.kind == 'directory':
1527
directories_to_expand.add(entry.file_id)
1528
children_of_parent_id.setdefault(entry.parent_id, set()
1529
).add(entry.file_id)
1530
return interesting, children_of_parent_id
1532
def filter(self, specific_fileids):
1533
"""Get an inventory view filtered against a set of file-ids.
1535
Children of directories and parents are included.
1537
The result may or may not reference the underlying inventory
1538
so it should be treated as immutable.
1541
parent_to_children) = self._expand_fileids_to_parents_and_children(
1543
# There is some overlap here, but we assume that all interesting items
1544
# are in the _fileid_to_entry_cache because we had to read them to
1545
# determine if they were a dir we wanted to recurse, or just a file
1546
# This should give us all the entries we'll want to add, so start
1548
other = Inventory(self.root_id)
1549
other.root.revision = self.root.revision
1550
other.revision_id = self.revision_id
1551
if not interesting or not parent_to_children:
1552
# empty filter, or filtering entrys that don't exist
1553
# (if even 1 existed, then we would have populated
1554
# parent_to_children with at least the tree root.)
1556
cache = self._fileid_to_entry_cache
1557
remaining_children = collections.deque(parent_to_children[self.root_id])
1558
while remaining_children:
1559
file_id = remaining_children.popleft()
1561
if ie.kind == 'directory':
1562
ie = ie.copy() # We create a copy to depopulate the .children attribute
1563
# TODO: depending on the uses of 'other' we should probably alwyas
1564
# '.copy()' to prevent someone from mutating other and
1565
# invaliding our internal cache
1567
if file_id in parent_to_children:
1568
remaining_children.extend(parent_to_children[file_id])
1572
1507
def _bytes_to_utf8name_key(bytes):
1573
1508
"""Get the file_id, revision_id key out of bytes."""
1603
1538
result.reference_revision = sections[4]
1605
1540
raise ValueError("Not a serialised entry %r" % bytes)
1606
result.file_id = intern(result.file_id)
1607
result.revision = intern(sections[3])
1541
result.revision = sections[3]
1608
1542
if result.parent_id == '':
1609
1543
result.parent_id = None
1610
1544
self._fileid_to_entry_cache[result.file_id] = result
1547
def _get_mutable_inventory(self):
1548
"""See CommonInventory._get_mutable_inventory."""
1549
entries = self.iter_entries()
1550
inv = Inventory(None, self.revision_id)
1551
for path, inv_entry in entries:
1552
inv.add(inv_entry.copy())
1613
1555
def create_by_apply_delta(self, inventory_delta, new_revision_id,
1614
1556
propagate_caches=False):
1615
1557
"""Create a new CHKInventory by applying inventory_delta to this one.
1617
See the inventory developers documentation for the theory behind
1620
1559
:param inventory_delta: The inventory delta to apply. See
1621
1560
Inventory.apply_delta for details.
1622
1561
:param new_revision_id: The revision id of the resulting CHKInventory.
1639
1577
search_key_func=search_key_func)
1640
1578
result.id_to_entry._ensure_root()
1641
1579
result.id_to_entry._root_node.set_maximum_size(maximum_size)
1642
# Change to apply to the parent_id_basename delta. The dict maps
1643
# (parent_id, basename) -> (old_key, new_value). We use a dict because
1644
# when a path has its id replaced (e.g. the root is changed, or someone
1645
# does bzr mv a b, bzr mv c a, we should output a single change to this
1646
# map rather than two.
1647
parent_id_basename_delta = {}
1580
parent_id_basename_delta = []
1648
1581
if self.parent_id_basename_to_file_id is not None:
1649
1582
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1650
1583
self.parent_id_basename_to_file_id._store,
1660
1593
result.parent_id_basename_to_file_id = None
1661
1594
result.root_id = self.root_id
1662
1595
id_to_entry_delta = []
1663
# inventory_delta is only traversed once, so we just update the
1665
# Check for repeated file ids
1666
inventory_delta = _check_delta_unique_ids(inventory_delta)
1667
# Repeated old paths
1668
inventory_delta = _check_delta_unique_old_paths(inventory_delta)
1669
# Check for repeated new paths
1670
inventory_delta = _check_delta_unique_new_paths(inventory_delta)
1671
# Check for entries that don't match the fileid
1672
inventory_delta = _check_delta_ids_match_entry(inventory_delta)
1673
# Check for nonsense fileids
1674
inventory_delta = _check_delta_ids_are_valid(inventory_delta)
1675
# Check for new_path <-> entry consistency
1676
inventory_delta = _check_delta_new_path_entry_both_or_None(
1678
# All changed entries need to have their parents be directories and be
1679
# at the right path. This set contains (path, id) tuples.
1681
# When we delete an item, all the children of it must be either deleted
1682
# or altered in their own right. As we batch process the change via
1683
# CHKMap.apply_delta, we build a set of things to use to validate the
1687
1596
for old_path, new_path, file_id, entry in inventory_delta:
1688
1597
# file id changes
1689
1598
if new_path == '':
1698
1607
del result._path_to_fileid_cache[old_path]
1699
1608
except KeyError:
1701
deletes.add(file_id)
1703
new_key = StaticTuple(file_id,)
1611
new_key = (file_id,)
1704
1612
new_value = result._entry_to_bytes(entry)
1705
1613
# Update caches. It's worth doing this whether
1706
1614
# we're propagating the old caches or not.
1707
1615
result._path_to_fileid_cache[new_path] = file_id
1708
parents.add((split(new_path)[0], entry.parent_id))
1709
1616
if old_path is None:
1712
old_key = StaticTuple(file_id,)
1713
if self.id2path(file_id) != old_path:
1714
raise errors.InconsistentDelta(old_path, file_id,
1715
"Entry was at wrong other path %r." %
1716
self.id2path(file_id))
1717
altered.add(file_id)
1718
id_to_entry_delta.append(StaticTuple(old_key, new_key, new_value))
1619
old_key = (file_id,)
1620
id_to_entry_delta.append((old_key, new_key, new_value))
1719
1621
if result.parent_id_basename_to_file_id is not None:
1720
1622
# parent_id, basename changes
1721
1623
if old_path is None:
1730
1632
new_key = self._parent_id_basename_key(entry)
1731
1633
new_value = file_id
1732
# If the two keys are the same, the value will be unchanged
1733
# as its always the file id for this entry.
1734
1634
if old_key != new_key:
1735
# Transform a change into explicit delete/add preserving
1736
# a possible match on the key from a different file id.
1737
if old_key is not None:
1738
parent_id_basename_delta.setdefault(
1739
old_key, [None, None])[0] = old_key
1740
if new_key is not None:
1741
parent_id_basename_delta.setdefault(
1742
new_key, [None, None])[1] = new_value
1743
# validate that deletes are complete.
1744
for file_id in deletes:
1745
entry = self[file_id]
1746
if entry.kind != 'directory':
1748
# This loop could potentially be better by using the id_basename
1749
# map to just get the child file ids.
1750
for child in entry.children.values():
1751
if child.file_id not in altered:
1752
raise errors.InconsistentDelta(self.id2path(child.file_id),
1753
child.file_id, "Child not deleted or reparented when "
1635
# If the two keys are the same, the value will be unchanged
1636
# as its always the file id.
1637
parent_id_basename_delta.append((old_key, new_key, new_value))
1755
1638
result.id_to_entry.apply_delta(id_to_entry_delta)
1756
1639
if parent_id_basename_delta:
1757
# Transform the parent_id_basename delta data into a linear delta
1758
# with only one record for a given key. Optimally this would allow
1759
# re-keying, but its simpler to just output that as a delete+add
1760
# to spend less time calculating the delta.
1762
for key, (old_key, value) in parent_id_basename_delta.iteritems():
1763
if value is not None:
1764
delta_list.append((old_key, key, value))
1766
delta_list.append((old_key, None, None))
1767
result.parent_id_basename_to_file_id.apply_delta(delta_list)
1768
parents.discard(('', None))
1769
for parent_path, parent in parents:
1771
if result[parent].kind != 'directory':
1772
raise errors.InconsistentDelta(result.id2path(parent), parent,
1773
'Not a directory, but given children')
1774
except errors.NoSuchId:
1775
raise errors.InconsistentDelta("<unknown>", parent,
1776
"Parent is not present in resulting inventory.")
1777
if result.path2id(parent_path) != parent:
1778
raise errors.InconsistentDelta(parent_path, parent,
1779
"Parent has wrong path %r." % result.path2id(parent_path))
1640
result.parent_id_basename_to_file_id.apply_delta(parent_id_basename_delta)
1808
1669
raise errors.BzrError('Duplicate key in inventory: %r\n%r'
1809
1670
% (key, bytes))
1810
1671
info[key] = value
1811
revision_id = intern(info['revision_id'])
1812
root_id = intern(info['root_id'])
1813
search_key_name = intern(info.get('search_key_name', 'plain'))
1814
parent_id_basename_to_file_id = intern(info.get(
1815
'parent_id_basename_to_file_id', None))
1816
if not parent_id_basename_to_file_id.startswith('sha1:'):
1817
raise ValueError('parent_id_basename_to_file_id should be a sha1'
1818
' key not %r' % (parent_id_basename_to_file_id,))
1672
revision_id = info['revision_id']
1673
root_id = info['root_id']
1674
search_key_name = info.get('search_key_name', 'plain')
1675
parent_id_basename_to_file_id = info.get(
1676
'parent_id_basename_to_file_id', None)
1819
1677
id_to_entry = info['id_to_entry']
1820
if not id_to_entry.startswith('sha1:'):
1821
raise ValueError('id_to_entry should be a sha1'
1822
' key not %r' % (id_to_entry,))
1824
1679
result = CHKInventory(search_key_name)
1825
1680
result.revision_id = revision_id
1828
1683
result._search_key_name)
1829
1684
if parent_id_basename_to_file_id is not None:
1830
1685
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1831
chk_store, StaticTuple(parent_id_basename_to_file_id,),
1686
chk_store, (parent_id_basename_to_file_id,),
1832
1687
search_key_func=search_key_func)
1834
1689
result.parent_id_basename_to_file_id = None
1836
result.id_to_entry = chk_map.CHKMap(chk_store,
1837
StaticTuple(id_to_entry,),
1691
result.id_to_entry = chk_map.CHKMap(chk_store, (id_to_entry,),
1838
1692
search_key_func=search_key_func)
1839
1693
if (result.revision_id,) != expected_revision_id:
1840
1694
raise ValueError("Mismatched revision id and expected: %r, %r" %
1853
1707
:param maximum_size: The CHKMap node size limit.
1854
1708
:param search_key_name: The identifier for the search key function
1856
result = klass(search_key_name)
1710
result = CHKInventory(search_key_name)
1857
1711
result.revision_id = inventory.revision_id
1858
1712
result.root_id = inventory.root.file_id
1860
entry_to_bytes = result._entry_to_bytes
1861
parent_id_basename_key = result._parent_id_basename_key
1862
id_to_entry_dict = {}
1863
parent_id_basename_dict = {}
1713
search_key_func = chk_map.search_key_registry.get(search_key_name)
1714
result.id_to_entry = chk_map.CHKMap(chk_store, None, search_key_func)
1715
result.id_to_entry._root_node.set_maximum_size(maximum_size)
1717
result.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
1718
None, search_key_func)
1719
result.parent_id_basename_to_file_id._root_node.set_maximum_size(
1721
result.parent_id_basename_to_file_id._root_node._key_width = 2
1722
parent_id_delta = []
1864
1723
for path, entry in inventory.iter_entries():
1865
key = StaticTuple(entry.file_id,).intern()
1866
id_to_entry_dict[key] = entry_to_bytes(entry)
1867
p_id_key = parent_id_basename_key(entry)
1868
parent_id_basename_dict[p_id_key] = entry.file_id
1870
result._populate_from_dicts(chk_store, id_to_entry_dict,
1871
parent_id_basename_dict, maximum_size=maximum_size)
1724
file_id_delta.append((None, (entry.file_id,),
1725
result._entry_to_bytes(entry)))
1726
parent_id_delta.append(
1727
(None, result._parent_id_basename_key(entry),
1729
result.id_to_entry.apply_delta(file_id_delta)
1730
result.parent_id_basename_to_file_id.apply_delta(parent_id_delta)
1874
def _populate_from_dicts(self, chk_store, id_to_entry_dict,
1875
parent_id_basename_dict, maximum_size):
1876
search_key_func = chk_map.search_key_registry.get(self._search_key_name)
1877
root_key = chk_map.CHKMap.from_dict(chk_store, id_to_entry_dict,
1878
maximum_size=maximum_size, key_width=1,
1879
search_key_func=search_key_func)
1880
self.id_to_entry = chk_map.CHKMap(chk_store, root_key,
1882
root_key = chk_map.CHKMap.from_dict(chk_store,
1883
parent_id_basename_dict,
1884
maximum_size=maximum_size, key_width=2,
1885
search_key_func=search_key_func)
1886
self.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
1887
root_key, search_key_func)
1889
1733
def _parent_id_basename_key(self, entry):
1890
1734
"""Create a key for a entry in a parent_id_basename_to_file_id index."""
1891
1735
if entry.parent_id is not None:
1892
1736
parent_id = entry.parent_id
1895
return StaticTuple(parent_id, entry.name.encode('utf8')).intern()
1739
return parent_id, entry.name.encode('utf8')
1897
1741
def __getitem__(self, file_id):
1898
1742
"""map a single file_id -> InventoryEntry."""
1905
1749
return self._bytes_to_entry(
1906
self.id_to_entry.iteritems([StaticTuple(file_id,)]).next()[1])
1750
self.id_to_entry.iteritems([(file_id,)]).next()[1])
1907
1751
except StopIteration:
1908
1752
# really we're passing an inventory, not a tree...
1909
1753
raise errors.NoSuchId(self, file_id)
1911
def _getitems(self, file_ids):
1912
"""Similar to __getitem__, but lets you query for multiple.
1914
The returned order is undefined. And currently if an item doesn't
1915
exist, it isn't included in the output.
1919
for file_id in file_ids:
1920
entry = self._fileid_to_entry_cache.get(file_id, None)
1922
remaining.append(file_id)
1924
result.append(entry)
1925
file_keys = [StaticTuple(f,).intern() for f in remaining]
1926
for file_key, value in self.id_to_entry.iteritems(file_keys):
1927
entry = self._bytes_to_entry(value)
1928
result.append(entry)
1929
self._fileid_to_entry_cache[entry.file_id] = entry
1932
1755
def has_id(self, file_id):
1933
1756
# Perhaps have an explicit 'contains' method on CHKMap ?
1934
1757
if self._fileid_to_entry_cache.get(file_id, None) is not None:
1937
self.id_to_entry.iteritems([StaticTuple(file_id,)]))) == 1
1759
return len(list(self.id_to_entry.iteritems([(file_id,)]))) == 1
1939
1761
def is_root(self, file_id):
1940
1762
return file_id == self.root_id
1970
1792
self._fileid_to_entry_cache[file_id] = ie
1973
def _preload_cache(self):
1974
"""Make sure all file-ids are in _fileid_to_entry_cache"""
1975
if self._fully_cached:
1976
return # No need to do it again
1977
# The optimal sort order is to use iteritems() directly
1978
cache = self._fileid_to_entry_cache
1979
for key, entry in self.id_to_entry.iteritems():
1981
if file_id not in cache:
1982
ie = self._bytes_to_entry(entry)
1986
last_parent_id = last_parent_ie = None
1987
pid_items = self.parent_id_basename_to_file_id.iteritems()
1988
for key, child_file_id in pid_items:
1989
if key == ('', ''): # This is the root
1990
if child_file_id != self.root_id:
1991
raise ValueError('Data inconsistency detected.'
1992
' We expected data with key ("","") to match'
1993
' the root id, but %s != %s'
1994
% (child_file_id, self.root_id))
1996
parent_id, basename = key
1997
ie = cache[child_file_id]
1998
if parent_id == last_parent_id:
1999
parent_ie = last_parent_ie
2001
parent_ie = cache[parent_id]
2002
if parent_ie.kind != 'directory':
2003
raise ValueError('Data inconsistency detected.'
2004
' An entry in the parent_id_basename_to_file_id map'
2005
' has parent_id {%s} but the kind of that object'
2006
' is %r not "directory"' % (parent_id, parent_ie.kind))
2007
if parent_ie._children is None:
2008
parent_ie._children = {}
2009
basename = basename.decode('utf-8')
2010
if basename in parent_ie._children:
2011
existing_ie = parent_ie._children[basename]
2012
if existing_ie != ie:
2013
raise ValueError('Data inconsistency detected.'
2014
' Two entries with basename %r were found'
2015
' in the parent entry {%s}'
2016
% (basename, parent_id))
2017
if basename != ie.name:
2018
raise ValueError('Data inconsistency detected.'
2019
' In the parent_id_basename_to_file_id map, file_id'
2020
' {%s} is listed as having basename %r, but in the'
2021
' id_to_entry map it is %r'
2022
% (child_file_id, basename, ie.name))
2023
parent_ie._children[basename] = ie
2024
self._fully_cached = True
2026
1795
def iter_changes(self, basis):
2027
1796
"""Generate a Tree.iter_changes change list between this and basis.
2122
1891
delta.append((old_path, new_path, file_id, entry))
2125
def path2id(self, relpath):
1894
def path2id(self, name):
2126
1895
"""See CommonInventory.path2id()."""
2127
# TODO: perhaps support negative hits?
2128
result = self._path_to_fileid_cache.get(relpath, None)
2129
if result is not None:
2131
if isinstance(relpath, basestring):
2132
names = osutils.splitpath(relpath)
2135
current_id = self.root_id
2136
if current_id is None:
2138
parent_id_index = self.parent_id_basename_to_file_id
2140
for basename in names:
2141
if cur_path is None:
2144
cur_path = cur_path + '/' + basename
2145
basename_utf8 = basename.encode('utf8')
2146
file_id = self._path_to_fileid_cache.get(cur_path, None)
2148
key_filter = [StaticTuple(current_id, basename_utf8)]
2149
items = parent_id_index.iteritems(key_filter)
2150
for (parent_id, name_utf8), file_id in items:
2151
if parent_id != current_id or name_utf8 != basename_utf8:
2152
raise errors.BzrError("corrupt inventory lookup! "
2153
"%r %r %r %r" % (parent_id, current_id, name_utf8,
2158
self._path_to_fileid_cache[cur_path] = file_id
2159
current_id = file_id
1896
result = self._path_to_fileid_cache.get(name, None)
1898
result = CommonInventory.path2id(self, name)
1899
self._path_to_fileid_cache[name] = result
2162
1902
def to_lines(self):
2163
1903
"""Serialise the inventory to lines."""
2167
1907
lines.append('search_key_name: %s\n' % (self._search_key_name,))
2168
1908
lines.append("root_id: %s\n" % self.root_id)
2169
1909
lines.append('parent_id_basename_to_file_id: %s\n' %
2170
(self.parent_id_basename_to_file_id.key()[0],))
1910
self.parent_id_basename_to_file_id.key())
2171
1911
lines.append("revision_id: %s\n" % self.revision_id)
2172
lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],))
1912
lines.append("id_to_entry: %s\n" % self.id_to_entry.key())
2174
1914
lines.append("revision_id: %s\n" % self.revision_id)
2175
1915
lines.append("root_id: %s\n" % self.root_id)
2176
1916
if self.parent_id_basename_to_file_id is not None:
2177
1917
lines.append('parent_id_basename_to_file_id: %s\n' %
2178
(self.parent_id_basename_to_file_id.key()[0],))
2179
lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],))
1918
self.parent_id_basename_to_file_id.key())
1919
lines.append("id_to_entry: %s\n" % self.id_to_entry.key())
2188
1928
class CHKInventoryDirectory(InventoryDirectory):
2189
1929
"""A directory in an inventory."""
2191
__slots__ = ['_children', '_chk_inventory']
1931
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
1932
'text_id', 'parent_id', '_children', 'executable',
1933
'revision', 'symlink_target', 'reference_revision',
2193
1936
def __init__(self, file_id, name, parent_id, chk_inventory):
2194
1937
# Don't call InventoryDirectory.__init__ - it isn't right for this
2196
1939
InventoryEntry.__init__(self, file_id, name, parent_id)
2197
1940
self._children = None
1941
self.kind = 'directory'
2198
1942
self._chk_inventory = chk_inventory
2288
_NAME_RE = lazy_regex.lazy_compile(r'^[^/\\]+$')
2290
2034
def is_valid_name(name):
2036
if _NAME_RE is None:
2037
_NAME_RE = re.compile(r'^[^/\\]+$')
2291
2039
return bool(_NAME_RE.match(name))
2294
def _check_delta_unique_ids(delta):
2295
"""Decorate a delta and check that the file ids in it are unique.
2297
:return: A generator over delta.
2301
length = len(ids) + 1
2303
if len(ids) != length:
2304
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2309
def _check_delta_unique_new_paths(delta):
2310
"""Decorate a delta and check that the new paths in it are unique.
2312
:return: A generator over delta.
2316
length = len(paths) + 1
2318
if path is not None:
2320
if len(paths) != length:
2321
raise errors.InconsistentDelta(path, item[2], "repeated path")
2325
def _check_delta_unique_old_paths(delta):
2326
"""Decorate a delta and check that the old paths in it are unique.
2328
:return: A generator over delta.
2332
length = len(paths) + 1
2334
if path is not None:
2336
if len(paths) != length:
2337
raise errors.InconsistentDelta(path, item[2], "repeated path")
2341
def _check_delta_ids_are_valid(delta):
2342
"""Decorate a delta and check that the ids in it are valid.
2344
:return: A generator over delta.
2349
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2350
"entry with file_id None %r" % entry)
2351
if type(item[2]) != str:
2352
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2353
"entry with non bytes file_id %r" % entry)
2357
def _check_delta_ids_match_entry(delta):
2358
"""Decorate a delta and check that the ids in it match the entry.file_id.
2360
:return: A generator over delta.
2364
if entry is not None:
2365
if entry.file_id != item[2]:
2366
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2367
"mismatched id with %r" % entry)
2371
def _check_delta_new_path_entry_both_or_None(delta):
2372
"""Decorate a delta and check that the new_path and entry are paired.
2374
:return: A generator over delta.
2379
if new_path is None and entry is not None:
2380
raise errors.InconsistentDelta(item[0], item[1],
2381
"Entry with no new_path")
2382
if new_path is not None and entry is None:
2383
raise errors.InconsistentDelta(new_path, item[1],
2384
"new_path with no entry")
2388
def mutable_inventory_from_tree(tree):
2389
"""Create a new inventory that has the same contents as a specified tree.
2391
:param tree: Revision tree to create inventory from
2393
entries = tree.iter_entries_by_dir()
2394
inv = Inventory(None, tree.get_revision_id())
2395
for path, inv_entry in entries:
2396
inv.add(inv_entry.copy())