83
119
src/wibble/wibble.c
84
120
>>> i.id2path('2326')
85
121
'src/wibble/wibble.c'
87
TODO: Maybe also keep the full path of the entry, and the children?
88
But those depend on its position within a particular inventory, and
89
it would be nice not to need to hold the backpointer here.
92
# TODO: split InventoryEntry into subclasses for files,
93
# directories, etc etc.
95
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
96
'text_id', 'parent_id', 'children', ]
98
def __init__(self, file_id, name, kind, parent_id, text_id=None):
124
# Constants returned by describe_change()
126
# TODO: These should probably move to some kind of FileChangeDescription
127
# class; that's like what's inside a TreeDelta but we want to be able to
128
# generate them just for one file at a time.
130
MODIFIED_AND_RENAMED = 'modified and renamed'
132
__slots__ = ['file_id', 'revision', 'parent_id', 'name']
134
# Attributes that all InventoryEntry instances are expected to have, but
135
# that don't vary for all kinds of entry. (e.g. symlink_target is only
136
# relevant to InventoryLink, so there's no reason to make every
137
# InventoryFile instance allocate space to hold a value for it.)
138
# Attributes that only vary for files: executable, text_sha1, text_size,
144
# Attributes that only vary for symlinks: symlink_target
145
symlink_target = None
146
# Attributes that only vary for tree-references: reference_revision
147
reference_revision = None
150
def detect_changes(self, old_entry):
151
"""Return a (text_modified, meta_modified) from this to old_entry.
153
_read_tree_state must have been called on self and old_entry prior to
154
calling detect_changes.
158
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
159
output_to, reverse=False):
160
"""Perform a diff between two entries of the same kind."""
162
def parent_candidates(self, previous_inventories):
163
"""Find possible per-file graph parents.
165
This is currently defined by:
166
- Select the last changed revision in the parent inventory.
167
- Do deal with a short lived bug in bzr 0.8's development two entries
168
that have the same last changed but different 'x' bit settings are
171
# revision:ie mapping for each ie found in previous_inventories.
173
# identify candidate head revision ids.
174
for inv in previous_inventories:
175
if inv.has_id(self.file_id):
176
ie = inv[self.file_id]
177
if ie.revision in candidates:
178
# same revision value in two different inventories:
179
# correct possible inconsistencies:
180
# * there was a bug in revision updates with 'x' bit
183
if candidates[ie.revision].executable != ie.executable:
184
candidates[ie.revision].executable = False
185
ie.executable = False
186
except AttributeError:
189
# add this revision as a candidate.
190
candidates[ie.revision] = ie
194
"""Return true if the object this entry represents has textual data.
196
Note that textual data includes binary content.
198
Also note that all entries get weave files created for them.
199
This attribute is primarily used when upgrading from old trees that
200
did not have the weave index for all inventory entries.
204
def __init__(self, file_id, name, parent_id):
99
205
"""Create an InventoryEntry
101
207
The filename must be a single component, relative to the
102
208
parent directory; it cannot be a whole path or relative name.
104
>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)
210
>>> e = InventoryFile('123', 'hello.c', ROOT_ID)
109
>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)
215
>>> e = InventoryFile('123', 'src/hello.c', ROOT_ID)
110
216
Traceback (most recent call last):
111
BzrCheckError: InventoryEntry name 'src/hello.c' is invalid
217
InvalidEntryName: Invalid entry name: src/hello.c
113
219
if '/' in name or '\\' in name:
114
raise BzrCheckError('InventoryEntry name %r is invalid' % name)
116
self.text_sha1 = None
117
self.text_size = None
220
raise errors.InvalidEntryName(name=name)
119
221
self.file_id = file_id
122
self.text_id = text_id
123
224
self.parent_id = parent_id
124
if kind == 'directory':
129
raise BzrError("unhandled entry kind %r" % kind)
226
def kind_character(self):
227
"""Return a short kind indicator useful for appending to names."""
228
raise errors.BzrError('unknown kind %r' % self.kind)
230
known_kinds = ('file', 'directory', 'symlink')
133
232
def sorted_children(self):
134
l = self.children.items()
140
other = InventoryEntry(self.file_id, self.name, self.kind,
141
self.parent_id, text_id=self.text_id)
142
other.text_sha1 = self.text_sha1
143
other.text_size = self.text_size
233
return sorted(self.children.items())
236
def versionable_kind(kind):
237
return (kind in ('file', 'directory', 'symlink', 'tree-reference'))
239
def check(self, checker, rev_id, inv):
240
"""Check this inventory entry is intact.
242
This is a template method, override _check for kind specific
245
:param checker: Check object providing context for the checks;
246
can be used to find out what parts of the repository have already
248
:param rev_id: Revision id from which this InventoryEntry was loaded.
249
Not necessarily the last-changed revision for this file.
250
:param inv: Inventory from which the entry was loaded.
252
if self.parent_id is not None:
253
if not inv.has_id(self.parent_id):
254
raise errors.BzrCheckError(
255
'missing parent {%s} in inventory for revision {%s}' % (
256
self.parent_id, rev_id))
257
checker._add_entry_to_text_key_references(inv, self)
258
self._check(checker, rev_id)
260
def _check(self, checker, rev_id):
261
"""Check this inventory entry for kind specific errors."""
262
checker._report_items.append(
263
'unknown entry kind %r in revision {%s}' % (self.kind, rev_id))
266
"""Clone this inventory entry."""
267
raise NotImplementedError
270
def describe_change(old_entry, new_entry):
271
"""Describe the change between old_entry and this.
273
This smells of being an InterInventoryEntry situation, but as its
274
the first one, we're making it a static method for now.
276
An entry with a different parent, or different name is considered
277
to be renamed. Reparenting is an internal detail.
278
Note that renaming the parent does not trigger a rename for the
281
# TODO: Perhaps return an object rather than just a string
282
if old_entry is new_entry:
283
# also the case of both being None
285
elif old_entry is None:
287
elif new_entry is None:
289
if old_entry.kind != new_entry.kind:
291
text_modified, meta_modified = new_entry.detect_changes(old_entry)
292
if text_modified or meta_modified:
296
# TODO 20060511 (mbp, rbc) factor out 'detect_rename' here.
297
if old_entry.parent_id != new_entry.parent_id:
299
elif old_entry.name != new_entry.name:
303
if renamed and not modified:
304
return InventoryEntry.RENAMED
305
if modified and not renamed:
307
if modified and renamed:
308
return InventoryEntry.MODIFIED_AND_RENAMED
312
return ("%s(%r, %r, parent_id=%r, revision=%r)"
313
% (self.__class__.__name__,
319
def __eq__(self, other):
321
# For the case when objects are cached
323
if not isinstance(other, InventoryEntry):
324
return NotImplemented
326
return ((self.file_id == other.file_id)
327
and (self.name == other.name)
328
and (other.symlink_target == self.symlink_target)
329
and (self.text_sha1 == other.text_sha1)
330
and (self.text_size == other.text_size)
331
and (self.text_id == other.text_id)
332
and (self.parent_id == other.parent_id)
333
and (self.kind == other.kind)
334
and (self.revision == other.revision)
335
and (self.executable == other.executable)
336
and (self.reference_revision == other.reference_revision)
339
def __ne__(self, other):
340
return not (self == other)
343
raise ValueError('not hashable')
345
def _unchanged(self, previous_ie):
346
"""Has this entry changed relative to previous_ie.
348
This method should be overridden in child classes.
351
# different inv parent
352
if previous_ie.parent_id != self.parent_id:
355
elif previous_ie.name != self.name:
357
elif previous_ie.kind != self.kind:
361
def _read_tree_state(self, path, work_tree):
362
"""Populate fields in the inventory entry from the given tree.
364
Note that this should be modified to be a noop on virtual trees
365
as all entries created there are prepopulated.
367
# TODO: Rather than running this manually, we should check the
368
# working sha1 and other expensive properties when they're
369
# first requested, or preload them if they're already known
370
pass # nothing to do by default
372
def _forget_tree_state(self):
376
class InventoryDirectory(InventoryEntry):
377
"""A directory in an inventory."""
379
__slots__ = ['children']
383
def _check(self, checker, rev_id):
384
"""See InventoryEntry._check"""
385
# In non rich root repositories we do not expect a file graph for the
387
if self.name == '' and not checker.rich_roots:
389
# Directories are stored as an empty file, but the file should exist
390
# to provide a per-fileid log. The hash of every directory content is
391
# "da..." below (the sha1sum of '').
392
checker.add_pending_item(rev_id,
393
('texts', self.file_id, self.revision), 'text',
394
'da39a3ee5e6b4b0d3255bfef95601890afd80709')
397
other = InventoryDirectory(self.file_id, self.name, self.parent_id)
398
other.revision = self.revision
144
399
# note that children are *not* copied; they're pulled across when
145
400
# others are added
150
return ("%s(%r, %r, kind=%r, parent_id=%r)"
151
% (self.__class__.__name__,
158
def to_element(self):
159
"""Convert to XML element"""
160
from bzrlib.xml import Element
164
e.set('name', self.name)
165
e.set('file_id', self.file_id)
166
e.set('kind', self.kind)
168
if self.text_size != None:
169
e.set('text_size', '%d' % self.text_size)
171
for f in ['text_id', 'text_sha1']:
176
# to be conservative, we don't externalize the root pointers
177
# for now, leaving them as null in the xml form. in a future
178
# version it will be implied by nested elements.
179
if self.parent_id != ROOT_ID:
180
assert isinstance(self.parent_id, basestring)
181
e.set('parent_id', self.parent_id)
188
def from_element(cls, elt):
189
assert elt.tag == 'entry'
191
## original format inventories don't have a parent_id for
192
## nodes in the root directory, but it's cleaner to use one
194
parent_id = elt.get('parent_id')
195
if parent_id == None:
198
self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'), parent_id)
199
self.text_id = elt.get('text_id')
200
self.text_sha1 = elt.get('text_sha1')
202
## mutter("read inventoryentry: %r" % (elt.attrib))
204
v = elt.get('text_size')
205
self.text_size = v and int(v)
210
from_element = classmethod(from_element)
212
def __eq__(self, other):
213
if not isinstance(other, InventoryEntry):
214
return NotImplemented
216
return (self.file_id == other.file_id) \
217
and (self.name == other.name) \
218
and (self.text_sha1 == other.text_sha1) \
219
and (self.text_size == other.text_size) \
220
and (self.text_id == other.text_id) \
221
and (self.parent_id == other.parent_id) \
222
and (self.kind == other.kind)
225
def __ne__(self, other):
226
return not (self == other)
229
raise ValueError('not hashable')
233
class RootEntry(InventoryEntry):
234
def __init__(self, file_id):
235
self.file_id = file_id
403
def __init__(self, file_id, name, parent_id):
404
super(InventoryDirectory, self).__init__(file_id, name, parent_id)
236
405
self.children = {}
237
self.kind = 'root_directory'
238
self.parent_id = None
241
def __eq__(self, other):
242
if not isinstance(other, RootEntry):
243
return NotImplemented
245
return (self.file_id == other.file_id) \
246
and (self.children == other.children)
250
class Inventory(object):
251
"""Inventory of versioned files in a tree.
253
This describes which file_id is present at each point in the tree,
254
and possibly the SHA-1 or other information about the file.
407
def kind_character(self):
408
"""See InventoryEntry.kind_character."""
412
class InventoryFile(InventoryEntry):
413
"""A file in an inventory."""
415
__slots__ = ['text_sha1', 'text_size', 'text_id', 'executable']
419
def __init__(self, file_id, name, parent_id):
420
super(InventoryFile, self).__init__(file_id, name, parent_id)
421
self.text_sha1 = None
422
self.text_size = None
424
self.executable = False
426
def _check(self, checker, tree_revision_id):
427
"""See InventoryEntry._check"""
428
# TODO: check size too.
429
checker.add_pending_item(tree_revision_id,
430
('texts', self.file_id, self.revision), 'text',
432
if self.text_size is None:
433
checker._report_items.append(
434
'fileid {%s} in {%s} has None for text_size' % (self.file_id,
438
other = InventoryFile(self.file_id, self.name, self.parent_id)
439
other.executable = self.executable
440
other.text_id = self.text_id
441
other.text_sha1 = self.text_sha1
442
other.text_size = self.text_size
443
other.revision = self.revision
446
def detect_changes(self, old_entry):
447
"""See InventoryEntry.detect_changes."""
448
text_modified = (self.text_sha1 != old_entry.text_sha1)
449
meta_modified = (self.executable != old_entry.executable)
450
return text_modified, meta_modified
452
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
453
output_to, reverse=False):
454
"""See InventoryEntry._diff."""
455
from bzrlib.diff import DiffText
456
from_file_id = self.file_id
458
to_file_id = to_entry.file_id
462
to_file_id, from_file_id = from_file_id, to_file_id
463
tree, to_tree = to_tree, tree
464
from_label, to_label = to_label, from_label
465
differ = DiffText(tree, to_tree, output_to, 'utf-8', '', '',
467
return differ.diff_text(from_file_id, to_file_id, from_label, to_label)
470
"""See InventoryEntry.has_text."""
473
def kind_character(self):
474
"""See InventoryEntry.kind_character."""
477
def _read_tree_state(self, path, work_tree):
478
"""See InventoryEntry._read_tree_state."""
479
self.text_sha1 = work_tree.get_file_sha1(self.file_id, path=path)
480
# FIXME: 20050930 probe for the text size when getting sha1
481
# in _read_tree_state
482
self.executable = work_tree.is_executable(self.file_id, path=path)
485
return ("%s(%r, %r, parent_id=%r, sha1=%r, len=%s, revision=%s)"
486
% (self.__class__.__name__,
494
def _forget_tree_state(self):
495
self.text_sha1 = None
497
def _unchanged(self, previous_ie):
498
"""See InventoryEntry._unchanged."""
499
compatible = super(InventoryFile, self)._unchanged(previous_ie)
500
if self.text_sha1 != previous_ie.text_sha1:
503
# FIXME: 20050930 probe for the text size when getting sha1
504
# in _read_tree_state
505
self.text_size = previous_ie.text_size
506
if self.executable != previous_ie.executable:
511
class InventoryLink(InventoryEntry):
512
"""A file in an inventory."""
514
__slots__ = ['symlink_target']
518
def __init__(self, file_id, name, parent_id):
519
super(InventoryLink, self).__init__(file_id, name, parent_id)
520
self.symlink_target = None
522
def _check(self, checker, tree_revision_id):
523
"""See InventoryEntry._check"""
524
if self.symlink_target is None:
525
checker._report_items.append(
526
'symlink {%s} has no target in revision {%s}'
527
% (self.file_id, tree_revision_id))
528
# Symlinks are stored as ''
529
checker.add_pending_item(tree_revision_id,
530
('texts', self.file_id, self.revision), 'text',
531
'da39a3ee5e6b4b0d3255bfef95601890afd80709')
534
other = InventoryLink(self.file_id, self.name, self.parent_id)
535
other.symlink_target = self.symlink_target
536
other.revision = self.revision
539
def detect_changes(self, old_entry):
540
"""See InventoryEntry.detect_changes."""
541
# FIXME: which _modified field should we use ? RBC 20051003
542
text_modified = (self.symlink_target != old_entry.symlink_target)
544
trace.mutter(" symlink target changed")
545
meta_modified = False
546
return text_modified, meta_modified
548
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
549
output_to, reverse=False):
550
"""See InventoryEntry._diff."""
551
from bzrlib.diff import DiffSymlink
552
old_target = self.symlink_target
553
if to_entry is not None:
554
new_target = to_entry.symlink_target
563
new_target, old_target = old_target, new_target
564
differ = DiffSymlink(old_tree, new_tree, output_to)
565
return differ.diff_symlink(old_target, new_target)
567
def kind_character(self):
568
"""See InventoryEntry.kind_character."""
571
def _read_tree_state(self, path, work_tree):
572
"""See InventoryEntry._read_tree_state."""
573
self.symlink_target = work_tree.get_symlink_target(self.file_id)
575
def _forget_tree_state(self):
576
self.symlink_target = None
578
def _unchanged(self, previous_ie):
579
"""See InventoryEntry._unchanged."""
580
compatible = super(InventoryLink, self)._unchanged(previous_ie)
581
if self.symlink_target != previous_ie.symlink_target:
586
class TreeReference(InventoryEntry):
588
__slots__ = ['reference_revision']
590
kind = 'tree-reference'
592
def __init__(self, file_id, name, parent_id, revision=None,
593
reference_revision=None):
594
InventoryEntry.__init__(self, file_id, name, parent_id)
595
self.revision = revision
596
self.reference_revision = reference_revision
599
return TreeReference(self.file_id, self.name, self.parent_id,
600
self.revision, self.reference_revision)
602
def _read_tree_state(self, path, work_tree):
603
"""Populate fields in the inventory entry from the given tree.
605
self.reference_revision = work_tree.get_reference_revision(
608
def _forget_tree_state(self):
609
self.reference_revision = None
611
def _unchanged(self, previous_ie):
612
"""See InventoryEntry._unchanged."""
613
compatible = super(TreeReference, self)._unchanged(previous_ie)
614
if self.reference_revision != previous_ie.reference_revision:
619
class CommonInventory(object):
620
"""Basic inventory logic, defined in terms of primitives like has_id.
622
An inventory is the metadata about the contents of a tree.
624
This is broadly a map from file_id to entries such as directories, files,
625
symlinks and tree references. Each entry maintains its own metadata like
626
SHA1 and length for files, or children for a directory.
255
628
Entries can be looked up either by path or by file_id.
257
The inventory represents a typical unix file tree, with
258
directories containing files and subdirectories. We never store
259
the full path to a file, because renaming a directory implicitly
260
moves all of its contents. This class internally maintains a
261
lookup tree that allows the children under a directory to be
264
630
InventoryEntry objects must not be modified after they are
265
631
inserted, other than through the Inventory API.
267
>>> inv = Inventory()
268
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
269
>>> inv['123-123'].name
272
May be treated as an iterator or set to look up file ids:
634
@deprecated_method(deprecated_in((2, 4, 0)))
635
def __contains__(self, file_id):
636
"""True if this entry contains a file with given id.
638
>>> inv = Inventory()
639
>>> inv.add(InventoryFile('123', 'foo.c', ROOT_ID))
640
InventoryFile('123', 'foo.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
641
>>> inv.has_id('123')
643
>>> inv.has_id('456')
646
Note that this method along with __iter__ are not encouraged for use as
647
they are less clear than specific query methods - they may be rmeoved
650
return self.has_id(file_id)
652
def has_filename(self, filename):
653
return bool(self.path2id(filename))
655
def id2path(self, file_id):
656
"""Return as a string the path to file_id.
659
>>> e = i.add(InventoryDirectory('src-id', 'src', ROOT_ID))
660
>>> e = i.add(InventoryFile('foo-id', 'foo.c', parent_id='src-id'))
661
>>> print i.id2path('foo-id')
664
:raises NoSuchId: If file_id is not present in the inventory.
666
# get all names, skipping root
667
return '/'.join(reversed(
668
[parent.name for parent in
669
self._iter_file_id_parents(file_id)][:-1]))
671
def iter_entries(self, from_dir=None, recursive=True):
672
"""Return (path, entry) pairs, in order by name.
674
:param from_dir: if None, start from the root,
675
otherwise start from this directory (either file-id or entry)
676
:param recursive: recurse into directories or not
679
if self.root is None:
683
elif isinstance(from_dir, basestring):
684
from_dir = self[from_dir]
686
# unrolling the recursive called changed the time from
687
# 440ms/663ms (inline/total) to 116ms/116ms
688
children = from_dir.children.items()
691
for name, ie in children:
694
children = collections.deque(children)
695
stack = [(u'', children)]
697
from_dir_relpath, children = stack[-1]
700
name, ie = children.popleft()
702
# we know that from_dir_relpath never ends in a slash
703
# and 'f' doesn't begin with one, we can do a string op, rather
704
# than the checks of pathjoin(), though this means that all paths
706
path = from_dir_relpath + '/' + name
710
if ie.kind != 'directory':
713
# But do this child first
714
new_children = ie.children.items()
716
new_children = collections.deque(new_children)
717
stack.append((path, new_children))
718
# Break out of inner loop, so that we start outer loop with child
721
# if we finished all children, pop it off the stack
724
def _preload_cache(self):
725
"""Populate any caches, we are about to access all items.
727
The default implementation does nothing, because CommonInventory doesn't
274
>>> bool(inv.path2id('hello.c'))
279
May also look up by name:
281
>>> [x[0] for x in inv.iter_entries()]
283
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
284
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
286
def __init__(self, root_id=ROOT_ID):
287
"""Create or read an inventory.
289
If a working directory is specified, the inventory is read
290
from there. If the file is specified, read from that. If not,
291
the inventory is created empty.
293
The inventory is created with a default root directory, with
732
def iter_entries_by_dir(self, from_dir=None, specific_file_ids=None,
733
yield_parents=False):
734
"""Iterate over the entries in a directory first order.
736
This returns all entries for a directory before returning
737
the entries for children of a directory. This is not
738
lexicographically sorted order, and is a hybrid between
739
depth-first and breadth-first.
741
:param yield_parents: If True, yield the parents from the root leading
742
down to specific_file_ids that have been requested. This has no
743
impact if specific_file_ids is None.
744
:return: This yields (path, entry) pairs
296
# We are letting Branch(init=True) create a unique inventory
297
# root id. Rather than generating a random one here.
299
# root_id = bzrlib.branch.gen_file_id('TREE_ROOT')
300
self.root = RootEntry(root_id)
301
self._byid = {self.root.file_id: self.root}
305
return iter(self._byid)
309
"""Returns number of entries."""
310
return len(self._byid)
313
def iter_entries(self, from_dir=None):
314
"""Return (path, entry) pairs, in order by name."""
746
if specific_file_ids and not isinstance(specific_file_ids, set):
747
specific_file_ids = set(specific_file_ids)
748
# TODO? Perhaps this should return the from_dir so that the root is
749
# yielded? or maybe an option?
750
if from_dir is None and specific_file_ids is None:
751
# They are iterating from the root, and have not specified any
752
# specific entries to look at. All current callers fully consume the
753
# iterator, so we can safely assume we are accessing all entries
754
self._preload_cache()
756
if self.root is None:
758
# Optimize a common case
759
if (not yield_parents and specific_file_ids is not None and
760
len(specific_file_ids) == 1):
761
file_id = list(specific_file_ids)[0]
762
if self.has_id(file_id):
763
yield self.id2path(file_id), self[file_id]
317
765
from_dir = self.root
766
if (specific_file_ids is None or yield_parents or
767
self.root.file_id in specific_file_ids):
318
769
elif isinstance(from_dir, basestring):
319
from_dir = self._byid[from_dir]
321
kids = from_dir.children.items()
323
for name, ie in kids:
325
if ie.kind == 'directory':
326
for cn, cie in self.iter_entries(from_dir=ie.file_id):
327
yield os.path.join(name, cn), cie
770
from_dir = self[from_dir]
772
if specific_file_ids is not None:
773
# TODO: jam 20070302 This could really be done as a loop rather
774
# than a bunch of recursive calls.
777
def add_ancestors(file_id):
778
if not byid.has_id(file_id):
780
parent_id = byid[file_id].parent_id
781
if parent_id is None:
783
if parent_id not in parents:
784
parents.add(parent_id)
785
add_ancestors(parent_id)
786
for file_id in specific_file_ids:
787
add_ancestors(file_id)
791
stack = [(u'', from_dir)]
793
cur_relpath, cur_dir = stack.pop()
796
for child_name, child_ie in sorted(cur_dir.children.iteritems()):
798
child_relpath = cur_relpath + child_name
800
if (specific_file_ids is None or
801
child_ie.file_id in specific_file_ids or
802
(yield_parents and child_ie.file_id in parents)):
803
yield child_relpath, child_ie
805
if child_ie.kind == 'directory':
806
if parents is None or child_ie.file_id in parents:
807
child_dirs.append((child_relpath+'/', child_ie))
808
stack.extend(reversed(child_dirs))
810
def _make_delta(self, old):
811
"""Make an inventory delta from two inventories."""
814
adds = new_ids - old_ids
815
deletes = old_ids - new_ids
816
common = old_ids.intersection(new_ids)
818
for file_id in deletes:
819
delta.append((old.id2path(file_id), None, file_id, None))
821
delta.append((None, self.id2path(file_id), file_id, self[file_id]))
822
for file_id in common:
823
if old[file_id] != self[file_id]:
824
delta.append((old.id2path(file_id), self.id2path(file_id),
825
file_id, self[file_id]))
828
def make_entry(self, kind, name, parent_id, file_id=None):
829
"""Simple thunk to bzrlib.inventory.make_entry."""
830
return make_entry(kind, name, parent_id, file_id)
330
832
def entries(self):
331
833
"""Return list of (path, ie) for all entries except the root.
337
839
kids = dir_ie.children.items()
339
841
for name, ie in kids:
340
child_path = os.path.join(dir_path, name)
842
child_path = osutils.pathjoin(dir_path, name)
341
843
accum.append((child_path, ie))
342
844
if ie.kind == 'directory':
343
845
descend(ie, child_path)
345
descend(self.root, '')
847
if self.root is not None:
848
descend(self.root, u'')
349
851
def directories(self):
350
852
"""Return (path, entry) pairs for all directories, including the root.
353
855
def descend(parent_ie, parent_path):
354
856
accum.append((parent_path, parent_ie))
356
858
kids = [(ie.name, ie) for ie in parent_ie.children.itervalues() if ie.kind == 'directory']
359
861
for name, child_ie in kids:
360
child_path = os.path.join(parent_path, name)
862
child_path = osutils.pathjoin(parent_path, name)
361
863
descend(child_ie, child_path)
362
descend(self.root, '')
864
descend(self.root, u'')
867
def path2id(self, relpath):
868
"""Walk down through directories to return entry of last component.
870
:param relpath: may be either a list of path components, or a single
871
string, in which case it is automatically split.
873
This returns the entry of the last component in the path,
874
which may be either a file or a directory.
876
Returns None IFF the path is not found.
878
if isinstance(relpath, basestring):
879
names = osutils.splitpath(relpath)
885
except errors.NoSuchId:
886
# root doesn't exist yet so nothing else can
892
children = getattr(parent, 'children', None)
901
return parent.file_id
903
def filter(self, specific_fileids):
904
"""Get an inventory view filtered against a set of file-ids.
906
Children of directories and parents are included.
908
The result may or may not reference the underlying inventory
909
so it should be treated as immutable.
911
interesting_parents = set()
912
for fileid in specific_fileids:
914
interesting_parents.update(self.get_idpath(fileid))
915
except errors.NoSuchId:
916
# This fileid is not in the inventory - that's ok
918
entries = self.iter_entries()
919
if self.root is None:
920
return Inventory(root_id=None)
921
other = Inventory(entries.next()[1].file_id)
922
other.root.revision = self.root.revision
923
other.revision_id = self.revision_id
924
directories_to_expand = set()
925
for path, entry in entries:
926
file_id = entry.file_id
927
if (file_id in specific_fileids
928
or entry.parent_id in directories_to_expand):
929
if entry.kind == 'directory':
930
directories_to_expand.add(file_id)
931
elif file_id not in interesting_parents:
933
other.add(entry.copy())
936
def get_idpath(self, file_id):
937
"""Return a list of file_ids for the path to an entry.
939
The list contains one element for each directory followed by
940
the id of the file itself. So the length of the returned list
941
is equal to the depth of the file in the tree, counting the
942
root directory as depth 1.
945
for parent in self._iter_file_id_parents(file_id):
946
p.insert(0, parent.file_id)
950
class Inventory(CommonInventory):
951
"""Mutable dict based in-memory inventory.
953
We never store the full path to a file, because renaming a directory
954
implicitly moves all of its contents. This class internally maintains a
955
lookup tree that allows the children under a directory to be
958
>>> inv = Inventory()
959
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
960
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
961
>>> inv['123-123'].name
964
Id's may be looked up from paths:
966
>>> inv.path2id('hello.c')
968
>>> inv.has_id('123-123')
971
There are iterators over the contents:
973
>>> [entry[0] for entry in inv.iter_entries()]
977
def __init__(self, root_id=ROOT_ID, revision_id=None):
978
"""Create or read an inventory.
980
If a working directory is specified, the inventory is read
981
from there. If the file is specified, read from that. If not,
982
the inventory is created empty.
984
The inventory is created with a default root directory, with
987
if root_id is not None:
988
self._set_root(InventoryDirectory(root_id, u'', None))
992
self.revision_id = revision_id
995
# More than one page of ouput is not useful anymore to debug
998
contents = repr(self._byid)
999
if len(contents) > max_len:
1000
contents = contents[:(max_len-len(closing))] + closing
1001
return "<Inventory object at %x, contents=%r>" % (id(self), contents)
1003
def apply_delta(self, delta):
1004
"""Apply a delta to this inventory.
1006
See the inventory developers documentation for the theory behind
1009
If delta application fails the inventory is left in an indeterminate
1010
state and must not be used.
1012
:param delta: A list of changes to apply. After all the changes are
1013
applied the final inventory must be internally consistent, but it
1014
is ok to supply changes which, if only half-applied would have an
1015
invalid result - such as supplying two changes which rename two
1016
files, 'A' and 'B' with each other : [('A', 'B', 'A-id', a_entry),
1017
('B', 'A', 'B-id', b_entry)].
1019
Each change is a tuple, of the form (old_path, new_path, file_id,
1022
When new_path is None, the change indicates the removal of an entry
1023
from the inventory and new_entry will be ignored (using None is
1024
appropriate). If new_path is not None, then new_entry must be an
1025
InventoryEntry instance, which will be incorporated into the
1026
inventory (and replace any existing entry with the same file id).
1028
When old_path is None, the change indicates the addition of
1029
a new entry to the inventory.
1031
When neither new_path nor old_path are None, the change is a
1032
modification to an entry, such as a rename, reparent, kind change
1035
The children attribute of new_entry is ignored. This is because
1036
this method preserves children automatically across alterations to
1037
the parent of the children, and cases where the parent id of a
1038
child is changing require the child to be passed in as a separate
1039
change regardless. E.g. in the recursive deletion of a directory -
1040
the directory's children must be included in the delta, or the
1041
final inventory will be invalid.
1043
Note that a file_id must only appear once within a given delta.
1044
An AssertionError is raised otherwise.
1046
# Check that the delta is legal. It would be nice if this could be
1047
# done within the loops below but it's safer to validate the delta
1048
# before starting to mutate the inventory, as there isn't a rollback
1050
list(_check_delta_unique_ids(_check_delta_unique_new_paths(
1051
_check_delta_unique_old_paths(_check_delta_ids_match_entry(
1052
_check_delta_ids_are_valid(
1053
_check_delta_new_path_entry_both_or_None(
1057
# Remove all affected items which were in the original inventory,
1058
# starting with the longest paths, thus ensuring parents are examined
1059
# after their children, which means that everything we examine has no
1060
# modified children remaining by the time we examine it.
1061
for old_path, file_id in sorted(((op, f) for op, np, f, e in delta
1062
if op is not None), reverse=True):
1063
# Preserve unaltered children of file_id for later reinsertion.
1064
file_id_children = getattr(self[file_id], 'children', {})
1065
if len(file_id_children):
1066
children[file_id] = file_id_children
1067
if self.id2path(file_id) != old_path:
1068
raise errors.InconsistentDelta(old_path, file_id,
1069
"Entry was at wrong other path %r." % self.id2path(file_id))
1070
# Remove file_id and the unaltered children. If file_id is not
1071
# being deleted it will be reinserted back later.
1072
self.remove_recursive_id(file_id)
1073
# Insert all affected which should be in the new inventory, reattaching
1074
# their children if they had any. This is done from shortest path to
1075
# longest, ensuring that items which were modified and whose parents in
1076
# the resulting inventory were also modified, are inserted after their
1078
for new_path, f, new_entry in sorted((np, f, e) for op, np, f, e in
1079
delta if np is not None):
1080
if new_entry.kind == 'directory':
1081
# Pop the child which to allow detection of children whose
1082
# parents were deleted and which were not reattached to a new
1084
replacement = InventoryDirectory(new_entry.file_id,
1085
new_entry.name, new_entry.parent_id)
1086
replacement.revision = new_entry.revision
1087
replacement.children = children.pop(replacement.file_id, {})
1088
new_entry = replacement
1091
except errors.DuplicateFileId:
1092
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1093
"New id is already present in target.")
1094
except AttributeError:
1095
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1096
"Parent is not a directory.")
1097
if self.id2path(new_entry.file_id) != new_path:
1098
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1099
"New path is not consistent with parent path.")
1101
# Get the parent id that was deleted
1102
parent_id, children = children.popitem()
1103
raise errors.InconsistentDelta("<deleted>", parent_id,
1104
"The file id was deleted but its children were not deleted.")
1106
def create_by_apply_delta(self, inventory_delta, new_revision_id,
1107
propagate_caches=False):
1108
"""See CHKInventory.create_by_apply_delta()"""
1109
new_inv = self.copy()
1110
new_inv.apply_delta(inventory_delta)
1111
new_inv.revision_id = new_revision_id
1114
def _set_root(self, ie):
1116
self._byid = {self.root.file_id: self.root}
1119
# TODO: jam 20051218 Should copy also copy the revision_id?
1120
entries = self.iter_entries()
1121
if self.root is None:
1122
return Inventory(root_id=None)
1123
other = Inventory(entries.next()[1].file_id)
1124
other.root.revision = self.root.revision
1125
# copy recursively so we know directories will be added before
1126
# their children. There are more efficient ways than this...
1127
for path, entry in entries:
1128
other.add(entry.copy())
1132
"""Iterate over all file-ids."""
1133
return iter(self._byid)
1135
def iter_just_entries(self):
1136
"""Iterate over all entries.
367
def __contains__(self, file_id):
368
"""True if this entry contains a file with given id.
370
>>> inv = Inventory()
371
>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))
1138
Unlike iter_entries(), just the entries are returned (not (path, ie))
1139
and the order of entries is undefined.
1141
XXX: We may not want to merge this into bzr.dev.
377
return file_id in self._byid
1143
if self.root is None:
1145
for _, ie in self._byid.iteritems():
1149
"""Returns number of entries."""
1150
return len(self._byid)
380
1152
def __getitem__(self, file_id):
381
1153
"""Return the entry for given file_id.
383
1155
>>> inv = Inventory()
384
>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))
1156
>>> inv.add(InventoryFile('123123', 'hello.c', ROOT_ID))
1157
InventoryFile('123123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
385
1158
>>> inv['123123'].name
389
1162
return self._byid[file_id]
390
1163
except KeyError:
392
raise BzrError("can't look up file_id None")
394
raise BzrError("file_id {%s} not in inventory" % file_id)
1164
# really we're passing an inventory, not a tree...
1165
raise errors.NoSuchId(self, file_id)
397
1167
def get_file_kind(self, file_id):
398
1168
return self._byid[file_id].kind
637
1366
del old_parent.children[file_ie.name]
638
1367
new_parent.children[new_name] = file_ie
640
1369
file_ie.name = new_name
641
1370
file_ie.parent_id = new_parent_id
646
_NAME_RE = re.compile(r'^[^/\\]+$')
1372
def is_root(self, file_id):
1373
return self.root is not None and file_id == self.root.file_id
1376
class CHKInventory(CommonInventory):
1377
"""An inventory persisted in a CHK store.
1379
By design, a CHKInventory is immutable so many of the methods
1380
supported by Inventory - add, rename, apply_delta, etc - are *not*
1381
supported. To create a new CHKInventory, use create_by_apply_delta()
1382
or from_inventory(), say.
1384
Internally, a CHKInventory has one or two CHKMaps:
1386
* id_to_entry - a map from (file_id,) => InventoryEntry as bytes
1387
* parent_id_basename_to_file_id - a map from (parent_id, basename_utf8)
1390
The second map is optional and not present in early CHkRepository's.
1392
No caching is performed: every method call or item access will perform
1393
requests to the storage layer. As such, keep references to objects you
1397
def __init__(self, search_key_name):
1398
CommonInventory.__init__(self)
1399
self._fileid_to_entry_cache = {}
1400
self._fully_cached = False
1401
self._path_to_fileid_cache = {}
1402
self._search_key_name = search_key_name
1405
def __eq__(self, other):
1406
"""Compare two sets by comparing their contents."""
1407
if not isinstance(other, CHKInventory):
1408
return NotImplemented
1410
this_key = self.id_to_entry.key()
1411
other_key = other.id_to_entry.key()
1412
this_pid_key = self.parent_id_basename_to_file_id.key()
1413
other_pid_key = other.parent_id_basename_to_file_id.key()
1414
if None in (this_key, this_pid_key, other_key, other_pid_key):
1416
return this_key == other_key and this_pid_key == other_pid_key
1418
def _entry_to_bytes(self, entry):
1419
"""Serialise entry as a single bytestring.
1421
:param Entry: An inventory entry.
1422
:return: A bytestring for the entry.
1425
ENTRY ::= FILE | DIR | SYMLINK | TREE
1426
FILE ::= "file: " COMMON SEP SHA SEP SIZE SEP EXECUTABLE
1427
DIR ::= "dir: " COMMON
1428
SYMLINK ::= "symlink: " COMMON SEP TARGET_UTF8
1429
TREE ::= "tree: " COMMON REFERENCE_REVISION
1430
COMMON ::= FILE_ID SEP PARENT_ID SEP NAME_UTF8 SEP REVISION
1433
if entry.parent_id is not None:
1434
parent_str = entry.parent_id
1437
name_str = entry.name.encode("utf8")
1438
if entry.kind == 'file':
1439
if entry.executable:
1443
return "file: %s\n%s\n%s\n%s\n%s\n%d\n%s" % (
1444
entry.file_id, parent_str, name_str, entry.revision,
1445
entry.text_sha1, entry.text_size, exec_str)
1446
elif entry.kind == 'directory':
1447
return "dir: %s\n%s\n%s\n%s" % (
1448
entry.file_id, parent_str, name_str, entry.revision)
1449
elif entry.kind == 'symlink':
1450
return "symlink: %s\n%s\n%s\n%s\n%s" % (
1451
entry.file_id, parent_str, name_str, entry.revision,
1452
entry.symlink_target.encode("utf8"))
1453
elif entry.kind == 'tree-reference':
1454
return "tree: %s\n%s\n%s\n%s\n%s" % (
1455
entry.file_id, parent_str, name_str, entry.revision,
1456
entry.reference_revision)
1458
raise ValueError("unknown kind %r" % entry.kind)
1460
def _expand_fileids_to_parents_and_children(self, file_ids):
1461
"""Give a more wholistic view starting with the given file_ids.
1463
For any file_id which maps to a directory, we will include all children
1464
of that directory. We will also include all directories which are
1465
parents of the given file_ids, but we will not include their children.
1472
fringle # fringle-id
1476
if given [foo-id] we will include
1477
TREE_ROOT as interesting parents
1479
foo-id, baz-id, frob-id, fringle-id
1483
# TODO: Pre-pass over the list of fileids to see if anything is already
1484
# deserialized in self._fileid_to_entry_cache
1486
directories_to_expand = set()
1487
children_of_parent_id = {}
1488
# It is okay if some of the fileids are missing
1489
for entry in self._getitems(file_ids):
1490
if entry.kind == 'directory':
1491
directories_to_expand.add(entry.file_id)
1492
interesting.add(entry.parent_id)
1493
children_of_parent_id.setdefault(entry.parent_id, set()
1494
).add(entry.file_id)
1496
# Now, interesting has all of the direct parents, but not the
1497
# parents of those parents. It also may have some duplicates with
1499
remaining_parents = interesting.difference(file_ids)
1500
# When we hit the TREE_ROOT, we'll get an interesting parent of None,
1501
# but we don't actually want to recurse into that
1502
interesting.add(None) # this will auto-filter it in the loop
1503
remaining_parents.discard(None)
1504
while remaining_parents:
1505
next_parents = set()
1506
for entry in self._getitems(remaining_parents):
1507
next_parents.add(entry.parent_id)
1508
children_of_parent_id.setdefault(entry.parent_id, set()
1509
).add(entry.file_id)
1510
# Remove any search tips we've already processed
1511
remaining_parents = next_parents.difference(interesting)
1512
interesting.update(remaining_parents)
1513
# We should probably also .difference(directories_to_expand)
1514
interesting.update(file_ids)
1515
interesting.discard(None)
1516
while directories_to_expand:
1517
# Expand directories by looking in the
1518
# parent_id_basename_to_file_id map
1519
keys = [StaticTuple(f,).intern() for f in directories_to_expand]
1520
directories_to_expand = set()
1521
items = self.parent_id_basename_to_file_id.iteritems(keys)
1522
next_file_ids = set([item[1] for item in items])
1523
next_file_ids = next_file_ids.difference(interesting)
1524
interesting.update(next_file_ids)
1525
for entry in self._getitems(next_file_ids):
1526
if entry.kind == 'directory':
1527
directories_to_expand.add(entry.file_id)
1528
children_of_parent_id.setdefault(entry.parent_id, set()
1529
).add(entry.file_id)
1530
return interesting, children_of_parent_id
1532
def filter(self, specific_fileids):
1533
"""Get an inventory view filtered against a set of file-ids.
1535
Children of directories and parents are included.
1537
The result may or may not reference the underlying inventory
1538
so it should be treated as immutable.
1541
parent_to_children) = self._expand_fileids_to_parents_and_children(
1543
# There is some overlap here, but we assume that all interesting items
1544
# are in the _fileid_to_entry_cache because we had to read them to
1545
# determine if they were a dir we wanted to recurse, or just a file
1546
# This should give us all the entries we'll want to add, so start
1548
other = Inventory(self.root_id)
1549
other.root.revision = self.root.revision
1550
other.revision_id = self.revision_id
1551
if not interesting or not parent_to_children:
1552
# empty filter, or filtering entrys that don't exist
1553
# (if even 1 existed, then we would have populated
1554
# parent_to_children with at least the tree root.)
1556
cache = self._fileid_to_entry_cache
1557
remaining_children = collections.deque(parent_to_children[self.root_id])
1558
while remaining_children:
1559
file_id = remaining_children.popleft()
1561
if ie.kind == 'directory':
1562
ie = ie.copy() # We create a copy to depopulate the .children attribute
1563
# TODO: depending on the uses of 'other' we should probably alwyas
1564
# '.copy()' to prevent someone from mutating other and
1565
# invaliding our internal cache
1567
if file_id in parent_to_children:
1568
remaining_children.extend(parent_to_children[file_id])
1572
def _bytes_to_utf8name_key(bytes):
1573
"""Get the file_id, revision_id key out of bytes."""
1574
# We don't normally care about name, except for times when we want
1575
# to filter out empty names because of non rich-root...
1576
sections = bytes.split('\n')
1577
kind, file_id = sections[0].split(': ')
1578
return (sections[2], intern(file_id), intern(sections[3]))
1580
def _bytes_to_entry(self, bytes):
1581
"""Deserialise a serialised entry."""
1582
sections = bytes.split('\n')
1583
if sections[0].startswith("file: "):
1584
result = InventoryFile(sections[0][6:],
1585
sections[2].decode('utf8'),
1587
result.text_sha1 = sections[4]
1588
result.text_size = int(sections[5])
1589
result.executable = sections[6] == "Y"
1590
elif sections[0].startswith("dir: "):
1591
result = CHKInventoryDirectory(sections[0][5:],
1592
sections[2].decode('utf8'),
1594
elif sections[0].startswith("symlink: "):
1595
result = InventoryLink(sections[0][9:],
1596
sections[2].decode('utf8'),
1598
result.symlink_target = sections[4].decode('utf8')
1599
elif sections[0].startswith("tree: "):
1600
result = TreeReference(sections[0][6:],
1601
sections[2].decode('utf8'),
1603
result.reference_revision = sections[4]
1605
raise ValueError("Not a serialised entry %r" % bytes)
1606
result.file_id = intern(result.file_id)
1607
result.revision = intern(sections[3])
1608
if result.parent_id == '':
1609
result.parent_id = None
1610
self._fileid_to_entry_cache[result.file_id] = result
1613
def create_by_apply_delta(self, inventory_delta, new_revision_id,
1614
propagate_caches=False):
1615
"""Create a new CHKInventory by applying inventory_delta to this one.
1617
See the inventory developers documentation for the theory behind
1620
:param inventory_delta: The inventory delta to apply. See
1621
Inventory.apply_delta for details.
1622
:param new_revision_id: The revision id of the resulting CHKInventory.
1623
:param propagate_caches: If True, the caches for this inventory are
1624
copied to and updated for the result.
1625
:return: The new CHKInventory.
1627
split = osutils.split
1628
result = CHKInventory(self._search_key_name)
1629
if propagate_caches:
1630
# Just propagate the path-to-fileid cache for now
1631
result._path_to_fileid_cache = dict(self._path_to_fileid_cache.iteritems())
1632
search_key_func = chk_map.search_key_registry.get(self._search_key_name)
1633
self.id_to_entry._ensure_root()
1634
maximum_size = self.id_to_entry._root_node.maximum_size
1635
result.revision_id = new_revision_id
1636
result.id_to_entry = chk_map.CHKMap(
1637
self.id_to_entry._store,
1638
self.id_to_entry.key(),
1639
search_key_func=search_key_func)
1640
result.id_to_entry._ensure_root()
1641
result.id_to_entry._root_node.set_maximum_size(maximum_size)
1642
# Change to apply to the parent_id_basename delta. The dict maps
1643
# (parent_id, basename) -> (old_key, new_value). We use a dict because
1644
# when a path has its id replaced (e.g. the root is changed, or someone
1645
# does bzr mv a b, bzr mv c a, we should output a single change to this
1646
# map rather than two.
1647
parent_id_basename_delta = {}
1648
if self.parent_id_basename_to_file_id is not None:
1649
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1650
self.parent_id_basename_to_file_id._store,
1651
self.parent_id_basename_to_file_id.key(),
1652
search_key_func=search_key_func)
1653
result.parent_id_basename_to_file_id._ensure_root()
1654
self.parent_id_basename_to_file_id._ensure_root()
1655
result_p_id_root = result.parent_id_basename_to_file_id._root_node
1656
p_id_root = self.parent_id_basename_to_file_id._root_node
1657
result_p_id_root.set_maximum_size(p_id_root.maximum_size)
1658
result_p_id_root._key_width = p_id_root._key_width
1660
result.parent_id_basename_to_file_id = None
1661
result.root_id = self.root_id
1662
id_to_entry_delta = []
1663
# inventory_delta is only traversed once, so we just update the
1665
# Check for repeated file ids
1666
inventory_delta = _check_delta_unique_ids(inventory_delta)
1667
# Repeated old paths
1668
inventory_delta = _check_delta_unique_old_paths(inventory_delta)
1669
# Check for repeated new paths
1670
inventory_delta = _check_delta_unique_new_paths(inventory_delta)
1671
# Check for entries that don't match the fileid
1672
inventory_delta = _check_delta_ids_match_entry(inventory_delta)
1673
# Check for nonsense fileids
1674
inventory_delta = _check_delta_ids_are_valid(inventory_delta)
1675
# Check for new_path <-> entry consistency
1676
inventory_delta = _check_delta_new_path_entry_both_or_None(
1678
# All changed entries need to have their parents be directories and be
1679
# at the right path. This set contains (path, id) tuples.
1681
# When we delete an item, all the children of it must be either deleted
1682
# or altered in their own right. As we batch process the change via
1683
# CHKMap.apply_delta, we build a set of things to use to validate the
1687
for old_path, new_path, file_id, entry in inventory_delta:
1690
result.root_id = file_id
1691
if new_path is None:
1696
if propagate_caches:
1698
del result._path_to_fileid_cache[old_path]
1701
deletes.add(file_id)
1703
new_key = StaticTuple(file_id,)
1704
new_value = result._entry_to_bytes(entry)
1705
# Update caches. It's worth doing this whether
1706
# we're propagating the old caches or not.
1707
result._path_to_fileid_cache[new_path] = file_id
1708
parents.add((split(new_path)[0], entry.parent_id))
1709
if old_path is None:
1712
old_key = StaticTuple(file_id,)
1713
if self.id2path(file_id) != old_path:
1714
raise errors.InconsistentDelta(old_path, file_id,
1715
"Entry was at wrong other path %r." %
1716
self.id2path(file_id))
1717
altered.add(file_id)
1718
id_to_entry_delta.append(StaticTuple(old_key, new_key, new_value))
1719
if result.parent_id_basename_to_file_id is not None:
1720
# parent_id, basename changes
1721
if old_path is None:
1724
old_entry = self[file_id]
1725
old_key = self._parent_id_basename_key(old_entry)
1726
if new_path is None:
1730
new_key = self._parent_id_basename_key(entry)
1732
# If the two keys are the same, the value will be unchanged
1733
# as its always the file id for this entry.
1734
if old_key != new_key:
1735
# Transform a change into explicit delete/add preserving
1736
# a possible match on the key from a different file id.
1737
if old_key is not None:
1738
parent_id_basename_delta.setdefault(
1739
old_key, [None, None])[0] = old_key
1740
if new_key is not None:
1741
parent_id_basename_delta.setdefault(
1742
new_key, [None, None])[1] = new_value
1743
# validate that deletes are complete.
1744
for file_id in deletes:
1745
entry = self[file_id]
1746
if entry.kind != 'directory':
1748
# This loop could potentially be better by using the id_basename
1749
# map to just get the child file ids.
1750
for child in entry.children.values():
1751
if child.file_id not in altered:
1752
raise errors.InconsistentDelta(self.id2path(child.file_id),
1753
child.file_id, "Child not deleted or reparented when "
1755
result.id_to_entry.apply_delta(id_to_entry_delta)
1756
if parent_id_basename_delta:
1757
# Transform the parent_id_basename delta data into a linear delta
1758
# with only one record for a given key. Optimally this would allow
1759
# re-keying, but its simpler to just output that as a delete+add
1760
# to spend less time calculating the delta.
1762
for key, (old_key, value) in parent_id_basename_delta.iteritems():
1763
if value is not None:
1764
delta_list.append((old_key, key, value))
1766
delta_list.append((old_key, None, None))
1767
result.parent_id_basename_to_file_id.apply_delta(delta_list)
1768
parents.discard(('', None))
1769
for parent_path, parent in parents:
1771
if result[parent].kind != 'directory':
1772
raise errors.InconsistentDelta(result.id2path(parent), parent,
1773
'Not a directory, but given children')
1774
except errors.NoSuchId:
1775
raise errors.InconsistentDelta("<unknown>", parent,
1776
"Parent is not present in resulting inventory.")
1777
if result.path2id(parent_path) != parent:
1778
raise errors.InconsistentDelta(parent_path, parent,
1779
"Parent has wrong path %r." % result.path2id(parent_path))
1783
def deserialise(klass, chk_store, bytes, expected_revision_id):
1784
"""Deserialise a CHKInventory.
1786
:param chk_store: A CHK capable VersionedFiles instance.
1787
:param bytes: The serialised bytes.
1788
:param expected_revision_id: The revision ID we think this inventory is
1790
:return: A CHKInventory
1792
lines = bytes.split('\n')
1794
raise AssertionError('bytes to deserialize must end with an eol')
1796
if lines[0] != 'chkinventory:':
1797
raise ValueError("not a serialised CHKInventory: %r" % bytes)
1799
allowed_keys = frozenset(['root_id', 'revision_id', 'search_key_name',
1800
'parent_id_basename_to_file_id',
1802
for line in lines[1:]:
1803
key, value = line.split(': ', 1)
1804
if key not in allowed_keys:
1805
raise errors.BzrError('Unknown key in inventory: %r\n%r'
1808
raise errors.BzrError('Duplicate key in inventory: %r\n%r'
1811
revision_id = intern(info['revision_id'])
1812
root_id = intern(info['root_id'])
1813
search_key_name = intern(info.get('search_key_name', 'plain'))
1814
parent_id_basename_to_file_id = intern(info.get(
1815
'parent_id_basename_to_file_id', None))
1816
if not parent_id_basename_to_file_id.startswith('sha1:'):
1817
raise ValueError('parent_id_basename_to_file_id should be a sha1'
1818
' key not %r' % (parent_id_basename_to_file_id,))
1819
id_to_entry = info['id_to_entry']
1820
if not id_to_entry.startswith('sha1:'):
1821
raise ValueError('id_to_entry should be a sha1'
1822
' key not %r' % (id_to_entry,))
1824
result = CHKInventory(search_key_name)
1825
result.revision_id = revision_id
1826
result.root_id = root_id
1827
search_key_func = chk_map.search_key_registry.get(
1828
result._search_key_name)
1829
if parent_id_basename_to_file_id is not None:
1830
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1831
chk_store, StaticTuple(parent_id_basename_to_file_id,),
1832
search_key_func=search_key_func)
1834
result.parent_id_basename_to_file_id = None
1836
result.id_to_entry = chk_map.CHKMap(chk_store,
1837
StaticTuple(id_to_entry,),
1838
search_key_func=search_key_func)
1839
if (result.revision_id,) != expected_revision_id:
1840
raise ValueError("Mismatched revision id and expected: %r, %r" %
1841
(result.revision_id, expected_revision_id))
1845
def from_inventory(klass, chk_store, inventory, maximum_size=0, search_key_name='plain'):
1846
"""Create a CHKInventory from an existing inventory.
1848
The content of inventory is copied into the chk_store, and a
1849
CHKInventory referencing that is returned.
1851
:param chk_store: A CHK capable VersionedFiles instance.
1852
:param inventory: The inventory to copy.
1853
:param maximum_size: The CHKMap node size limit.
1854
:param search_key_name: The identifier for the search key function
1856
result = klass(search_key_name)
1857
result.revision_id = inventory.revision_id
1858
result.root_id = inventory.root.file_id
1860
entry_to_bytes = result._entry_to_bytes
1861
parent_id_basename_key = result._parent_id_basename_key
1862
id_to_entry_dict = {}
1863
parent_id_basename_dict = {}
1864
for path, entry in inventory.iter_entries():
1865
key = StaticTuple(entry.file_id,).intern()
1866
id_to_entry_dict[key] = entry_to_bytes(entry)
1867
p_id_key = parent_id_basename_key(entry)
1868
parent_id_basename_dict[p_id_key] = entry.file_id
1870
result._populate_from_dicts(chk_store, id_to_entry_dict,
1871
parent_id_basename_dict, maximum_size=maximum_size)
1874
def _populate_from_dicts(self, chk_store, id_to_entry_dict,
1875
parent_id_basename_dict, maximum_size):
1876
search_key_func = chk_map.search_key_registry.get(self._search_key_name)
1877
root_key = chk_map.CHKMap.from_dict(chk_store, id_to_entry_dict,
1878
maximum_size=maximum_size, key_width=1,
1879
search_key_func=search_key_func)
1880
self.id_to_entry = chk_map.CHKMap(chk_store, root_key,
1882
root_key = chk_map.CHKMap.from_dict(chk_store,
1883
parent_id_basename_dict,
1884
maximum_size=maximum_size, key_width=2,
1885
search_key_func=search_key_func)
1886
self.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
1887
root_key, search_key_func)
1889
def _parent_id_basename_key(self, entry):
1890
"""Create a key for a entry in a parent_id_basename_to_file_id index."""
1891
if entry.parent_id is not None:
1892
parent_id = entry.parent_id
1895
return StaticTuple(parent_id, entry.name.encode('utf8')).intern()
1897
def __getitem__(self, file_id):
1898
"""map a single file_id -> InventoryEntry."""
1900
raise errors.NoSuchId(self, file_id)
1901
result = self._fileid_to_entry_cache.get(file_id, None)
1902
if result is not None:
1905
return self._bytes_to_entry(
1906
self.id_to_entry.iteritems([StaticTuple(file_id,)]).next()[1])
1907
except StopIteration:
1908
# really we're passing an inventory, not a tree...
1909
raise errors.NoSuchId(self, file_id)
1911
def _getitems(self, file_ids):
1912
"""Similar to __getitem__, but lets you query for multiple.
1914
The returned order is undefined. And currently if an item doesn't
1915
exist, it isn't included in the output.
1919
for file_id in file_ids:
1920
entry = self._fileid_to_entry_cache.get(file_id, None)
1922
remaining.append(file_id)
1924
result.append(entry)
1925
file_keys = [StaticTuple(f,).intern() for f in remaining]
1926
for file_key, value in self.id_to_entry.iteritems(file_keys):
1927
entry = self._bytes_to_entry(value)
1928
result.append(entry)
1929
self._fileid_to_entry_cache[entry.file_id] = entry
1932
def has_id(self, file_id):
1933
# Perhaps have an explicit 'contains' method on CHKMap ?
1934
if self._fileid_to_entry_cache.get(file_id, None) is not None:
1937
self.id_to_entry.iteritems([StaticTuple(file_id,)]))) == 1
1939
def is_root(self, file_id):
1940
return file_id == self.root_id
1942
def _iter_file_id_parents(self, file_id):
1943
"""Yield the parents of file_id up to the root."""
1944
while file_id is not None:
1948
raise errors.NoSuchId(tree=self, file_id=file_id)
1950
file_id = ie.parent_id
1953
"""Iterate over all file-ids."""
1954
for key, _ in self.id_to_entry.iteritems():
1957
def iter_just_entries(self):
1958
"""Iterate over all entries.
1960
Unlike iter_entries(), just the entries are returned (not (path, ie))
1961
and the order of entries is undefined.
1963
XXX: We may not want to merge this into bzr.dev.
1965
for key, entry in self.id_to_entry.iteritems():
1967
ie = self._fileid_to_entry_cache.get(file_id, None)
1969
ie = self._bytes_to_entry(entry)
1970
self._fileid_to_entry_cache[file_id] = ie
1973
def _preload_cache(self):
1974
"""Make sure all file-ids are in _fileid_to_entry_cache"""
1975
if self._fully_cached:
1976
return # No need to do it again
1977
# The optimal sort order is to use iteritems() directly
1978
cache = self._fileid_to_entry_cache
1979
for key, entry in self.id_to_entry.iteritems():
1981
if file_id not in cache:
1982
ie = self._bytes_to_entry(entry)
1986
last_parent_id = last_parent_ie = None
1987
pid_items = self.parent_id_basename_to_file_id.iteritems()
1988
for key, child_file_id in pid_items:
1989
if key == ('', ''): # This is the root
1990
if child_file_id != self.root_id:
1991
raise ValueError('Data inconsistency detected.'
1992
' We expected data with key ("","") to match'
1993
' the root id, but %s != %s'
1994
% (child_file_id, self.root_id))
1996
parent_id, basename = key
1997
ie = cache[child_file_id]
1998
if parent_id == last_parent_id:
1999
parent_ie = last_parent_ie
2001
parent_ie = cache[parent_id]
2002
if parent_ie.kind != 'directory':
2003
raise ValueError('Data inconsistency detected.'
2004
' An entry in the parent_id_basename_to_file_id map'
2005
' has parent_id {%s} but the kind of that object'
2006
' is %r not "directory"' % (parent_id, parent_ie.kind))
2007
if parent_ie._children is None:
2008
parent_ie._children = {}
2009
basename = basename.decode('utf-8')
2010
if basename in parent_ie._children:
2011
existing_ie = parent_ie._children[basename]
2012
if existing_ie != ie:
2013
raise ValueError('Data inconsistency detected.'
2014
' Two entries with basename %r were found'
2015
' in the parent entry {%s}'
2016
% (basename, parent_id))
2017
if basename != ie.name:
2018
raise ValueError('Data inconsistency detected.'
2019
' In the parent_id_basename_to_file_id map, file_id'
2020
' {%s} is listed as having basename %r, but in the'
2021
' id_to_entry map it is %r'
2022
% (child_file_id, basename, ie.name))
2023
parent_ie._children[basename] = ie
2024
self._fully_cached = True
2026
def iter_changes(self, basis):
2027
"""Generate a Tree.iter_changes change list between this and basis.
2029
:param basis: Another CHKInventory.
2030
:return: An iterator over the changes between self and basis, as per
2031
tree.iter_changes().
2033
# We want: (file_id, (path_in_source, path_in_target),
2034
# changed_content, versioned, parent, name, kind,
2036
for key, basis_value, self_value in \
2037
self.id_to_entry.iter_changes(basis.id_to_entry):
2039
if basis_value is not None:
2040
basis_entry = basis._bytes_to_entry(basis_value)
2041
path_in_source = basis.id2path(file_id)
2042
basis_parent = basis_entry.parent_id
2043
basis_name = basis_entry.name
2044
basis_executable = basis_entry.executable
2046
path_in_source = None
2049
basis_executable = None
2050
if self_value is not None:
2051
self_entry = self._bytes_to_entry(self_value)
2052
path_in_target = self.id2path(file_id)
2053
self_parent = self_entry.parent_id
2054
self_name = self_entry.name
2055
self_executable = self_entry.executable
2057
path_in_target = None
2060
self_executable = None
2061
if basis_value is None:
2063
kind = (None, self_entry.kind)
2064
versioned = (False, True)
2065
elif self_value is None:
2067
kind = (basis_entry.kind, None)
2068
versioned = (True, False)
2070
kind = (basis_entry.kind, self_entry.kind)
2071
versioned = (True, True)
2072
changed_content = False
2073
if kind[0] != kind[1]:
2074
changed_content = True
2075
elif kind[0] == 'file':
2076
if (self_entry.text_size != basis_entry.text_size or
2077
self_entry.text_sha1 != basis_entry.text_sha1):
2078
changed_content = True
2079
elif kind[0] == 'symlink':
2080
if self_entry.symlink_target != basis_entry.symlink_target:
2081
changed_content = True
2082
elif kind[0] == 'tree-reference':
2083
if (self_entry.reference_revision !=
2084
basis_entry.reference_revision):
2085
changed_content = True
2086
parent = (basis_parent, self_parent)
2087
name = (basis_name, self_name)
2088
executable = (basis_executable, self_executable)
2089
if (not changed_content
2090
and parent[0] == parent[1]
2091
and name[0] == name[1]
2092
and executable[0] == executable[1]):
2093
# Could happen when only the revision changed for a directory
2096
yield (file_id, (path_in_source, path_in_target), changed_content,
2097
versioned, parent, name, kind, executable)
2100
"""Return the number of entries in the inventory."""
2101
return len(self.id_to_entry)
2103
def _make_delta(self, old):
2104
"""Make an inventory delta from two inventories."""
2105
if type(old) != CHKInventory:
2106
return CommonInventory._make_delta(self, old)
2108
for key, old_value, self_value in \
2109
self.id_to_entry.iter_changes(old.id_to_entry):
2111
if old_value is not None:
2112
old_path = old.id2path(file_id)
2115
if self_value is not None:
2116
entry = self._bytes_to_entry(self_value)
2117
self._fileid_to_entry_cache[file_id] = entry
2118
new_path = self.id2path(file_id)
2122
delta.append((old_path, new_path, file_id, entry))
2125
def path2id(self, relpath):
2126
"""See CommonInventory.path2id()."""
2127
# TODO: perhaps support negative hits?
2128
result = self._path_to_fileid_cache.get(relpath, None)
2129
if result is not None:
2131
if isinstance(relpath, basestring):
2132
names = osutils.splitpath(relpath)
2135
current_id = self.root_id
2136
if current_id is None:
2138
parent_id_index = self.parent_id_basename_to_file_id
2140
for basename in names:
2141
if cur_path is None:
2144
cur_path = cur_path + '/' + basename
2145
basename_utf8 = basename.encode('utf8')
2146
file_id = self._path_to_fileid_cache.get(cur_path, None)
2148
key_filter = [StaticTuple(current_id, basename_utf8)]
2149
items = parent_id_index.iteritems(key_filter)
2150
for (parent_id, name_utf8), file_id in items:
2151
if parent_id != current_id or name_utf8 != basename_utf8:
2152
raise errors.BzrError("corrupt inventory lookup! "
2153
"%r %r %r %r" % (parent_id, current_id, name_utf8,
2158
self._path_to_fileid_cache[cur_path] = file_id
2159
current_id = file_id
2163
"""Serialise the inventory to lines."""
2164
lines = ["chkinventory:\n"]
2165
if self._search_key_name != 'plain':
2166
# custom ordering grouping things that don't change together
2167
lines.append('search_key_name: %s\n' % (self._search_key_name,))
2168
lines.append("root_id: %s\n" % self.root_id)
2169
lines.append('parent_id_basename_to_file_id: %s\n' %
2170
(self.parent_id_basename_to_file_id.key()[0],))
2171
lines.append("revision_id: %s\n" % self.revision_id)
2172
lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],))
2174
lines.append("revision_id: %s\n" % self.revision_id)
2175
lines.append("root_id: %s\n" % self.root_id)
2176
if self.parent_id_basename_to_file_id is not None:
2177
lines.append('parent_id_basename_to_file_id: %s\n' %
2178
(self.parent_id_basename_to_file_id.key()[0],))
2179
lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],))
2184
"""Get the root entry."""
2185
return self[self.root_id]
2188
class CHKInventoryDirectory(InventoryDirectory):
2189
"""A directory in an inventory."""
2191
__slots__ = ['_children', '_chk_inventory']
2193
def __init__(self, file_id, name, parent_id, chk_inventory):
2194
# Don't call InventoryDirectory.__init__ - it isn't right for this
2196
InventoryEntry.__init__(self, file_id, name, parent_id)
2197
self._children = None
2198
self._chk_inventory = chk_inventory
2202
"""Access the list of children of this directory.
2204
With a parent_id_basename_to_file_id index, loads all the children,
2205
without loads the entire index. Without is bad. A more sophisticated
2206
proxy object might be nice, to allow partial loading of children as
2207
well when specific names are accessed. (So path traversal can be
2208
written in the obvious way but not examine siblings.).
2210
if self._children is not None:
2211
return self._children
2212
# No longer supported
2213
if self._chk_inventory.parent_id_basename_to_file_id is None:
2214
raise AssertionError("Inventories without"
2215
" parent_id_basename_to_file_id are no longer supported")
2217
# XXX: Todo - use proxy objects for the children rather than loading
2218
# all when the attribute is referenced.
2219
parent_id_index = self._chk_inventory.parent_id_basename_to_file_id
2221
for (parent_id, name_utf8), file_id in parent_id_index.iteritems(
2222
key_filter=[StaticTuple(self.file_id,)]):
2223
child_keys.add(StaticTuple(file_id,))
2225
for file_id_key in child_keys:
2226
entry = self._chk_inventory._fileid_to_entry_cache.get(
2227
file_id_key[0], None)
2228
if entry is not None:
2229
result[entry.name] = entry
2230
cached.add(file_id_key)
2231
child_keys.difference_update(cached)
2232
# populate; todo: do by name
2233
id_to_entry = self._chk_inventory.id_to_entry
2234
for file_id_key, bytes in id_to_entry.iteritems(child_keys):
2235
entry = self._chk_inventory._bytes_to_entry(bytes)
2236
result[entry.name] = entry
2237
self._chk_inventory._fileid_to_entry_cache[file_id_key[0]] = entry
2238
self._children = result
2242
'directory': InventoryDirectory,
2243
'file': InventoryFile,
2244
'symlink': InventoryLink,
2245
'tree-reference': TreeReference
2248
def make_entry(kind, name, parent_id, file_id=None):
2249
"""Create an inventory entry.
2251
:param kind: the type of inventory entry to create.
2252
:param name: the basename of the entry.
2253
:param parent_id: the parent_id of the entry.
2254
:param file_id: the file_id to use. if None, one will be created.
2257
file_id = generate_ids.gen_file_id(name)
2258
name = ensure_normalized_name(name)
2260
factory = entry_factory[kind]
2262
raise errors.BadFileKindError(name, kind)
2263
return factory(file_id, name, parent_id)
2266
def ensure_normalized_name(name):
2269
:raises InvalidNormalization: When name is not normalized, and cannot be
2270
accessed on this platform by the normalized path.
2271
:return: The NFC normalised version of name.
2273
#------- This has been copied to bzrlib.dirstate.DirState.add, please
2274
# keep them synchronised.
2275
# we dont import normalized_filename directly because we want to be
2276
# able to change the implementation at runtime for tests.
2277
norm_name, can_access = osutils.normalized_filename(name)
2278
if norm_name != name:
2282
# TODO: jam 20060701 This would probably be more useful
2283
# if the error was raised with the full path
2284
raise errors.InvalidNormalization(name)
2288
_NAME_RE = lazy_regex.lazy_compile(r'^[^/\\]+$')
648
2290
def is_valid_name(name):
649
2291
return bool(_NAME_RE.match(name))
2294
def _check_delta_unique_ids(delta):
2295
"""Decorate a delta and check that the file ids in it are unique.
2297
:return: A generator over delta.
2301
length = len(ids) + 1
2303
if len(ids) != length:
2304
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2309
def _check_delta_unique_new_paths(delta):
2310
"""Decorate a delta and check that the new paths in it are unique.
2312
:return: A generator over delta.
2316
length = len(paths) + 1
2318
if path is not None:
2320
if len(paths) != length:
2321
raise errors.InconsistentDelta(path, item[2], "repeated path")
2325
def _check_delta_unique_old_paths(delta):
2326
"""Decorate a delta and check that the old paths in it are unique.
2328
:return: A generator over delta.
2332
length = len(paths) + 1
2334
if path is not None:
2336
if len(paths) != length:
2337
raise errors.InconsistentDelta(path, item[2], "repeated path")
2341
def _check_delta_ids_are_valid(delta):
2342
"""Decorate a delta and check that the ids in it are valid.
2344
:return: A generator over delta.
2349
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2350
"entry with file_id None %r" % entry)
2351
if type(item[2]) != str:
2352
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2353
"entry with non bytes file_id %r" % entry)
2357
def _check_delta_ids_match_entry(delta):
2358
"""Decorate a delta and check that the ids in it match the entry.file_id.
2360
:return: A generator over delta.
2364
if entry is not None:
2365
if entry.file_id != item[2]:
2366
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2367
"mismatched id with %r" % entry)
2371
def _check_delta_new_path_entry_both_or_None(delta):
2372
"""Decorate a delta and check that the new_path and entry are paired.
2374
:return: A generator over delta.
2379
if new_path is None and entry is not None:
2380
raise errors.InconsistentDelta(item[0], item[1],
2381
"Entry with no new_path")
2382
if new_path is not None and entry is None:
2383
raise errors.InconsistentDelta(new_path, item[1],
2384
"new_path with no entry")
2388
def mutable_inventory_from_tree(tree):
2389
"""Create a new inventory that has the same contents as a specified tree.
2391
:param tree: Revision tree to create inventory from
2393
entries = tree.iter_entries_by_dir()
2394
inv = Inventory(None, tree.get_revision_id())
2395
for path, inv_entry in entries:
2396
inv.add(inv_entry.copy())