2
# -*- coding: UTF-8 -*-
1
# (C) 2005 Canonical Ltd
4
3
# This program is free software; you can redistribute it and/or modify
5
4
# it under the terms of the GNU General Public License as published by
6
5
# the Free Software Foundation; either version 2 of the License, or
7
6
# (at your option) any later version.
9
8
# This program is distributed in the hope that it will be useful,
10
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
11
# GNU General Public License for more details.
14
13
# You should have received a copy of the GNU General Public License
15
14
# along with this program; if not, write to the Free Software
16
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
"""Inventories map files to their name in a revision."""
21
__copyright__ = "Copyright (C) 2005 Canonical Ltd."
22
__author__ = "Martin Pool <mbp@canonical.com>"
24
import sys, os.path, types
28
from cElementTree import Element, ElementTree, SubElement
30
from elementtree.ElementTree import Element, ElementTree, SubElement
32
from xml import XMLMixin
33
from errors import bailout
34
from osutils import uuid, quotefn, splitpath, joinpath, appendpath
35
from trace import mutter
37
class InventoryEntry(XMLMixin):
18
# TODO: Maybe also keep the full path of the entry, and the children?
19
# But those depend on its position within a particular inventory, and
20
# it would be nice not to need to hold the backpointer here.
22
# This should really be an id randomly assigned when the tree is
23
# created, but it's not for now.
34
from bzrlib.errors import BzrError, BzrCheckError
36
from bzrlib.osutils import (pumpfile, quotefn, splitpath, joinpath,
37
appendpath, sha_strings)
38
from bzrlib.trace import mutter
39
from bzrlib.errors import NotVersionedError
42
class InventoryEntry(object):
38
43
"""Description of a versioned file.
40
45
An InventoryEntry has the following fields, which are also
41
46
present in the XML inventory-entry element:
44
* *name*: (only the basename within the directory, must not
46
* *kind*: "directory" or "file"
47
* *directory_id*: (if absent/null means the branch root directory)
48
* *text_sha1*: only for files
49
* *text_size*: in bytes, only for files
50
* *text_id*: identifier for the text version, only for files
52
InventoryEntries can also exist inside a WorkingTree
53
inventory, in which case they are not yet bound to a
54
particular revision of the file. In that case the text_sha1,
55
text_size and text_id are absent.
51
(within the parent directory)
54
file_id of the parent directory, or ROOT_ID
57
the revision_id in which this variation of this file was
61
Indicates that this file should be executable on systems
65
sha-1 of the text of the file
68
size in bytes of the text of the file
70
(reading a version 4 tree created a text_id field.)
58
72
>>> i = Inventory()
60
>>> i.add(InventoryEntry('123', 'src', kind='directory'))
61
>>> i.add(InventoryEntry('2323', 'hello.c', parent_id='123'))
75
>>> i.add(InventoryDirectory('123', 'src', ROOT_ID))
76
InventoryDirectory('123', 'src', parent_id='TREE_ROOT')
77
>>> i.add(InventoryFile('2323', 'hello.c', parent_id='123'))
78
InventoryFile('2323', 'hello.c', parent_id='123')
62
79
>>> for j in i.iter_entries():
65
('src', InventoryEntry('123', 'src', kind='directory', parent_id=None))
66
('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))
67
>>> i.add(InventoryEntry('2323', 'bye.c', parent_id='123'))
82
('src', InventoryDirectory('123', 'src', parent_id='TREE_ROOT'))
83
('src/hello.c', InventoryFile('2323', 'hello.c', parent_id='123'))
84
>>> i.add(InventoryFile('2323', 'bye.c', '123'))
68
85
Traceback (most recent call last):
70
BzrError: ('inventory already contains entry with id {2323}', [])
71
>>> i.add(InventoryEntry('2324', 'bye.c', parent_id='123'))
72
>>> i.add(InventoryEntry('2325', 'wibble', parent_id='123', kind='directory'))
87
BzrError: inventory already contains entry with id {2323}
88
>>> i.add(InventoryFile('2324', 'bye.c', '123'))
89
InventoryFile('2324', 'bye.c', parent_id='123')
90
>>> i.add(InventoryDirectory('2325', 'wibble', '123'))
91
InventoryDirectory('2325', 'wibble', parent_id='123')
73
92
>>> i.path2id('src/wibble')
77
>>> i.add(InventoryEntry('2326', 'wibble.c', parent_id='2325'))
96
>>> i.add(InventoryFile('2326', 'wibble.c', '2325'))
97
InventoryFile('2326', 'wibble.c', parent_id='2325')
79
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
80
>>> for j in i.iter_entries():
82
... assert i.path2id(j[0])
99
InventoryFile('2326', 'wibble.c', parent_id='2325')
100
>>> for path, entry in i.iter_entries():
101
... print path.replace('\\\\', '/') # for win32 os.sep
102
... assert i.path2id(path)
88
108
src/wibble/wibble.c
109
>>> i.id2path('2326').replace('\\\\', '/')
90
110
'src/wibble/wibble.c'
92
:todo: Maybe also keep the full path of the entry, and the children?
93
But those depend on its position within a particular inventory, and
94
it would be nice not to need to hold the backpointer here.
96
def __init__(self, file_id, name, kind='file', text_id=None,
113
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
114
'text_id', 'parent_id', 'children', 'executable',
117
def _add_text_to_weave(self, new_lines, parents, weave_store, transaction):
118
weave_store.add_text(self.file_id, self.revision, new_lines, parents,
121
def detect_changes(self, old_entry):
122
"""Return a (text_modified, meta_modified) from this to old_entry.
124
_read_tree_state must have been called on self and old_entry prior to
125
calling detect_changes.
129
def diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
130
output_to, reverse=False):
131
"""Perform a diff from this to to_entry.
133
text_diff will be used for textual difference calculation.
134
This is a template method, override _diff in child classes.
136
self._read_tree_state(tree)
138
# cannot diff from one kind to another - you must do a removal
139
# and an addif they do not match.
140
assert self.kind == to_entry.kind
141
to_entry._read_tree_state(to_tree)
142
self._diff(text_diff, from_label, tree, to_label, to_entry, to_tree,
145
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
146
output_to, reverse=False):
147
"""Perform a diff between two entries of the same kind."""
149
def find_previous_heads(self, previous_inventories, entry_weave):
150
"""Return the revisions and entries that directly preceed this.
152
Returned as a map from revision to inventory entry.
154
This is a map containing the file revisions in all parents
155
for which the file exists, and its revision is not a parent of
156
any other. If the file is new, the set will be empty.
158
def get_ancestors(weave, entry):
159
return set(map(weave.idx_to_name,
160
weave.inclusions([weave.lookup(entry.revision)])))
163
for inv in previous_inventories:
164
if self.file_id in inv:
165
ie = inv[self.file_id]
166
assert ie.file_id == self.file_id
167
if ie.revision in heads:
168
# fixup logic, there was a bug in revision updates.
169
# with x bit support.
171
if heads[ie.revision].executable != ie.executable:
172
heads[ie.revision].executable = False
173
ie.executable = False
174
except AttributeError:
176
assert heads[ie.revision] == ie
178
# may want to add it.
179
# may already be covered:
180
already_present = 0 != len(
181
[head for head in heads
182
if ie.revision in head_ancestors[head]])
184
# an ancestor of a known head.
187
ancestors = get_ancestors(entry_weave, ie)
188
# may knock something else out:
189
check_heads = list(heads.keys())
190
for head in check_heads:
191
if head in ancestors:
192
# this head is not really a head
194
head_ancestors[ie.revision] = ancestors
195
heads[ie.revision] = ie
198
def get_tar_item(self, root, dp, now, tree):
199
"""Get a tarfile item and a file stream for its content."""
200
item = tarfile.TarInfo(os.path.join(root, dp))
201
# TODO: would be cool to actually set it to the timestamp of the
202
# revision it was last changed
204
fileobj = self._put_in_tar(item, tree)
208
"""Return true if the object this entry represents has textual data.
210
Note that textual data includes binary content.
212
Also note that all entries get weave files created for them.
213
This attribute is primarily used when upgrading from old trees that
214
did not have the weave index for all inventory entries.
218
def __init__(self, file_id, name, parent_id, text_id=None):
98
219
"""Create an InventoryEntry
100
221
The filename must be a single component, relative to the
101
222
parent directory; it cannot be a whole path or relative name.
103
>>> e = InventoryEntry('123', 'hello.c')
224
>>> e = InventoryFile('123', 'hello.c', ROOT_ID)
108
>>> e = InventoryEntry('123', 'src/hello.c')
229
>>> e = InventoryFile('123', 'src/hello.c', ROOT_ID)
109
230
Traceback (most recent call last):
110
BzrError: ("InventoryEntry name is not a simple filename: 'src/hello.c'", [])
231
BzrCheckError: InventoryEntry name 'src/hello.c' is invalid
113
if len(splitpath(name)) != 1:
114
bailout('InventoryEntry name is not a simple filename: %r'
233
assert isinstance(name, basestring), name
234
if '/' in name or '\\' in name:
235
raise BzrCheckError('InventoryEntry name %r is invalid' % name)
237
self.executable = False
239
self.text_sha1 = None
240
self.text_size = None
117
241
self.file_id = file_id
119
assert kind in ['file', 'directory']
121
243
self.text_id = text_id
122
244
self.parent_id = parent_id
123
self.text_sha1 = None
124
self.text_size = None
245
self.symlink_target = None
247
def kind_character(self):
248
"""Return a short kind indicator useful for appending to names."""
249
raise BzrError('unknown kind %r' % self.kind)
251
known_kinds = ('file', 'directory', 'symlink', 'root_directory')
253
def _put_in_tar(self, item, tree):
254
"""populate item for stashing in a tar, and return the content stream.
256
If no content is available, return None.
258
raise BzrError("don't know how to export {%s} of kind %r" %
259
(self.file_id, self.kind))
261
def put_on_disk(self, dest, dp, tree):
262
"""Create a representation of self on disk in the prefix dest.
264
This is a template method - implement _put_on_disk in subclasses.
266
fullpath = appendpath(dest, dp)
267
self._put_on_disk(fullpath, tree)
268
mutter(" export {%s} kind %s to %s" % (self.file_id, self.kind, fullpath))
270
def _put_on_disk(self, fullpath, tree):
271
"""Put this entry onto disk at fullpath, from tree tree."""
272
raise BzrError("don't know how to export {%s} of kind %r" % (self.file_id, self.kind))
274
def sorted_children(self):
275
l = self.children.items()
280
def versionable_kind(kind):
281
return kind in ('file', 'directory', 'symlink')
283
def check(self, checker, rev_id, inv, tree):
284
"""Check this inventory entry is intact.
286
This is a template method, override _check for kind specific
289
if self.parent_id != None:
290
if not inv.has_id(self.parent_id):
291
raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'
292
% (self.parent_id, rev_id))
293
self._check(checker, rev_id, tree)
295
def _check(self, checker, rev_id, tree):
296
"""Check this inventory entry for kind specific errors."""
297
raise BzrCheckError('unknown entry kind %r in revision {%s}' %
128
other = InventoryEntry(self.file_id, self.name, self.kind,
129
self.text_id, self.parent_id)
130
other.text_sha1 = self.text_sha1
131
other.text_size = self.text_size
302
"""Clone this inventory entry."""
303
raise NotImplementedError
305
def _get_snapshot_change(self, previous_entries):
306
if len(previous_entries) > 1:
308
elif len(previous_entries) == 0:
311
return 'modified/renamed/reparented'
135
313
def __repr__(self):
136
return ("%s(%r, %r, kind=%r, parent_id=%r)"
314
return ("%s(%r, %r, parent_id=%r)"
137
315
% (self.__class__.__name__,
144
def to_element(self):
145
"""Convert to XML element"""
148
e.set('name', self.name)
149
e.set('file_id', self.file_id)
150
e.set('kind', self.kind)
152
if self.text_size is not None:
153
e.set('text_size', '%d' % self.text_size)
155
for f in ['text_id', 'text_sha1', 'parent_id']:
165
def from_element(cls, elt):
166
assert elt.tag == 'entry'
167
self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'))
168
self.text_id = elt.get('text_id')
169
self.text_sha1 = elt.get('text_sha1')
170
self.parent_id = elt.get('parent_id')
172
## mutter("read inventoryentry: %r" % (elt.attrib))
174
v = elt.get('text_size')
175
self.text_size = v and int(v)
180
from_element = classmethod(from_element)
182
def __cmp__(self, other):
320
def snapshot(self, revision, path, previous_entries,
321
work_tree, weave_store, transaction):
322
"""Make a snapshot of this entry which may or may not have changed.
324
This means that all its fields are populated, that it has its
325
text stored in the text store or weave.
327
mutter('new parents of %s are %r', path, previous_entries)
328
self._read_tree_state(work_tree)
329
if len(previous_entries) == 1:
330
# cannot be unchanged unless there is only one parent file rev.
331
parent_ie = previous_entries.values()[0]
332
if self._unchanged(parent_ie):
333
mutter("found unchanged entry")
334
self.revision = parent_ie.revision
336
return self.snapshot_revision(revision, previous_entries,
337
work_tree, weave_store, transaction)
339
def snapshot_revision(self, revision, previous_entries, work_tree,
340
weave_store, transaction):
341
"""Record this revision unconditionally."""
342
mutter('new revision for {%s}', self.file_id)
343
self.revision = revision
344
change = self._get_snapshot_change(previous_entries)
345
self._snapshot_text(previous_entries, work_tree, weave_store,
349
def _snapshot_text(self, file_parents, work_tree, weave_store, transaction):
350
"""Record the 'text' of this entry, whatever form that takes.
352
This default implementation simply adds an empty text.
354
mutter('storing file {%s} in revision {%s}',
355
self.file_id, self.revision)
356
self._add_text_to_weave([], file_parents, weave_store, transaction)
358
def __eq__(self, other):
185
359
if not isinstance(other, InventoryEntry):
186
360
return NotImplemented
188
return cmp(self.file_id, other.file_id) \
189
or cmp(self.name, other.name) \
190
or cmp(self.text_sha1, other.text_sha1) \
191
or cmp(self.text_size, other.text_size) \
192
or cmp(self.text_id, other.text_id) \
193
or cmp(self.parent_id, other.parent_id) \
194
or cmp(self.kind, other.kind)
198
class Inventory(XMLMixin):
362
return ((self.file_id == other.file_id)
363
and (self.name == other.name)
364
and (other.symlink_target == self.symlink_target)
365
and (self.text_sha1 == other.text_sha1)
366
and (self.text_size == other.text_size)
367
and (self.text_id == other.text_id)
368
and (self.parent_id == other.parent_id)
369
and (self.kind == other.kind)
370
and (self.revision == other.revision)
371
and (self.executable == other.executable)
374
def __ne__(self, other):
375
return not (self == other)
378
raise ValueError('not hashable')
380
def _unchanged(self, previous_ie):
381
"""Has this entry changed relative to previous_ie.
383
This method should be overriden in child classes.
386
# different inv parent
387
if previous_ie.parent_id != self.parent_id:
390
elif previous_ie.name != self.name:
394
def _read_tree_state(self, work_tree):
395
"""Populate fields in the inventory entry from the given tree.
397
Note that this should be modified to be a noop on virtual trees
398
as all entries created there are prepopulated.
402
class RootEntry(InventoryEntry):
404
def _check(self, checker, rev_id, tree):
405
"""See InventoryEntry._check"""
407
def __init__(self, file_id):
408
self.file_id = file_id
410
self.kind = 'root_directory'
411
self.parent_id = None
414
def __eq__(self, other):
415
if not isinstance(other, RootEntry):
416
return NotImplemented
418
return (self.file_id == other.file_id) \
419
and (self.children == other.children)
422
class InventoryDirectory(InventoryEntry):
423
"""A directory in an inventory."""
425
def _check(self, checker, rev_id, tree):
426
"""See InventoryEntry._check"""
427
if self.text_sha1 != None or self.text_size != None or self.text_id != None:
428
raise BzrCheckError('directory {%s} has text in revision {%s}'
429
% (self.file_id, rev_id))
432
other = InventoryDirectory(self.file_id, self.name, self.parent_id)
433
other.revision = self.revision
434
# note that children are *not* copied; they're pulled across when
438
def __init__(self, file_id, name, parent_id):
439
super(InventoryDirectory, self).__init__(file_id, name, parent_id)
441
self.kind = 'directory'
443
def kind_character(self):
444
"""See InventoryEntry.kind_character."""
447
def _put_in_tar(self, item, tree):
448
"""See InventoryEntry._put_in_tar."""
449
item.type = tarfile.DIRTYPE
456
def _put_on_disk(self, fullpath, tree):
457
"""See InventoryEntry._put_on_disk."""
461
class InventoryFile(InventoryEntry):
462
"""A file in an inventory."""
464
def _check(self, checker, rev_id, tree):
465
"""See InventoryEntry._check"""
466
revision = self.revision
467
t = (self.file_id, revision)
468
if t in checker.checked_texts:
469
prev_sha = checker.checked_texts[t]
470
if prev_sha != self.text_sha1:
471
raise BzrCheckError('mismatched sha1 on {%s} in {%s}' %
472
(self.file_id, rev_id))
474
checker.repeated_text_cnt += 1
476
mutter('check version {%s} of {%s}', rev_id, self.file_id)
477
file_lines = tree.get_file_lines(self.file_id)
478
checker.checked_text_cnt += 1
479
if self.text_size != sum(map(len, file_lines)):
480
raise BzrCheckError('text {%s} wrong size' % self.text_id)
481
if self.text_sha1 != sha_strings(file_lines):
482
raise BzrCheckError('text {%s} wrong sha1' % self.text_id)
483
checker.checked_texts[t] = self.text_sha1
486
other = InventoryFile(self.file_id, self.name, self.parent_id)
487
other.executable = self.executable
488
other.text_id = self.text_id
489
other.text_sha1 = self.text_sha1
490
other.text_size = self.text_size
491
other.revision = self.revision
494
def detect_changes(self, old_entry):
495
"""See InventoryEntry.detect_changes."""
496
assert self.text_sha1 != None
497
assert old_entry.text_sha1 != None
498
text_modified = (self.text_sha1 != old_entry.text_sha1)
499
meta_modified = (self.executable != old_entry.executable)
500
return text_modified, meta_modified
502
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
503
output_to, reverse=False):
504
"""See InventoryEntry._diff."""
505
from_text = tree.get_file(self.file_id).readlines()
507
to_text = to_tree.get_file(to_entry.file_id).readlines()
511
text_diff(from_label, from_text,
512
to_label, to_text, output_to)
514
text_diff(to_label, to_text,
515
from_label, from_text, output_to)
518
"""See InventoryEntry.has_text."""
521
def __init__(self, file_id, name, parent_id):
522
super(InventoryFile, self).__init__(file_id, name, parent_id)
525
def kind_character(self):
526
"""See InventoryEntry.kind_character."""
529
def _put_in_tar(self, item, tree):
530
"""See InventoryEntry._put_in_tar."""
531
item.type = tarfile.REGTYPE
532
fileobj = tree.get_file(self.file_id)
533
item.size = self.text_size
534
if tree.is_executable(self.file_id):
540
def _put_on_disk(self, fullpath, tree):
541
"""See InventoryEntry._put_on_disk."""
542
pumpfile(tree.get_file(self.file_id), file(fullpath, 'wb'))
543
if tree.is_executable(self.file_id):
544
os.chmod(fullpath, 0755)
546
def _read_tree_state(self, work_tree):
547
"""See InventoryEntry._read_tree_state."""
548
self.text_sha1 = work_tree.get_file_sha1(self.file_id)
549
self.executable = work_tree.is_executable(self.file_id)
551
def _snapshot_text(self, file_parents, work_tree, weave_store, transaction):
552
"""See InventoryEntry._snapshot_text."""
553
mutter('storing file {%s} in revision {%s}',
554
self.file_id, self.revision)
555
# special case to avoid diffing on renames or
557
if (len(file_parents) == 1
558
and self.text_sha1 == file_parents.values()[0].text_sha1
559
and self.text_size == file_parents.values()[0].text_size):
560
previous_ie = file_parents.values()[0]
561
weave_store.add_identical_text(
562
self.file_id, previous_ie.revision,
563
self.revision, file_parents, transaction)
565
new_lines = work_tree.get_file(self.file_id).readlines()
566
self._add_text_to_weave(new_lines, file_parents, weave_store,
568
self.text_sha1 = sha_strings(new_lines)
569
self.text_size = sum(map(len, new_lines))
572
def _unchanged(self, previous_ie):
573
"""See InventoryEntry._unchanged."""
574
compatible = super(InventoryFile, self)._unchanged(previous_ie)
575
if self.text_sha1 != previous_ie.text_sha1:
578
# FIXME: 20050930 probe for the text size when getting sha1
579
# in _read_tree_state
580
self.text_size = previous_ie.text_size
581
if self.executable != previous_ie.executable:
586
class InventoryLink(InventoryEntry):
587
"""A file in an inventory."""
589
__slots__ = ['symlink_target']
591
def _check(self, checker, rev_id, tree):
592
"""See InventoryEntry._check"""
593
if self.text_sha1 != None or self.text_size != None or self.text_id != None:
594
raise BzrCheckError('symlink {%s} has text in revision {%s}'
595
% (self.file_id, rev_id))
596
if self.symlink_target == None:
597
raise BzrCheckError('symlink {%s} has no target in revision {%s}'
598
% (self.file_id, rev_id))
601
other = InventoryLink(self.file_id, self.name, self.parent_id)
602
other.symlink_target = self.symlink_target
603
other.revision = self.revision
606
def detect_changes(self, old_entry):
607
"""See InventoryEntry.detect_changes."""
608
# FIXME: which _modified field should we use ? RBC 20051003
609
text_modified = (self.symlink_target != old_entry.symlink_target)
611
mutter(" symlink target changed")
612
meta_modified = False
613
return text_modified, meta_modified
615
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
616
output_to, reverse=False):
617
"""See InventoryEntry._diff."""
618
from_text = self.symlink_target
619
if to_entry is not None:
620
to_text = to_entry.symlink_target
625
print >>output_to, '=== target changed %r => %r' % (from_text, to_text)
628
print >>output_to, '=== target was %r' % self.symlink_target
630
print >>output_to, '=== target is %r' % self.symlink_target
632
def __init__(self, file_id, name, parent_id):
633
super(InventoryLink, self).__init__(file_id, name, parent_id)
634
self.kind = 'symlink'
636
def kind_character(self):
637
"""See InventoryEntry.kind_character."""
640
def _put_in_tar(self, item, tree):
641
"""See InventoryEntry._put_in_tar."""
642
iterm.type = tarfile.SYMTYPE
646
item.linkname = self.symlink_target
649
def _put_on_disk(self, fullpath, tree):
650
"""See InventoryEntry._put_on_disk."""
652
os.symlink(self.symlink_target, fullpath)
654
raise BzrError("Failed to create symlink %r -> %r, error: %s" % (fullpath, self.symlink_target, e))
656
def _read_tree_state(self, work_tree):
657
"""See InventoryEntry._read_tree_state."""
658
self.symlink_target = work_tree.get_symlink_target(self.file_id)
660
def _unchanged(self, previous_ie):
661
"""See InventoryEntry._unchanged."""
662
compatible = super(InventoryLink, self)._unchanged(previous_ie)
663
if self.symlink_target != previous_ie.symlink_target:
668
class Inventory(object):
199
669
"""Inventory of versioned files in a tree.
201
An Inventory acts like a set of InventoryEntry items. You can
202
also look files up by their file_id or name.
204
May be read from and written to a metadata file in a tree. To
205
manipulate the inventory (for example to add a file), it is read
206
in, modified, and then written back out.
671
This describes which file_id is present at each point in the tree,
672
and possibly the SHA-1 or other information about the file.
673
Entries can be looked up either by path or by file_id.
208
675
The inventory represents a typical unix file tree, with
209
676
directories containing files and subdirectories. We never store