1
1
# (C) 2005 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
# FIXME: This refactoring of the workingtree code doesn't seem to keep
18
# the WorkingTree's copy of the inventory in sync with the branch. The
19
# branch modifies its working inventory when it does a commit to make
20
# missing files permanently removed.
22
# TODO: Maybe also keep the full path of the entry, and the children?
23
# But those depend on its position within a particular inventory, and
24
# it would be nice not to need to hold the backpointer here.
26
18
# This should really be an id randomly assigned when the tree is
27
19
# created, but it's not for now.
28
20
ROOT_ID = "TREE_ROOT"
23
import sys, os.path, types, re
26
from cElementTree import Element, ElementTree, SubElement
28
from elementtree.ElementTree import Element, ElementTree, SubElement
30
from bzrlib.xml import XMLMixin
31
from bzrlib.errors import BzrError, BzrCheckError
38
from bzrlib.osutils import (pumpfile, quotefn, splitpath, joinpath,
39
appendpath, sha_strings)
34
from bzrlib.osutils import uuid, quotefn, splitpath, joinpath, appendpath
40
35
from bzrlib.trace import mutter
41
from bzrlib.errors import (NotVersionedError, InvalidEntryName,
42
BzrError, BzrCheckError)
45
class InventoryEntry(object):
37
class InventoryEntry(XMLMixin):
46
38
"""Description of a versioned file.
48
40
An InventoryEntry has the following fields, which are also
49
41
present in the XML inventory-entry element:
54
(within the parent directory)
57
file_id of the parent directory, or ROOT_ID
60
the revision_id in which this variation of this file was
64
Indicates that this file should be executable on systems
68
sha-1 of the text of the file
71
size in bytes of the text of the file
73
(reading a version 4 tree created a text_id field.)
44
* *name*: (only the basename within the directory, must not
46
* *kind*: "directory" or "file"
47
* *directory_id*: (if absent/null means the branch root directory)
48
* *text_sha1*: only for files
49
* *text_size*: in bytes, only for files
50
* *text_id*: identifier for the text version, only for files
52
InventoryEntries can also exist inside a WorkingTree
53
inventory, in which case they are not yet bound to a
54
particular revision of the file. In that case the text_sha1,
55
text_size and text_id are absent.
75
58
>>> i = Inventory()
78
>>> i.add(InventoryDirectory('123', 'src', ROOT_ID))
79
InventoryDirectory('123', 'src', parent_id='TREE_ROOT')
80
>>> i.add(InventoryFile('2323', 'hello.c', parent_id='123'))
81
InventoryFile('2323', 'hello.c', parent_id='123')
61
>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))
62
>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))
82
63
>>> for j in i.iter_entries():
85
('src', InventoryDirectory('123', 'src', parent_id='TREE_ROOT'))
86
('src/hello.c', InventoryFile('2323', 'hello.c', parent_id='123'))
87
>>> i.add(InventoryFile('2323', 'bye.c', '123'))
66
('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))
67
('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))
68
>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))
88
69
Traceback (most recent call last):
90
71
BzrError: inventory already contains entry with id {2323}
91
>>> i.add(InventoryFile('2324', 'bye.c', '123'))
92
InventoryFile('2324', 'bye.c', parent_id='123')
93
>>> i.add(InventoryDirectory('2325', 'wibble', '123'))
94
InventoryDirectory('2325', 'wibble', parent_id='123')
72
>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))
73
>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))
95
74
>>> i.path2id('src/wibble')
99
>>> i.add(InventoryFile('2326', 'wibble.c', '2325'))
100
InventoryFile('2326', 'wibble.c', parent_id='2325')
78
>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))
102
InventoryFile('2326', 'wibble.c', parent_id='2325')
103
>>> for path, entry in i.iter_entries():
104
... print path.replace('\\\\', '/') # for win32 os.sep
105
... assert i.path2id(path)
80
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
81
>>> for j in i.iter_entries():
83
... assert i.path2id(j[0])
111
89
src/wibble/wibble.c
112
>>> i.id2path('2326').replace('\\\\', '/')
113
91
'src/wibble/wibble.c'
93
TODO: Maybe also keep the full path of the entry, and the children?
94
But those depend on its position within a particular inventory, and
95
it would be nice not to need to hold the backpointer here.
98
# TODO: split InventoryEntry into subclasses for files,
99
# directories, etc etc.
116
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
117
'text_id', 'parent_id', 'children', 'executable',
120
def _add_text_to_weave(self, new_lines, parents, weave_store, transaction):
121
weave_store.add_text(self.file_id, self.revision, new_lines, parents,
124
def detect_changes(self, old_entry):
125
"""Return a (text_modified, meta_modified) from this to old_entry.
127
_read_tree_state must have been called on self and old_entry prior to
128
calling detect_changes.
132
def diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
133
output_to, reverse=False):
134
"""Perform a diff from this to to_entry.
136
text_diff will be used for textual difference calculation.
137
This is a template method, override _diff in child classes.
139
self._read_tree_state(tree.id2path(self.file_id), tree)
141
# cannot diff from one kind to another - you must do a removal
142
# and an addif they do not match.
143
assert self.kind == to_entry.kind
144
to_entry._read_tree_state(to_tree.id2path(to_entry.file_id),
146
self._diff(text_diff, from_label, tree, to_label, to_entry, to_tree,
149
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
150
output_to, reverse=False):
151
"""Perform a diff between two entries of the same kind."""
153
def find_previous_heads(self, previous_inventories, entry_weave):
154
"""Return the revisions and entries that directly preceed this.
156
Returned as a map from revision to inventory entry.
158
This is a map containing the file revisions in all parents
159
for which the file exists, and its revision is not a parent of
160
any other. If the file is new, the set will be empty.
162
def get_ancestors(weave, entry):
163
return set(map(weave.idx_to_name,
164
weave.inclusions([weave.lookup(entry.revision)])))
167
for inv in previous_inventories:
168
if self.file_id in inv:
169
ie = inv[self.file_id]
170
assert ie.file_id == self.file_id
171
if ie.revision in heads:
172
# fixup logic, there was a bug in revision updates.
173
# with x bit support.
175
if heads[ie.revision].executable != ie.executable:
176
heads[ie.revision].executable = False
177
ie.executable = False
178
except AttributeError:
180
assert heads[ie.revision] == ie
182
# may want to add it.
183
# may already be covered:
184
already_present = 0 != len(
185
[head for head in heads
186
if ie.revision in head_ancestors[head]])
188
# an ancestor of a known head.
191
ancestors = get_ancestors(entry_weave, ie)
192
# may knock something else out:
193
check_heads = list(heads.keys())
194
for head in check_heads:
195
if head in ancestors:
196
# this head is not really a head
198
head_ancestors[ie.revision] = ancestors
199
heads[ie.revision] = ie
202
def get_tar_item(self, root, dp, now, tree):
203
"""Get a tarfile item and a file stream for its content."""
204
item = tarfile.TarInfo(os.path.join(root, dp))
205
# TODO: would be cool to actually set it to the timestamp of the
206
# revision it was last changed
208
fileobj = self._put_in_tar(item, tree)
212
"""Return true if the object this entry represents has textual data.
214
Note that textual data includes binary content.
216
Also note that all entries get weave files created for them.
217
This attribute is primarily used when upgrading from old trees that
218
did not have the weave index for all inventory entries.
222
def __init__(self, file_id, name, parent_id, text_id=None):
104
def __init__(self, file_id, name, kind, parent_id, text_id=None):
223
105
"""Create an InventoryEntry
225
107
The filename must be a single component, relative to the
226
108
parent directory; it cannot be a whole path or relative name.
228
>>> e = InventoryFile('123', 'hello.c', ROOT_ID)
110
>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)
233
>>> e = InventoryFile('123', 'src/hello.c', ROOT_ID)
115
>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)
234
116
Traceback (most recent call last):
235
InvalidEntryName: Invalid entry name: src/hello.c
117
BzrCheckError: InventoryEntry name 'src/hello.c' is invalid
237
assert isinstance(name, basestring), name
238
119
if '/' in name or '\\' in name:
239
raise InvalidEntryName(name=name)
240
self.executable = False
242
self.text_sha1 = None
243
self.text_size = None
120
raise BzrCheckError('InventoryEntry name %r is invalid' % name)
244
122
self.file_id = file_id
246
125
self.text_id = text_id
247
126
self.parent_id = parent_id
248
self.symlink_target = None
250
def kind_character(self):
251
"""Return a short kind indicator useful for appending to names."""
252
raise BzrError('unknown kind %r' % self.kind)
254
known_kinds = ('file', 'directory', 'symlink', 'root_directory')
256
def _put_in_tar(self, item, tree):
257
"""populate item for stashing in a tar, and return the content stream.
259
If no content is available, return None.
261
raise BzrError("don't know how to export {%s} of kind %r" %
262
(self.file_id, self.kind))
264
def put_on_disk(self, dest, dp, tree):
265
"""Create a representation of self on disk in the prefix dest.
267
This is a template method - implement _put_on_disk in subclasses.
269
fullpath = appendpath(dest, dp)
270
self._put_on_disk(fullpath, tree)
271
mutter(" export {%s} kind %s to %s" % (self.file_id, self.kind, fullpath))
273
def _put_on_disk(self, fullpath, tree):
274
"""Put this entry onto disk at fullpath, from tree tree."""
275
raise BzrError("don't know how to export {%s} of kind %r" % (self.file_id, self.kind))
127
if kind == 'directory':
132
raise BzrError("unhandled entry kind %r" % kind)
277
136
def sorted_children(self):
278
137
l = self.children.items()
283
def versionable_kind(kind):
284
return kind in ('file', 'directory', 'symlink')
286
def check(self, checker, rev_id, inv, tree):
287
"""Check this inventory entry is intact.
289
This is a template method, override _check for kind specific
292
if self.parent_id != None:
293
if not inv.has_id(self.parent_id):
294
raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'
295
% (self.parent_id, rev_id))
296
self._check(checker, rev_id, tree)
298
def _check(self, checker, rev_id, tree):
299
"""Check this inventory entry for kind specific errors."""
300
raise BzrCheckError('unknown entry kind %r in revision {%s}' %
305
"""Clone this inventory entry."""
306
raise NotImplementedError
143
other = InventoryEntry(self.file_id, self.name, self.kind,
144
self.parent_id, text_id=self.text_id)
145
other.text_sha1 = self.text_sha1
146
other.text_size = self.text_size
147
# note that children are *not* copied; they're pulled across when
308
def _get_snapshot_change(self, previous_entries):
309
if len(previous_entries) > 1:
311
elif len(previous_entries) == 0:
314
return 'modified/renamed/reparented'
316
152
def __repr__(self):
317
return ("%s(%r, %r, parent_id=%r)"
153
return ("%s(%r, %r, kind=%r, parent_id=%r)"
318
154
% (self.__class__.__name__,
323
def snapshot(self, revision, path, previous_entries,
324
work_tree, weave_store, transaction):
325
"""Make a snapshot of this entry which may or may not have changed.
327
This means that all its fields are populated, that it has its
328
text stored in the text store or weave.
330
mutter('new parents of %s are %r', path, previous_entries)
331
self._read_tree_state(path, work_tree)
332
if len(previous_entries) == 1:
333
# cannot be unchanged unless there is only one parent file rev.
334
parent_ie = previous_entries.values()[0]
335
if self._unchanged(parent_ie):
336
mutter("found unchanged entry")
337
self.revision = parent_ie.revision
339
return self.snapshot_revision(revision, previous_entries,
340
work_tree, weave_store, transaction)
342
def snapshot_revision(self, revision, previous_entries, work_tree,
343
weave_store, transaction):
344
"""Record this revision unconditionally."""
345
mutter('new revision for {%s}', self.file_id)
346
self.revision = revision
347
change = self._get_snapshot_change(previous_entries)
348
self._snapshot_text(previous_entries, work_tree, weave_store,
352
def _snapshot_text(self, file_parents, work_tree, weave_store, transaction):
353
"""Record the 'text' of this entry, whatever form that takes.
355
This default implementation simply adds an empty text.
357
mutter('storing file {%s} in revision {%s}',
358
self.file_id, self.revision)
359
self._add_text_to_weave([], file_parents, weave_store, transaction)
161
def to_element(self):
162
"""Convert to XML element"""
165
e.set('name', self.name)
166
e.set('file_id', self.file_id)
167
e.set('kind', self.kind)
169
if self.text_size != None:
170
e.set('text_size', '%d' % self.text_size)
172
for f in ['text_id', 'text_sha1']:
177
# to be conservative, we don't externalize the root pointers
178
# for now, leaving them as null in the xml form. in a future
179
# version it will be implied by nested elements.
180
if self.parent_id != ROOT_ID:
181
assert isinstance(self.parent_id, basestring)
182
e.set('parent_id', self.parent_id)
189
def from_element(cls, elt):
190
assert elt.tag == 'entry'
192
## original format inventories don't have a parent_id for
193
## nodes in the root directory, but it's cleaner to use one
195
parent_id = elt.get('parent_id')
196
if parent_id == None:
199
self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'), parent_id)
200
self.text_id = elt.get('text_id')
201
self.text_sha1 = elt.get('text_sha1')
203
## mutter("read inventoryentry: %r" % (elt.attrib))
205
v = elt.get('text_size')
206
self.text_size = v and int(v)
211
from_element = classmethod(from_element)
361
213
def __eq__(self, other):
362
214
if not isinstance(other, InventoryEntry):
363
215
return NotImplemented
365
return ((self.file_id == other.file_id)
366
and (self.name == other.name)
367
and (other.symlink_target == self.symlink_target)
368
and (self.text_sha1 == other.text_sha1)
369
and (self.text_size == other.text_size)
370
and (self.text_id == other.text_id)
371
and (self.parent_id == other.parent_id)
372
and (self.kind == other.kind)
373
and (self.revision == other.revision)
374
and (self.executable == other.executable)
217
return (self.file_id == other.file_id) \
218
and (self.name == other.name) \
219
and (self.text_sha1 == other.text_sha1) \
220
and (self.text_size == other.text_size) \
221
and (self.text_id == other.text_id) \
222
and (self.parent_id == other.parent_id) \
223
and (self.kind == other.kind)
377
226
def __ne__(self, other):
378
227
return not (self == other)
426
247
and (self.children == other.children)
429
class InventoryDirectory(InventoryEntry):
430
"""A directory in an inventory."""
432
def _check(self, checker, rev_id, tree):
433
"""See InventoryEntry._check"""
434
if self.text_sha1 != None or self.text_size != None or self.text_id != None:
435
raise BzrCheckError('directory {%s} has text in revision {%s}'
436
% (self.file_id, rev_id))
439
other = InventoryDirectory(self.file_id, self.name, self.parent_id)
440
other.revision = self.revision
441
# note that children are *not* copied; they're pulled across when
445
def __init__(self, file_id, name, parent_id):
446
super(InventoryDirectory, self).__init__(file_id, name, parent_id)
448
self.kind = 'directory'
450
def kind_character(self):
451
"""See InventoryEntry.kind_character."""
454
def _put_in_tar(self, item, tree):
455
"""See InventoryEntry._put_in_tar."""
456
item.type = tarfile.DIRTYPE
463
def _put_on_disk(self, fullpath, tree):
464
"""See InventoryEntry._put_on_disk."""
468
class InventoryFile(InventoryEntry):
469
"""A file in an inventory."""
471
def _check(self, checker, rev_id, tree):
472
"""See InventoryEntry._check"""
473
revision = self.revision
474
t = (self.file_id, revision)
475
if t in checker.checked_texts:
476
prev_sha = checker.checked_texts[t]
477
if prev_sha != self.text_sha1:
478
raise BzrCheckError('mismatched sha1 on {%s} in {%s}' %
479
(self.file_id, rev_id))
481
checker.repeated_text_cnt += 1
483
mutter('check version {%s} of {%s}', rev_id, self.file_id)
484
file_lines = tree.get_file_lines(self.file_id)
485
checker.checked_text_cnt += 1
486
if self.text_size != sum(map(len, file_lines)):
487
raise BzrCheckError('text {%s} wrong size' % self.text_id)
488
if self.text_sha1 != sha_strings(file_lines):
489
raise BzrCheckError('text {%s} wrong sha1' % self.text_id)
490
checker.checked_texts[t] = self.text_sha1
493
other = InventoryFile(self.file_id, self.name, self.parent_id)
494
other.executable = self.executable
495
other.text_id = self.text_id
496
other.text_sha1 = self.text_sha1
497
other.text_size = self.text_size
498
other.revision = self.revision
501
def detect_changes(self, old_entry):
502
"""See InventoryEntry.detect_changes."""
503
assert self.text_sha1 != None
504
assert old_entry.text_sha1 != None
505
text_modified = (self.text_sha1 != old_entry.text_sha1)
506
meta_modified = (self.executable != old_entry.executable)
507
return text_modified, meta_modified
509
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
510
output_to, reverse=False):
511
"""See InventoryEntry._diff."""
512
from_text = tree.get_file(self.file_id).readlines()
514
to_text = to_tree.get_file(to_entry.file_id).readlines()
518
text_diff(from_label, from_text,
519
to_label, to_text, output_to)
521
text_diff(to_label, to_text,
522
from_label, from_text, output_to)
525
"""See InventoryEntry.has_text."""
528
def __init__(self, file_id, name, parent_id):
529
super(InventoryFile, self).__init__(file_id, name, parent_id)
532
def kind_character(self):
533
"""See InventoryEntry.kind_character."""
536
def _put_in_tar(self, item, tree):
537
"""See InventoryEntry._put_in_tar."""
538
item.type = tarfile.REGTYPE
539
fileobj = tree.get_file(self.file_id)
540
item.size = self.text_size
541
if tree.is_executable(self.file_id):
547
def _put_on_disk(self, fullpath, tree):
548
"""See InventoryEntry._put_on_disk."""
549
pumpfile(tree.get_file(self.file_id), file(fullpath, 'wb'))
550
if tree.is_executable(self.file_id):
551
os.chmod(fullpath, 0755)
553
def _read_tree_state(self, path, work_tree):
554
"""See InventoryEntry._read_tree_state."""
555
self.text_sha1 = work_tree.get_file_sha1(self.file_id)
556
self.executable = work_tree.is_executable(self.file_id)
558
def _snapshot_text(self, file_parents, work_tree, weave_store, transaction):
559
"""See InventoryEntry._snapshot_text."""
560
mutter('storing file {%s} in revision {%s}',
561
self.file_id, self.revision)
562
# special case to avoid diffing on renames or
564
if (len(file_parents) == 1
565
and self.text_sha1 == file_parents.values()[0].text_sha1
566
and self.text_size == file_parents.values()[0].text_size):
567
previous_ie = file_parents.values()[0]
568
weave_store.add_identical_text(
569
self.file_id, previous_ie.revision,
570
self.revision, file_parents, transaction)
572
new_lines = work_tree.get_file(self.file_id).readlines()
573
self._add_text_to_weave(new_lines, file_parents, weave_store,
575
self.text_sha1 = sha_strings(new_lines)
576
self.text_size = sum(map(len, new_lines))
579
def _unchanged(self, previous_ie):
580
"""See InventoryEntry._unchanged."""
581
compatible = super(InventoryFile, self)._unchanged(previous_ie)
582
if self.text_sha1 != previous_ie.text_sha1:
585
# FIXME: 20050930 probe for the text size when getting sha1
586
# in _read_tree_state
587
self.text_size = previous_ie.text_size
588
if self.executable != previous_ie.executable:
593
class InventoryLink(InventoryEntry):
594
"""A file in an inventory."""
596
__slots__ = ['symlink_target']
598
def _check(self, checker, rev_id, tree):
599
"""See InventoryEntry._check"""
600
if self.text_sha1 != None or self.text_size != None or self.text_id != None:
601
raise BzrCheckError('symlink {%s} has text in revision {%s}'
602
% (self.file_id, rev_id))
603
if self.symlink_target == None:
604
raise BzrCheckError('symlink {%s} has no target in revision {%s}'
605
% (self.file_id, rev_id))
608
other = InventoryLink(self.file_id, self.name, self.parent_id)
609
other.symlink_target = self.symlink_target
610
other.revision = self.revision
613
def detect_changes(self, old_entry):
614
"""See InventoryEntry.detect_changes."""
615
# FIXME: which _modified field should we use ? RBC 20051003
616
text_modified = (self.symlink_target != old_entry.symlink_target)
618
mutter(" symlink target changed")
619
meta_modified = False
620
return text_modified, meta_modified
622
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
623
output_to, reverse=False):
624
"""See InventoryEntry._diff."""
625
from_text = self.symlink_target
626
if to_entry is not None:
627
to_text = to_entry.symlink_target
632
print >>output_to, '=== target changed %r => %r' % (from_text, to_text)
635
print >>output_to, '=== target was %r' % self.symlink_target
637
print >>output_to, '=== target is %r' % self.symlink_target
639
def __init__(self, file_id, name, parent_id):
640
super(InventoryLink, self).__init__(file_id, name, parent_id)
641
self.kind = 'symlink'
643
def kind_character(self):
644
"""See InventoryEntry.kind_character."""
647
def _put_in_tar(self, item, tree):
648
"""See InventoryEntry._put_in_tar."""
649
iterm.type = tarfile.SYMTYPE
653
item.linkname = self.symlink_target
656
def _put_on_disk(self, fullpath, tree):
657
"""See InventoryEntry._put_on_disk."""
659
os.symlink(self.symlink_target, fullpath)
661
raise BzrError("Failed to create symlink %r -> %r, error: %s" % (fullpath, self.symlink_target, e))
663
def _read_tree_state(self, path, work_tree):
664
"""See InventoryEntry._read_tree_state."""
665
self.symlink_target = work_tree.get_symlink_target(self.file_id)
667
def _unchanged(self, previous_ie):
668
"""See InventoryEntry._unchanged."""
669
compatible = super(InventoryLink, self)._unchanged(previous_ie)
670
if self.symlink_target != previous_ie.symlink_target:
675
class Inventory(object):
251
class Inventory(XMLMixin):
676
252
"""Inventory of versioned files in a tree.
678
254
This describes which file_id is present at each point in the tree,