15
14
# along with this program; if not, write to the Free Software
16
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
"""Inventories map files to their name in a revision."""
20
# TODO: Maybe store inventory_id in the file? Not really needed.
22
__copyright__ = "Copyright (C) 2005 Canonical Ltd."
23
__author__ = "Martin Pool <mbp@canonical.com>"
25
import sys, os.path, types
29
from cElementTree import Element, ElementTree, SubElement
31
from elementtree.ElementTree import Element, ElementTree, SubElement
33
from xml import XMLMixin
34
from errors import bailout
18
# TODO: Maybe also keep the full path of the entry, and the children?
19
# But those depend on its position within a particular inventory, and
20
# it would be nice not to need to hold the backpointer here.
22
# TODO: Perhaps split InventoryEntry into subclasses for files,
23
# directories, etc etc.
26
# This should really be an id randomly assigned when the tree is
27
# created, but it's not for now.
31
import sys, os.path, types, re
34
from bzrlib.errors import BzrError, BzrCheckError
37
36
from bzrlib.osutils import uuid, quotefn, splitpath, joinpath, appendpath
38
37
from bzrlib.trace import mutter
38
from bzrlib.errors import NotVersionedError
40
class InventoryEntry(XMLMixin):
41
class InventoryEntry(object):
41
42
"""Description of a versioned file.
43
44
An InventoryEntry has the following fields, which are also
44
45
present in the XML inventory-entry element:
47
* *name*: (only the basename within the directory, must not
49
* *kind*: "directory" or "file"
50
* *directory_id*: (if absent/null means the branch root directory)
51
* *text_sha1*: only for files
52
* *text_size*: in bytes, only for files
53
* *text_id*: identifier for the text version, only for files
55
InventoryEntries can also exist inside a WorkingTree
56
inventory, in which case they are not yet bound to a
57
particular revision of the file. In that case the text_sha1,
58
text_size and text_id are absent.
50
(within the parent directory)
56
file_id of the parent directory, or ROOT_ID
59
the revision_id in which the name or parent of this file was
63
sha-1 of the text of the file
66
size in bytes of the text of the file
69
the revision_id in which the text of this file was introduced
71
(reading a version 4 tree created a text_id field.)
61
73
>>> i = Inventory()
63
>>> i.add(InventoryEntry('123', 'src', kind='directory'))
64
>>> i.add(InventoryEntry('2323', 'hello.c', parent_id='123'))
76
>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))
77
InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT')
78
>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))
79
InventoryEntry('2323', 'hello.c', kind='file', parent_id='123')
65
80
>>> for j in i.iter_entries():
68
('src', InventoryEntry('123', 'src', kind='directory', parent_id=None))
83
('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))
69
84
('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))
70
>>> i.add(InventoryEntry('2323', 'bye.c', parent_id='123'))
85
>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))
71
86
Traceback (most recent call last):
73
BzrError: ('inventory already contains entry with id {2323}', [])
74
>>> i.add(InventoryEntry('2324', 'bye.c', parent_id='123'))
75
>>> i.add(InventoryEntry('2325', 'wibble', parent_id='123', kind='directory'))
88
BzrError: inventory already contains entry with id {2323}
89
>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))
90
InventoryEntry('2324', 'bye.c', kind='file', parent_id='123')
91
>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))
92
InventoryEntry('2325', 'wibble', kind='directory', parent_id='123')
76
93
>>> i.path2id('src/wibble')
80
>>> i.add(InventoryEntry('2326', 'wibble.c', parent_id='2325'))
97
>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))
98
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
82
100
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
83
101
>>> for j in i.iter_entries():
91
109
src/wibble/wibble.c
92
110
>>> i.id2path('2326')
93
111
'src/wibble/wibble.c'
95
:todo: Maybe also keep the full path of the entry, and the children?
96
But those depend on its position within a particular inventory, and
97
it would be nice not to need to hold the backpointer here.
99
def __init__(self, file_id, name, kind='file', text_id=None,
114
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
115
'text_id', 'parent_id', 'children',
116
'text_version', 'entry_version', ]
119
def __init__(self, file_id, name, kind, parent_id, text_id=None):
101
120
"""Create an InventoryEntry
103
122
The filename must be a single component, relative to the
104
123
parent directory; it cannot be a whole path or relative name.
106
>>> e = InventoryEntry('123', 'hello.c')
125
>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)
111
>>> e = InventoryEntry('123', 'src/hello.c')
130
>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)
112
131
Traceback (most recent call last):
113
BzrError: ("InventoryEntry name is not a simple filename: 'src/hello.c'", [])
132
BzrCheckError: InventoryEntry name 'src/hello.c' is invalid
116
if len(splitpath(name)) != 1:
117
bailout('InventoryEntry name is not a simple filename: %r'
134
assert isinstance(name, basestring), name
135
if '/' in name or '\\' in name:
136
raise BzrCheckError('InventoryEntry name %r is invalid' % name)
138
self.text_version = None
139
self.entry_version = None
140
self.text_sha1 = None
141
self.text_size = None
120
142
self.file_id = file_id
122
assert kind in ['file', 'directory']
124
145
self.text_id = text_id
125
146
self.parent_id = parent_id
126
self.text_sha1 = None
127
self.text_size = None
147
if kind == 'directory':
152
raise BzrError("unhandled entry kind %r" % kind)
156
def sorted_children(self):
157
l = self.children.items()
131
163
other = InventoryEntry(self.file_id, self.name, self.kind,
132
self.text_id, self.parent_id)
165
other.text_id = self.text_id
133
166
other.text_sha1 = self.text_sha1
134
167
other.text_size = self.text_size
168
# note that children are *not* copied; they're pulled across when
147
def to_element(self):
148
"""Convert to XML element"""
151
e.set('name', self.name)
152
e.set('file_id', self.file_id)
153
e.set('kind', self.kind)
155
if self.text_size is not None:
156
e.set('text_size', '%d' % self.text_size)
158
for f in ['text_id', 'text_sha1', 'parent_id']:
168
def from_element(cls, elt):
169
assert elt.tag == 'entry'
170
self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'))
171
self.text_id = elt.get('text_id')
172
self.text_sha1 = elt.get('text_sha1')
173
self.parent_id = elt.get('parent_id')
175
## mutter("read inventoryentry: %r" % (elt.attrib))
177
v = elt.get('text_size')
178
self.text_size = v and int(v)
183
from_element = classmethod(from_element)
185
def __cmp__(self, other):
182
def __eq__(self, other):
188
183
if not isinstance(other, InventoryEntry):
189
184
return NotImplemented
191
return cmp(self.file_id, other.file_id) \
192
or cmp(self.name, other.name) \
193
or cmp(self.text_sha1, other.text_sha1) \
194
or cmp(self.text_size, other.text_size) \
195
or cmp(self.text_id, other.text_id) \
196
or cmp(self.parent_id, other.parent_id) \
197
or cmp(self.kind, other.kind)
201
class Inventory(XMLMixin):
186
return (self.file_id == other.file_id) \
187
and (self.name == other.name) \
188
and (self.text_sha1 == other.text_sha1) \
189
and (self.text_size == other.text_size) \
190
and (self.text_id == other.text_id) \
191
and (self.parent_id == other.parent_id) \
192
and (self.kind == other.kind) \
193
and (self.text_version == other.text_version) \
194
and (self.entry_version == other.entry_version)
197
def __ne__(self, other):
198
return not (self == other)
201
raise ValueError('not hashable')
205
class RootEntry(InventoryEntry):
206
def __init__(self, file_id):
207
self.file_id = file_id
209
self.kind = 'root_directory'
210
self.parent_id = None
213
def __eq__(self, other):
214
if not isinstance(other, RootEntry):
215
return NotImplemented
217
return (self.file_id == other.file_id) \
218
and (self.children == other.children)
222
class Inventory(object):
202
223
"""Inventory of versioned files in a tree.
204
An Inventory acts like a set of InventoryEntry items. You can
205
also look files up by their file_id or name.
207
May be read from and written to a metadata file in a tree. To
208
manipulate the inventory (for example to add a file), it is read
209
in, modified, and then written back out.
225
This describes which file_id is present at each point in the tree,
226
and possibly the SHA-1 or other information about the file.
227
Entries can be looked up either by path or by file_id.
211
229
The inventory represents a typical unix file tree, with
212
230
directories containing files and subdirectories. We never store
241
254
>>> [x[0] for x in inv.iter_entries()]
244
>>> inv.write_xml(sys.stdout)
246
<entry file_id="123-123" kind="file" name="hello.c" />
256
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
257
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
258
InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT-12345678-12345678')
251
## TODO: Clear up handling of files in subdirectories; we probably
252
## do want to be able to just look them up by name but this
253
## probably means gradually walking down the path, looking up as we go.
255
## TODO: Make sure only canonical filenames are stored.
257
## TODO: Do something sensible about the possible collisions on
258
## case-losing filesystems. Perhaps we should just always forbid
261
## _tree should probably just be stored as
262
## InventoryEntry._children on each directory.
260
def __init__(self, root_id=ROOT_ID):
265
261
"""Create or read an inventory.
267
263
If a working directory is specified, the inventory is read
268
264
from there. If the file is specified, read from that. If not,
269
265
the inventory is created empty.
267
The inventory is created with a default root directory, with
273
# _tree is indexed by parent_id; at each level a map from name
274
# to ie. The None entry is the root.
275
self._tree = {None: {}}
270
# We are letting Branch(init=True) create a unique inventory
271
# root id. Rather than generating a random one here.
273
# root_id = bzrlib.branch.gen_file_id('TREE_ROOT')
274
self.root = RootEntry(root_id)
275
self._byid = {self.root.file_id: self.root}
279
other = Inventory(self.root.file_id)
280
for entry in self._byid.itervalues():
281
if entry == self.root:
283
other.add(entry.copy())
278
287
def __iter__(self):
284
293
return len(self._byid)
287
def iter_entries(self, parent_id=None):
296
def iter_entries(self, from_dir=None):
288
297
"""Return (path, entry) pairs, in order by name."""
289
kids = self._tree[parent_id].items()
301
elif isinstance(from_dir, basestring):
302
from_dir = self._byid[from_dir]
304
kids = from_dir.children.items()
291
306
for name, ie in kids:
293
308
if ie.kind == 'directory':
294
for cn, cie in self.iter_entries(parent_id=ie.file_id):
295
yield joinpath([name, cn]), cie
298
def directories(self, include_root=True):
299
"""Return (path, entry) pairs for all directories.
303
for path, entry in self.iter_entries():
304
if entry.kind == 'directory':
309
for cn, cie in self.iter_entries(from_dir=ie.file_id):
310
yield os.path.join(name, cn), cie
314
"""Return list of (path, ie) for all entries except the root.
316
This may be faster than iter_entries.
319
def descend(dir_ie, dir_path):
320
kids = dir_ie.children.items()
322
for name, ie in kids:
323
child_path = os.path.join(dir_path, name)
324
accum.append((child_path, ie))
325
if ie.kind == 'directory':
326
descend(ie, child_path)
328
descend(self.root, '')
332
def directories(self):
333
"""Return (path, entry) pairs for all directories, including the root.
336
def descend(parent_ie, parent_path):
337
accum.append((parent_path, parent_ie))
339
kids = [(ie.name, ie) for ie in parent_ie.children.itervalues() if ie.kind == 'directory']
342
for name, child_ie in kids:
343
child_path = os.path.join(parent_path, name)
344
descend(child_ie, child_path)
345
descend(self.root, '')
309
def children(self, parent_id):
310
"""Return entries that are direct children of parent_id."""
311
return self._tree[parent_id]
315
# TODO: return all paths and entries
318
350
def __contains__(self, file_id):
319
351
"""True if this entry contains a file with given id.
321
353
>>> inv = Inventory()
322
>>> inv.add(InventoryEntry('123', 'foo.c'))
354
>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))
355
InventoryEntry('123', 'foo.c', kind='file', parent_id='TREE_ROOT')
332
365
"""Return the entry for given file_id.
334
367
>>> inv = Inventory()
335
>>> inv.add(InventoryEntry('123123', 'hello.c'))
368
>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))
369
InventoryEntry('123123', 'hello.c', kind='file', parent_id='TREE_ROOT')
336
370
>>> inv['123123'].name
339
return self._byid[file_id]
374
return self._byid[file_id]
377
raise BzrError("can't look up file_id None")
379
raise BzrError("file_id {%s} not in inventory" % file_id)
382
def get_file_kind(self, file_id):
383
return self._byid[file_id].kind
385
def get_child(self, parent_id, filename):
386
return self[parent_id].children.get(filename)
342
389
def add(self, entry):
343
390
"""Add entry to inventory.
345
392
To add a file to a branch ready to be committed, use Branch.add,
347
if entry.file_id in self:
348
bailout("inventory already contains entry with id {%s}" % entry.file_id)
350
if entry.parent_id != None:
351
if entry.parent_id not in self:
352
bailout("parent_id %s of new entry not found in inventory"
355
if self._tree[entry.parent_id].has_key(entry.name):
356
bailout("%s is already versioned"
357
% appendpath(self.id2path(entry.parent_id), entry.name))
395
Returns the new entry object.
397
if entry.file_id in self._byid:
398
raise BzrError("inventory already contains entry with id {%s}" % entry.file_id)
400
if entry.parent_id == ROOT_ID or entry.parent_id is None:
401
entry.parent_id = self.root.file_id
404
parent = self._byid[entry.parent_id]
406
raise BzrError("parent_id {%s} not in inventory" % entry.parent_id)
408
if parent.children.has_key(entry.name):
409
raise BzrError("%s is already versioned" %
410
appendpath(self.id2path(parent.file_id), entry.name))
359
412
self._byid[entry.file_id] = entry
360
self._tree[entry.parent_id][entry.name] = entry
362
if entry.kind == 'directory':
363
self._tree[entry.file_id] = {}
413
parent.children[entry.name] = entry
366
417
def add_path(self, relpath, kind, file_id=None):
367
418
"""Add entry from a path.
369
The immediate parent must already be versioned"""
420
The immediate parent must already be versioned.
422
Returns the new entry object."""
423
from bzrlib.branch import gen_file_id
370
425
parts = bzrlib.osutils.splitpath(relpath)
371
426
if len(parts) == 0:
372
bailout("cannot re-add root of inventory")
375
file_id = bzrlib.branch.gen_file_id(relpath)
377
parent_id = self.path2id(parts[:-1])
427
raise BzrError("cannot re-add root of inventory")
430
file_id = gen_file_id(relpath)
432
parent_path = parts[:-1]
433
parent_id = self.path2id(parent_path)
434
if parent_id == None:
435
raise NotVersionedError(parent_path)
378
437
ie = InventoryEntry(file_id, parts[-1],
379
438
kind=kind, parent_id=parent_id)
380
439
return self.add(ie)
394
454
ie = self[file_id]
396
assert self._tree[ie.parent_id][ie.name] == ie
456
assert self[ie.parent_id].children[ie.name] == ie
398
458
# TODO: Test deleting all children; maybe hoist to a separate
399
459
# deltree method?
400
460
if ie.kind == 'directory':
401
for cie in self._tree[file_id].values():
461
for cie in ie.children.values():
402
462
del self[cie.file_id]
403
del self._tree[file_id]
405
465
del self._byid[file_id]
406
del self._tree[ie.parent_id][ie.name]
410
return Set(self._byid)
413
def to_element(self):
414
"""Convert to XML Element"""
415
e = Element('inventory')
417
for path, ie in self.iter_entries():
418
e.append(ie.to_element())
422
def from_element(cls, elt):
423
"""Construct from XML Element
425
>>> inv = Inventory()
426
>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c'))
427
>>> elt = inv.to_element()
428
>>> inv2 = Inventory.from_element(elt)
432
assert elt.tag == 'inventory'
435
o.add(InventoryEntry.from_element(e))
438
from_element = classmethod(from_element)
441
def __cmp__(self, other):
466
del self[ie.parent_id].children[ie.name]
469
def __eq__(self, other):
442
470
"""Compare two sets by comparing their contents.
444
472
>>> i1 = Inventory()
445
473
>>> i2 = Inventory()
448
>>> i1.add(InventoryEntry('123', 'foo'))
476
>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
477
InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')
451
>>> i2.add(InventoryEntry('123', 'foo'))
480
>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
481
InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')
458
485
if not isinstance(other, Inventory):
459
486
return NotImplemented
461
if self.id_set() ^ other.id_set():
464
for file_id in self._byid:
465
c = cmp(self[file_id], other[file_id])
471
def id2path(self, file_id):
472
"""Return as a list the path to file_id."""
488
if len(self._byid) != len(other._byid):
489
# shortcut: obviously not the same
492
return self._byid == other._byid
495
def __ne__(self, other):
496
return not (self == other)
500
raise ValueError('not hashable')
503
def get_idpath(self, file_id):
504
"""Return a list of file_ids for the path to an entry.
506
The list contains one element for each directory followed by
507
the id of the file itself. So the length of the returned list
508
is equal to the depth of the file in the tree, counting the
509
root directory as depth 1.
474
512
while file_id != None:
514
ie = self._byid[file_id]
516
raise BzrError("file_id {%s} not found in inventory" % file_id)
517
p.insert(0, ie.file_id)
477
518
file_id = ie.parent_id
522
def id2path(self, file_id):
523
"""Return as a list the path to file_id."""
525
# get all names, skipping root
526
p = [self._byid[fid].name for fid in self.get_idpath(file_id)[1:]]
527
return os.sep.join(p)
515
565
def has_id(self, file_id):
516
assert isinstance(file_id, str)
517
566
return self._byid.has_key(file_id)
523
if __name__ == '__main__':
524
import doctest, inventory
525
doctest.testmod(inventory)
569
def rename(self, file_id, new_parent_id, new_name):
570
"""Move a file within the inventory.
572
This can change either the name, or the parent, or both.
574
This does not move the working file."""
575
if not is_valid_name(new_name):
576
raise BzrError("not an acceptable filename: %r" % new_name)
578
new_parent = self._byid[new_parent_id]
579
if new_name in new_parent.children:
580
raise BzrError("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))
582
new_parent_idpath = self.get_idpath(new_parent_id)
583
if file_id in new_parent_idpath:
584
raise BzrError("cannot move directory %r into a subdirectory of itself, %r"
585
% (self.id2path(file_id), self.id2path(new_parent_id)))
587
file_ie = self._byid[file_id]
588
old_parent = self._byid[file_ie.parent_id]
590
# TODO: Don't leave things messed up if this fails
592
del old_parent.children[file_ie.name]
593
new_parent.children[new_name] = file_ie
595
file_ie.name = new_name
596
file_ie.parent_id = new_parent_id
603
def is_valid_name(name):
606
_NAME_RE = re.compile(r'^[^/\\]+$')
608
return bool(_NAME_RE.match(name))