14
15
# along with this program; if not, write to the Free Software
15
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# This should really be an id randomly assigned when the tree is
19
# created, but it's not for now.
23
import sys, os.path, types, re
18
"""Inventories map files to their name in a revision."""
20
# TODO: Maybe store inventory_id in the file? Not really needed.
22
__copyright__ = "Copyright (C) 2005 Canonical Ltd."
23
__author__ = "Martin Pool <mbp@canonical.com>"
25
import sys, os.path, types
29
from cElementTree import Element, ElementTree, SubElement
31
from elementtree.ElementTree import Element, ElementTree, SubElement
33
from xml import XMLMixin
34
from errors import bailout
26
from bzrlib.errors import BzrError, BzrCheckError
28
37
from bzrlib.osutils import uuid, quotefn, splitpath, joinpath, appendpath
29
38
from bzrlib.trace import mutter
30
from bzrlib.errors import NotVersionedError
33
class InventoryEntry(object):
40
class InventoryEntry(XMLMixin):
34
41
"""Description of a versioned file.
36
43
An InventoryEntry has the following fields, which are also
54
61
>>> i = Inventory()
57
>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))
58
InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT')
59
>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))
60
InventoryEntry('2323', 'hello.c', kind='file', parent_id='123')
63
>>> i.add(InventoryEntry('123', 'src', kind='directory'))
64
>>> i.add(InventoryEntry('2323', 'hello.c', parent_id='123'))
61
65
>>> for j in i.iter_entries():
64
('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))
68
('src', InventoryEntry('123', 'src', kind='directory', parent_id=None))
65
69
('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))
66
>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))
70
>>> i.add(InventoryEntry('2323', 'bye.c', parent_id='123'))
67
71
Traceback (most recent call last):
69
BzrError: inventory already contains entry with id {2323}
70
>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))
71
InventoryEntry('2324', 'bye.c', kind='file', parent_id='123')
72
>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))
73
InventoryEntry('2325', 'wibble', kind='directory', parent_id='123')
73
BzrError: ('inventory already contains entry with id {2323}', [])
74
>>> i.add(InventoryEntry('2324', 'bye.c', parent_id='123'))
75
>>> i.add(InventoryEntry('2325', 'wibble', parent_id='123', kind='directory'))
74
76
>>> i.path2id('src/wibble')
78
>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))
79
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
80
>>> i.add(InventoryEntry('2326', 'wibble.c', parent_id='2325'))
81
82
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
82
83
>>> for j in i.iter_entries():
91
92
>>> i.id2path('2326')
92
93
'src/wibble/wibble.c'
94
TODO: Maybe also keep the full path of the entry, and the children?
95
:todo: Maybe also keep the full path of the entry, and the children?
95
96
But those depend on its position within a particular inventory, and
96
97
it would be nice not to need to hold the backpointer here.
99
# TODO: split InventoryEntry into subclasses for files,
100
# directories, etc etc.
102
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
103
'text_id', 'parent_id', 'children', ]
105
def __init__(self, file_id, name, kind, parent_id, text_id=None):
99
def __init__(self, file_id, name, kind='file', text_id=None,
106
101
"""Create an InventoryEntry
108
103
The filename must be a single component, relative to the
109
104
parent directory; it cannot be a whole path or relative name.
111
>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)
106
>>> e = InventoryEntry('123', 'hello.c')
116
>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)
111
>>> e = InventoryEntry('123', 'src/hello.c')
117
112
Traceback (most recent call last):
118
BzrCheckError: InventoryEntry name 'src/hello.c' is invalid
113
BzrError: ("InventoryEntry name is not a simple filename: 'src/hello.c'", [])
120
if '/' in name or '\\' in name:
121
raise BzrCheckError('InventoryEntry name %r is invalid' % name)
123
self.text_sha1 = None
124
self.text_size = None
116
if len(splitpath(name)) != 1:
117
bailout('InventoryEntry name is not a simple filename: %r'
126
120
self.file_id = file_id
122
assert kind in ['file', 'directory']
129
124
self.text_id = text_id
130
125
self.parent_id = parent_id
131
if kind == 'directory':
136
raise BzrError("unhandled entry kind %r" % kind)
140
def sorted_children(self):
141
l = self.children.items()
126
self.text_sha1 = None
127
self.text_size = None
147
131
other = InventoryEntry(self.file_id, self.name, self.kind,
148
self.parent_id, text_id=self.text_id)
132
self.text_id, self.parent_id)
149
133
other.text_sha1 = self.text_sha1
150
134
other.text_size = self.text_size
151
# note that children are *not* copied; they're pulled across when
165
147
def to_element(self):
166
148
"""Convert to XML element"""
167
from bzrlib.xml import Element
169
149
e = Element('entry')
171
151
e.set('name', self.name)
172
152
e.set('file_id', self.file_id)
173
153
e.set('kind', self.kind)
175
if self.text_size != None:
155
if self.text_size is not None:
176
156
e.set('text_size', '%d' % self.text_size)
178
for f in ['text_id', 'text_sha1']:
158
for f in ['text_id', 'text_sha1', 'parent_id']:
179
159
v = getattr(self, f)
183
# to be conservative, we don't externalize the root pointers
184
# for now, leaving them as null in the xml form. in a future
185
# version it will be implied by nested elements.
186
if self.parent_id != ROOT_ID:
187
assert isinstance(self.parent_id, basestring)
188
e.set('parent_id', self.parent_id)
195
168
def from_element(cls, elt):
196
169
assert elt.tag == 'entry'
198
## original format inventories don't have a parent_id for
199
## nodes in the root directory, but it's cleaner to use one
201
parent_id = elt.get('parent_id')
202
if parent_id == None:
205
self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'), parent_id)
170
self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'))
206
171
self.text_id = elt.get('text_id')
207
172
self.text_sha1 = elt.get('text_sha1')
173
self.parent_id = elt.get('parent_id')
209
175
## mutter("read inventoryentry: %r" % (elt.attrib))
217
183
from_element = classmethod(from_element)
219
def __eq__(self, other):
185
def __cmp__(self, other):
220
188
if not isinstance(other, InventoryEntry):
221
189
return NotImplemented
223
return (self.file_id == other.file_id) \
224
and (self.name == other.name) \
225
and (self.text_sha1 == other.text_sha1) \
226
and (self.text_size == other.text_size) \
227
and (self.text_id == other.text_id) \
228
and (self.parent_id == other.parent_id) \
229
and (self.kind == other.kind)
232
def __ne__(self, other):
233
return not (self == other)
236
raise ValueError('not hashable')
240
class RootEntry(InventoryEntry):
241
def __init__(self, file_id):
242
self.file_id = file_id
244
self.kind = 'root_directory'
245
self.parent_id = None
248
def __eq__(self, other):
249
if not isinstance(other, RootEntry):
250
return NotImplemented
252
return (self.file_id == other.file_id) \
253
and (self.children == other.children)
257
class Inventory(object):
191
return cmp(self.file_id, other.file_id) \
192
or cmp(self.name, other.name) \
193
or cmp(self.text_sha1, other.text_sha1) \
194
or cmp(self.text_size, other.text_size) \
195
or cmp(self.text_id, other.text_id) \
196
or cmp(self.parent_id, other.parent_id) \
197
or cmp(self.kind, other.kind)
201
class Inventory(XMLMixin):
258
202
"""Inventory of versioned files in a tree.
260
This describes which file_id is present at each point in the tree,
261
and possibly the SHA-1 or other information about the file.
262
Entries can be looked up either by path or by file_id.
204
An Inventory acts like a set of InventoryEntry items. You can
205
also look files up by their file_id or name.
207
May be read from and written to a metadata file in a tree. To
208
manipulate the inventory (for example to add a file), it is read
209
in, modified, and then written back out.
264
211
The inventory represents a typical unix file tree, with
265
212
directories containing files and subdirectories. We never store
269
216
returned quickly.
271
218
InventoryEntry objects must not be modified after they are
272
inserted, other than through the Inventory API.
274
221
>>> inv = Inventory()
275
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
276
InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT')
222
>>> inv.write_xml(sys.stdout)
225
>>> inv.add(InventoryEntry('123-123', 'hello.c'))
277
226
>>> inv['123-123'].name
228
>>> for file_id in inv: print file_id
280
232
May be treated as an iterator or set to look up file ids:
289
241
>>> [x[0] for x in inv.iter_entries()]
291
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
292
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
293
InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT-12345678-12345678')
244
>>> inv.write_xml(sys.stdout)
246
<entry file_id="123-123" kind="file" name="hello.c" />
295
def __init__(self, root_id=ROOT_ID):
251
## TODO: Clear up handling of files in subdirectories; we probably
252
## do want to be able to just look them up by name but this
253
## probably means gradually walking down the path, looking up as we go.
255
## TODO: Make sure only canonical filenames are stored.
257
## TODO: Do something sensible about the possible collisions on
258
## case-losing filesystems. Perhaps we should just always forbid
261
## _tree should probably just be stored as
262
## InventoryEntry._children on each directory.
296
265
"""Create or read an inventory.
298
267
If a working directory is specified, the inventory is read
299
268
from there. If the file is specified, read from that. If not,
300
269
the inventory is created empty.
302
The inventory is created with a default root directory, with
305
# We are letting Branch(init=True) create a unique inventory
306
# root id. Rather than generating a random one here.
308
# root_id = bzrlib.branch.gen_file_id('TREE_ROOT')
309
self.root = RootEntry(root_id)
310
self._byid = {self.root.file_id: self.root}
273
# _tree is indexed by parent_id; at each level a map from name
274
# to ie. The None entry is the root.
275
self._tree = {None: {}}
313
278
def __iter__(self):
319
284
return len(self._byid)
322
def iter_entries(self, from_dir=None):
287
def iter_entries(self, parent_id=None):
323
288
"""Return (path, entry) pairs, in order by name."""
327
elif isinstance(from_dir, basestring):
328
from_dir = self._byid[from_dir]
330
kids = from_dir.children.items()
289
kids = self._tree[parent_id].items()
332
291
for name, ie in kids:
334
293
if ie.kind == 'directory':
335
for cn, cie in self.iter_entries(from_dir=ie.file_id):
336
yield os.path.join(name, cn), cie
340
"""Return list of (path, ie) for all entries except the root.
342
This may be faster than iter_entries.
345
def descend(dir_ie, dir_path):
346
kids = dir_ie.children.items()
348
for name, ie in kids:
349
child_path = os.path.join(dir_path, name)
350
accum.append((child_path, ie))
351
if ie.kind == 'directory':
352
descend(ie, child_path)
354
descend(self.root, '')
358
def directories(self):
359
"""Return (path, entry) pairs for all directories, including the root.
362
def descend(parent_ie, parent_path):
363
accum.append((parent_path, parent_ie))
365
kids = [(ie.name, ie) for ie in parent_ie.children.itervalues() if ie.kind == 'directory']
368
for name, child_ie in kids:
369
child_path = os.path.join(parent_path, name)
370
descend(child_ie, child_path)
371
descend(self.root, '')
294
for cn, cie in self.iter_entries(parent_id=ie.file_id):
295
yield joinpath([name, cn]), cie
298
def directories(self, include_root=True):
299
"""Return (path, entry) pairs for all directories.
303
for path, entry in self.iter_entries():
304
if entry.kind == 'directory':
309
def children(self, parent_id):
310
"""Return entries that are direct children of parent_id."""
311
return self._tree[parent_id]
315
# TODO: return all paths and entries
376
318
def __contains__(self, file_id):
377
319
"""True if this entry contains a file with given id.
379
321
>>> inv = Inventory()
380
>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))
381
InventoryEntry('123', 'foo.c', kind='file', parent_id='TREE_ROOT')
322
>>> inv.add(InventoryEntry('123', 'foo.c'))
391
332
"""Return the entry for given file_id.
393
334
>>> inv = Inventory()
394
>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))
395
InventoryEntry('123123', 'hello.c', kind='file', parent_id='TREE_ROOT')
335
>>> inv.add(InventoryEntry('123123', 'hello.c'))
396
336
>>> inv['123123'].name
400
return self._byid[file_id]
403
raise BzrError("can't look up file_id None")
405
raise BzrError("file_id {%s} not in inventory" % file_id)
408
def get_file_kind(self, file_id):
409
return self._byid[file_id].kind
411
def get_child(self, parent_id, filename):
412
return self[parent_id].children.get(filename)
339
return self._byid[file_id]
415
342
def add(self, entry):
418
345
To add a file to a branch ready to be committed, use Branch.add,
419
346
which calls this."""
420
if entry.file_id in self._byid:
421
raise BzrError("inventory already contains entry with id {%s}" % entry.file_id)
423
if entry.parent_id == ROOT_ID or entry.parent_id is None:
424
entry.parent_id = self.root.file_id
427
parent = self._byid[entry.parent_id]
429
raise BzrError("parent_id {%s} not in inventory" % entry.parent_id)
431
if parent.children.has_key(entry.name):
432
raise BzrError("%s is already versioned" %
433
appendpath(self.id2path(parent.file_id), entry.name))
347
if entry.file_id in self:
348
bailout("inventory already contains entry with id {%s}" % entry.file_id)
350
if entry.parent_id != None:
351
if entry.parent_id not in self:
352
bailout("parent_id %s of new entry not found in inventory"
355
if self._tree[entry.parent_id].has_key(entry.name):
356
bailout("%s is already versioned"
357
% appendpath(self.id2path(entry.parent_id), entry.name))
435
359
self._byid[entry.file_id] = entry
436
parent.children[entry.name] = entry
360
self._tree[entry.parent_id][entry.name] = entry
362
if entry.kind == 'directory':
363
self._tree[entry.file_id] = {}
440
366
def add_path(self, relpath, kind, file_id=None):
441
367
"""Add entry from a path.
443
369
The immediate parent must already be versioned"""
444
from bzrlib.branch import gen_file_id
446
370
parts = bzrlib.osutils.splitpath(relpath)
447
371
if len(parts) == 0:
448
raise BzrError("cannot re-add root of inventory")
451
file_id = gen_file_id(relpath)
453
parent_path = parts[:-1]
454
parent_id = self.path2id(parent_path)
455
if parent_id == None:
456
raise NotVersionedError(parent_path)
372
bailout("cannot re-add root of inventory")
375
file_id = bzrlib.branch.gen_file_id(relpath)
377
parent_id = self.path2id(parts[:-1])
458
378
ie = InventoryEntry(file_id, parts[-1],
459
379
kind=kind, parent_id=parent_id)
460
380
return self.add(ie)
475
394
ie = self[file_id]
477
assert self[ie.parent_id].children[ie.name] == ie
396
assert self._tree[ie.parent_id][ie.name] == ie
479
398
# TODO: Test deleting all children; maybe hoist to a separate
480
399
# deltree method?
481
400
if ie.kind == 'directory':
482
for cie in ie.children.values():
401
for cie in self._tree[file_id].values():
483
402
del self[cie.file_id]
403
del self._tree[file_id]
486
405
del self._byid[file_id]
487
del self[ie.parent_id].children[ie.name]
406
del self._tree[ie.parent_id][ie.name]
410
return Set(self._byid)
490
413
def to_element(self):
491
414
"""Convert to XML Element"""
492
from bzrlib.xml import Element
494
415
e = Element('inventory')
496
if self.root.file_id not in (None, ROOT_ID):
497
e.set('file_id', self.root.file_id)
498
417
for path, ie in self.iter_entries():
499
418
e.append(ie.to_element())
503
422
def from_element(cls, elt):
504
423
"""Construct from XML Element
506
425
>>> inv = Inventory()
507
>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c', 'file', ROOT_ID))
508
InventoryEntry('foo.c-123981239', 'foo.c', kind='file', parent_id='TREE_ROOT')
426
>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c'))
509
427
>>> elt = inv.to_element()
510
428
>>> inv2 = Inventory.from_element(elt)
514
# XXXX: doctest doesn't run this properly under python2.3
515
432
assert elt.tag == 'inventory'
516
root_id = elt.get('file_id') or ROOT_ID
519
ie = InventoryEntry.from_element(e)
520
if ie.parent_id == ROOT_ID:
521
ie.parent_id = root_id
435
o.add(InventoryEntry.from_element(e))
525
438
from_element = classmethod(from_element)
528
def __eq__(self, other):
441
def __cmp__(self, other):
529
442
"""Compare two sets by comparing their contents.
531
444
>>> i1 = Inventory()
532
445
>>> i2 = Inventory()
535
>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
536
InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')
448
>>> i1.add(InventoryEntry('123', 'foo'))
539
>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
540
InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')
451
>>> i2.add(InventoryEntry('123', 'foo'))
544
458
if not isinstance(other, Inventory):
545
459
return NotImplemented
547
if len(self._byid) != len(other._byid):
548
# shortcut: obviously not the same
551
return self._byid == other._byid
554
def __ne__(self, other):
555
return not (self == other)
559
raise ValueError('not hashable')
563
def get_idpath(self, file_id):
564
"""Return a list of file_ids for the path to an entry.
566
The list contains one element for each directory followed by
567
the id of the file itself. So the length of the returned list
568
is equal to the depth of the file in the tree, counting the
569
root directory as depth 1.
461
if self.id_set() ^ other.id_set():
464
for file_id in self._byid:
465
c = cmp(self[file_id], other[file_id])
471
def id2path(self, file_id):
472
"""Return as a list the path to file_id."""
572
474
while file_id != None:
574
ie = self._byid[file_id]
576
raise BzrError("file_id {%s} not found in inventory" % file_id)
577
p.insert(0, ie.file_id)
578
477
file_id = ie.parent_id
582
def id2path(self, file_id):
583
"""Return as a list the path to file_id."""
585
# get all names, skipping root
586
p = [self._byid[fid].name for fid in self.get_idpath(file_id)[1:]]
587
return os.sep.join(p)
597
488
This returns the entry of the last component in the path,
598
489
which may be either a file or a directory.
600
Returns None iff the path is not found.
602
491
if isinstance(name, types.StringTypes):
603
492
name = splitpath(name)
605
mutter("lookup path %r" % name)
610
cie = parent.children[f]
497
cie = self._tree[parent_id][f]
611
498
assert cie.name == f
612
assert cie.parent_id == parent.file_id
499
parent_id = cie.file_id
615
501
# or raise an error?
618
return parent.file_id
507
def get_child(self, parent_id, child_name):
508
return self._tree[parent_id].get(child_name)
621
511
def has_filename(self, names):
625
515
def has_id(self, file_id):
516
assert isinstance(file_id, str)
626
517
return self._byid.has_key(file_id)
629
def rename(self, file_id, new_parent_id, new_name):
630
"""Move a file within the inventory.
632
This can change either the name, or the parent, or both.
634
This does not move the working file."""
635
if not is_valid_name(new_name):
636
raise BzrError("not an acceptable filename: %r" % new_name)
638
new_parent = self._byid[new_parent_id]
639
if new_name in new_parent.children:
640
raise BzrError("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))
642
new_parent_idpath = self.get_idpath(new_parent_id)
643
if file_id in new_parent_idpath:
644
raise BzrError("cannot move directory %r into a subdirectory of itself, %r"
645
% (self.id2path(file_id), self.id2path(new_parent_id)))
647
file_ie = self._byid[file_id]
648
old_parent = self._byid[file_ie.parent_id]
650
# TODO: Don't leave things messed up if this fails
652
del old_parent.children[file_ie.name]
653
new_parent.children[new_name] = file_ie
655
file_ie.name = new_name
656
file_ie.parent_id = new_parent_id
663
def is_valid_name(name):
666
_NAME_RE = re.compile(r'^[^/\\]+$')
668
return bool(_NAME_RE.match(name))
523
if __name__ == '__main__':
524
import doctest, inventory
525
doctest.testmod(inventory)