14
15
# along with this program; if not, write to the Free Software
15
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# TODO: Maybe store inventory_id in the file? Not really needed.
21
# This should really be an id randomly assigned when the tree is
22
# created, but it's not for now.
26
import sys, os.path, types, re
18
"""Inventories map files to their name in a revision."""
21
__copyright__ = "Copyright (C) 2005 Canonical Ltd."
22
__author__ = "Martin Pool <mbp@canonical.com>"
24
import sys, os.path, types
27
25
from sets import Set
32
30
from elementtree.ElementTree import Element, ElementTree, SubElement
34
32
from xml import XMLMixin
35
from errors import bailout, BzrError
33
from errors import bailout
38
36
from bzrlib.osutils import uuid, quotefn, splitpath, joinpath, appendpath
62
60
>>> i = Inventory()
65
>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))
66
>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))
62
>>> i.add(InventoryEntry('123', 'src', kind='directory'))
63
>>> i.add(InventoryEntry('2323', 'hello.c', parent_id='123'))
67
64
>>> for j in i.iter_entries():
70
('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))
67
('src', InventoryEntry('123', 'src', kind='directory', parent_id=None))
71
68
('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))
72
>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))
69
>>> i.add(InventoryEntry('2323', 'bye.c', parent_id='123'))
73
70
Traceback (most recent call last):
75
72
BzrError: ('inventory already contains entry with id {2323}', [])
76
>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))
77
>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))
73
>>> i.add(InventoryEntry('2324', 'bye.c', parent_id='123'))
74
>>> i.add(InventoryEntry('2325', 'wibble', parent_id='123', kind='directory'))
78
75
>>> i.path2id('src/wibble')
82
>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))
79
>>> i.add(InventoryEntry('2326', 'wibble.c', parent_id='2325'))
84
81
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
85
82
>>> for j in i.iter_entries():
94
91
>>> i.id2path('2326')
95
92
'src/wibble/wibble.c'
97
TODO: Maybe also keep the full path of the entry, and the children?
94
:todo: Maybe also keep the full path of the entry, and the children?
98
95
But those depend on its position within a particular inventory, and
99
96
it would be nice not to need to hold the backpointer here.
102
# TODO: split InventoryEntry into subclasses for files,
103
# directories, etc etc.
105
def __init__(self, file_id, name, kind, parent_id, text_id=None):
98
def __init__(self, file_id, name, kind='file', text_id=None,
106
100
"""Create an InventoryEntry
108
102
The filename must be a single component, relative to the
109
103
parent directory; it cannot be a whole path or relative name.
111
>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)
105
>>> e = InventoryEntry('123', 'hello.c')
116
>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)
110
>>> e = InventoryEntry('123', 'src/hello.c')
117
111
Traceback (most recent call last):
118
112
BzrError: ("InventoryEntry name is not a simple filename: 'src/hello.c'", [])
125
119
self.file_id = file_id
121
assert kind in ['file', 'directory']
128
123
self.text_id = text_id
129
124
self.parent_id = parent_id
130
125
self.text_sha1 = None
131
126
self.text_size = None
132
if kind == 'directory':
137
raise BzrError("unhandled entry kind %r" % kind)
141
def sorted_children(self):
142
l = self.children.items()
148
130
other = InventoryEntry(self.file_id, self.name, self.kind,
149
self.parent_id, text_id=self.text_id)
131
self.text_id, self.parent_id)
150
132
other.text_sha1 = self.text_sha1
151
133
other.text_size = self.text_size
169
151
e.set('file_id', self.file_id)
170
152
e.set('kind', self.kind)
172
if self.text_size != None:
154
if self.text_size is not None:
173
155
e.set('text_size', '%d' % self.text_size)
175
for f in ['text_id', 'text_sha1']:
157
for f in ['text_id', 'text_sha1', 'parent_id']:
176
158
v = getattr(self, f)
180
# to be conservative, we don't externalize the root pointers
181
# for now, leaving them as null in the xml form. in a future
182
# version it will be implied by nested elements.
183
if self.parent_id != ROOT_ID:
184
assert isinstance(self.parent_id, basestring)
185
e.set('parent_id', self.parent_id)
192
167
def from_element(cls, elt):
193
168
assert elt.tag == 'entry'
195
## original format inventories don't have a parent_id for
196
## nodes in the root directory, but it's cleaner to use one
198
parent_id = elt.get('parent_id')
199
if parent_id == None:
202
self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'), parent_id)
169
self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'))
203
170
self.text_id = elt.get('text_id')
204
171
self.text_sha1 = elt.get('text_sha1')
172
self.parent_id = elt.get('parent_id')
206
174
## mutter("read inventoryentry: %r" % (elt.attrib))
232
class RootEntry(InventoryEntry):
233
def __init__(self, file_id):
234
self.file_id = file_id
236
self.kind = 'root_directory'
237
self.parent_id = None
240
def __cmp__(self, other):
243
if not isinstance(other, RootEntry):
244
return NotImplemented
245
return cmp(self.file_id, other.file_id) \
246
or cmp(self.children, other.children)
250
200
class Inventory(XMLMixin):
251
201
"""Inventory of versioned files in a tree.
253
This describes which file_id is present at each point in the tree,
254
and possibly the SHA-1 or other information about the file.
255
Entries can be looked up either by path or by file_id.
203
An Inventory acts like a set of InventoryEntry items. You can
204
also look files up by their file_id or name.
206
May be read from and written to a metadata file in a tree. To
207
manipulate the inventory (for example to add a file), it is read
208
in, modified, and then written back out.
257
210
The inventory represents a typical unix file tree, with
258
211
directories containing files and subdirectories. We never store
262
215
returned quickly.
264
217
InventoryEntry objects must not be modified after they are
265
inserted, other than through the Inventory API.
267
220
>>> inv = Inventory()
268
221
>>> inv.write_xml(sys.stdout)
271
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
224
>>> inv.add(InventoryEntry('123-123', 'hello.c'))
272
225
>>> inv['123-123'].name
227
>>> for file_id in inv: print file_id
275
231
May be treated as an iterator or set to look up file ids:
250
## TODO: Clear up handling of files in subdirectories; we probably
251
## do want to be able to just look them up by name but this
252
## probably means gradually walking down the path, looking up as we go.
294
254
## TODO: Make sure only canonical filenames are stored.
296
256
## TODO: Do something sensible about the possible collisions on
297
257
## case-losing filesystems. Perhaps we should just always forbid
298
258
## such collisions.
300
## TODO: No special cases for root, rather just give it a file id
301
## like everything else.
303
## TODO: Probably change XML serialization to use nesting rather
304
## than parent_id pointers.
306
## TODO: Perhaps hold the ElementTree in memory and work directly
307
## on that rather than converting into Python objects every time?
260
## _tree should probably just be stored as
261
## InventoryEntry._children on each directory.
309
263
def __init__(self):
310
264
"""Create or read an inventory.
312
266
If a working directory is specified, the inventory is read
313
267
from there. If the file is specified, read from that. If not,
314
268
the inventory is created empty.
316
The inventory is created with a default root directory, with
319
self.root = RootEntry(ROOT_ID)
320
self._byid = {self.root.file_id: self.root}
272
# _tree is indexed by parent_id; at each level a map from name
273
# to ie. The None entry is the root.
274
self._tree = {None: {}}
323
277
def __iter__(self):
329
283
return len(self._byid)
332
def iter_entries(self, from_dir=None):
286
def iter_entries(self, parent_id=None):
333
287
"""Return (path, entry) pairs, in order by name."""
337
elif isinstance(from_dir, basestring):
338
from_dir = self._byid[from_dir]
340
kids = from_dir.children.items()
288
kids = self._tree[parent_id].items()
342
290
for name, ie in kids:
344
292
if ie.kind == 'directory':
345
for cn, cie in self.iter_entries(from_dir=ie.file_id):
346
yield os.path.join(name, cn), cie
293
for cn, cie in self.iter_entries(parent_id=ie.file_id):
294
yield joinpath([name, cn]), cie
297
def directories(self, include_root=True):
298
"""Return (path, entry) pairs for all directories.
302
for path, entry in self.iter_entries():
303
if entry.kind == 'directory':
308
def children(self, parent_id):
309
"""Return entries that are direct children of parent_id."""
310
return self._tree[parent_id]
350
def directories(self):
351
"""Return (path, entry) pairs for all directories.
353
def descend(parent_ie):
354
parent_name = parent_ie.name
355
yield parent_name, parent_ie
357
# directory children in sorted order
359
for ie in parent_ie.children.itervalues():
360
if ie.kind == 'directory':
361
dn.append((ie.name, ie))
364
for name, child_ie in dn:
365
for sub_name, sub_ie in descend(child_ie):
366
yield appendpath(parent_name, sub_name), sub_ie
368
for name, ie in descend(self.root):
314
# TODO: return all paths and entries
373
317
def __contains__(self, file_id):
374
318
"""True if this entry contains a file with given id.
376
320
>>> inv = Inventory()
377
>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))
321
>>> inv.add(InventoryEntry('123', 'foo.c'))
387
331
"""Return the entry for given file_id.
389
333
>>> inv = Inventory()
390
>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))
334
>>> inv.add(InventoryEntry('123123', 'hello.c'))
391
335
>>> inv['123123'].name
395
raise BzrError("can't look up file_id None")
398
return self._byid[file_id]
400
raise BzrError("file_id {%s} not in inventory" % file_id)
403
def get_child(self, parent_id, filename):
404
return self[parent_id].children.get(filename)
338
return self._byid[file_id]
407
341
def add(self, entry):
410
344
To add a file to a branch ready to be committed, use Branch.add,
411
345
which calls this."""
412
if entry.file_id in self._byid:
346
if entry.file_id in self:
413
347
bailout("inventory already contains entry with id {%s}" % entry.file_id)
416
parent = self._byid[entry.parent_id]
418
bailout("parent_id {%s} not in inventory" % entry.parent_id)
420
if parent.children.has_key(entry.name):
421
bailout("%s is already versioned" %
422
appendpath(self.id2path(parent.file_id), entry.name))
349
if entry.parent_id != None:
350
if entry.parent_id not in self:
351
bailout("parent_id %s of new entry not found in inventory"
354
if self._tree[entry.parent_id].has_key(entry.name):
355
bailout("%s is already versioned"
356
% appendpath(self.id2path(entry.parent_id), entry.name))
424
358
self._byid[entry.file_id] = entry
425
parent.children[entry.name] = entry
359
self._tree[entry.parent_id][entry.name] = entry
361
if entry.kind == 'directory':
362
self._tree[entry.file_id] = {}
428
365
def add_path(self, relpath, kind, file_id=None):
433
370
if len(parts) == 0:
434
371
bailout("cannot re-add root of inventory")
437
374
file_id = bzrlib.branch.gen_file_id(relpath)
439
376
parent_id = self.path2id(parts[:-1])
440
assert parent_id != None
441
377
ie = InventoryEntry(file_id, parts[-1],
442
378
kind=kind, parent_id=parent_id)
443
379
return self.add(ie)
457
393
ie = self[file_id]
459
assert self[ie.parent_id].children[ie.name] == ie
395
assert self._tree[ie.parent_id][ie.name] == ie
461
397
# TODO: Test deleting all children; maybe hoist to a separate
462
398
# deltree method?
463
399
if ie.kind == 'directory':
464
for cie in ie.children.values():
400
for cie in self._tree[file_id].values():
465
401
del self[cie.file_id]
402
del self._tree[file_id]
468
404
del self._byid[file_id]
469
del self[ie.parent_id].children[ie.name]
405
del self._tree[ie.parent_id][ie.name]
472
408
def id_set(self):
486
422
"""Construct from XML Element
488
424
>>> inv = Inventory()
489
>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c', 'file', ROOT_ID))
425
>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c'))
490
426
>>> elt = inv.to_element()
491
427
>>> inv2 = Inventory.from_element(elt)
508
444
>>> i2 = Inventory()
511
>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
447
>>> i1.add(InventoryEntry('123', 'foo'))
514
>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
450
>>> i2.add(InventoryEntry('123', 'foo'))
534
def get_idpath(self, file_id):
535
"""Return a list of file_ids for the path to an entry.
537
The list contains one element for each directory followed by
538
the id of the file itself. So the length of the returned list
539
is equal to the depth of the file in the tree, counting the
540
root directory as depth 1.
470
def id2path(self, file_id):
471
"""Return as a list the path to file_id."""
543
473
while file_id != None:
545
ie = self._byid[file_id]
547
bailout("file_id {%s} not found in inventory" % file_id)
548
p.insert(0, ie.file_id)
549
476
file_id = ie.parent_id
553
def id2path(self, file_id):
554
"""Return as a list the path to file_id."""
556
# get all names, skipping root
557
p = [self[fid].name for fid in self.get_idpath(file_id)[1:]]
558
return os.sep.join(p)
568
487
This returns the entry of the last component in the path,
569
488
which may be either a file or a directory.
571
Returns None iff the path is not found.
573
490
if isinstance(name, types.StringTypes):
574
491
name = splitpath(name)
576
mutter("lookup path %r" % name)
581
cie = parent.children[f]
496
cie = self._tree[parent_id][f]
582
497
assert cie.name == f
583
assert cie.parent_id == parent.file_id
498
parent_id = cie.file_id
586
500
# or raise an error?
589
return parent.file_id
506
def get_child(self, parent_id, child_name):
507
return self._tree[parent_id].get(child_name)
592
510
def has_filename(self, names):
596
514
def has_id(self, file_id):
515
assert isinstance(file_id, str)
597
516
return self._byid.has_key(file_id)
600
def rename(self, file_id, new_parent_id, new_name):
601
"""Move a file within the inventory.
603
This can change either the name, or the parent, or both.
605
This does not move the working file."""
606
if not is_valid_name(new_name):
607
bailout("not an acceptable filename: %r" % new_name)
609
new_parent = self._byid[new_parent_id]
610
if new_name in new_parent.children:
611
bailout("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))
613
new_parent_idpath = self.get_idpath(new_parent_id)
614
if file_id in new_parent_idpath:
615
bailout("cannot move directory %r into a subdirectory of itself, %r"
616
% (self.id2path(file_id), self.id2path(new_parent_id)))
618
file_ie = self._byid[file_id]
619
old_parent = self._byid[file_ie.parent_id]
621
# TODO: Don't leave things messed up if this fails
623
del old_parent.children[file_ie.name]
624
new_parent.children[new_name] = file_ie
626
file_ie.name = new_name
627
file_ie.parent_id = new_parent_id
632
_NAME_RE = re.compile(r'^[^/\\]+$')
634
def is_valid_name(name):
635
return bool(_NAME_RE.match(name))
522
if __name__ == '__main__':
523
import doctest, inventory
524
doctest.testmod(inventory)