15
14
# along with this program; if not, write to the Free Software
16
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
"""Inventories map files to their name in a revision."""
21
__copyright__ = "Copyright (C) 2005 Canonical Ltd."
22
__author__ = "Martin Pool <mbp@canonical.com>"
24
import sys, os.path, types
28
from cElementTree import Element, ElementTree, SubElement
30
from elementtree.ElementTree import Element, ElementTree, SubElement
32
from xml import XMLMixin
33
from errors import bailout
18
# This should really be an id randomly assigned when the tree is
19
# created, but it's not for now.
23
import sys, os.path, types, re
26
from bzrlib.errors import BzrError, BzrCheckError
36
28
from bzrlib.osutils import uuid, quotefn, splitpath, joinpath, appendpath
37
29
from bzrlib.trace import mutter
39
class InventoryEntry(XMLMixin):
31
class InventoryEntry(object):
40
32
"""Description of a versioned file.
42
34
An InventoryEntry has the following fields, which are also
60
52
>>> i = Inventory()
62
>>> i.add(InventoryEntry('123', 'src', kind='directory'))
63
>>> i.add(InventoryEntry('2323', 'hello.c', parent_id='123'))
55
>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))
56
>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))
64
57
>>> for j in i.iter_entries():
67
('src', InventoryEntry('123', 'src', kind='directory', parent_id=None))
60
('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))
68
61
('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))
69
>>> i.add(InventoryEntry('2323', 'bye.c', parent_id='123'))
62
>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))
70
63
Traceback (most recent call last):
72
BzrError: ('inventory already contains entry with id {2323}', [])
73
>>> i.add(InventoryEntry('2324', 'bye.c', parent_id='123'))
74
>>> i.add(InventoryEntry('2325', 'wibble', parent_id='123', kind='directory'))
65
BzrError: inventory already contains entry with id {2323}
66
>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))
67
>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))
75
68
>>> i.path2id('src/wibble')
79
>>> i.add(InventoryEntry('2326', 'wibble.c', parent_id='2325'))
72
>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))
81
74
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
82
75
>>> for j in i.iter_entries():
91
84
>>> i.id2path('2326')
92
85
'src/wibble/wibble.c'
94
:todo: Maybe also keep the full path of the entry, and the children?
87
TODO: Maybe also keep the full path of the entry, and the children?
95
88
But those depend on its position within a particular inventory, and
96
89
it would be nice not to need to hold the backpointer here.
98
def __init__(self, file_id, name, kind='file', text_id=None,
92
# TODO: split InventoryEntry into subclasses for files,
93
# directories, etc etc.
98
def __init__(self, file_id, name, kind, parent_id, text_id=None):
100
99
"""Create an InventoryEntry
102
101
The filename must be a single component, relative to the
103
102
parent directory; it cannot be a whole path or relative name.
105
>>> e = InventoryEntry('123', 'hello.c')
104
>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)
110
>>> e = InventoryEntry('123', 'src/hello.c')
109
>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)
111
110
Traceback (most recent call last):
112
BzrError: ("InventoryEntry name is not a simple filename: 'src/hello.c'", [])
111
BzrCheckError: InventoryEntry name 'src/hello.c' is invalid
115
if len(splitpath(name)) != 1:
116
bailout('InventoryEntry name is not a simple filename: %r'
113
if '/' in name or '\\' in name:
114
raise BzrCheckError('InventoryEntry name %r is invalid' % name)
119
116
self.file_id = file_id
121
assert kind in ['file', 'directory']
123
119
self.text_id = text_id
124
120
self.parent_id = parent_id
125
self.text_sha1 = None
126
self.text_size = None
121
if kind == 'directory':
126
raise BzrError("unhandled entry kind %r" % kind)
130
def sorted_children(self):
131
l = self.children.items()
130
137
other = InventoryEntry(self.file_id, self.name, self.kind,
131
self.text_id, self.parent_id)
138
self.parent_id, text_id=self.text_id)
132
139
other.text_sha1 = self.text_sha1
133
140
other.text_size = self.text_size
141
# note that children are *not* copied; they're pulled across when
146
155
def to_element(self):
147
156
"""Convert to XML element"""
157
from bzrlib.xml import Element
148
159
e = Element('entry')
150
161
e.set('name', self.name)
151
162
e.set('file_id', self.file_id)
152
163
e.set('kind', self.kind)
154
if self.text_size is not None:
165
if self.text_size != None:
155
166
e.set('text_size', '%d' % self.text_size)
157
for f in ['text_id', 'text_sha1', 'parent_id']:
168
for f in ['text_id', 'text_sha1']:
158
169
v = getattr(self, f)
173
# to be conservative, we don't externalize the root pointers
174
# for now, leaving them as null in the xml form. in a future
175
# version it will be implied by nested elements.
176
if self.parent_id != ROOT_ID:
177
assert isinstance(self.parent_id, basestring)
178
e.set('parent_id', self.parent_id)
167
185
def from_element(cls, elt):
168
186
assert elt.tag == 'entry'
169
self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'))
188
## original format inventories don't have a parent_id for
189
## nodes in the root directory, but it's cleaner to use one
191
parent_id = elt.get('parent_id')
192
if parent_id == None:
195
self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'), parent_id)
170
196
self.text_id = elt.get('text_id')
171
197
self.text_sha1 = elt.get('text_sha1')
172
self.parent_id = elt.get('parent_id')
174
199
## mutter("read inventoryentry: %r" % (elt.attrib))
182
207
from_element = classmethod(from_element)
184
def __cmp__(self, other):
209
def __eq__(self, other):
187
210
if not isinstance(other, InventoryEntry):
188
211
return NotImplemented
190
return cmp(self.file_id, other.file_id) \
191
or cmp(self.name, other.name) \
192
or cmp(self.text_sha1, other.text_sha1) \
193
or cmp(self.text_size, other.text_size) \
194
or cmp(self.text_id, other.text_id) \
195
or cmp(self.parent_id, other.parent_id) \
196
or cmp(self.kind, other.kind)
200
class Inventory(XMLMixin):
213
return (self.file_id == other.file_id) \
214
and (self.name == other.name) \
215
and (self.text_sha1 == other.text_sha1) \
216
and (self.text_size == other.text_size) \
217
and (self.text_id == other.text_id) \
218
and (self.parent_id == other.parent_id) \
219
and (self.kind == other.kind)
222
def __ne__(self, other):
223
return not (self == other)
226
raise ValueError('not hashable')
230
class RootEntry(InventoryEntry):
231
def __init__(self, file_id):
232
self.file_id = file_id
234
self.kind = 'root_directory'
235
self.parent_id = None
238
def __eq__(self, other):
239
if not isinstance(other, RootEntry):
240
return NotImplemented
242
return (self.file_id == other.file_id) \
243
and (self.children == other.children)
247
class Inventory(object):
201
248
"""Inventory of versioned files in a tree.
203
An Inventory acts like a set of InventoryEntry items. You can
204
also look files up by their file_id or name.
206
May be read from and written to a metadata file in a tree. To
207
manipulate the inventory (for example to add a file), it is read
208
in, modified, and then written back out.
250
This describes which file_id is present at each point in the tree,
251
and possibly the SHA-1 or other information about the file.
252
Entries can be looked up either by path or by file_id.
210
254
The inventory represents a typical unix file tree, with
211
255
directories containing files and subdirectories. We never store
215
259
returned quickly.
217
261
InventoryEntry objects must not be modified after they are
262
inserted, other than through the Inventory API.
220
264
>>> inv = Inventory()
221
>>> inv.write_xml(sys.stdout)
224
>>> inv.add(InventoryEntry('123-123', 'hello.c'))
265
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
225
266
>>> inv['123-123'].name
227
>>> for file_id in inv: print file_id
231
269
May be treated as an iterator or set to look up file ids:
240
278
>>> [x[0] for x in inv.iter_entries()]
243
>>> inv.write_xml(sys.stdout)
245
<entry file_id="123-123" kind="file" name="hello.c" />
250
## TODO: Clear up handling of files in subdirectories; we probably
251
## do want to be able to just look them up by name but this
252
## probably means gradually walking down the path, looking up as we go.
254
## TODO: Make sure only canonical filenames are stored.
256
## TODO: Do something sensible about the possible collisions on
257
## case-losing filesystems. Perhaps we should just always forbid
260
## _tree should probably just be stored as
261
## InventoryEntry._children on each directory.
263
281
def __init__(self):
264
282
"""Create or read an inventory.
266
284
If a working directory is specified, the inventory is read
267
285
from there. If the file is specified, read from that. If not,
268
286
the inventory is created empty.
288
The inventory is created with a default root directory, with
272
# _tree is indexed by parent_id; at each level a map from name
273
# to ie. The None entry is the root.
274
self._tree = {None: {}}
291
self.root = RootEntry(ROOT_ID)
292
self._byid = {self.root.file_id: self.root}
277
295
def __iter__(self):
283
301
return len(self._byid)
286
def iter_entries(self, parent_id=None):
304
def iter_entries(self, from_dir=None):
287
305
"""Return (path, entry) pairs, in order by name."""
288
kids = self._tree[parent_id].items()
309
elif isinstance(from_dir, basestring):
310
from_dir = self._byid[from_dir]
312
kids = from_dir.children.items()
290
314
for name, ie in kids:
292
316
if ie.kind == 'directory':
293
for cn, cie in self.iter_entries(parent_id=ie.file_id):
294
yield joinpath([name, cn]), cie
297
def directories(self, include_root=True):
298
"""Return (path, entry) pairs for all directories.
302
for path, entry in self.iter_entries():
303
if entry.kind == 'directory':
317
for cn, cie in self.iter_entries(from_dir=ie.file_id):
318
yield os.path.join(name, cn), cie
322
"""Return list of (path, ie) for all entries except the root.
324
This may be faster than iter_entries.
327
def descend(dir_ie, dir_path):
328
kids = dir_ie.children.items()
330
for name, ie in kids:
331
child_path = os.path.join(dir_path, name)
332
accum.append((child_path, ie))
333
if ie.kind == 'directory':
334
descend(ie, child_path)
336
descend(self.root, '')
340
def directories(self):
341
"""Return (path, entry) pairs for all directories, including the root.
344
def descend(parent_ie, parent_path):
345
accum.append((parent_path, parent_ie))
347
kids = [(ie.name, ie) for ie in parent_ie.children.itervalues() if ie.kind == 'directory']
350
for name, child_ie in kids:
351
child_path = os.path.join(parent_path, name)
352
descend(child_ie, child_path)
353
descend(self.root, '')
308
def children(self, parent_id):
309
"""Return entries that are direct children of parent_id."""
310
return self._tree[parent_id]
314
# TODO: return all paths and entries
317
358
def __contains__(self, file_id):
318
359
"""True if this entry contains a file with given id.
320
361
>>> inv = Inventory()
321
>>> inv.add(InventoryEntry('123', 'foo.c'))
362
>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))
331
372
"""Return the entry for given file_id.
333
374
>>> inv = Inventory()
334
>>> inv.add(InventoryEntry('123123', 'hello.c'))
375
>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))
335
376
>>> inv['123123'].name
338
return self._byid[file_id]
380
return self._byid[file_id]
383
raise BzrError("can't look up file_id None")
385
raise BzrError("file_id {%s} not in inventory" % file_id)
388
def get_file_kind(self, file_id):
389
return self._byid[file_id].kind
391
def get_child(self, parent_id, filename):
392
return self[parent_id].children.get(filename)
341
395
def add(self, entry):
344
398
To add a file to a branch ready to be committed, use Branch.add,
345
399
which calls this."""
346
if entry.file_id in self:
347
bailout("inventory already contains entry with id {%s}" % entry.file_id)
349
if entry.parent_id != None:
350
if entry.parent_id not in self:
351
bailout("parent_id %s of new entry not found in inventory"
354
if self._tree[entry.parent_id].has_key(entry.name):
355
bailout("%s is already versioned"
356
% appendpath(self.id2path(entry.parent_id), entry.name))
400
if entry.file_id in self._byid:
401
raise BzrError("inventory already contains entry with id {%s}" % entry.file_id)
404
parent = self._byid[entry.parent_id]
406
raise BzrError("parent_id {%s} not in inventory" % entry.parent_id)
408
if parent.children.has_key(entry.name):
409
raise BzrError("%s is already versioned" %
410
appendpath(self.id2path(parent.file_id), entry.name))
358
412
self._byid[entry.file_id] = entry
359
self._tree[entry.parent_id][entry.name] = entry
361
if entry.kind == 'directory':
362
self._tree[entry.file_id] = {}
413
parent.children[entry.name] = entry
365
416
def add_path(self, relpath, kind, file_id=None):
366
417
"""Add entry from a path.
368
419
The immediate parent must already be versioned"""
420
from bzrlib.errors import NotVersionedError
369
422
parts = bzrlib.osutils.splitpath(relpath)
370
423
if len(parts) == 0:
371
bailout("cannot re-add root of inventory")
374
file_id = bzrlib.branch.gen_file_id(relpath)
376
parent_id = self.path2id(parts[:-1])
424
raise BzrError("cannot re-add root of inventory")
427
from bzrlib.branch import gen_file_id
428
file_id = gen_file_id(relpath)
430
parent_path = parts[:-1]
431
parent_id = self.path2id(parent_path)
432
if parent_id == None:
433
raise NotVersionedError(parent_path)
377
435
ie = InventoryEntry(file_id, parts[-1],
378
436
kind=kind, parent_id=parent_id)
379
437
return self.add(ie)
393
451
ie = self[file_id]
395
assert self._tree[ie.parent_id][ie.name] == ie
453
assert self[ie.parent_id].children[ie.name] == ie
397
455
# TODO: Test deleting all children; maybe hoist to a separate
398
456
# deltree method?
399
457
if ie.kind == 'directory':
400
for cie in self._tree[file_id].values():
458
for cie in ie.children.values():
401
459
del self[cie.file_id]
402
del self._tree[file_id]
404
462
del self._byid[file_id]
405
del self._tree[ie.parent_id][ie.name]
409
return Set(self._byid)
463
del self[ie.parent_id].children[ie.name]
412
466
def to_element(self):
413
467
"""Convert to XML Element"""
468
from bzrlib.xml import Element
414
470
e = Element('inventory')
416
472
for path, ie in self.iter_entries():
437
493
from_element = classmethod(from_element)
440
def __cmp__(self, other):
496
def __eq__(self, other):
441
497
"""Compare two sets by comparing their contents.
443
499
>>> i1 = Inventory()
444
500
>>> i2 = Inventory()
447
>>> i1.add(InventoryEntry('123', 'foo'))
503
>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
450
>>> i2.add(InventoryEntry('123', 'foo'))
506
>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
457
510
if not isinstance(other, Inventory):
458
511
return NotImplemented
460
if self.id_set() ^ other.id_set():
463
for file_id in self._byid:
464
c = cmp(self[file_id], other[file_id])
470
def id2path(self, file_id):
471
"""Return as a list the path to file_id."""
513
if len(self._byid) != len(other._byid):
514
# shortcut: obviously not the same
517
return self._byid == other._byid
520
def __ne__(self, other):
521
return not (self == other)
525
raise ValueError('not hashable')
529
def get_idpath(self, file_id):
530
"""Return a list of file_ids for the path to an entry.
532
The list contains one element for each directory followed by
533
the id of the file itself. So the length of the returned list
534
is equal to the depth of the file in the tree, counting the
535
root directory as depth 1.
473
538
while file_id != None:
540
ie = self._byid[file_id]
542
raise BzrError("file_id {%s} not found in inventory" % file_id)
543
p.insert(0, ie.file_id)
476
544
file_id = ie.parent_id
548
def id2path(self, file_id):
549
"""Return as a list the path to file_id."""
551
# get all names, skipping root
552
p = [self[fid].name for fid in self.get_idpath(file_id)[1:]]
553
return os.sep.join(p)
487
563
This returns the entry of the last component in the path,
488
564
which may be either a file or a directory.
566
Returns None iff the path is not found.
490
568
if isinstance(name, types.StringTypes):
491
569
name = splitpath(name)
571
mutter("lookup path %r" % name)
496
cie = self._tree[parent_id][f]
576
cie = parent.children[f]
497
577
assert cie.name == f
498
parent_id = cie.file_id
578
assert cie.parent_id == parent.file_id
500
581
# or raise an error?
506
def get_child(self, parent_id, child_name):
507
return self._tree[parent_id].get(child_name)
584
return parent.file_id
510
587
def has_filename(self, names):
514
591
def has_id(self, file_id):
515
assert isinstance(file_id, str)
516
592
return self._byid.has_key(file_id)
522
if __name__ == '__main__':
523
import doctest, inventory
524
doctest.testmod(inventory)
595
def rename(self, file_id, new_parent_id, new_name):
596
"""Move a file within the inventory.
598
This can change either the name, or the parent, or both.
600
This does not move the working file."""
601
if not is_valid_name(new_name):
602
raise BzrError("not an acceptable filename: %r" % new_name)
604
new_parent = self._byid[new_parent_id]
605
if new_name in new_parent.children:
606
raise BzrError("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))
608
new_parent_idpath = self.get_idpath(new_parent_id)
609
if file_id in new_parent_idpath:
610
raise BzrError("cannot move directory %r into a subdirectory of itself, %r"
611
% (self.id2path(file_id), self.id2path(new_parent_id)))
613
file_ie = self._byid[file_id]
614
old_parent = self._byid[file_ie.parent_id]
616
# TODO: Don't leave things messed up if this fails
618
del old_parent.children[file_ie.name]
619
new_parent.children[new_name] = file_ie
621
file_ie.name = new_name
622
file_ie.parent_id = new_parent_id
627
_NAME_RE = re.compile(r'^[^/\\]+$')
629
def is_valid_name(name):
630
return bool(_NAME_RE.match(name))