~bzr-pqm/bzr/bzr.dev

1 by mbp at sourcefrog
import from baz patch-364
1
#! /usr/bin/env python
2
# -*- coding: UTF-8 -*-
3
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
8
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
13
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
18
"""Inventories map files to their name in a revision."""
19
20
21
__copyright__ = "Copyright (C) 2005 Canonical Ltd."
22
__author__ = "Martin Pool <mbp@canonical.com>"
23
24
import sys, os.path, types
25
from sets import Set
26
27
from xml import XMLMixin
28
from ElementTree import ElementTree, Element
29
from errors import bailout
30
from osutils import uuid, quotefn, splitpath, joinpath, appendpath
31
from trace import mutter
32
33
class InventoryEntry(XMLMixin):
34
    """Description of a versioned file.
35
36
    An InventoryEntry has the following fields, which are also
37
    present in the XML inventory-entry element:
38
39
    * *file_id*
40
    * *name*: (only the basename within the directory, must not
41
      contain slashes)
42
    * *kind*: "directory" or "file"
43
    * *directory_id*: (if absent/null means the branch root directory)
44
    * *text_sha1*: only for files
45
    * *text_size*: in bytes, only for files 
46
    * *text_id*: identifier for the text version, only for files
47
48
    InventoryEntries can also exist inside a WorkingTree
49
    inventory, in which case they are not yet bound to a
50
    particular revision of the file.  In that case the text_sha1,
51
    text_size and text_id are absent.
52
53
54
    >>> i = Inventory()
55
    >>> i.path2id('')
56
    >>> i.add(InventoryEntry('123', 'src', kind='directory'))
57
    >>> i.add(InventoryEntry('2323', 'hello.c', parent_id='123'))
58
    >>> for j in i.iter_entries():
59
    ...   print j
60
    ... 
61
    ('src', InventoryEntry('123', 'src', kind='directory', parent_id=None))
62
    ('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))
63
    >>> i.add(InventoryEntry('2323', 'bye.c', parent_id='123'))
64
    Traceback (most recent call last):
65
    ...
66
    BzrError: ('inventory already contains entry with id {2323}', [])
67
    >>> i.add(InventoryEntry('2324', 'bye.c', parent_id='123'))
68
    >>> i.add(InventoryEntry('2325', 'wibble', parent_id='123', kind='directory'))
69
    >>> i.path2id('src/wibble')
70
    '2325'
71
    >>> '2325' in i
72
    True
73
    >>> i.add(InventoryEntry('2326', 'wibble.c', parent_id='2325'))
74
    >>> i['2326']
75
    InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
76
    >>> for j in i.iter_entries():
77
    ...     print j[0]
78
    ...     assert i.path2id(j[0])
79
    ... 
80
    src
81
    src/bye.c
82
    src/hello.c
83
    src/wibble
84
    src/wibble/wibble.c
85
    >>> i.id2path('2326')
86
    'src/wibble/wibble.c'
87
88
    :todo: Maybe also keep the full path of the entry, and the children?
89
           But those depend on its position within a particular inventory, and
90
           it would be nice not to need to hold the backpointer here.
91
    """
92
    def __init__(self, file_id, name, kind='file', text_id=None,
93
                 parent_id=None):
94
        """Create an InventoryEntry
95
        
96
        The filename must be a single component, relative to the
97
        parent directory; it cannot be a whole path or relative name.
98
99
        >>> e = InventoryEntry('123', 'hello.c')
100
        >>> e.name
101
        'hello.c'
102
        >>> e.file_id
103
        '123'
104
        >>> e = InventoryEntry('123', 'src/hello.c')
105
        Traceback (most recent call last):
106
        BzrError: ("InventoryEntry name is not a simple filename: 'src/hello.c'", [])
107
        """
108
        
109
        if len(splitpath(name)) != 1:
110
            bailout('InventoryEntry name is not a simple filename: %r'
111
                    % name)
112
        
113
        self.file_id = file_id
114
        self.name = name
115
        assert kind in ['file', 'directory']
116
        self.kind = kind
117
        self.text_id = text_id
118
        self.parent_id = parent_id
119
        self.text_sha1 = None
120
        self.text_size = None
121
122
123
    def copy(self):
124
        other = InventoryEntry(self.file_id, self.name, self.kind,
125
                               self.text_id, self.parent_id)
126
        other.text_sha1 = self.text_sha1
127
        other.text_size = self.text_size
128
        return other
129
130
131
    def __repr__(self):
132
        return ("%s(%r, %r, kind=%r, parent_id=%r)"
133
                % (self.__class__.__name__,
134
                   self.file_id,
135
                   self.name,
136
                   self.kind,
137
                   self.parent_id))
138
139
    
140
    def to_element(self):
141
        """Convert to XML element"""
142
        e = Element('entry')
143
144
        e.set('name', self.name)
145
        e.set('file_id', self.file_id)
146
        e.set('kind', self.kind)
147
148
        if self.text_size is not None:
149
            e.set('text_size', '%d' % self.text_size)
150
            
151
        for f in ['text_id', 'text_sha1', 'parent_id']:
152
            v = getattr(self, f)
153
            if v is not None:
154
                e.set(f, v)
155
156
        e.tail = '\n'
157
            
158
        return e
159
160
161
    def from_element(cls, elt):
162
        assert elt.tag == 'entry'
163
        self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'))
164
        self.text_id = elt.get('text_id')
165
        self.text_sha1 = elt.get('text_sha1')
166
        self.parent_id = elt.get('parent_id')
167
        
168
        ## mutter("read inventoryentry: %r" % (elt.attrib))
169
170
        v = elt.get('text_size')
171
        self.text_size = v and int(v)
172
173
        return self
174
            
175
176
    from_element = classmethod(from_element)
177
178
    def __cmp__(self, other):
179
        if self is other:
180
            return 0
181
        if not isinstance(other, InventoryEntry):
182
            return NotImplemented
183
184
        return cmp(self.file_id, other.file_id) \
185
               or cmp(self.name, other.name) \
186
               or cmp(self.text_sha1, other.text_sha1) \
187
               or cmp(self.text_size, other.text_size) \
188
               or cmp(self.text_id, other.text_id) \
189
               or cmp(self.parent_id, other.parent_id) \
190
               or cmp(self.kind, other.kind)
191
192
193
194
class Inventory(XMLMixin):
195
    """Inventory of versioned files in a tree.
196
197
    An Inventory acts like a set of InventoryEntry items.  You can
198
    also look files up by their file_id or name.
199
    
200
    May be read from and written to a metadata file in a tree.  To
201
    manipulate the inventory (for example to add a file), it is read
202
    in, modified, and then written back out.
203
204
    The inventory represents a typical unix file tree, with
205
    directories containing files and subdirectories.  We never store
206
    the full path to a file, because renaming a directory implicitly
207
    moves all of its contents.  This class internally maintains a
208
    lookup tree that allows the children under a directory to be
209
    returned quickly.
210
211
    InventoryEntry objects must not be modified after they are
212
    inserted.
213
214
    >>> inv = Inventory()
215
    >>> inv.write_xml(sys.stdout)
216
    <inventory>
217
    </inventory>
218
    >>> inv.add(InventoryEntry('123-123', 'hello.c'))
219
    >>> inv['123-123'].name
220
    'hello.c'
221
    >>> for file_id in inv: print file_id
222
    ...
223
    123-123
224
225
    May be treated as an iterator or set to look up file ids:
226
    
227
    >>> bool(inv.path2id('hello.c'))
228
    True
229
    >>> '123-123' in inv
230
    True
231
232
    May also look up by name:
233
234
    >>> [x[0] for x in inv.iter_entries()]
235
    ['hello.c']
236
    
237
    >>> inv.write_xml(sys.stdout)
238
    <inventory>
239
    <entry file_id="123-123" kind="file" name="hello.c" />
240
    </inventory>
241
242
    """
243
244
    ## TODO: Clear up handling of files in subdirectories; we probably
245
    ## do want to be able to just look them up by name but this
246
    ## probably means gradually walking down the path, looking up as we go.
247
248
    ## TODO: Make sure only canonical filenames are stored.
249
250
    ## TODO: Do something sensible about the possible collisions on
251
    ## case-losing filesystems.  Perhaps we should just always forbid
252
    ## such collisions.
253
254
    ## _tree should probably just be stored as
255
    ## InventoryEntry._children on each directory.
256
257
    def __init__(self):
258
        """Create or read an inventory.
259
260
        If a working directory is specified, the inventory is read
261
        from there.  If the file is specified, read from that. If not,
262
        the inventory is created empty.
263
        """
264
        self._byid = dict()
265
266
        # _tree is indexed by parent_id; at each level a map from name
267
        # to ie.  The None entry is the root.
268
        self._tree = {None: {}}
269
270
271
    def __iter__(self):
272
        return iter(self._byid)
273
274
275
    def __len__(self):
276
        """Returns number of entries."""
277
        return len(self._byid)
278
279
280
    def iter_entries(self, parent_id=None):
281
        """Return (path, entry) pairs, in order by name."""
282
        kids = self._tree[parent_id].items()
283
        kids.sort()
284
        for name, ie in kids:
285
            yield name, ie
286
            if ie.kind == 'directory':
287
                for cn, cie in self.iter_entries(parent_id=ie.file_id):
288
                    yield joinpath([name, cn]), cie
289
290
291
    def directories(self, include_root=True):
292
        """Return (path, entry) pairs for all directories.
293
        """
294
        if include_root:
295
            yield '', None
296
        for path, entry in self.iter_entries():
297
            if entry.kind == 'directory':
298
                yield path, entry
299
        
300
301
302
    def children(self, parent_id):
303
        """Return entries that are direct children of parent_id."""
304
        return self._tree[parent_id]
305
                    
306
307
308
    # TODO: return all paths and entries
309
310
311
    def __contains__(self, file_id):
312
        """True if this entry contains a file with given id.
313
314
        >>> inv = Inventory()
315
        >>> inv.add(InventoryEntry('123', 'foo.c'))
316
        >>> '123' in inv
317
        True
318
        >>> '456' in inv
319
        False
320
        """
321
        return file_id in self._byid
322
323
324
    def __getitem__(self, file_id):
325
        """Return the entry for given file_id.
326
327
        >>> inv = Inventory()
328
        >>> inv.add(InventoryEntry('123123', 'hello.c'))
329
        >>> inv['123123'].name
330
        'hello.c'
331
        """
332
        return self._byid[file_id]
333
334
335
    def add(self, entry):
336
        """Add entry to inventory.
337
338
        To add  a file to a branch ready to be committed, use Branch.add,
339
        which calls this."""
340
        if entry.file_id in self:
341
            bailout("inventory already contains entry with id {%s}" % entry.file_id)
342
343
        if entry.parent_id != None:
344
            if entry.parent_id not in self:
345
                bailout("parent_id %s of new entry not found in inventory"
346
                        % entry.parent_id)
347
            
348
        if self._tree[entry.parent_id].has_key(entry.name):
349
            bailout("%s is already versioned"
350
                    % appendpath(self.id2path(entry.parent_id), entry.name))
351
352
        self._byid[entry.file_id] = entry
353
        self._tree[entry.parent_id][entry.name] = entry
354
355
        if entry.kind == 'directory':
356
            self._tree[entry.file_id] = {}
357
358
359
    def __delitem__(self, file_id):
360
        """Remove entry by id.
361
362
        >>> inv = Inventory()
363
        >>> inv.add(InventoryEntry('123', 'foo.c'))
364
        >>> '123' in inv
365
        True
366
        >>> del inv['123']
367
        >>> '123' in inv
368
        False
369
        """
370
        ie = self[file_id]
371
372
        assert self._tree[ie.parent_id][ie.name] == ie
373
        
374
        # TODO: Test deleting all children; maybe hoist to a separate
375
        # deltree method?
376
        if ie.kind == 'directory':
377
            for cie in self._tree[file_id].values():
378
                del self[cie.file_id]
379
            del self._tree[file_id]
380
381
        del self._byid[file_id]
382
        del self._tree[ie.parent_id][ie.name]
383
384
385
    def id_set(self):
386
        return Set(self._byid)
387
388
389
    def to_element(self):
390
        """Convert to XML Element"""
391
        e = Element('inventory')
392
        e.text = '\n'
393
        for path, ie in self.iter_entries():
394
            e.append(ie.to_element())
395
        return e
396
    
397
398
    def from_element(cls, elt):
399
        """Construct from XML Element
400
401
        >>> inv = Inventory()
402
        >>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c'))
403
        >>> elt = inv.to_element()
404
        >>> inv2 = Inventory.from_element(elt)
405
        >>> inv2 == inv
406
        True
407
        """
408
        assert elt.tag == 'inventory'
409
        o = cls()
410
        for e in elt:
411
            o.add(InventoryEntry.from_element(e))
412
        return o
413
        
414
    from_element = classmethod(from_element)
415
416
417
    def __cmp__(self, other):
418
        """Compare two sets by comparing their contents.
419
420
        >>> i1 = Inventory()
421
        >>> i2 = Inventory()
422
        >>> i1 == i2
423
        True
424
        >>> i1.add(InventoryEntry('123', 'foo'))
425
        >>> i1 == i2
426
        False
427
        >>> i2.add(InventoryEntry('123', 'foo'))
428
        >>> i1 == i2
429
        True
430
        """
431
        if self is other:
432
            return 0
433
        
434
        if not isinstance(other, Inventory):
435
            return NotImplemented
436
437
        if self.id_set() ^ other.id_set():
438
            return 1
439
440
        for file_id in self._byid:
441
            c = cmp(self[file_id], other[file_id])
442
            if c: return c
443
444
        return 0
445
446
447
    def id2path(self, file_id):
448
        """Return as a list the path to file_id."""
449
        p = []
450
        while file_id != None:
451
            ie = self[file_id]
452
            p = [ie.name] + p
453
            file_id = ie.parent_id
454
        return joinpath(p)
455
            
456
457
458
    def path2id(self, name):
459
        """Walk down through directories to return entry of last component.
460
461
        names may be either a list of path components, or a single
462
        string, in which case it is automatically split.
463
464
        This returns the entry of the last component in the path,
465
        which may be either a file or a directory.
466
        """
467
        assert isinstance(name, types.StringTypes)
468
469
        parent_id = None
470
        for f in splitpath(name):
471
            try:
472
                cie = self._tree[parent_id][f]
473
                assert cie.name == f
474
                parent_id = cie.file_id
475
            except KeyError:
476
                # or raise an error?
477
                return None
478
479
        return parent_id
480
481
482
    def get_child(self, parent_id, child_name):
483
        return self._tree[parent_id].get(child_name)
484
485
486
    def has_filename(self, names):
487
        return bool(self.path2id(names))
488
489
490
    def has_id(self, file_id):
491
        assert isinstance(file_id, str)
492
        return self._byid.has_key(file_id)
493
494
495
496
if __name__ == '__main__':
497
    import doctest, inventory
498
    doctest.testmod(inventory)