~bzr-pqm/bzr/bzr.dev

70 by mbp at sourcefrog
Prepare for smart recursive add.
1
# Copyright (C) 2005 Canonical Ltd
1 by mbp at sourcefrog
import from baz patch-364
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Tree classes, representing directory at point in time.
18
"""
19
20
from sets import Set
21
import os.path, os, fnmatch
22
157 by mbp at sourcefrog
fix test case breakage
23
from osutils import pumpfile, compare_files, filesize, quotefn, sha_file, \
24
     joinpath, splitpath, appendpath, isdir, isfile, file_kind, fingerprint_file
25
import errno
26
from stat import S_ISREG, S_ISDIR, ST_MODE, ST_SIZE
27
1 by mbp at sourcefrog
import from baz patch-364
28
from inventory import Inventory
29
from trace import mutter, note
30
from errors import bailout
31
import branch
32
33
import bzrlib
34
35
class Tree:
36
    """Abstract file tree.
37
38
    There are several subclasses:
39
    
40
    * `WorkingTree` exists as files on disk editable by the user.
41
42
    * `RevisionTree` is a tree as recorded at some point in the past.
43
44
    * `EmptyTree`
45
46
    Trees contain an `Inventory` object, and also know how to retrieve
47
    file texts mentioned in the inventory, either from a working
48
    directory or from a store.
49
50
    It is possible for trees to contain files that are not described
51
    in their inventory or vice versa; for this use `filenames()`.
52
53
    Trees can be compared, etc, regardless of whether they are working
54
    trees or versioned trees.
55
    """
56
    
57
    def has_filename(self, filename):
58
        """True if the tree has given filename."""
59
        raise NotImplementedError()
60
61
    def has_id(self, file_id):
62
        return self.inventory.has_id(file_id)
63
64
    def id_set(self):
65
        """Return set of all ids in this tree."""
66
        return self.inventory.id_set()
67
68
    def id2path(self, file_id):
69
        return self.inventory.id2path(file_id)
70
71
    def _get_inventory(self):
72
        return self._inventory
73
74
    inventory = property(_get_inventory,
75
                         doc="Inventory of this Tree")
76
77
    def _check_retrieved(self, ie, f):
130 by mbp at sourcefrog
- fixup checks on retrieved files to cope with compression,
78
        fp = fingerprint_file(f)
79
        f.seek(0)
80
        
184 by mbp at sourcefrog
pychecker fixups
81
        if ie.text_size != None:
131 by mbp at sourcefrog
check size and sha1 of files retrieved from the tree
82
            if ie.text_size != fp['size']:
1 by mbp at sourcefrog
import from baz patch-364
83
                bailout("mismatched size for file %r in %r" % (ie.file_id, self._store),
84
                        ["inventory expects %d bytes" % ie.text_size,
130 by mbp at sourcefrog
- fixup checks on retrieved files to cope with compression,
85
                         "file is actually %d bytes" % fp['size'],
1 by mbp at sourcefrog
import from baz patch-364
86
                         "store is probably damaged/corrupt"])
87
130 by mbp at sourcefrog
- fixup checks on retrieved files to cope with compression,
88
        if ie.text_sha1 != fp['sha1']:
1 by mbp at sourcefrog
import from baz patch-364
89
            bailout("wrong SHA-1 for file %r in %r" % (ie.file_id, self._store),
90
                    ["inventory expects %s" % ie.text_sha1,
130 by mbp at sourcefrog
- fixup checks on retrieved files to cope with compression,
91
                     "file is actually %s" % fp['sha1'],
1 by mbp at sourcefrog
import from baz patch-364
92
                     "store is probably damaged/corrupt"])
93
94
176 by mbp at sourcefrog
New cat command contributed by janmar.
95
    def print_file(self, fileid):
96
        """Print file with id `fileid` to stdout."""
97
        import sys
98
        pumpfile(self.get_file(fileid), sys.stdout)
99
        
100
        
101
    def export(self, dest):        
1 by mbp at sourcefrog
import from baz patch-364
102
        """Export this tree to a new directory.
103
104
        `dest` should not exist, and will be created holding the
105
        contents of this tree.
106
254 by Martin Pool
- Doc cleanups from Magnus Therning
107
        TODO: To handle subdirectories we need to create the
1 by mbp at sourcefrog
import from baz patch-364
108
               directories first.
109
110
        :note: If the export fails, the destination directory will be
111
               left in a half-assed state.
112
        """
113
        os.mkdir(dest)
114
        mutter('export version %r' % self)
115
        inv = self.inventory
116
        for dp, ie in inv.iter_entries():
117
            kind = ie.kind
118
            fullpath = appendpath(dest, dp)
119
            if kind == 'directory':
120
                os.mkdir(fullpath)
121
            elif kind == 'file':
122
                pumpfile(self.get_file(ie.file_id), file(fullpath, 'wb'))
123
            else:
184 by mbp at sourcefrog
pychecker fixups
124
                bailout("don't know how to export {%s} of kind %r" % (fid, kind))
1 by mbp at sourcefrog
import from baz patch-364
125
            mutter("  export {%s} kind %s to %s" % (ie.file_id, kind, fullpath))
126
127
128
129
class WorkingTree(Tree):
130
    """Working copy tree.
131
132
    The inventory is held in the `Branch` working-inventory, and the
133
    files are in a directory on disk.
134
135
    It is possible for a `WorkingTree` to have a filename which is
136
    not listed in the Inventory and vice versa.
137
    """
138
    def __init__(self, basedir, inv):
139
        self._inventory = inv
140
        self.basedir = basedir
141
        self.path2id = inv.path2id
142
143
    def __repr__(self):
144
        return "<%s of %s>" % (self.__class__.__name__,
145
                               self.basedir)
146
67 by mbp at sourcefrog
use abspath() for the function that makes an absolute
147
    def abspath(self, filename):
1 by mbp at sourcefrog
import from baz patch-364
148
        return os.path.join(self.basedir, filename)
149
150
    def has_filename(self, filename):
67 by mbp at sourcefrog
use abspath() for the function that makes an absolute
151
        return os.path.exists(self.abspath(filename))
1 by mbp at sourcefrog
import from baz patch-364
152
153
    def get_file(self, file_id):
5 by mbp at sourcefrog
.bzrignore support
154
        return self.get_file_byname(self.id2path(file_id))
155
156
    def get_file_byname(self, filename):
67 by mbp at sourcefrog
use abspath() for the function that makes an absolute
157
        return file(self.abspath(filename), 'rb')
1 by mbp at sourcefrog
import from baz patch-364
158
159
    def _get_store_filename(self, file_id):
178 by mbp at sourcefrog
- Use a non-null file_id for the branch root directory. At the moment
160
        ## XXX: badly named; this isn't in the store at all
67 by mbp at sourcefrog
use abspath() for the function that makes an absolute
161
        return self.abspath(self.id2path(file_id))
1 by mbp at sourcefrog
import from baz patch-364
162
15 by mbp at sourcefrog
files that have been deleted are not considered present in the WorkingTree
163
    def has_id(self, file_id):
164
        # files that have been deleted are excluded
165
        if not self.inventory.has_id(file_id):
166
            return False
67 by mbp at sourcefrog
use abspath() for the function that makes an absolute
167
        return os.access(self.abspath(self.inventory.id2path(file_id)), os.F_OK)
15 by mbp at sourcefrog
files that have been deleted are not considered present in the WorkingTree
168
1 by mbp at sourcefrog
import from baz patch-364
169
    def get_file_size(self, file_id):
170
        return os.stat(self._get_store_filename(file_id))[ST_SIZE]
171
172
    def get_file_sha1(self, file_id):
173
        f = self.get_file(file_id)
174
        return sha_file(f)
175
176
177
    def file_class(self, filename):
178
        if self.path2id(filename):
179
            return 'V'
180
        elif self.is_ignored(filename):
181
            return 'I'
182
        else:
183
            return '?'
184
185
186
    def list_files(self):
187
        """Recursively list all files as (path, class, kind, id).
188
189
        Lists, but does not descend into unversioned directories.
190
191
        This does not include files that have been deleted in this
192
        tree.
193
194
        Skips the control directory.
195
        """
196
        inv = self.inventory
197
178 by mbp at sourcefrog
- Use a non-null file_id for the branch root directory. At the moment
198
        def descend(from_dir_relpath, from_dir_id, dp):
1 by mbp at sourcefrog
import from baz patch-364
199
            ls = os.listdir(dp)
200
            ls.sort()
201
            for f in ls:
134 by mbp at sourcefrog
Doc
202
                ## TODO: If we find a subdirectory with its own .bzr
203
                ## directory, then that is a separate tree and we
204
                ## should exclude it.
1 by mbp at sourcefrog
import from baz patch-364
205
                if bzrlib.BZRDIR == f:
206
                    continue
207
208
                # path within tree
178 by mbp at sourcefrog
- Use a non-null file_id for the branch root directory. At the moment
209
                fp = appendpath(from_dir_relpath, f)
1 by mbp at sourcefrog
import from baz patch-364
210
211
                # absolute path
212
                fap = appendpath(dp, f)
213
                
214
                f_ie = inv.get_child(from_dir_id, f)
215
                if f_ie:
216
                    c = 'V'
217
                elif self.is_ignored(fp):
218
                    c = 'I'
219
                else:
220
                    c = '?'
221
222
                fk = file_kind(fap)
223
224
                if f_ie:
225
                    if f_ie.kind != fk:
226
                        bailout("file %r entered as kind %r id %r, now of kind %r"
227
                                % (fap, f_ie.kind, f_ie.file_id, fk))
228
229
                yield fp, c, fk, (f_ie and f_ie.file_id)
230
231
                if fk != 'directory':
232
                    continue
233
234
                if c != 'V':
235
                    # don't descend unversioned directories
236
                    continue
237
                
238
                for ff in descend(fp, f_ie.file_id, fap):
239
                    yield ff
240
178 by mbp at sourcefrog
- Use a non-null file_id for the branch root directory. At the moment
241
        for f in descend('', inv.root.file_id, self.basedir):
1 by mbp at sourcefrog
import from baz patch-364
242
            yield f
243
            
244
245
142 by mbp at sourcefrog
better "unknowns" based on just listing the relevant files
246
    def unknowns(self):
143 by mbp at sourcefrog
new common Tree.extras() to support both unknowns
247
        for subp in self.extras():
248
            if not self.is_ignored(subp):
249
                yield subp
250
251
252
    def extras(self):
142 by mbp at sourcefrog
better "unknowns" based on just listing the relevant files
253
        """Yield all unknown files in this WorkingTree.
1 by mbp at sourcefrog
import from baz patch-364
254
255
        If there are any unknown directories then only the directory is
256
        returned, not all its children.  But if there are unknown files
257
        under a versioned subdirectory, they are returned.
258
259
        Currently returned depth-first, sorted by name within directories.
260
        """
142 by mbp at sourcefrog
better "unknowns" based on just listing the relevant files
261
        ## TODO: Work from given directory downwards
262
        
263
        for path, dir_entry in self.inventory.directories():
264
            mutter("search for unknowns in %r" % path)
265
            dirabs = self.abspath(path)
266
            if not isdir(dirabs):
267
                # e.g. directory deleted
268
                continue
269
270
            fl = []
271
            for subf in os.listdir(dirabs):
272
                if (subf != '.bzr'
273
                    and (subf not in dir_entry.children)):
274
                    fl.append(subf)
275
            
276
            fl.sort()
277
            for subf in fl:
278
                subp = appendpath(path, subf)
279
                yield subp
157 by mbp at sourcefrog
fix test case breakage
280
1 by mbp at sourcefrog
import from baz patch-364
281
282
    def ignored_files(self):
144 by mbp at sourcefrog
new Tree.ignored_files based on Tree.extras()
283
        """Yield list of PATH, IGNORE_PATTERN"""
284
        for subp in self.extras():
285
            pat = self.is_ignored(subp)
286
            if pat != None:
287
                yield subp, pat
1 by mbp at sourcefrog
import from baz patch-364
288
289
5 by mbp at sourcefrog
.bzrignore support
290
    def get_ignore_list(self):
102 by mbp at sourcefrog
- cache ignore list in Tree
291
        """Return list of ignore patterns.
292
293
        Cached in the Tree object after the first call.
294
        """
295
        if hasattr(self, '_ignorelist'):
296
            return self._ignorelist
297
298
        l = bzrlib.DEFAULT_IGNORE[:]
5 by mbp at sourcefrog
.bzrignore support
299
        if self.has_filename(bzrlib.IGNORE_FILENAME):
300
            f = self.get_file_byname(bzrlib.IGNORE_FILENAME)
102 by mbp at sourcefrog
- cache ignore list in Tree
301
            l.extend([line.rstrip("\n\r") for line in f.readlines()])
302
        self._ignorelist = l
303
        return l
5 by mbp at sourcefrog
.bzrignore support
304
305
1 by mbp at sourcefrog
import from baz patch-364
306
    def is_ignored(self, filename):
271 by Martin Pool
- Windows path fixes
307
        r"""Check whether the filename matches an ignore pattern.
4 by mbp at sourcefrog
match ignore patterns against only the last path component
308
271 by Martin Pool
- Windows path fixes
309
        Patterns containing '/' or '\' need to match the whole path;
310
        others match against only the last component.
132 by mbp at sourcefrog
Tree.is_ignored returns the pattern that matched, if any
311
312
        If the file is ignored, returns the pattern which caused it to
313
        be ignored, otherwise None.  So this can simply be used as a
314
        boolean if desired."""
315
271 by Martin Pool
- Windows path fixes
316
        # TODO: Use '**' to match directories, and other extended
317
        # globbing stuff from cvs/rsync.
318
319
        # XXX: fnmatch is actually not quite what we want: it's only
320
        # approximately the same as real Unix fnmatch, and doesn't
321
        # treat dotfiles correctly and allows * to match /.
322
        # Eventually it should be replaced with something more
323
        # accurate.
132 by mbp at sourcefrog
Tree.is_ignored returns the pattern that matched, if any
324
        
5 by mbp at sourcefrog
.bzrignore support
325
        for pat in self.get_ignore_list():
271 by Martin Pool
- Windows path fixes
326
            if '/' in pat or '\\' in pat:
327
                
328
                # as a special case, you can put ./ at the start of a
329
                # pattern; this is good to match in the top-level
330
                # only;
331
                
332
                if (pat[:2] == './') or (pat[:2] == '.\\'):
146 by mbp at sourcefrog
match ignore patterns like ./config.h
333
                    newpat = pat[2:]
334
                else:
335
                    newpat = pat
336
                if fnmatch.fnmatchcase(filename, newpat):
132 by mbp at sourcefrog
Tree.is_ignored returns the pattern that matched, if any
337
                    return pat
4 by mbp at sourcefrog
match ignore patterns against only the last path component
338
            else:
339
                if fnmatch.fnmatchcase(splitpath(filename)[-1], pat):
132 by mbp at sourcefrog
Tree.is_ignored returns the pattern that matched, if any
340
                    return pat
341
        return None
1 by mbp at sourcefrog
import from baz patch-364
342
        
343
344
        
345
        
346
347
class RevisionTree(Tree):
348
    """Tree viewing a previous revision.
349
350
    File text can be retrieved from the text store.
351
254 by Martin Pool
- Doc cleanups from Magnus Therning
352
    TODO: Some kind of `__repr__` method, but a good one
1 by mbp at sourcefrog
import from baz patch-364
353
           probably means knowing the branch and revision number,
354
           or at least passing a description to the constructor.
355
    """
356
    
357
    def __init__(self, store, inv):
358
        self._store = store
359
        self._inventory = inv
360
361
    def get_file(self, file_id):
362
        ie = self._inventory[file_id]
363
        f = self._store[ie.text_id]
364
        mutter("  get fileid{%s} from %r" % (file_id, self))
131 by mbp at sourcefrog
check size and sha1 of files retrieved from the tree
365
        self._check_retrieved(ie, f)
1 by mbp at sourcefrog
import from baz patch-364
366
        return f
367
368
    def get_file_size(self, file_id):
369
        return self._inventory[file_id].text_size
370
371
    def get_file_sha1(self, file_id):
372
        ie = self._inventory[file_id]
373
        return ie.text_sha1
374
375
    def has_filename(self, filename):
376
        return bool(self.inventory.path2id(filename))
377
378
    def list_files(self):
379
        # The only files returned by this are those from the version
380
        for path, entry in self.inventory.iter_entries():
381
            yield path, 'V', entry.kind, entry.file_id
382
383
384
class EmptyTree(Tree):
385
    def __init__(self):
386
        self._inventory = Inventory()
387
388
    def has_filename(self, filename):
389
        return False
390
391
    def list_files(self):
392
        if False:  # just to make it a generator
393
            yield None
394
    
395
396
397
######################################################################
398
# diff
399
400
# TODO: Merge these two functions into a single one that can operate
401
# on either a whole tree or a set of files.
402
403
# TODO: Return the diff in order by filename, not by category or in
404
# random order.  Can probably be done by lock-stepping through the
405
# filenames from both trees.
406
407
408
def file_status(filename, old_tree, new_tree):
409
    """Return single-letter status, old and new names for a file.
410
411
    The complexity here is in deciding how to represent renames;
412
    many complex cases are possible.
413
    """
414
    old_inv = old_tree.inventory
415
    new_inv = new_tree.inventory
416
    new_id = new_inv.path2id(filename)
417
    old_id = old_inv.path2id(filename)
418
419
    if not new_id and not old_id:
420
        # easy: doesn't exist in either; not versioned at all
421
        if new_tree.is_ignored(filename):
422
            return 'I', None, None
423
        else:
424
            return '?', None, None
425
    elif new_id:
426
        # There is now a file of this name, great.
427
        pass
428
    else:
429
        # There is no longer a file of this name, but we can describe
430
        # what happened to the file that used to have
431
        # this name.  There are two possibilities: either it was
432
        # deleted entirely, or renamed.
433
        assert old_id
434
        if new_inv.has_id(old_id):
435
            return 'X', old_inv.id2path(old_id), new_inv.id2path(old_id)
436
        else:
437
            return 'D', old_inv.id2path(old_id), None
438
439
    # if the file_id is new in this revision, it is added
440
    if new_id and not old_inv.has_id(new_id):
441
        return 'A'
442
443
    # if there used to be a file of this name, but that ID has now
444
    # disappeared, it is deleted
445
    if old_id and not new_inv.has_id(old_id):
446
        return 'D'
447
448
    return 'wtf?'
449
450
    
451
164 by mbp at sourcefrog
new 'renames' command
452
def find_renames(old_inv, new_inv):
453
    for file_id in old_inv:
454
        if file_id not in new_inv:
455
            continue
456
        old_name = old_inv.id2path(file_id)
457
        new_name = new_inv.id2path(file_id)
458
        if old_name != new_name:
459
            yield (old_name, new_name)
460