~bzr-pqm/bzr/bzr.dev

70 by mbp at sourcefrog
Prepare for smart recursive add.
1
# Copyright (C) 2005 Canonical Ltd
1 by mbp at sourcefrog
import from baz patch-364
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Tree classes, representing directory at point in time.
18
"""
19
20
from sets import Set
21
import os.path, os, fnmatch
22
157 by mbp at sourcefrog
fix test case breakage
23
from osutils import pumpfile, compare_files, filesize, quotefn, sha_file, \
24
     joinpath, splitpath, appendpath, isdir, isfile, file_kind, fingerprint_file
25
import errno
26
from stat import S_ISREG, S_ISDIR, ST_MODE, ST_SIZE
27
1 by mbp at sourcefrog
import from baz patch-364
28
from inventory import Inventory
29
from trace import mutter, note
30
from errors import bailout
31
import branch
32
33
import bzrlib
34
35
class Tree:
36
    """Abstract file tree.
37
38
    There are several subclasses:
39
    
40
    * `WorkingTree` exists as files on disk editable by the user.
41
42
    * `RevisionTree` is a tree as recorded at some point in the past.
43
44
    * `EmptyTree`
45
46
    Trees contain an `Inventory` object, and also know how to retrieve
47
    file texts mentioned in the inventory, either from a working
48
    directory or from a store.
49
50
    It is possible for trees to contain files that are not described
51
    in their inventory or vice versa; for this use `filenames()`.
52
53
    Trees can be compared, etc, regardless of whether they are working
54
    trees or versioned trees.
55
    """
56
    
57
    def has_filename(self, filename):
58
        """True if the tree has given filename."""
59
        raise NotImplementedError()
60
61
    def has_id(self, file_id):
62
        return self.inventory.has_id(file_id)
63
64
    def id_set(self):
65
        """Return set of all ids in this tree."""
66
        return self.inventory.id_set()
67
68
    def id2path(self, file_id):
69
        return self.inventory.id2path(file_id)
70
71
    def _get_inventory(self):
72
        return self._inventory
73
74
    inventory = property(_get_inventory,
75
                         doc="Inventory of this Tree")
76
77
    def _check_retrieved(self, ie, f):
130 by mbp at sourcefrog
- fixup checks on retrieved files to cope with compression,
78
        fp = fingerprint_file(f)
79
        f.seek(0)
80
        
184 by mbp at sourcefrog
pychecker fixups
81
        if ie.text_size != None:
131 by mbp at sourcefrog
check size and sha1 of files retrieved from the tree
82
            if ie.text_size != fp['size']:
1 by mbp at sourcefrog
import from baz patch-364
83
                bailout("mismatched size for file %r in %r" % (ie.file_id, self._store),
84
                        ["inventory expects %d bytes" % ie.text_size,
130 by mbp at sourcefrog
- fixup checks on retrieved files to cope with compression,
85
                         "file is actually %d bytes" % fp['size'],
1 by mbp at sourcefrog
import from baz patch-364
86
                         "store is probably damaged/corrupt"])
87
130 by mbp at sourcefrog
- fixup checks on retrieved files to cope with compression,
88
        if ie.text_sha1 != fp['sha1']:
1 by mbp at sourcefrog
import from baz patch-364
89
            bailout("wrong SHA-1 for file %r in %r" % (ie.file_id, self._store),
90
                    ["inventory expects %s" % ie.text_sha1,
130 by mbp at sourcefrog
- fixup checks on retrieved files to cope with compression,
91
                     "file is actually %s" % fp['sha1'],
1 by mbp at sourcefrog
import from baz patch-364
92
                     "store is probably damaged/corrupt"])
93
94
176 by mbp at sourcefrog
New cat command contributed by janmar.
95
    def print_file(self, fileid):
96
        """Print file with id `fileid` to stdout."""
97
        import sys
98
        pumpfile(self.get_file(fileid), sys.stdout)
99
        
100
        
101
    def export(self, dest):        
1 by mbp at sourcefrog
import from baz patch-364
102
        """Export this tree to a new directory.
103
104
        `dest` should not exist, and will be created holding the
105
        contents of this tree.
106
107
        :todo: To handle subdirectories we need to create the
108
               directories first.
109
110
        :note: If the export fails, the destination directory will be
111
               left in a half-assed state.
112
        """
113
        os.mkdir(dest)
114
        mutter('export version %r' % self)
115
        inv = self.inventory
116
        for dp, ie in inv.iter_entries():
117
            kind = ie.kind
118
            fullpath = appendpath(dest, dp)
119
            if kind == 'directory':
120
                os.mkdir(fullpath)
121
            elif kind == 'file':
122
                pumpfile(self.get_file(ie.file_id), file(fullpath, 'wb'))
123
            else:
184 by mbp at sourcefrog
pychecker fixups
124
                bailout("don't know how to export {%s} of kind %r" % (fid, kind))
1 by mbp at sourcefrog
import from baz patch-364
125
            mutter("  export {%s} kind %s to %s" % (ie.file_id, kind, fullpath))
126
127
128
129
class WorkingTree(Tree):
130
    """Working copy tree.
131
132
    The inventory is held in the `Branch` working-inventory, and the
133
    files are in a directory on disk.
134
135
    It is possible for a `WorkingTree` to have a filename which is
136
    not listed in the Inventory and vice versa.
137
    """
138
    def __init__(self, basedir, inv):
139
        self._inventory = inv
140
        self.basedir = basedir
141
        self.path2id = inv.path2id
142
143
    def __repr__(self):
144
        return "<%s of %s>" % (self.__class__.__name__,
145
                               self.basedir)
146
67 by mbp at sourcefrog
use abspath() for the function that makes an absolute
147
    def abspath(self, filename):
1 by mbp at sourcefrog
import from baz patch-364
148
        return os.path.join(self.basedir, filename)
149
150
    def has_filename(self, filename):
67 by mbp at sourcefrog
use abspath() for the function that makes an absolute
151
        return os.path.exists(self.abspath(filename))
1 by mbp at sourcefrog
import from baz patch-364
152
153
    def get_file(self, file_id):
5 by mbp at sourcefrog
.bzrignore support
154
        return self.get_file_byname(self.id2path(file_id))
155
156
    def get_file_byname(self, filename):
67 by mbp at sourcefrog
use abspath() for the function that makes an absolute
157
        return file(self.abspath(filename), 'rb')
1 by mbp at sourcefrog
import from baz patch-364
158
159
    def _get_store_filename(self, file_id):
178 by mbp at sourcefrog
- Use a non-null file_id for the branch root directory. At the moment
160
        ## XXX: badly named; this isn't in the store at all
67 by mbp at sourcefrog
use abspath() for the function that makes an absolute
161
        return self.abspath(self.id2path(file_id))
1 by mbp at sourcefrog
import from baz patch-364
162
15 by mbp at sourcefrog
files that have been deleted are not considered present in the WorkingTree
163
    def has_id(self, file_id):
164
        # files that have been deleted are excluded
165
        if not self.inventory.has_id(file_id):
166
            return False
67 by mbp at sourcefrog
use abspath() for the function that makes an absolute
167
        return os.access(self.abspath(self.inventory.id2path(file_id)), os.F_OK)
15 by mbp at sourcefrog
files that have been deleted are not considered present in the WorkingTree
168
1 by mbp at sourcefrog
import from baz patch-364
169
    def get_file_size(self, file_id):
170
        return os.stat(self._get_store_filename(file_id))[ST_SIZE]
171
172
    def get_file_sha1(self, file_id):
173
        f = self.get_file(file_id)
174
        return sha_file(f)
175
176
177
    def file_class(self, filename):
178
        if self.path2id(filename):
179
            return 'V'
180
        elif self.is_ignored(filename):
181
            return 'I'
182
        else:
183
            return '?'
184
185
186
    def list_files(self):
187
        """Recursively list all files as (path, class, kind, id).
188
189
        Lists, but does not descend into unversioned directories.
190
191
        This does not include files that have been deleted in this
192
        tree.
193
194
        Skips the control directory.
195
        """
196
        inv = self.inventory
197
178 by mbp at sourcefrog
- Use a non-null file_id for the branch root directory. At the moment
198
        def descend(from_dir_relpath, from_dir_id, dp):
1 by mbp at sourcefrog
import from baz patch-364
199
            ls = os.listdir(dp)
200
            ls.sort()
201
            for f in ls:
134 by mbp at sourcefrog
Doc
202
                ## TODO: If we find a subdirectory with its own .bzr
203
                ## directory, then that is a separate tree and we
204
                ## should exclude it.
1 by mbp at sourcefrog
import from baz patch-364
205
                if bzrlib.BZRDIR == f:
206
                    continue
207
208
                # path within tree
178 by mbp at sourcefrog
- Use a non-null file_id for the branch root directory. At the moment
209
                fp = appendpath(from_dir_relpath, f)
1 by mbp at sourcefrog
import from baz patch-364
210
211
                # absolute path
212
                fap = appendpath(dp, f)
213
                
214
                f_ie = inv.get_child(from_dir_id, f)
215
                if f_ie:
216
                    c = 'V'
217
                elif self.is_ignored(fp):
218
                    c = 'I'
219
                else:
220
                    c = '?'
221
222
                fk = file_kind(fap)
223
224
                if f_ie:
225
                    if f_ie.kind != fk:
226
                        bailout("file %r entered as kind %r id %r, now of kind %r"
227
                                % (fap, f_ie.kind, f_ie.file_id, fk))
228
229
                yield fp, c, fk, (f_ie and f_ie.file_id)
230
231
                if fk != 'directory':
232
                    continue
233
234
                if c != 'V':
235
                    # don't descend unversioned directories
236
                    continue
237
                
238
                for ff in descend(fp, f_ie.file_id, fap):
239
                    yield ff
240
178 by mbp at sourcefrog
- Use a non-null file_id for the branch root directory. At the moment
241
        for f in descend('', inv.root.file_id, self.basedir):
1 by mbp at sourcefrog
import from baz patch-364
242
            yield f
243
            
244
245
142 by mbp at sourcefrog
better "unknowns" based on just listing the relevant files
246
    def unknowns(self):
143 by mbp at sourcefrog
new common Tree.extras() to support both unknowns
247
        for subp in self.extras():
248
            if not self.is_ignored(subp):
249
                yield subp
250
251
252
    def extras(self):
142 by mbp at sourcefrog
better "unknowns" based on just listing the relevant files
253
        """Yield all unknown files in this WorkingTree.
1 by mbp at sourcefrog
import from baz patch-364
254
255
        If there are any unknown directories then only the directory is
256
        returned, not all its children.  But if there are unknown files
257
        under a versioned subdirectory, they are returned.
258
259
        Currently returned depth-first, sorted by name within directories.
260
        """
142 by mbp at sourcefrog
better "unknowns" based on just listing the relevant files
261
        ## TODO: Work from given directory downwards
262
        
263
        for path, dir_entry in self.inventory.directories():
264
            mutter("search for unknowns in %r" % path)
265
            dirabs = self.abspath(path)
266
            if not isdir(dirabs):
267
                # e.g. directory deleted
268
                continue
269
270
            fl = []
271
            for subf in os.listdir(dirabs):
272
                if (subf != '.bzr'
273
                    and (subf not in dir_entry.children)):
274
                    fl.append(subf)
275
            
276
            fl.sort()
277
            for subf in fl:
278
                subp = appendpath(path, subf)
279
                yield subp
157 by mbp at sourcefrog
fix test case breakage
280
1 by mbp at sourcefrog
import from baz patch-364
281
282
    def ignored_files(self):
144 by mbp at sourcefrog
new Tree.ignored_files based on Tree.extras()
283
        """Yield list of PATH, IGNORE_PATTERN"""
284
        for subp in self.extras():
285
            pat = self.is_ignored(subp)
286
            if pat != None:
287
                yield subp, pat
1 by mbp at sourcefrog
import from baz patch-364
288
289
5 by mbp at sourcefrog
.bzrignore support
290
    def get_ignore_list(self):
102 by mbp at sourcefrog
- cache ignore list in Tree
291
        """Return list of ignore patterns.
292
293
        Cached in the Tree object after the first call.
294
        """
295
        if hasattr(self, '_ignorelist'):
296
            return self._ignorelist
297
298
        l = bzrlib.DEFAULT_IGNORE[:]
5 by mbp at sourcefrog
.bzrignore support
299
        if self.has_filename(bzrlib.IGNORE_FILENAME):
300
            f = self.get_file_byname(bzrlib.IGNORE_FILENAME)
102 by mbp at sourcefrog
- cache ignore list in Tree
301
            l.extend([line.rstrip("\n\r") for line in f.readlines()])
302
        self._ignorelist = l
303
        return l
5 by mbp at sourcefrog
.bzrignore support
304
305
1 by mbp at sourcefrog
import from baz patch-364
306
    def is_ignored(self, filename):
4 by mbp at sourcefrog
match ignore patterns against only the last path component
307
        """Check whether the filename matches an ignore pattern.
308
309
        Patterns containing '/' need to match the whole path; others
132 by mbp at sourcefrog
Tree.is_ignored returns the pattern that matched, if any
310
        match against only the last component.
311
312
        If the file is ignored, returns the pattern which caused it to
313
        be ignored, otherwise None.  So this can simply be used as a
314
        boolean if desired."""
315
316
        ## TODO: Use '**' to match directories, and other extended globbing stuff from cvs/rsync.
317
        
5 by mbp at sourcefrog
.bzrignore support
318
        for pat in self.get_ignore_list():
4 by mbp at sourcefrog
match ignore patterns against only the last path component
319
            if '/' in pat:
146 by mbp at sourcefrog
match ignore patterns like ./config.h
320
                # as a special case, you can put ./ at the start of a pattern;
321
                # this is good to match in the top-level only;
322
                if pat[:2] == './':
323
                    newpat = pat[2:]
324
                else:
325
                    newpat = pat
326
                if fnmatch.fnmatchcase(filename, newpat):
132 by mbp at sourcefrog
Tree.is_ignored returns the pattern that matched, if any
327
                    return pat
4 by mbp at sourcefrog
match ignore patterns against only the last path component
328
            else:
329
                if fnmatch.fnmatchcase(splitpath(filename)[-1], pat):
132 by mbp at sourcefrog
Tree.is_ignored returns the pattern that matched, if any
330
                    return pat
331
        return None
1 by mbp at sourcefrog
import from baz patch-364
332
        
333
334
        
335
        
336
337
class RevisionTree(Tree):
338
    """Tree viewing a previous revision.
339
340
    File text can be retrieved from the text store.
341
342
    :todo: Some kind of `__repr__` method, but a good one
343
           probably means knowing the branch and revision number,
344
           or at least passing a description to the constructor.
345
    """
346
    
347
    def __init__(self, store, inv):
348
        self._store = store
349
        self._inventory = inv
350
351
    def get_file(self, file_id):
352
        ie = self._inventory[file_id]
353
        f = self._store[ie.text_id]
354
        mutter("  get fileid{%s} from %r" % (file_id, self))
131 by mbp at sourcefrog
check size and sha1 of files retrieved from the tree
355
        self._check_retrieved(ie, f)
1 by mbp at sourcefrog
import from baz patch-364
356
        return f
357
358
    def get_file_size(self, file_id):
359
        return self._inventory[file_id].text_size
360
361
    def get_file_sha1(self, file_id):
362
        ie = self._inventory[file_id]
363
        return ie.text_sha1
364
365
    def has_filename(self, filename):
366
        return bool(self.inventory.path2id(filename))
367
368
    def list_files(self):
369
        # The only files returned by this are those from the version
370
        for path, entry in self.inventory.iter_entries():
371
            yield path, 'V', entry.kind, entry.file_id
372
373
374
class EmptyTree(Tree):
375
    def __init__(self):
376
        self._inventory = Inventory()
377
378
    def has_filename(self, filename):
379
        return False
380
381
    def list_files(self):
382
        if False:  # just to make it a generator
383
            yield None
384
    
385
386
387
######################################################################
388
# diff
389
390
# TODO: Merge these two functions into a single one that can operate
391
# on either a whole tree or a set of files.
392
393
# TODO: Return the diff in order by filename, not by category or in
394
# random order.  Can probably be done by lock-stepping through the
395
# filenames from both trees.
396
397
398
def file_status(filename, old_tree, new_tree):
399
    """Return single-letter status, old and new names for a file.
400
401
    The complexity here is in deciding how to represent renames;
402
    many complex cases are possible.
403
    """
404
    old_inv = old_tree.inventory
405
    new_inv = new_tree.inventory
406
    new_id = new_inv.path2id(filename)
407
    old_id = old_inv.path2id(filename)
408
409
    if not new_id and not old_id:
410
        # easy: doesn't exist in either; not versioned at all
411
        if new_tree.is_ignored(filename):
412
            return 'I', None, None
413
        else:
414
            return '?', None, None
415
    elif new_id:
416
        # There is now a file of this name, great.
417
        pass
418
    else:
419
        # There is no longer a file of this name, but we can describe
420
        # what happened to the file that used to have
421
        # this name.  There are two possibilities: either it was
422
        # deleted entirely, or renamed.
423
        assert old_id
424
        if new_inv.has_id(old_id):
425
            return 'X', old_inv.id2path(old_id), new_inv.id2path(old_id)
426
        else:
427
            return 'D', old_inv.id2path(old_id), None
428
429
    # if the file_id is new in this revision, it is added
430
    if new_id and not old_inv.has_id(new_id):
431
        return 'A'
432
433
    # if there used to be a file of this name, but that ID has now
434
    # disappeared, it is deleted
435
    if old_id and not new_inv.has_id(old_id):
436
        return 'D'
437
438
    return 'wtf?'
439
440
    
441
164 by mbp at sourcefrog
new 'renames' command
442
def find_renames(old_inv, new_inv):
443
    for file_id in old_inv:
444
        if file_id not in new_inv:
445
            continue
446
        old_name = old_inv.id2path(file_id)
447
        new_name = new_inv.id2path(file_id)
448
        if old_name != new_name:
449
            yield (old_name, new_name)
450