~bzr-pqm/bzr/bzr.dev

1 by mbp at sourcefrog
import from baz patch-364
1
#! /usr/bin/env python
2
# -*- coding: UTF-8 -*-
3
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
8
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
13
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
18
from sets import Set
19
20
from trace import mutter
356 by Martin Pool
- pychecker fixes in bzrlib.diff
21
from errors import BzrError
1 by mbp at sourcefrog
import from baz patch-364
22
23
24
def diff_trees(old_tree, new_tree):
25
    """Compute diff between two trees.
26
27
    They may be in different branches and may be working or historical
28
    trees.
29
460 by Martin Pool
- new testing command compare-trees
30
    This only compares the versioned files, paying no attention to
31
    files which are ignored or unknown.  Those can only be present in
32
    working trees and can be reported on separately.
33
1 by mbp at sourcefrog
import from baz patch-364
34
    Yields a sequence of (state, id, old_name, new_name, kind).
35
    Each filename and each id is listed only once.
36
    """
37
    ## TODO: Allow specifying a list of files to compare, rather than
38
    ## doing the whole tree?  (Not urgent.)
39
40
    ## TODO: Allow diffing any two inventories, not just the
41
    ## current one against one.  We mgiht need to specify two
42
    ## stores to look for the files if diffing two branches.  That
43
    ## might imply this shouldn't be primarily a Branch method.
44
459 by Martin Pool
- diff now uses stat-cache -- much faster
45
    sha_match_cnt = modified_cnt = 0
46
1 by mbp at sourcefrog
import from baz patch-364
47
    old_it = old_tree.list_files()
48
    new_it = new_tree.list_files()
49
50
    def next(it):
51
        try:
52
            return it.next()
53
        except StopIteration:
54
            return None
55
56
    old_item = next(old_it)
57
    new_item = next(new_it)
58
59
    # We step through the two sorted iterators in parallel, trying to
60
    # keep them lined up.
61
62
    while (old_item != None) or (new_item != None):
63
        # OK, we still have some remaining on both, but they may be
64
        # out of step.        
65
        if old_item != None:
66
            old_name, old_class, old_kind, old_id = old_item
67
        else:
68
            old_name = None
69
            
70
        if new_item != None:
71
            new_name, new_class, new_kind, new_id = new_item
72
        else:
73
            new_name = None
74
75
        if old_item:
76
            # can't handle the old tree being a WorkingTree
77
            assert old_class == 'V'
78
79
        if new_item and (new_class != 'V'):
80
            yield new_class, None, None, new_name, new_kind
81
            new_item = next(new_it)
82
        elif (not new_item) or (old_item and (old_name < new_name)):
83
            if new_tree.has_id(old_id):
84
                # will be mentioned as renamed under new name
85
                pass
86
            else:
87
                yield 'D', old_id, old_name, None, old_kind
88
            old_item = next(old_it)
89
        elif (not old_item) or (new_item and (new_name < old_name)):
90
            if old_tree.has_id(new_id):
91
                yield 'R', new_id, old_tree.id2path(new_id), new_name, new_kind
92
            else:
93
                yield 'A', new_id, None, new_name, new_kind
94
            new_item = next(new_it)
95
        elif old_id != new_id:
96
            assert old_name == new_name
97
            # both trees have a file of this name, but it is not the
98
            # same file.  in other words, the old filename has been
99
            # overwritten by either a newly-added or a renamed file.
100
            # (should we return something about the overwritten file?)
101
            if old_tree.has_id(new_id):
102
                # renaming, overlying a deleted file
103
                yield 'R', new_id, old_tree.id2path(new_id), new_name, new_kind
104
            else:
105
                yield 'A', new_id, None, new_name, new_kind
106
107
            new_item = next(new_it)
108
            old_item = next(old_it)
109
        else:
110
            assert old_id == new_id
178 by mbp at sourcefrog
- Use a non-null file_id for the branch root directory. At the moment
111
            assert old_id != None
1 by mbp at sourcefrog
import from baz patch-364
112
            assert old_name == new_name
113
            assert old_kind == new_kind
114
115
            if old_kind == 'directory':
116
                yield '.', new_id, old_name, new_name, new_kind
117
            elif old_tree.get_file_sha1(old_id) == new_tree.get_file_sha1(old_id):
459 by Martin Pool
- diff now uses stat-cache -- much faster
118
                sha_match_cnt += 1
1 by mbp at sourcefrog
import from baz patch-364
119
                yield '.', new_id, old_name, new_name, new_kind
120
            else:
459 by Martin Pool
- diff now uses stat-cache -- much faster
121
                modified_cnt += 1
1 by mbp at sourcefrog
import from baz patch-364
122
                yield 'M', new_id, old_name, new_name, new_kind
123
124
            new_item = next(new_it)
125
            old_item = next(old_it)
126
127
459 by Martin Pool
- diff now uses stat-cache -- much faster
128
    mutter("diff finished: %d SHA matches, %d modified"
129
           % (sha_match_cnt, modified_cnt))
130
131
329 by Martin Pool
- refactor command functions into command classes
132
133
def show_diff(b, revision, file_list):
356 by Martin Pool
- pychecker fixes in bzrlib.diff
134
    import difflib, sys, types
329 by Martin Pool
- refactor command functions into command classes
135
    
136
    if revision == None:
137
        old_tree = b.basis_tree()
138
    else:
139
        old_tree = b.revision_tree(b.lookup_revision(revision))
140
        
141
    new_tree = b.working_tree()
142
143
    # TODO: Options to control putting on a prefix or suffix, perhaps as a format string
144
    old_label = ''
145
    new_label = ''
146
147
    DEVNULL = '/dev/null'
148
    # Windows users, don't panic about this filename -- it is a
149
    # special signal to GNU patch that the file should be created or
150
    # deleted respectively.
151
152
    # TODO: Generation of pseudo-diffs for added/deleted files could
153
    # be usefully made into a much faster special case.
154
155
    # TODO: Better to return them in sorted order I think.
156
157
    if file_list:
158
        file_list = [b.relpath(f) for f in file_list]
159
160
    # FIXME: If given a file list, compare only those files rather
161
    # than comparing everything and then throwing stuff away.
162
    
163
    for file_state, fid, old_name, new_name, kind in diff_trees(old_tree, new_tree):
164
165
        if file_list and (new_name not in file_list):
166
            continue
167
        
168
        # Don't show this by default; maybe do it if an option is passed
169
        # idlabel = '      {%s}' % fid
170
        idlabel = ''
171
172
        def diffit(oldlines, newlines, **kw):
173
            
174
            # FIXME: difflib is wrong if there is no trailing newline.
175
            # The syntax used by patch seems to be "\ No newline at
176
            # end of file" following the last diff line from that
177
            # file.  This is not trivial to insert into the
178
            # unified_diff output and it might be better to just fix
179
            # or replace that function.
180
181
            # In the meantime we at least make sure the patch isn't
182
            # mangled.
183
            
184
185
            # Special workaround for Python2.3, where difflib fails if
186
            # both sequences are empty.
187
            if not oldlines and not newlines:
188
                return
189
190
            nonl = False
191
192
            if oldlines and (oldlines[-1][-1] != '\n'):
193
                oldlines[-1] += '\n'
194
                nonl = True
195
            if newlines and (newlines[-1][-1] != '\n'):
196
                newlines[-1] += '\n'
197
                nonl = True
198
199
            ud = difflib.unified_diff(oldlines, newlines, **kw)
441 by Martin Pool
- Fix from Lalo for unidiff output of newly added
200
201
            # work-around for difflib being too smart for its own good
202
            # if /dev/null is "1,0", patch won't recognize it as /dev/null
203
            if not oldlines:
204
                ud = list(ud)
205
                ud[2] = ud[2].replace('-1,0', '-0,0')
206
            elif not newlines:
207
                ud = list(ud)
208
                ud[2] = ud[2].replace('+1,0', '+0,0')
209
            
329 by Martin Pool
- refactor command functions into command classes
210
            sys.stdout.writelines(ud)
211
            if nonl:
212
                print "\\ No newline at end of file"
213
            sys.stdout.write('\n')
214
        
215
        if file_state in ['.', '?', 'I']:
216
            continue
217
        elif file_state == 'A':
218
            print '*** added %s %r' % (kind, new_name)
219
            if kind == 'file':
220
                diffit([],
221
                       new_tree.get_file(fid).readlines(),
222
                       fromfile=DEVNULL,
223
                       tofile=new_label + new_name + idlabel)
224
        elif file_state == 'D':
225
            assert isinstance(old_name, types.StringTypes)
226
            print '*** deleted %s %r' % (kind, old_name)
227
            if kind == 'file':
228
                diffit(old_tree.get_file(fid).readlines(), [],
229
                       fromfile=old_label + old_name + idlabel,
230
                       tofile=DEVNULL)
231
        elif file_state in ['M', 'R']:
232
            if file_state == 'M':
233
                assert kind == 'file'
234
                assert old_name == new_name
235
                print '*** modified %s %r' % (kind, new_name)
236
            elif file_state == 'R':
237
                print '*** renamed %s %r => %r' % (kind, old_name, new_name)
238
239
            if kind == 'file':
240
                diffit(old_tree.get_file(fid).readlines(),
241
                       new_tree.get_file(fid).readlines(),
242
                       fromfile=old_label + old_name + idlabel,
243
                       tofile=new_label + new_name)
244
        else:
356 by Martin Pool
- pychecker fixes in bzrlib.diff
245
            raise BzrError("can't represent state %s {%s}" % (file_state, fid))
329 by Martin Pool
- refactor command functions into command classes
246
247
379 by Martin Pool
- Simpler compare_inventories() to possibly replace diff_trees
248
249
class TreeDelta:
250
    """Describes changes from one tree to another.
251
252
    Contains four lists:
253
254
    added
255
        (path, id)
256
    removed
257
        (path, id)
258
    renamed
460 by Martin Pool
- new testing command compare-trees
259
        (oldpath, newpath, id, text_modified)
379 by Martin Pool
- Simpler compare_inventories() to possibly replace diff_trees
260
    modified
261
        (path, id)
463 by Martin Pool
- compare_trees() also reports unchanged files
262
    unchanged
263
        (path, id)
379 by Martin Pool
- Simpler compare_inventories() to possibly replace diff_trees
264
460 by Martin Pool
- new testing command compare-trees
265
    Each id is listed only once.
379 by Martin Pool
- Simpler compare_inventories() to possibly replace diff_trees
266
460 by Martin Pool
- new testing command compare-trees
267
    Files that are both modified and renamed are listed only in
268
    renamed, with the text_modified flag true.
463 by Martin Pool
- compare_trees() also reports unchanged files
269
270
    The lists are normally sorted when the delta is created.
379 by Martin Pool
- Simpler compare_inventories() to possibly replace diff_trees
271
    """
272
    def __init__(self):
273
        self.added = []
274
        self.removed = []
275
        self.renamed = []
276
        self.modified = []
463 by Martin Pool
- compare_trees() also reports unchanged files
277
        self.unchanged = []
379 by Martin Pool
- Simpler compare_inventories() to possibly replace diff_trees
278
465 by Martin Pool
- Move show_status() out of Branch into a new function in
279
    def show(self, to_file, show_ids=False, show_unchanged=False):
280
        def show_list(files):
281
            for path, fid in files:
282
                if show_ids:
283
                    print >>to_file, '  %-30s %s' % (path, fid)
284
                else:
285
                    print >>to_file, ' ', path
286
            
460 by Martin Pool
- new testing command compare-trees
287
        if self.removed:
288
            print >>to_file, 'removed files:'
465 by Martin Pool
- Move show_status() out of Branch into a new function in
289
            show_list(self.removed)
290
                
460 by Martin Pool
- new testing command compare-trees
291
        if self.added:
292
            print >>to_file, 'added files:'
465 by Martin Pool
- Move show_status() out of Branch into a new function in
293
            show_list(self.added)
294
460 by Martin Pool
- new testing command compare-trees
295
        if self.renamed:
296
            print >>to_file, 'renamed files:'
297
            for oldpath, newpath, fid, text_modified in self.renamed:
298
                if show_ids:
299
                    print >>to_file, '  %s => %s %s' % (oldpath, newpath, fid)
300
                else:
301
                    print >>to_file, '  %s => %s' % (oldpath, newpath)
465 by Martin Pool
- Move show_status() out of Branch into a new function in
302
                    
460 by Martin Pool
- new testing command compare-trees
303
        if self.modified:
304
            print >>to_file, 'modified files:'
465 by Martin Pool
- Move show_status() out of Branch into a new function in
305
            show_list(self.modified)
306
            
307
        if show_unchanged and self.unchanged:
308
            print >>to_file, 'unchanged files:'
309
            show_list(self.unchanged)
460 by Martin Pool
- new testing command compare-trees
310
311
312
470 by Martin Pool
- remove dead code for cmd_compare_trees
313
def compare_trees(old_tree, new_tree, want_unchanged):
460 by Martin Pool
- new testing command compare-trees
314
    old_inv = old_tree.inventory
315
    new_inv = new_tree.inventory
316
    delta = TreeDelta()
462 by Martin Pool
- New form 'file_id in tree' to check if the file is present
317
    for file_id in old_tree:
318
        if file_id in new_tree:
460 by Martin Pool
- new testing command compare-trees
319
            old_path = old_inv.id2path(file_id)
320
            new_path = new_inv.id2path(file_id)
321
322
            kind = old_inv.get_file_kind(file_id)
323
            assert kind in ('file', 'directory', 'symlink', 'root_directory'), \
324
                   'invalid file kind %r' % kind
325
            if kind == 'file':
326
                old_sha1 = old_tree.get_file_sha1(file_id)
327
                new_sha1 = new_tree.get_file_sha1(file_id)
328
                text_modified = (old_sha1 != new_sha1)
329
            else:
330
                ## mutter("no text to check for %r %r" % (file_id, kind))
331
                text_modified = False
471 by Martin Pool
- actually avoid reporting unchanged files if not required
332
333
            # TODO: Can possibly avoid calculating path strings if the
334
            # two files are unchanged and their names and parents are
335
            # the same and the parents are unchanged all the way up.
336
            # May not be worthwhile.
460 by Martin Pool
- new testing command compare-trees
337
            
338
            if old_path != new_path:
339
                delta.renamed.append((old_path, new_path, file_id, text_modified))
340
            elif text_modified:
341
                delta.modified.append((new_path, file_id))
471 by Martin Pool
- actually avoid reporting unchanged files if not required
342
            elif want_unchanged:
463 by Martin Pool
- compare_trees() also reports unchanged files
343
                delta.unchanged.append((new_path, file_id))
460 by Martin Pool
- new testing command compare-trees
344
        else:
345
            delta.removed.append((old_inv.id2path(file_id), file_id))
346
    for file_id in new_inv:
347
        if file_id in old_inv:
348
            continue
349
        delta.added.append((new_inv.id2path(file_id), file_id))
350
            
351
    delta.removed.sort()
352
    delta.added.sort()
353
    delta.renamed.sort()
354
    delta.modified.sort()
355
356
    return delta