~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/diff.py

  • Committer: John Arbash Meinel
  • Date: 2006-10-11 23:08:27 UTC
  • mto: This revision was merged to the branch mainline in revision 2080.
  • Revision ID: john@arbash-meinel.com-20061011230827-2bdfc45020695281
Change Copyright .. by Canonical to Copyright ... Canonical

Show diffs side-by-side

added added

removed removed

Lines of Context:
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
16
 
17
 
import difflib
 
17
import errno
18
18
import os
19
19
import re
20
 
import shutil
 
20
import subprocess
21
21
import sys
22
 
 
23
 
from bzrlib.lazy_import import lazy_import
24
 
lazy_import(globals(), """
25
 
import errno
26
 
import subprocess
27
22
import tempfile
28
23
import time
29
24
 
30
25
from bzrlib import (
31
 
    branch as _mod_branch,
32
 
    bzrdir,
33
 
    commands,
34
26
    errors,
35
27
    osutils,
36
 
    patiencediff,
37
 
    textfile,
38
 
    timestamp,
39
28
    )
40
 
""")
41
 
 
42
 
from bzrlib.symbol_versioning import (
43
 
        deprecated_function,
44
 
        one_three
45
 
        )
46
 
from bzrlib.trace import warning
 
29
# compatability - plugins import compare_trees from diff!!!
 
30
# deprecated as of 0.10
 
31
from bzrlib.delta import compare_trees
 
32
from bzrlib.errors import BzrError
 
33
from bzrlib.patiencediff import unified_diff
 
34
import bzrlib.patiencediff
 
35
from bzrlib.symbol_versioning import (deprecated_function,
 
36
        zero_eight)
 
37
from bzrlib.textfile import check_text_lines
 
38
from bzrlib.trace import mutter, warning
47
39
 
48
40
 
49
41
# TODO: Rather than building a changeset object, we should probably
50
42
# invoke callbacks on an object.  That object can either accumulate a
51
43
# list, write them out directly, etc etc.
52
44
 
53
 
 
54
 
class _PrematchedMatcher(difflib.SequenceMatcher):
55
 
    """Allow SequenceMatcher operations to use predetermined blocks"""
56
 
 
57
 
    def __init__(self, matching_blocks):
58
 
        difflib.SequenceMatcher(self, None, None)
59
 
        self.matching_blocks = matching_blocks
60
 
        self.opcodes = None
61
 
 
62
 
 
63
45
def internal_diff(old_filename, oldlines, new_filename, newlines, to_file,
64
46
                  allow_binary=False, sequence_matcher=None,
65
47
                  path_encoding='utf8'):
80
62
        return
81
63
    
82
64
    if allow_binary is False:
83
 
        textfile.check_text_lines(oldlines)
84
 
        textfile.check_text_lines(newlines)
 
65
        check_text_lines(oldlines)
 
66
        check_text_lines(newlines)
85
67
 
86
68
    if sequence_matcher is None:
87
 
        sequence_matcher = patiencediff.PatienceSequenceMatcher
88
 
    ud = patiencediff.unified_diff(oldlines, newlines,
 
69
        sequence_matcher = bzrlib.patiencediff.PatienceSequenceMatcher
 
70
    ud = unified_diff(oldlines, newlines,
89
71
                      fromfile=old_filename.encode(path_encoding),
90
72
                      tofile=new_filename.encode(path_encoding),
91
73
                      sequencematcher=sequence_matcher)
92
74
 
93
75
    ud = list(ud)
94
 
    if len(ud) == 0: # Identical contents, nothing to do
95
 
        return
96
76
    # work-around for difflib being too smart for its own good
97
77
    # if /dev/null is "1,0", patch won't recognize it as /dev/null
98
78
    if not oldlines:
99
79
        ud[2] = ud[2].replace('-1,0', '-0,0')
100
80
    elif not newlines:
101
81
        ud[2] = ud[2].replace('+1,0', '+0,0')
 
82
    # work around for difflib emitting random spaces after the label
 
83
    ud[0] = ud[0][:-2] + '\n'
 
84
    ud[1] = ud[1][:-2] + '\n'
102
85
 
103
86
    for line in ud:
104
87
        to_file.write(line)
105
88
        if not line.endswith('\n'):
106
89
            to_file.write("\n\\ No newline at end of file\n")
107
 
    to_file.write('\n')
 
90
    print >>to_file
 
91
 
 
92
 
 
93
def _set_lang_C():
 
94
    """Set the env var LANG=C"""
 
95
    osutils.set_or_unset_env('LANG', 'C')
 
96
    osutils.set_or_unset_env('LC_ALL', None)
 
97
    osutils.set_or_unset_env('LC_CTYPE', None)
 
98
    osutils.set_or_unset_env('LANGUAGE', None)
108
99
 
109
100
 
110
101
def _spawn_external_diff(diffcmd, capture_errors=True):
111
102
    """Spawn the externall diff process, and return the child handle.
112
103
 
113
104
    :param diffcmd: The command list to spawn
114
 
    :param capture_errors: Capture stderr as well as setting LANG=C
115
 
        and LC_ALL=C. This lets us read and understand the output of diff,
116
 
        and respond to any errors.
 
105
    :param capture_errors: Capture stderr as well as setting LANG=C.
 
106
        This lets us read and understand the output of diff, and respond 
 
107
        to any errors.
117
108
    :return: A Popen object.
118
109
    """
119
110
    if capture_errors:
120
 
        # construct minimal environment
121
 
        env = {}
122
 
        path = os.environ.get('PATH')
123
 
        if path is not None:
124
 
            env['PATH'] = path
125
 
        env['LANGUAGE'] = 'C'   # on win32 only LANGUAGE has effect
126
 
        env['LANG'] = 'C'
127
 
        env['LC_ALL'] = 'C'
 
111
        if sys.platform == 'win32':
 
112
            # Win32 doesn't support preexec_fn, but that is
 
113
            # okay, because it doesn't support LANG either.
 
114
            preexec_fn = None
 
115
        else:
 
116
            preexec_fn = _set_lang_C
128
117
        stderr = subprocess.PIPE
129
118
    else:
130
 
        env = None
 
119
        preexec_fn = None
131
120
        stderr = None
132
121
 
133
122
    try:
135
124
                                stdin=subprocess.PIPE,
136
125
                                stdout=subprocess.PIPE,
137
126
                                stderr=stderr,
138
 
                                env=env)
 
127
                                preexec_fn=preexec_fn)
139
128
    except OSError, e:
140
129
        if e.errno == errno.ENOENT:
141
130
            raise errors.NoDiff(str(e))
223
212
            # Write out the new i18n diff response
224
213
            to_file.write(out+'\n')
225
214
            if pipe.returncode != 2:
226
 
                raise errors.BzrError(
227
 
                               'external diff failed with exit code 2'
228
 
                               ' when run with LANG=C and LC_ALL=C,'
229
 
                               ' but not when run natively: %r' % (diffcmd,))
 
215
                raise BzrError('external diff failed with exit code 2'
 
216
                               ' when run with LANG=C, but not when run'
 
217
                               ' natively: %r' % (diffcmd,))
230
218
 
231
219
            first_line = lang_c_out.split('\n', 1)[0]
232
220
            # Starting with diffutils 2.8.4 the word "binary" was dropped.
233
221
            m = re.match('^(binary )?files.*differ$', first_line, re.I)
234
222
            if m is None:
235
 
                raise errors.BzrError('external diff failed with exit code 2;'
236
 
                                      ' command: %r' % (diffcmd,))
 
223
                raise BzrError('external diff failed with exit code 2;'
 
224
                               ' command: %r' % (diffcmd,))
237
225
            else:
238
226
                # Binary files differ, just return
239
227
                return
248
236
            else:
249
237
                msg = 'exit code %d' % rc
250
238
                
251
 
            raise errors.BzrError('external diff failed with %s; command: %r' 
252
 
                                  % (rc, diffcmd))
 
239
            raise BzrError('external diff failed with %s; command: %r' 
 
240
                           % (rc, diffcmd))
253
241
 
254
242
 
255
243
    finally:
272
260
                        new_abspath, e)
273
261
 
274
262
 
275
 
def _get_trees_to_diff(path_list, revision_specs, old_url, new_url):
276
 
    """Get the trees and specific files to diff given a list of paths.
277
 
 
278
 
    This method works out the trees to be diff'ed and the files of
279
 
    interest within those trees.
280
 
 
281
 
    :param path_list:
282
 
        the list of arguments passed to the diff command
283
 
    :param revision_specs:
284
 
        Zero, one or two RevisionSpecs from the diff command line,
285
 
        saying what revisions to compare.
286
 
    :param old_url:
287
 
        The url of the old branch or tree. If None, the tree to use is
288
 
        taken from the first path, if any, or the current working tree.
289
 
    :param new_url:
290
 
        The url of the new branch or tree. If None, the tree to use is
291
 
        taken from the first path, if any, or the current working tree.
292
 
    :returns:
293
 
        a tuple of (old_tree, new_tree, specific_files, extra_trees) where
294
 
        extra_trees is a sequence of additional trees to search in for
295
 
        file-ids.
296
 
    """
297
 
    # Get the old and new revision specs
298
 
    old_revision_spec = None
299
 
    new_revision_spec = None
300
 
    if revision_specs is not None:
301
 
        if len(revision_specs) > 0:
302
 
            old_revision_spec = revision_specs[0]
303
 
            if old_url is None:
304
 
                old_url = old_revision_spec.get_branch()
305
 
        if len(revision_specs) > 1:
306
 
            new_revision_spec = revision_specs[1]
307
 
            if new_url is None:
308
 
                new_url = new_revision_spec.get_branch()
309
 
 
310
 
    other_paths = []
311
 
    make_paths_wt_relative = True
312
 
    consider_relpath = True
313
 
    if path_list is None or len(path_list) == 0:
314
 
        # If no path is given, the current working tree is used
315
 
        default_location = u'.'
316
 
        consider_relpath = False
317
 
    elif old_url is not None and new_url is not None:
318
 
        other_paths = path_list
319
 
        make_paths_wt_relative = False
320
 
    else:
321
 
        default_location = path_list[0]
322
 
        other_paths = path_list[1:]
323
 
 
324
 
    # Get the old location
325
 
    specific_files = []
326
 
    if old_url is None:
327
 
        old_url = default_location
328
 
    working_tree, branch, relpath = \
329
 
        bzrdir.BzrDir.open_containing_tree_or_branch(old_url)
330
 
    if consider_relpath and relpath != '':
331
 
        specific_files.append(relpath)
332
 
    old_tree = _get_tree_to_diff(old_revision_spec, working_tree, branch)
333
 
 
334
 
    # Get the new location
335
 
    if new_url is None:
336
 
        new_url = default_location
337
 
    if new_url != old_url:
338
 
        working_tree, branch, relpath = \
339
 
            bzrdir.BzrDir.open_containing_tree_or_branch(new_url)
340
 
        if consider_relpath and relpath != '':
341
 
            specific_files.append(relpath)
342
 
    new_tree = _get_tree_to_diff(new_revision_spec, working_tree, branch,
343
 
        basis_is_default=working_tree is None)
344
 
 
345
 
    # Get the specific files (all files is None, no files is [])
346
 
    if make_paths_wt_relative and working_tree is not None:
347
 
        other_paths = _relative_paths_in_tree(working_tree, other_paths)
348
 
    specific_files.extend(other_paths)
349
 
    if len(specific_files) == 0:
350
 
        specific_files = None
351
 
 
352
 
    # Get extra trees that ought to be searched for file-ids
353
 
    extra_trees = None
354
 
    if working_tree is not None and working_tree not in (old_tree, new_tree):
355
 
        extra_trees = (working_tree,)
356
 
    return old_tree, new_tree, specific_files, extra_trees
357
 
 
358
 
 
359
 
def _get_tree_to_diff(spec, tree=None, branch=None, basis_is_default=True):
360
 
    if branch is None and tree is not None:
361
 
        branch = tree.branch
362
 
    if spec is None or spec.spec is None:
363
 
        if basis_is_default:
364
 
            if tree is not None:
365
 
                return tree.basis_tree()
366
 
            else:
367
 
                return branch.basis_tree()
368
 
        else:
369
 
            return tree
370
 
    return spec.as_tree(branch)
371
 
 
372
 
 
373
 
def _relative_paths_in_tree(tree, paths):
374
 
    """Get the relative paths within a working tree.
375
 
 
376
 
    Each path may be either an absolute path or a path relative to the
377
 
    current working directory.
378
 
    """
379
 
    result = []
380
 
    for filename in paths:
381
 
        try:
382
 
            result.append(tree.relpath(osutils.dereference_path(filename)))
383
 
        except errors.PathNotChild:
384
 
            raise errors.BzrCommandError("Files are in different branches")
385
 
    return result
 
263
@deprecated_function(zero_eight)
 
264
def show_diff(b, from_spec, specific_files, external_diff_options=None,
 
265
              revision2=None, output=None, b2=None):
 
266
    """Shortcut for showing the diff to the working tree.
 
267
 
 
268
    Please use show_diff_trees instead.
 
269
 
 
270
    b
 
271
        Branch.
 
272
 
 
273
    revision
 
274
        None for 'basis tree', or otherwise the old revision to compare against.
 
275
    
 
276
    The more general form is show_diff_trees(), where the caller
 
277
    supplies any two trees.
 
278
    """
 
279
    if output is None:
 
280
        output = sys.stdout
 
281
 
 
282
    if from_spec is None:
 
283
        old_tree = b.bzrdir.open_workingtree()
 
284
        if b2 is None:
 
285
            old_tree = old_tree = old_tree.basis_tree()
 
286
    else:
 
287
        old_tree = b.repository.revision_tree(from_spec.in_history(b).rev_id)
 
288
 
 
289
    if revision2 is None:
 
290
        if b2 is None:
 
291
            new_tree = b.bzrdir.open_workingtree()
 
292
        else:
 
293
            new_tree = b2.bzrdir.open_workingtree()
 
294
    else:
 
295
        new_tree = b.repository.revision_tree(revision2.in_history(b).rev_id)
 
296
 
 
297
    return show_diff_trees(old_tree, new_tree, output, specific_files,
 
298
                           external_diff_options)
 
299
 
 
300
 
 
301
def diff_cmd_helper(tree, specific_files, external_diff_options, 
 
302
                    old_revision_spec=None, new_revision_spec=None,
 
303
                    old_label='a/', new_label='b/'):
 
304
    """Helper for cmd_diff.
 
305
 
 
306
   tree 
 
307
        A WorkingTree
 
308
 
 
309
    specific_files
 
310
        The specific files to compare, or None
 
311
 
 
312
    external_diff_options
 
313
        If non-None, run an external diff, and pass it these options
 
314
 
 
315
    old_revision_spec
 
316
        If None, use basis tree as old revision, otherwise use the tree for
 
317
        the specified revision. 
 
318
 
 
319
    new_revision_spec
 
320
        If None, use working tree as new revision, otherwise use the tree for
 
321
        the specified revision.
 
322
    
 
323
    The more general form is show_diff_trees(), where the caller
 
324
    supplies any two trees.
 
325
    """
 
326
    def spec_tree(spec):
 
327
        if tree:
 
328
            revision = spec.in_store(tree.branch)
 
329
        else:
 
330
            revision = spec.in_store(None)
 
331
        revision_id = revision.rev_id
 
332
        branch = revision.branch
 
333
        return branch.repository.revision_tree(revision_id)
 
334
    if old_revision_spec is None:
 
335
        old_tree = tree.basis_tree()
 
336
    else:
 
337
        old_tree = spec_tree(old_revision_spec)
 
338
 
 
339
    if new_revision_spec is None:
 
340
        new_tree = tree
 
341
    else:
 
342
        new_tree = spec_tree(new_revision_spec)
 
343
    if new_tree is not tree:
 
344
        extra_trees = (tree,)
 
345
    else:
 
346
        extra_trees = None
 
347
 
 
348
    return show_diff_trees(old_tree, new_tree, sys.stdout, specific_files,
 
349
                           external_diff_options,
 
350
                           old_label=old_label, new_label=new_label,
 
351
                           extra_trees=extra_trees)
386
352
 
387
353
 
388
354
def show_diff_trees(old_tree, new_tree, to_file, specific_files=None,
389
355
                    external_diff_options=None,
390
356
                    old_label='a/', new_label='b/',
391
 
                    extra_trees=None,
392
 
                    path_encoding='utf8',
393
 
                    using=None):
 
357
                    extra_trees=None):
394
358
    """Show in text form the changes from one tree to another.
395
359
 
396
 
    to_file
397
 
        The output stream.
398
 
 
399
 
    specific_files
400
 
        Include only changes to these files - None for all changes.
 
360
    to_files
 
361
        If set, include only changes to these files.
401
362
 
402
363
    external_diff_options
403
364
        If set, use an external GNU diff and pass these options.
404
365
 
405
366
    extra_trees
406
367
        If set, more Trees to use for looking up file ids
407
 
 
408
 
    path_encoding
409
 
        If set, the path will be encoded as specified, otherwise is supposed
410
 
        to be utf8
411
368
    """
412
369
    old_tree.lock_read()
413
370
    try:
414
 
        if extra_trees is not None:
415
 
            for tree in extra_trees:
416
 
                tree.lock_read()
417
371
        new_tree.lock_read()
418
372
        try:
419
 
            differ = DiffTree.from_trees_options(old_tree, new_tree, to_file,
420
 
                                                 path_encoding,
421
 
                                                 external_diff_options,
422
 
                                                 old_label, new_label, using)
423
 
            return differ.show_diff(specific_files, extra_trees)
 
373
            return _show_diff_trees(old_tree, new_tree, to_file,
 
374
                                    specific_files, external_diff_options,
 
375
                                    old_label=old_label, new_label=new_label,
 
376
                                    extra_trees=extra_trees)
424
377
        finally:
425
378
            new_tree.unlock()
426
 
            if extra_trees is not None:
427
 
                for tree in extra_trees:
428
 
                    tree.unlock()
429
379
    finally:
430
380
        old_tree.unlock()
431
381
 
432
382
 
 
383
def _show_diff_trees(old_tree, new_tree, to_file,
 
384
                     specific_files, external_diff_options, 
 
385
                     old_label='a/', new_label='b/', extra_trees=None):
 
386
 
 
387
    # GNU Patch uses the epoch date to detect files that are being added
 
388
    # or removed in a diff.
 
389
    EPOCH_DATE = '1970-01-01 00:00:00 +0000'
 
390
 
 
391
    # TODO: Generation of pseudo-diffs for added/deleted files could
 
392
    # be usefully made into a much faster special case.
 
393
 
 
394
    if external_diff_options:
 
395
        assert isinstance(external_diff_options, basestring)
 
396
        opts = external_diff_options.split()
 
397
        def diff_file(olab, olines, nlab, nlines, to_file):
 
398
            external_diff(olab, olines, nlab, nlines, to_file, opts)
 
399
    else:
 
400
        diff_file = internal_diff
 
401
    
 
402
    delta = new_tree.changes_from(old_tree,
 
403
        specific_files=specific_files,
 
404
        extra_trees=extra_trees, require_versioned=True)
 
405
 
 
406
    has_changes = 0
 
407
    for path, file_id, kind in delta.removed:
 
408
        has_changes = 1
 
409
        print >>to_file, '=== removed %s %r' % (kind, path.encode('utf8'))
 
410
        old_name = '%s%s\t%s' % (old_label, path,
 
411
                                 _patch_header_date(old_tree, file_id, path))
 
412
        new_name = '%s%s\t%s' % (new_label, path, EPOCH_DATE)
 
413
        old_tree.inventory[file_id].diff(diff_file, old_name, old_tree,
 
414
                                         new_name, None, None, to_file)
 
415
    for path, file_id, kind in delta.added:
 
416
        has_changes = 1
 
417
        print >>to_file, '=== added %s %r' % (kind, path.encode('utf8'))
 
418
        old_name = '%s%s\t%s' % (old_label, path, EPOCH_DATE)
 
419
        new_name = '%s%s\t%s' % (new_label, path,
 
420
                                 _patch_header_date(new_tree, file_id, path))
 
421
        new_tree.inventory[file_id].diff(diff_file, new_name, new_tree,
 
422
                                         old_name, None, None, to_file, 
 
423
                                         reverse=True)
 
424
    for (old_path, new_path, file_id, kind,
 
425
         text_modified, meta_modified) in delta.renamed:
 
426
        has_changes = 1
 
427
        prop_str = get_prop_change(meta_modified)
 
428
        print >>to_file, '=== renamed %s %r => %r%s' % (
 
429
                    kind, old_path.encode('utf8'),
 
430
                    new_path.encode('utf8'), prop_str)
 
431
        old_name = '%s%s\t%s' % (old_label, old_path,
 
432
                                 _patch_header_date(old_tree, file_id,
 
433
                                                    old_path))
 
434
        new_name = '%s%s\t%s' % (new_label, new_path,
 
435
                                 _patch_header_date(new_tree, file_id,
 
436
                                                    new_path))
 
437
        _maybe_diff_file_or_symlink(old_name, old_tree, file_id,
 
438
                                    new_name, new_tree,
 
439
                                    text_modified, kind, to_file, diff_file)
 
440
    for path, file_id, kind, text_modified, meta_modified in delta.modified:
 
441
        has_changes = 1
 
442
        prop_str = get_prop_change(meta_modified)
 
443
        print >>to_file, '=== modified %s %r%s' % (kind, path.encode('utf8'), prop_str)
 
444
        old_name = '%s%s\t%s' % (old_label, path,
 
445
                                 _patch_header_date(old_tree, file_id, path))
 
446
        new_name = '%s%s\t%s' % (new_label, path,
 
447
                                 _patch_header_date(new_tree, file_id, path))
 
448
        if text_modified:
 
449
            _maybe_diff_file_or_symlink(old_name, old_tree, file_id,
 
450
                                        new_name, new_tree,
 
451
                                        True, kind, to_file, diff_file)
 
452
 
 
453
    return has_changes
 
454
 
 
455
 
433
456
def _patch_header_date(tree, file_id, path):
434
457
    """Returns a timestamp suitable for use in a patch header."""
435
 
    mtime = tree.get_file_mtime(file_id, path)
436
 
    return timestamp.format_patch_date(mtime)
437
 
 
438
 
 
439
 
@deprecated_function(one_three)
 
458
    tm = time.gmtime(tree.get_file_mtime(file_id, path))
 
459
    return time.strftime('%Y-%m-%d %H:%M:%S +0000', tm)
 
460
 
 
461
 
 
462
def _raise_if_nonexistent(paths, old_tree, new_tree):
 
463
    """Complain if paths are not in either inventory or tree.
 
464
 
 
465
    It's OK with the files exist in either tree's inventory, or 
 
466
    if they exist in the tree but are not versioned.
 
467
    
 
468
    This can be used by operations such as bzr status that can accept
 
469
    unknown or ignored files.
 
470
    """
 
471
    mutter("check paths: %r", paths)
 
472
    if not paths:
 
473
        return
 
474
    s = old_tree.filter_unversioned_files(paths)
 
475
    s = new_tree.filter_unversioned_files(s)
 
476
    s = [path for path in s if not new_tree.has_filename(path)]
 
477
    if s:
 
478
        raise errors.PathsDoNotExist(sorted(s))
 
479
 
 
480
 
440
481
def get_prop_change(meta_modified):
441
482
    if meta_modified:
442
483
        return " (properties changed)"
443
484
    else:
444
485
        return  ""
445
486
 
446
 
def get_executable_change(old_is_x, new_is_x):
447
 
    descr = { True:"+x", False:"-x", None:"??" }
448
 
    if old_is_x != new_is_x:
449
 
        return ["%s to %s" % (descr[old_is_x], descr[new_is_x],)]
450
 
    else:
451
 
        return []
452
 
 
453
 
 
454
 
class DiffPath(object):
455
 
    """Base type for command object that compare files"""
456
 
 
457
 
    # The type or contents of the file were unsuitable for diffing
458
 
    CANNOT_DIFF = 'CANNOT_DIFF'
459
 
    # The file has changed in a semantic way
460
 
    CHANGED = 'CHANGED'
461
 
    # The file content may have changed, but there is no semantic change
462
 
    UNCHANGED = 'UNCHANGED'
463
 
 
464
 
    def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8'):
465
 
        """Constructor.
466
 
 
467
 
        :param old_tree: The tree to show as the old tree in the comparison
468
 
        :param new_tree: The tree to show as new in the comparison
469
 
        :param to_file: The file to write comparison data to
470
 
        :param path_encoding: The character encoding to write paths in
471
 
        """
472
 
        self.old_tree = old_tree
473
 
        self.new_tree = new_tree
474
 
        self.to_file = to_file
475
 
        self.path_encoding = path_encoding
476
 
 
477
 
    def finish(self):
478
 
        pass
479
 
 
480
 
    @classmethod
481
 
    def from_diff_tree(klass, diff_tree):
482
 
        return klass(diff_tree.old_tree, diff_tree.new_tree,
483
 
                     diff_tree.to_file, diff_tree.path_encoding)
484
 
 
485
 
    @staticmethod
486
 
    def _diff_many(differs, file_id, old_path, new_path, old_kind, new_kind):
487
 
        for file_differ in differs:
488
 
            result = file_differ.diff(file_id, old_path, new_path, old_kind,
489
 
                                      new_kind)
490
 
            if result is not DiffPath.CANNOT_DIFF:
491
 
                return result
492
 
        else:
493
 
            return DiffPath.CANNOT_DIFF
494
 
 
495
 
 
496
 
class DiffKindChange(object):
497
 
    """Special differ for file kind changes.
498
 
 
499
 
    Represents kind change as deletion + creation.  Uses the other differs
500
 
    to do this.
501
 
    """
502
 
    def __init__(self, differs):
503
 
        self.differs = differs
504
 
 
505
 
    def finish(self):
506
 
        pass
507
 
 
508
 
    @classmethod
509
 
    def from_diff_tree(klass, diff_tree):
510
 
        return klass(diff_tree.differs)
511
 
 
512
 
    def diff(self, file_id, old_path, new_path, old_kind, new_kind):
513
 
        """Perform comparison
514
 
 
515
 
        :param file_id: The file_id of the file to compare
516
 
        :param old_path: Path of the file in the old tree
517
 
        :param new_path: Path of the file in the new tree
518
 
        :param old_kind: Old file-kind of the file
519
 
        :param new_kind: New file-kind of the file
520
 
        """
521
 
        if None in (old_kind, new_kind):
522
 
            return DiffPath.CANNOT_DIFF
523
 
        result = DiffPath._diff_many(self.differs, file_id, old_path,
524
 
                                       new_path, old_kind, None)
525
 
        if result is DiffPath.CANNOT_DIFF:
526
 
            return result
527
 
        return DiffPath._diff_many(self.differs, file_id, old_path, new_path,
528
 
                                     None, new_kind)
529
 
 
530
 
 
531
 
class DiffDirectory(DiffPath):
532
 
 
533
 
    def diff(self, file_id, old_path, new_path, old_kind, new_kind):
534
 
        """Perform comparison between two directories.  (dummy)
535
 
 
536
 
        """
537
 
        if 'directory' not in (old_kind, new_kind):
538
 
            return self.CANNOT_DIFF
539
 
        if old_kind not in ('directory', None):
540
 
            return self.CANNOT_DIFF
541
 
        if new_kind not in ('directory', None):
542
 
            return self.CANNOT_DIFF
543
 
        return self.CHANGED
544
 
 
545
 
 
546
 
class DiffSymlink(DiffPath):
547
 
 
548
 
    def diff(self, file_id, old_path, new_path, old_kind, new_kind):
549
 
        """Perform comparison between two symlinks
550
 
 
551
 
        :param file_id: The file_id of the file to compare
552
 
        :param old_path: Path of the file in the old tree
553
 
        :param new_path: Path of the file in the new tree
554
 
        :param old_kind: Old file-kind of the file
555
 
        :param new_kind: New file-kind of the file
556
 
        """
557
 
        if 'symlink' not in (old_kind, new_kind):
558
 
            return self.CANNOT_DIFF
559
 
        if old_kind == 'symlink':
560
 
            old_target = self.old_tree.get_symlink_target(file_id)
561
 
        elif old_kind is None:
562
 
            old_target = None
563
 
        else:
564
 
            return self.CANNOT_DIFF
565
 
        if new_kind == 'symlink':
566
 
            new_target = self.new_tree.get_symlink_target(file_id)
567
 
        elif new_kind is None:
568
 
            new_target = None
569
 
        else:
570
 
            return self.CANNOT_DIFF
571
 
        return self.diff_symlink(old_target, new_target)
572
 
 
573
 
    def diff_symlink(self, old_target, new_target):
574
 
        if old_target is None:
575
 
            self.to_file.write('=== target is %r\n' % new_target)
576
 
        elif new_target is None:
577
 
            self.to_file.write('=== target was %r\n' % old_target)
578
 
        else:
579
 
            self.to_file.write('=== target changed %r => %r\n' %
580
 
                              (old_target, new_target))
581
 
        return self.CHANGED
582
 
 
583
 
 
584
 
class DiffText(DiffPath):
585
 
 
586
 
    # GNU Patch uses the epoch date to detect files that are being added
587
 
    # or removed in a diff.
588
 
    EPOCH_DATE = '1970-01-01 00:00:00 +0000'
589
 
 
590
 
    def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8',
591
 
                 old_label='', new_label='', text_differ=internal_diff):
592
 
        DiffPath.__init__(self, old_tree, new_tree, to_file, path_encoding)
593
 
        self.text_differ = text_differ
594
 
        self.old_label = old_label
595
 
        self.new_label = new_label
596
 
        self.path_encoding = path_encoding
597
 
 
598
 
    def diff(self, file_id, old_path, new_path, old_kind, new_kind):
599
 
        """Compare two files in unified diff format
600
 
 
601
 
        :param file_id: The file_id of the file to compare
602
 
        :param old_path: Path of the file in the old tree
603
 
        :param new_path: Path of the file in the new tree
604
 
        :param old_kind: Old file-kind of the file
605
 
        :param new_kind: New file-kind of the file
606
 
        """
607
 
        if 'file' not in (old_kind, new_kind):
608
 
            return self.CANNOT_DIFF
609
 
        from_file_id = to_file_id = file_id
610
 
        if old_kind == 'file':
611
 
            old_date = _patch_header_date(self.old_tree, file_id, old_path)
612
 
        elif old_kind is None:
613
 
            old_date = self.EPOCH_DATE
614
 
            from_file_id = None
615
 
        else:
616
 
            return self.CANNOT_DIFF
617
 
        if new_kind == 'file':
618
 
            new_date = _patch_header_date(self.new_tree, file_id, new_path)
619
 
        elif new_kind is None:
620
 
            new_date = self.EPOCH_DATE
621
 
            to_file_id = None
622
 
        else:
623
 
            return self.CANNOT_DIFF
624
 
        from_label = '%s%s\t%s' % (self.old_label, old_path, old_date)
625
 
        to_label = '%s%s\t%s' % (self.new_label, new_path, new_date)
626
 
        return self.diff_text(from_file_id, to_file_id, from_label, to_label)
627
 
 
628
 
    def diff_text(self, from_file_id, to_file_id, from_label, to_label):
629
 
        """Diff the content of given files in two trees
630
 
 
631
 
        :param from_file_id: The id of the file in the from tree.  If None,
632
 
            the file is not present in the from tree.
633
 
        :param to_file_id: The id of the file in the to tree.  This may refer
634
 
            to a different file from from_file_id.  If None,
635
 
            the file is not present in the to tree.
636
 
        """
637
 
        def _get_text(tree, file_id):
638
 
            if file_id is not None:
639
 
                return tree.get_file(file_id).readlines()
640
 
            else:
641
 
                return []
642
 
        try:
643
 
            from_text = _get_text(self.old_tree, from_file_id)
644
 
            to_text = _get_text(self.new_tree, to_file_id)
645
 
            self.text_differ(from_label, from_text, to_label, to_text,
646
 
                             self.to_file)
647
 
        except errors.BinaryFile:
648
 
            self.to_file.write(
649
 
                  ("Binary files %s and %s differ\n" %
650
 
                  (from_label, to_label)).encode(self.path_encoding))
651
 
        return self.CHANGED
652
 
 
653
 
 
654
 
class DiffFromTool(DiffPath):
655
 
 
656
 
    def __init__(self, command_template, old_tree, new_tree, to_file,
657
 
                 path_encoding='utf-8'):
658
 
        DiffPath.__init__(self, old_tree, new_tree, to_file, path_encoding)
659
 
        self.command_template = command_template
660
 
        self._root = osutils.mkdtemp(prefix='bzr-diff-')
661
 
 
662
 
    @classmethod
663
 
    def from_string(klass, command_string, old_tree, new_tree, to_file,
664
 
                    path_encoding='utf-8'):
665
 
        command_template = commands.shlex_split_unicode(command_string)
666
 
        command_template.extend(['%(old_path)s', '%(new_path)s'])
667
 
        return klass(command_template, old_tree, new_tree, to_file,
668
 
                     path_encoding)
669
 
 
670
 
    @classmethod
671
 
    def make_from_diff_tree(klass, command_string):
672
 
        def from_diff_tree(diff_tree):
673
 
            return klass.from_string(command_string, diff_tree.old_tree,
674
 
                                     diff_tree.new_tree, diff_tree.to_file)
675
 
        return from_diff_tree
676
 
 
677
 
    def _get_command(self, old_path, new_path):
678
 
        my_map = {'old_path': old_path, 'new_path': new_path}
679
 
        return [t % my_map for t in self.command_template]
680
 
 
681
 
    def _execute(self, old_path, new_path):
682
 
        command = self._get_command(old_path, new_path)
683
 
        try:
684
 
            proc = subprocess.Popen(command, stdout=subprocess.PIPE,
685
 
                                    cwd=self._root)
686
 
        except OSError, e:
687
 
            if e.errno == errno.ENOENT:
688
 
                raise errors.ExecutableMissing(command[0])
689
 
            else:
690
 
                raise
691
 
        self.to_file.write(proc.stdout.read())
692
 
        return proc.wait()
693
 
 
694
 
    def _try_symlink_root(self, tree, prefix):
695
 
        if (getattr(tree, 'abspath', None) is None
696
 
            or not osutils.host_os_dereferences_symlinks()):
697
 
            return False
698
 
        try:
699
 
            os.symlink(tree.abspath(''), osutils.pathjoin(self._root, prefix))
700
 
        except OSError, e:
701
 
            if e.errno != errno.EEXIST:
702
 
                raise
703
 
        return True
704
 
 
705
 
    def _write_file(self, file_id, tree, prefix, relpath):
706
 
        full_path = osutils.pathjoin(self._root, prefix, relpath)
707
 
        if self._try_symlink_root(tree, prefix):
708
 
            return full_path
709
 
        parent_dir = osutils.dirname(full_path)
710
 
        try:
711
 
            os.makedirs(parent_dir)
712
 
        except OSError, e:
713
 
            if e.errno != errno.EEXIST:
714
 
                raise
715
 
        source = tree.get_file(file_id, relpath)
716
 
        try:
717
 
            target = open(full_path, 'wb')
718
 
            try:
719
 
                osutils.pumpfile(source, target)
720
 
            finally:
721
 
                target.close()
722
 
        finally:
723
 
            source.close()
724
 
        osutils.make_readonly(full_path)
725
 
        mtime = tree.get_file_mtime(file_id)
726
 
        os.utime(full_path, (mtime, mtime))
727
 
        return full_path
728
 
 
729
 
    def _prepare_files(self, file_id, old_path, new_path):
730
 
        old_disk_path = self._write_file(file_id, self.old_tree, 'old',
731
 
                                         old_path)
732
 
        new_disk_path = self._write_file(file_id, self.new_tree, 'new',
733
 
                                         new_path)
734
 
        return old_disk_path, new_disk_path
735
 
 
736
 
    def finish(self):
737
 
        osutils.rmtree(self._root)
738
 
 
739
 
    def diff(self, file_id, old_path, new_path, old_kind, new_kind):
740
 
        if (old_kind, new_kind) != ('file', 'file'):
741
 
            return DiffPath.CANNOT_DIFF
742
 
        self._prepare_files(file_id, old_path, new_path)
743
 
        self._execute(osutils.pathjoin('old', old_path),
744
 
                      osutils.pathjoin('new', new_path))
745
 
 
746
 
 
747
 
class DiffTree(object):
748
 
    """Provides textual representations of the difference between two trees.
749
 
 
750
 
    A DiffTree examines two trees and where a file-id has altered
751
 
    between them, generates a textual representation of the difference.
752
 
    DiffTree uses a sequence of DiffPath objects which are each
753
 
    given the opportunity to handle a given altered fileid. The list
754
 
    of DiffPath objects can be extended globally by appending to
755
 
    DiffTree.diff_factories, or for a specific diff operation by
756
 
    supplying the extra_factories option to the appropriate method.
757
 
    """
758
 
 
759
 
    # list of factories that can provide instances of DiffPath objects
760
 
    # may be extended by plugins.
761
 
    diff_factories = [DiffSymlink.from_diff_tree,
762
 
                      DiffDirectory.from_diff_tree]
763
 
 
764
 
    def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8',
765
 
                 diff_text=None, extra_factories=None):
766
 
        """Constructor
767
 
 
768
 
        :param old_tree: Tree to show as old in the comparison
769
 
        :param new_tree: Tree to show as new in the comparison
770
 
        :param to_file: File to write comparision to
771
 
        :param path_encoding: Character encoding to write paths in
772
 
        :param diff_text: DiffPath-type object to use as a last resort for
773
 
            diffing text files.
774
 
        :param extra_factories: Factories of DiffPaths to try before any other
775
 
            DiffPaths"""
776
 
        if diff_text is None:
777
 
            diff_text = DiffText(old_tree, new_tree, to_file, path_encoding,
778
 
                                 '', '',  internal_diff)
779
 
        self.old_tree = old_tree
780
 
        self.new_tree = new_tree
781
 
        self.to_file = to_file
782
 
        self.path_encoding = path_encoding
783
 
        self.differs = []
784
 
        if extra_factories is not None:
785
 
            self.differs.extend(f(self) for f in extra_factories)
786
 
        self.differs.extend(f(self) for f in self.diff_factories)
787
 
        self.differs.extend([diff_text, DiffKindChange.from_diff_tree(self)])
788
 
 
789
 
    @classmethod
790
 
    def from_trees_options(klass, old_tree, new_tree, to_file,
791
 
                           path_encoding, external_diff_options, old_label,
792
 
                           new_label, using):
793
 
        """Factory for producing a DiffTree.
794
 
 
795
 
        Designed to accept options used by show_diff_trees.
796
 
        :param old_tree: The tree to show as old in the comparison
797
 
        :param new_tree: The tree to show as new in the comparison
798
 
        :param to_file: File to write comparisons to
799
 
        :param path_encoding: Character encoding to use for writing paths
800
 
        :param external_diff_options: If supplied, use the installed diff
801
 
            binary to perform file comparison, using supplied options.
802
 
        :param old_label: Prefix to use for old file labels
803
 
        :param new_label: Prefix to use for new file labels
804
 
        :param using: Commandline to use to invoke an external diff tool
805
 
        """
806
 
        if using is not None:
807
 
            extra_factories = [DiffFromTool.make_from_diff_tree(using)]
808
 
        else:
809
 
            extra_factories = []
810
 
        if external_diff_options:
811
 
            opts = external_diff_options.split()
812
 
            def diff_file(olab, olines, nlab, nlines, to_file):
813
 
                external_diff(olab, olines, nlab, nlines, to_file, opts)
814
 
        else:
815
 
            diff_file = internal_diff
816
 
        diff_text = DiffText(old_tree, new_tree, to_file, path_encoding,
817
 
                             old_label, new_label, diff_file)
818
 
        return klass(old_tree, new_tree, to_file, path_encoding, diff_text,
819
 
                     extra_factories)
820
 
 
821
 
    def show_diff(self, specific_files, extra_trees=None):
822
 
        """Write tree diff to self.to_file
823
 
 
824
 
        :param sepecific_files: the specific files to compare (recursive)
825
 
        :param extra_trees: extra trees to use for mapping paths to file_ids
826
 
        """
827
 
        try:
828
 
            return self._show_diff(specific_files, extra_trees)
829
 
        finally:
830
 
            for differ in self.differs:
831
 
                differ.finish()
832
 
 
833
 
    def _show_diff(self, specific_files, extra_trees):
834
 
        # TODO: Generation of pseudo-diffs for added/deleted files could
835
 
        # be usefully made into a much faster special case.
836
 
        iterator = self.new_tree.iter_changes(self.old_tree,
837
 
                                               specific_files=specific_files,
838
 
                                               extra_trees=extra_trees,
839
 
                                               require_versioned=True)
840
 
        has_changes = 0
841
 
        def changes_key(change):
842
 
            old_path, new_path = change[1]
843
 
            path = new_path
844
 
            if path is None:
845
 
                path = old_path
846
 
            return path
847
 
        def get_encoded_path(path):
848
 
            if path is not None:
849
 
                return path.encode(self.path_encoding, "replace")
850
 
        for (file_id, paths, changed_content, versioned, parent, name, kind,
851
 
             executable) in sorted(iterator, key=changes_key):
852
 
            # The root does not get diffed, and items with no known kind (that
853
 
            # is, missing) in both trees are skipped as well.
854
 
            if parent == (None, None) or kind == (None, None):
855
 
                continue
856
 
            oldpath, newpath = paths
857
 
            oldpath_encoded = get_encoded_path(paths[0])
858
 
            newpath_encoded = get_encoded_path(paths[1])
859
 
            old_present = (kind[0] is not None and versioned[0])
860
 
            new_present = (kind[1] is not None and versioned[1])
861
 
            renamed = (parent[0], name[0]) != (parent[1], name[1])
862
 
 
863
 
            properties_changed = []
864
 
            properties_changed.extend(get_executable_change(executable[0], executable[1]))
865
 
 
866
 
            if properties_changed:
867
 
                prop_str = " (properties changed: %s)" % (", ".join(properties_changed),)
868
 
            else:
869
 
                prop_str = ""
870
 
 
871
 
            if (old_present, new_present) == (True, False):
872
 
                self.to_file.write("=== removed %s '%s'\n" %
873
 
                                   (kind[0], oldpath_encoded))
874
 
                newpath = oldpath
875
 
            elif (old_present, new_present) == (False, True):
876
 
                self.to_file.write("=== added %s '%s'\n" %
877
 
                                   (kind[1], newpath_encoded))
878
 
                oldpath = newpath
879
 
            elif renamed:
880
 
                self.to_file.write("=== renamed %s '%s' => '%s'%s\n" %
881
 
                    (kind[0], oldpath_encoded, newpath_encoded, prop_str))
882
 
            else:
883
 
                # if it was produced by iter_changes, it must be
884
 
                # modified *somehow*, either content or execute bit.
885
 
                self.to_file.write("=== modified %s '%s'%s\n" % (kind[0],
886
 
                                   newpath_encoded, prop_str))
887
 
            if changed_content:
888
 
                self.diff(file_id, oldpath, newpath)
889
 
                has_changes = 1
890
 
            if renamed:
891
 
                has_changes = 1
892
 
        return has_changes
893
 
 
894
 
    def diff(self, file_id, old_path, new_path):
895
 
        """Perform a diff of a single file
896
 
 
897
 
        :param file_id: file-id of the file
898
 
        :param old_path: The path of the file in the old tree
899
 
        :param new_path: The path of the file in the new tree
900
 
        """
901
 
        try:
902
 
            old_kind = self.old_tree.kind(file_id)
903
 
        except (errors.NoSuchId, errors.NoSuchFile):
904
 
            old_kind = None
905
 
        try:
906
 
            new_kind = self.new_tree.kind(file_id)
907
 
        except (errors.NoSuchId, errors.NoSuchFile):
908
 
            new_kind = None
909
 
 
910
 
        result = DiffPath._diff_many(self.differs, file_id, old_path,
911
 
                                       new_path, old_kind, new_kind)
912
 
        if result is DiffPath.CANNOT_DIFF:
913
 
            error_path = new_path
914
 
            if error_path is None:
915
 
                error_path = old_path
916
 
            raise errors.NoDiffFound(error_path)
 
487
 
 
488
def _maybe_diff_file_or_symlink(old_path, old_tree, file_id,
 
489
                                new_path, new_tree, text_modified,
 
490
                                kind, to_file, diff_file):
 
491
    if text_modified:
 
492
        new_entry = new_tree.inventory[file_id]
 
493
        old_tree.inventory[file_id].diff(diff_file,
 
494
                                         old_path, old_tree,
 
495
                                         new_path, new_entry, 
 
496
                                         new_tree, to_file)