2
# -*- coding: UTF-8 -*-
1
# Copyright (C) 2005-2014 Canonical Ltd.
4
3
# This program is free software; you can redistribute it and/or modify
5
4
# it under the terms of the GNU General Public License as published by
6
5
# the Free Software Foundation; either version 2 of the License, or
7
6
# (at your option) any later version.
9
8
# This program is distributed in the hope that it will be useful,
10
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
11
# GNU General Public License for more details.
14
13
# You should have received a copy of the GNU General Public License
15
14
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
from trace import mutter
21
from errors import BzrError
24
def diff_trees(old_tree, new_tree):
25
"""Compute diff between two trees.
27
They may be in different branches and may be working or historical
30
Yields a sequence of (state, id, old_name, new_name, kind).
31
Each filename and each id is listed only once.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
25
from bzrlib.lazy_import import lazy_import
26
lazy_import(globals(), """
43
from bzrlib.workingtree import WorkingTree
44
from bzrlib.i18n import gettext
47
from bzrlib.registry import (
50
from bzrlib.trace import mutter, note, warning
52
DEFAULT_CONTEXT_AMOUNT = 3
54
class AtTemplate(string.Template):
55
"""Templating class that uses @ instead of $."""
60
# TODO: Rather than building a changeset object, we should probably
61
# invoke callbacks on an object. That object can either accumulate a
62
# list, write them out directly, etc etc.
65
class _PrematchedMatcher(difflib.SequenceMatcher):
66
"""Allow SequenceMatcher operations to use predetermined blocks"""
68
def __init__(self, matching_blocks):
69
difflib.SequenceMatcher(self, None, None)
70
self.matching_blocks = matching_blocks
74
def internal_diff(old_filename, oldlines, new_filename, newlines, to_file,
75
allow_binary=False, sequence_matcher=None,
76
path_encoding='utf8', context_lines=DEFAULT_CONTEXT_AMOUNT):
77
# FIXME: difflib is wrong if there is no trailing newline.
78
# The syntax used by patch seems to be "\ No newline at
79
# end of file" following the last diff line from that
80
# file. This is not trivial to insert into the
81
# unified_diff output and it might be better to just fix
82
# or replace that function.
84
# In the meantime we at least make sure the patch isn't
88
# Special workaround for Python2.3, where difflib fails if
89
# both sequences are empty.
90
if not oldlines and not newlines:
93
if allow_binary is False:
94
textfile.check_text_lines(oldlines)
95
textfile.check_text_lines(newlines)
97
if sequence_matcher is None:
98
sequence_matcher = patiencediff.PatienceSequenceMatcher
99
ud = patiencediff.unified_diff(oldlines, newlines,
100
fromfile=old_filename.encode(path_encoding, 'replace'),
101
tofile=new_filename.encode(path_encoding, 'replace'),
102
n=context_lines, sequencematcher=sequence_matcher)
105
if len(ud) == 0: # Identical contents, nothing to do
107
# work-around for difflib being too smart for its own good
108
# if /dev/null is "1,0", patch won't recognize it as /dev/null
110
ud[2] = ud[2].replace('-1,0', '-0,0')
112
ud[2] = ud[2].replace('+1,0', '+0,0')
116
if not line.endswith('\n'):
117
to_file.write("\n\\ No newline at end of file\n")
121
def _spawn_external_diff(diffcmd, capture_errors=True):
122
"""Spawn the external diff process, and return the child handle.
124
:param diffcmd: The command list to spawn
125
:param capture_errors: Capture stderr as well as setting LANG=C
126
and LC_ALL=C. This lets us read and understand the output of diff,
127
and respond to any errors.
128
:return: A Popen object.
34
## TODO: Compare files before diffing; only mention those that have changed
36
## TODO: Set nice names in the headers, maybe include diffstat
38
## TODO: Perhaps make this a generator rather than using
41
## TODO: Allow specifying a list of files to compare, rather than
42
## doing the whole tree? (Not urgent.)
44
## TODO: Allow diffing any two inventories, not just the
45
## current one against one. We mgiht need to specify two
46
## stores to look for the files if diffing two branches. That
47
## might imply this shouldn't be primarily a Branch method.
49
## XXX: This doesn't report on unknown files; that can be done
50
## from a separate method.
52
old_it = old_tree.list_files()
53
new_it = new_tree.list_files()
61
old_item = next(old_it)
62
new_item = next(new_it)
64
# We step through the two sorted iterators in parallel, trying to
67
while (old_item != None) or (new_item != None):
68
# OK, we still have some remaining on both, but they may be
71
old_name, old_class, old_kind, old_id = old_item
76
new_name, new_class, new_kind, new_id = new_item
80
mutter(" diff pairwise %r" % (old_item,))
81
mutter(" %r" % (new_item,))
84
# can't handle the old tree being a WorkingTree
85
assert old_class == 'V'
87
if new_item and (new_class != 'V'):
88
yield new_class, None, None, new_name, new_kind
89
new_item = next(new_it)
90
elif (not new_item) or (old_item and (old_name < new_name)):
91
mutter(" extra entry in old-tree sequence")
92
if new_tree.has_id(old_id):
93
# will be mentioned as renamed under new name
96
yield 'D', old_id, old_name, None, old_kind
97
old_item = next(old_it)
98
elif (not old_item) or (new_item and (new_name < old_name)):
99
mutter(" extra entry in new-tree sequence")
100
if old_tree.has_id(new_id):
101
yield 'R', new_id, old_tree.id2path(new_id), new_name, new_kind
103
yield 'A', new_id, None, new_name, new_kind
104
new_item = next(new_it)
105
elif old_id != new_id:
106
assert old_name == new_name
107
# both trees have a file of this name, but it is not the
108
# same file. in other words, the old filename has been
109
# overwritten by either a newly-added or a renamed file.
110
# (should we return something about the overwritten file?)
111
if old_tree.has_id(new_id):
112
# renaming, overlying a deleted file
113
yield 'R', new_id, old_tree.id2path(new_id), new_name, new_kind
115
yield 'A', new_id, None, new_name, new_kind
117
new_item = next(new_it)
118
old_item = next(old_it)
120
assert old_id == new_id
121
assert old_id != None
122
assert old_name == new_name
123
assert old_kind == new_kind
125
if old_kind == 'directory':
126
yield '.', new_id, old_name, new_name, new_kind
127
elif old_tree.get_file_size(old_id) != new_tree.get_file_size(old_id):
128
mutter(" file size has changed, must be different")
129
yield 'M', new_id, old_name, new_name, new_kind
130
elif old_tree.get_file_sha1(old_id) == new_tree.get_file_sha1(old_id):
131
mutter(" SHA1 indicates they're identical")
132
## assert compare_files(old_tree.get_file(i), new_tree.get_file(i))
133
yield '.', new_id, old_name, new_name, new_kind
135
mutter(" quick compare shows different")
136
yield 'M', new_id, old_name, new_name, new_kind
138
new_item = next(new_it)
139
old_item = next(old_it)
143
def show_diff(b, revision, file_list):
144
import difflib, sys, types
147
old_tree = b.basis_tree()
131
# construct minimal environment
133
path = os.environ.get('PATH')
136
env['LANGUAGE'] = 'C' # on win32 only LANGUAGE has effect
139
stderr = subprocess.PIPE
149
old_tree = b.revision_tree(b.lookup_revision(revision))
151
new_tree = b.working_tree()
153
# TODO: Options to control putting on a prefix or suffix, perhaps as a format string
157
DEVNULL = '/dev/null'
158
# Windows users, don't panic about this filename -- it is a
159
# special signal to GNU patch that the file should be created or
160
# deleted respectively.
162
# TODO: Generation of pseudo-diffs for added/deleted files could
163
# be usefully made into a much faster special case.
165
# TODO: Better to return them in sorted order I think.
168
file_list = [b.relpath(f) for f in file_list]
170
# FIXME: If given a file list, compare only those files rather
171
# than comparing everything and then throwing stuff away.
173
for file_state, fid, old_name, new_name, kind in diff_trees(old_tree, new_tree):
175
if file_list and (new_name not in file_list):
145
pipe = subprocess.Popen(diffcmd,
146
stdin=subprocess.PIPE,
147
stdout=subprocess.PIPE,
151
if e.errno == errno.ENOENT:
152
raise errors.NoDiff(str(e))
157
# diff style options as of GNU diff v3.2
158
style_option_list = ['-c', '-C', '--context',
160
'-f', '--forward-ed',
164
'-u', '-U', '--unified',
165
'-y', '--side-by-side',
168
def default_style_unified(diff_opts):
169
"""Default to unified diff style if alternative not specified in diff_opts.
171
diff only allows one style to be specified; they don't override.
172
Note that some of these take optargs, and the optargs can be
173
directly appended to the options.
174
This is only an approximate parser; it doesn't properly understand
177
:param diff_opts: List of options for external (GNU) diff.
178
:return: List of options with default style=='unified'.
180
for s in style_option_list:
178
# Don't show this by default; maybe do it if an option is passed
179
# idlabel = ' {%s}' % fid
182
# FIXME: Something about the diff format makes patch unhappy
183
# with newly-added files.
185
def diffit(oldlines, newlines, **kw):
187
# FIXME: difflib is wrong if there is no trailing newline.
188
# The syntax used by patch seems to be "\ No newline at
189
# end of file" following the last diff line from that
190
# file. This is not trivial to insert into the
191
# unified_diff output and it might be better to just fix
192
# or replace that function.
194
# In the meantime we at least make sure the patch isn't
198
# Special workaround for Python2.3, where difflib fails if
199
# both sequences are empty.
200
if not oldlines and not newlines:
188
diff_opts.append('-u')
192
def external_diff(old_filename, oldlines, new_filename, newlines, to_file,
194
"""Display a diff by calling out to the external diff program."""
195
# make sure our own output is properly ordered before the diff
198
oldtmp_fd, old_abspath = tempfile.mkstemp(prefix='bzr-diff-old-')
199
newtmp_fd, new_abspath = tempfile.mkstemp(prefix='bzr-diff-new-')
200
oldtmpf = os.fdopen(oldtmp_fd, 'wb')
201
newtmpf = os.fdopen(newtmp_fd, 'wb')
204
# TODO: perhaps a special case for comparing to or from the empty
205
# sequence; can just use /dev/null on Unix
207
# TODO: if either of the files being compared already exists as a
208
# regular named file (e.g. in the working directory) then we can
209
# compare directly to that, rather than copying it.
211
oldtmpf.writelines(oldlines)
212
newtmpf.writelines(newlines)
219
if sys.platform == 'win32':
220
# Popen doesn't do the proper encoding for external commands
221
# Since we are dealing with an ANSI api, use mbcs encoding
222
old_filename = old_filename.encode('mbcs')
223
new_filename = new_filename.encode('mbcs')
225
'--label', old_filename,
227
'--label', new_filename,
232
diff_opts = default_style_unified(diff_opts)
235
diffcmd.extend(diff_opts)
237
pipe = _spawn_external_diff(diffcmd, capture_errors=True)
238
out,err = pipe.communicate()
241
# internal_diff() adds a trailing newline, add one here for consistency
244
# 'diff' gives retcode == 2 for all sorts of errors
245
# one of those is 'Binary files differ'.
246
# Bad options could also be the problem.
247
# 'Binary files' is not a real error, so we suppress that error.
250
# Since we got here, we want to make sure to give an i18n error
251
pipe = _spawn_external_diff(diffcmd, capture_errors=False)
252
out, err = pipe.communicate()
254
# Write out the new i18n diff response
255
to_file.write(out+'\n')
256
if pipe.returncode != 2:
257
raise errors.BzrError(
258
'external diff failed with exit code 2'
259
' when run with LANG=C and LC_ALL=C,'
260
' but not when run natively: %r' % (diffcmd,))
262
first_line = lang_c_out.split('\n', 1)[0]
263
# Starting with diffutils 2.8.4 the word "binary" was dropped.
264
m = re.match('^(binary )?files.*differ$', first_line, re.I)
266
raise errors.BzrError('external diff failed with exit code 2;'
267
' command: %r' % (diffcmd,))
269
# Binary files differ, just return
205
if oldlines and (oldlines[-1][-1] != '\n'):
208
if newlines and (newlines[-1][-1] != '\n'):
212
ud = difflib.unified_diff(oldlines, newlines, **kw)
213
sys.stdout.writelines(ud)
215
print "\\ No newline at end of file"
216
sys.stdout.write('\n')
218
if file_state in ['.', '?', 'I']:
220
elif file_state == 'A':
221
print '*** added %s %r' % (kind, new_name)
224
new_tree.get_file(fid).readlines(),
226
tofile=new_label + new_name + idlabel)
227
elif file_state == 'D':
228
assert isinstance(old_name, types.StringTypes)
229
print '*** deleted %s %r' % (kind, old_name)
231
diffit(old_tree.get_file(fid).readlines(), [],
232
fromfile=old_label + old_name + idlabel,
234
elif file_state in ['M', 'R']:
235
if file_state == 'M':
236
assert kind == 'file'
237
assert old_name == new_name
238
print '*** modified %s %r' % (kind, new_name)
239
elif file_state == 'R':
240
print '*** renamed %s %r => %r' % (kind, old_name, new_name)
243
diffit(old_tree.get_file(fid).readlines(),
244
new_tree.get_file(fid).readlines(),
245
fromfile=old_label + old_name + idlabel,
246
tofile=new_label + new_name)
248
raise BzrError("can't represent state %s {%s}" % (file_state, fid))
253
"""Describes changes from one tree to another.
262
(oldpath, newpath, id)
266
A path may occur in more than one list if it was e.g. deleted
267
under an old id and renamed into place in a new id.
269
Files are listed in either modified or renamed, not both. In
270
other words, renamed files may also be modified.
279
def compare_inventories(old_inv, new_inv):
280
"""Return a TreeDelta object describing changes between inventories.
282
This only describes changes in the shape of the tree, not the
285
This is an alternative to diff_trees() and should probably
286
eventually replace it.
288
old_ids = old_inv.id_set()
289
new_ids = new_inv.id_set()
292
delta.removed = [(old_inv.id2path(fid), fid) for fid in (old_ids - new_ids)]
295
delta.added = [(new_inv.id2path(fid), fid) for fid in (new_ids - old_ids)]
298
for fid in old_ids & new_ids:
299
old_ie = old_inv[fid]
300
new_ie = new_inv[fid]
301
old_path = old_inv.id2path(fid)
302
new_path = new_inv.id2path(fid)
304
if old_path != new_path:
305
delta.renamed.append((old_path, new_path, fid))
306
elif old_ie.text_sha1 != new_ie.text_sha1:
307
delta.modified.append((new_path, fid))
309
delta.modified.sort()
272
# If we got to here, we haven't written out the output of diff
276
# returns 1 if files differ; that's OK
278
msg = 'signal %d' % (-rc)
280
msg = 'exit code %d' % rc
282
raise errors.BzrError('external diff failed with %s; command: %r'
287
oldtmpf.close() # and delete
291
# Warn in case the file couldn't be deleted (in case windows still
292
# holds the file open, but not if the files have already been
297
if e.errno not in (errno.ENOENT,):
298
warning('Failed to delete temporary file: %s %s', path, e)
304
def get_trees_and_branches_to_diff_locked(
305
path_list, revision_specs, old_url, new_url, add_cleanup, apply_view=True):
306
"""Get the trees and specific files to diff given a list of paths.
308
This method works out the trees to be diff'ed and the files of
309
interest within those trees.
312
the list of arguments passed to the diff command
313
:param revision_specs:
314
Zero, one or two RevisionSpecs from the diff command line,
315
saying what revisions to compare.
317
The url of the old branch or tree. If None, the tree to use is
318
taken from the first path, if any, or the current working tree.
320
The url of the new branch or tree. If None, the tree to use is
321
taken from the first path, if any, or the current working tree.
323
a callable like Command.add_cleanup. get_trees_and_branches_to_diff
324
will register cleanups that must be run to unlock the trees, etc.
326
if True and a view is set, apply the view or check that the paths
329
a tuple of (old_tree, new_tree, old_branch, new_branch,
330
specific_files, extra_trees) where extra_trees is a sequence of
331
additional trees to search in for file-ids. The trees and branches
332
will be read-locked until the cleanups registered via the add_cleanup
335
# Get the old and new revision specs
336
old_revision_spec = None
337
new_revision_spec = None
338
if revision_specs is not None:
339
if len(revision_specs) > 0:
340
old_revision_spec = revision_specs[0]
342
old_url = old_revision_spec.get_branch()
343
if len(revision_specs) > 1:
344
new_revision_spec = revision_specs[1]
346
new_url = new_revision_spec.get_branch()
349
make_paths_wt_relative = True
350
consider_relpath = True
351
if path_list is None or len(path_list) == 0:
352
# If no path is given, the current working tree is used
353
default_location = u'.'
354
consider_relpath = False
355
elif old_url is not None and new_url is not None:
356
other_paths = path_list
357
make_paths_wt_relative = False
359
default_location = path_list[0]
360
other_paths = path_list[1:]
362
def lock_tree_or_branch(wt, br):
365
add_cleanup(wt.unlock)
368
add_cleanup(br.unlock)
370
# Get the old location
373
old_url = default_location
374
working_tree, branch, relpath = \
375
controldir.ControlDir.open_containing_tree_or_branch(old_url)
376
lock_tree_or_branch(working_tree, branch)
377
if consider_relpath and relpath != '':
378
if working_tree is not None and apply_view:
379
views.check_path_in_view(working_tree, relpath)
380
specific_files.append(relpath)
381
old_tree = _get_tree_to_diff(old_revision_spec, working_tree, branch)
384
# Get the new location
386
new_url = default_location
387
if new_url != old_url:
388
working_tree, branch, relpath = \
389
controldir.ControlDir.open_containing_tree_or_branch(new_url)
390
lock_tree_or_branch(working_tree, branch)
391
if consider_relpath and relpath != '':
392
if working_tree is not None and apply_view:
393
views.check_path_in_view(working_tree, relpath)
394
specific_files.append(relpath)
395
new_tree = _get_tree_to_diff(new_revision_spec, working_tree, branch,
396
basis_is_default=working_tree is None)
399
# Get the specific files (all files is None, no files is [])
400
if make_paths_wt_relative and working_tree is not None:
401
other_paths = working_tree.safe_relpath_files(
403
apply_view=apply_view)
404
specific_files.extend(other_paths)
405
if len(specific_files) == 0:
406
specific_files = None
407
if (working_tree is not None and working_tree.supports_views()
409
view_files = working_tree.views.lookup_view()
411
specific_files = view_files
412
view_str = views.view_display_str(view_files)
413
note(gettext("*** Ignoring files outside view. View is %s") % view_str)
415
# Get extra trees that ought to be searched for file-ids
417
if working_tree is not None and working_tree not in (old_tree, new_tree):
418
extra_trees = (working_tree,)
419
return (old_tree, new_tree, old_branch, new_branch,
420
specific_files, extra_trees)
423
def _get_tree_to_diff(spec, tree=None, branch=None, basis_is_default=True):
424
if branch is None and tree is not None:
426
if spec is None or spec.spec is None:
429
return tree.basis_tree()
431
return branch.basis_tree()
434
return spec.as_tree(branch)
437
def show_diff_trees(old_tree, new_tree, to_file, specific_files=None,
438
external_diff_options=None,
439
old_label='a/', new_label='b/',
441
path_encoding='utf8',
444
context=DEFAULT_CONTEXT_AMOUNT):
445
"""Show in text form the changes from one tree to another.
447
:param to_file: The output stream.
448
:param specific_files: Include only changes to these files - None for all
450
:param external_diff_options: If set, use an external GNU diff and pass
452
:param extra_trees: If set, more Trees to use for looking up file ids
453
:param path_encoding: If set, the path will be encoded as specified,
454
otherwise is supposed to be utf8
455
:param format_cls: Formatter class (DiffTree subclass)
458
context = DEFAULT_CONTEXT_AMOUNT
459
if format_cls is None:
460
format_cls = DiffTree
463
if extra_trees is not None:
464
for tree in extra_trees:
468
differ = format_cls.from_trees_options(old_tree, new_tree, to_file,
470
external_diff_options,
471
old_label, new_label, using,
472
context_lines=context)
473
return differ.show_diff(specific_files, extra_trees)
476
if extra_trees is not None:
477
for tree in extra_trees:
483
def _patch_header_date(tree, file_id, path):
484
"""Returns a timestamp suitable for use in a patch header."""
486
mtime = tree.get_file_mtime(file_id, path)
487
except errors.FileTimestampUnavailable:
489
return timestamp.format_patch_date(mtime)
492
def get_executable_change(old_is_x, new_is_x):
493
descr = { True:"+x", False:"-x", None:"??" }
494
if old_is_x != new_is_x:
495
return ["%s to %s" % (descr[old_is_x], descr[new_is_x],)]
500
class DiffPath(object):
501
"""Base type for command object that compare files"""
503
# The type or contents of the file were unsuitable for diffing
504
CANNOT_DIFF = 'CANNOT_DIFF'
505
# The file has changed in a semantic way
507
# The file content may have changed, but there is no semantic change
508
UNCHANGED = 'UNCHANGED'
510
def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8'):
513
:param old_tree: The tree to show as the old tree in the comparison
514
:param new_tree: The tree to show as new in the comparison
515
:param to_file: The file to write comparison data to
516
:param path_encoding: The character encoding to write paths in
518
self.old_tree = old_tree
519
self.new_tree = new_tree
520
self.to_file = to_file
521
self.path_encoding = path_encoding
527
def from_diff_tree(klass, diff_tree):
528
return klass(diff_tree.old_tree, diff_tree.new_tree,
529
diff_tree.to_file, diff_tree.path_encoding)
532
def _diff_many(differs, file_id, old_path, new_path, old_kind, new_kind):
533
for file_differ in differs:
534
result = file_differ.diff(file_id, old_path, new_path, old_kind,
536
if result is not DiffPath.CANNOT_DIFF:
539
return DiffPath.CANNOT_DIFF
542
class DiffKindChange(object):
543
"""Special differ for file kind changes.
545
Represents kind change as deletion + creation. Uses the other differs
548
def __init__(self, differs):
549
self.differs = differs
555
def from_diff_tree(klass, diff_tree):
556
return klass(diff_tree.differs)
558
def diff(self, file_id, old_path, new_path, old_kind, new_kind):
559
"""Perform comparison
561
:param file_id: The file_id of the file to compare
562
:param old_path: Path of the file in the old tree
563
:param new_path: Path of the file in the new tree
564
:param old_kind: Old file-kind of the file
565
:param new_kind: New file-kind of the file
567
if None in (old_kind, new_kind):
568
return DiffPath.CANNOT_DIFF
569
result = DiffPath._diff_many(self.differs, file_id, old_path,
570
new_path, old_kind, None)
571
if result is DiffPath.CANNOT_DIFF:
573
return DiffPath._diff_many(self.differs, file_id, old_path, new_path,
577
class DiffDirectory(DiffPath):
579
def diff(self, file_id, old_path, new_path, old_kind, new_kind):
580
"""Perform comparison between two directories. (dummy)
583
if 'directory' not in (old_kind, new_kind):
584
return self.CANNOT_DIFF
585
if old_kind not in ('directory', None):
586
return self.CANNOT_DIFF
587
if new_kind not in ('directory', None):
588
return self.CANNOT_DIFF
592
class DiffSymlink(DiffPath):
594
def diff(self, file_id, old_path, new_path, old_kind, new_kind):
595
"""Perform comparison between two symlinks
597
:param file_id: The file_id of the file to compare
598
:param old_path: Path of the file in the old tree
599
:param new_path: Path of the file in the new tree
600
:param old_kind: Old file-kind of the file
601
:param new_kind: New file-kind of the file
603
if 'symlink' not in (old_kind, new_kind):
604
return self.CANNOT_DIFF
605
if old_kind == 'symlink':
606
old_target = self.old_tree.get_symlink_target(file_id)
607
elif old_kind is None:
610
return self.CANNOT_DIFF
611
if new_kind == 'symlink':
612
new_target = self.new_tree.get_symlink_target(file_id)
613
elif new_kind is None:
616
return self.CANNOT_DIFF
617
return self.diff_symlink(old_target, new_target)
619
def diff_symlink(self, old_target, new_target):
620
if old_target is None:
621
self.to_file.write('=== target is %r\n' % new_target)
622
elif new_target is None:
623
self.to_file.write('=== target was %r\n' % old_target)
625
self.to_file.write('=== target changed %r => %r\n' %
626
(old_target, new_target))
630
class DiffText(DiffPath):
632
# GNU Patch uses the epoch date to detect files that are being added
633
# or removed in a diff.
634
EPOCH_DATE = '1970-01-01 00:00:00 +0000'
636
def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8',
637
old_label='', new_label='', text_differ=internal_diff,
638
context_lines=DEFAULT_CONTEXT_AMOUNT):
639
DiffPath.__init__(self, old_tree, new_tree, to_file, path_encoding)
640
self.text_differ = text_differ
641
self.old_label = old_label
642
self.new_label = new_label
643
self.path_encoding = path_encoding
644
self.context_lines = context_lines
646
def diff(self, file_id, old_path, new_path, old_kind, new_kind):
647
"""Compare two files in unified diff format
649
:param file_id: The file_id of the file to compare
650
:param old_path: Path of the file in the old tree
651
:param new_path: Path of the file in the new tree
652
:param old_kind: Old file-kind of the file
653
:param new_kind: New file-kind of the file
655
if 'file' not in (old_kind, new_kind):
656
return self.CANNOT_DIFF
657
from_file_id = to_file_id = file_id
658
if old_kind == 'file':
659
old_date = _patch_header_date(self.old_tree, file_id, old_path)
660
elif old_kind is None:
661
old_date = self.EPOCH_DATE
664
return self.CANNOT_DIFF
665
if new_kind == 'file':
666
new_date = _patch_header_date(self.new_tree, file_id, new_path)
667
elif new_kind is None:
668
new_date = self.EPOCH_DATE
671
return self.CANNOT_DIFF
672
from_label = '%s%s\t%s' % (self.old_label, old_path, old_date)
673
to_label = '%s%s\t%s' % (self.new_label, new_path, new_date)
674
return self.diff_text(from_file_id, to_file_id, from_label, to_label,
677
def diff_text(self, from_file_id, to_file_id, from_label, to_label,
678
from_path=None, to_path=None):
679
"""Diff the content of given files in two trees
681
:param from_file_id: The id of the file in the from tree. If None,
682
the file is not present in the from tree.
683
:param to_file_id: The id of the file in the to tree. This may refer
684
to a different file from from_file_id. If None,
685
the file is not present in the to tree.
686
:param from_path: The path in the from tree or None if unknown.
687
:param to_path: The path in the to tree or None if unknown.
689
def _get_text(tree, file_id, path):
690
if file_id is not None:
691
return tree.get_file_lines(file_id, path)
695
from_text = _get_text(self.old_tree, from_file_id, from_path)
696
to_text = _get_text(self.new_tree, to_file_id, to_path)
697
self.text_differ(from_label, from_text, to_label, to_text,
698
self.to_file, path_encoding=self.path_encoding,
699
context_lines=self.context_lines)
700
except errors.BinaryFile:
702
("Binary files %s and %s differ\n" %
703
(from_label, to_label)).encode(self.path_encoding,'replace'))
707
class DiffFromTool(DiffPath):
709
def __init__(self, command_template, old_tree, new_tree, to_file,
710
path_encoding='utf-8'):
711
DiffPath.__init__(self, old_tree, new_tree, to_file, path_encoding)
712
self.command_template = command_template
713
self._root = osutils.mkdtemp(prefix='bzr-diff-')
716
def from_string(klass, command_string, old_tree, new_tree, to_file,
717
path_encoding='utf-8'):
718
command_template = cmdline.split(command_string)
719
if '@' not in command_string:
720
command_template.extend(['@old_path', '@new_path'])
721
return klass(command_template, old_tree, new_tree, to_file,
725
def make_from_diff_tree(klass, command_string, external_diff_options=None):
726
def from_diff_tree(diff_tree):
727
full_command_string = [command_string]
728
if external_diff_options is not None:
729
full_command_string += ' ' + external_diff_options
730
return klass.from_string(full_command_string, diff_tree.old_tree,
731
diff_tree.new_tree, diff_tree.to_file)
732
return from_diff_tree
734
def _get_command(self, old_path, new_path):
735
my_map = {'old_path': old_path, 'new_path': new_path}
736
command = [AtTemplate(t).substitute(my_map) for t in
737
self.command_template]
738
if sys.platform == 'win32': # Popen doesn't accept unicode on win32
741
if isinstance(c, unicode):
742
command_encoded.append(c.encode('mbcs'))
744
command_encoded.append(c)
745
return command_encoded
749
def _execute(self, old_path, new_path):
750
command = self._get_command(old_path, new_path)
752
proc = subprocess.Popen(command, stdout=subprocess.PIPE,
755
if e.errno == errno.ENOENT:
756
raise errors.ExecutableMissing(command[0])
759
self.to_file.write(proc.stdout.read())
762
def _try_symlink_root(self, tree, prefix):
763
if (getattr(tree, 'abspath', None) is None
764
or not osutils.host_os_dereferences_symlinks()):
767
os.symlink(tree.abspath(''), osutils.pathjoin(self._root, prefix))
769
if e.errno != errno.EEXIST:
775
"""Returns safe encoding for passing file path to diff tool"""
776
if sys.platform == 'win32':
779
# Don't fallback to 'utf-8' because subprocess may not be able to
780
# handle utf-8 correctly when locale is not utf-8.
781
return sys.getfilesystemencoding() or 'ascii'
783
def _is_safepath(self, path):
784
"""Return true if `path` may be able to pass to subprocess."""
787
return path == path.encode(fenc).decode(fenc)
791
def _safe_filename(self, prefix, relpath):
792
"""Replace unsafe character in `relpath` then join `self._root`,
793
`prefix` and `relpath`."""
795
# encoded_str.replace('?', '_') may break multibyte char.
796
# So we should encode, decode, then replace(u'?', u'_')
797
relpath_tmp = relpath.encode(fenc, 'replace').decode(fenc, 'replace')
798
relpath_tmp = relpath_tmp.replace(u'?', u'_')
799
return osutils.pathjoin(self._root, prefix, relpath_tmp)
801
def _write_file(self, file_id, tree, prefix, relpath, force_temp=False,
803
if not force_temp and isinstance(tree, WorkingTree):
804
full_path = tree.abspath(tree.id2path(file_id))
805
if self._is_safepath(full_path):
808
full_path = self._safe_filename(prefix, relpath)
809
if not force_temp and self._try_symlink_root(tree, prefix):
811
parent_dir = osutils.dirname(full_path)
813
os.makedirs(parent_dir)
815
if e.errno != errno.EEXIST:
817
source = tree.get_file(file_id, relpath)
819
target = open(full_path, 'wb')
821
osutils.pumpfile(source, target)
827
mtime = tree.get_file_mtime(file_id)
828
except errors.FileTimestampUnavailable:
831
os.utime(full_path, (mtime, mtime))
833
osutils.make_readonly(full_path)
836
def _prepare_files(self, file_id, old_path, new_path, force_temp=False,
837
allow_write_new=False):
838
old_disk_path = self._write_file(file_id, self.old_tree, 'old',
839
old_path, force_temp)
840
new_disk_path = self._write_file(file_id, self.new_tree, 'new',
841
new_path, force_temp,
842
allow_write=allow_write_new)
843
return old_disk_path, new_disk_path
847
osutils.rmtree(self._root)
849
if e.errno != errno.ENOENT:
850
mutter("The temporary directory \"%s\" was not "
851
"cleanly removed: %s." % (self._root, e))
853
def diff(self, file_id, old_path, new_path, old_kind, new_kind):
854
if (old_kind, new_kind) != ('file', 'file'):
855
return DiffPath.CANNOT_DIFF
856
(old_disk_path, new_disk_path) = self._prepare_files(
857
file_id, old_path, new_path)
858
self._execute(old_disk_path, new_disk_path)
860
def edit_file(self, file_id):
861
"""Use this tool to edit a file.
863
A temporary copy will be edited, and the new contents will be
866
:param file_id: The id of the file to edit.
867
:return: The new contents of the file.
869
old_path = self.old_tree.id2path(file_id)
870
new_path = self.new_tree.id2path(file_id)
871
old_abs_path, new_abs_path = self._prepare_files(
872
file_id, old_path, new_path,
873
allow_write_new=True,
875
command = self._get_command(old_abs_path, new_abs_path)
876
subprocess.call(command, cwd=self._root)
877
new_file = open(new_abs_path, 'rb')
879
return new_file.read()
884
class DiffTree(object):
885
"""Provides textual representations of the difference between two trees.
887
A DiffTree examines two trees and where a file-id has altered
888
between them, generates a textual representation of the difference.
889
DiffTree uses a sequence of DiffPath objects which are each
890
given the opportunity to handle a given altered fileid. The list
891
of DiffPath objects can be extended globally by appending to
892
DiffTree.diff_factories, or for a specific diff operation by
893
supplying the extra_factories option to the appropriate method.
896
# list of factories that can provide instances of DiffPath objects
897
# may be extended by plugins.
898
diff_factories = [DiffSymlink.from_diff_tree,
899
DiffDirectory.from_diff_tree]
901
def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8',
902
diff_text=None, extra_factories=None):
905
:param old_tree: Tree to show as old in the comparison
906
:param new_tree: Tree to show as new in the comparison
907
:param to_file: File to write comparision to
908
:param path_encoding: Character encoding to write paths in
909
:param diff_text: DiffPath-type object to use as a last resort for
911
:param extra_factories: Factories of DiffPaths to try before any other
913
if diff_text is None:
914
diff_text = DiffText(old_tree, new_tree, to_file, path_encoding,
915
'', '', internal_diff)
916
self.old_tree = old_tree
917
self.new_tree = new_tree
918
self.to_file = to_file
919
self.path_encoding = path_encoding
921
if extra_factories is not None:
922
self.differs.extend(f(self) for f in extra_factories)
923
self.differs.extend(f(self) for f in self.diff_factories)
924
self.differs.extend([diff_text, DiffKindChange.from_diff_tree(self)])
927
def from_trees_options(klass, old_tree, new_tree, to_file,
928
path_encoding, external_diff_options, old_label,
929
new_label, using, context_lines):
930
"""Factory for producing a DiffTree.
932
Designed to accept options used by show_diff_trees.
934
:param old_tree: The tree to show as old in the comparison
935
:param new_tree: The tree to show as new in the comparison
936
:param to_file: File to write comparisons to
937
:param path_encoding: Character encoding to use for writing paths
938
:param external_diff_options: If supplied, use the installed diff
939
binary to perform file comparison, using supplied options.
940
:param old_label: Prefix to use for old file labels
941
:param new_label: Prefix to use for new file labels
942
:param using: Commandline to use to invoke an external diff tool
944
if using is not None:
945
extra_factories = [DiffFromTool.make_from_diff_tree(using, external_diff_options)]
948
if external_diff_options:
949
opts = external_diff_options.split()
950
def diff_file(olab, olines, nlab, nlines, to_file, path_encoding=None, context_lines=None):
951
""":param path_encoding: not used but required
952
to match the signature of internal_diff.
954
external_diff(olab, olines, nlab, nlines, to_file, opts)
956
diff_file = internal_diff
957
diff_text = DiffText(old_tree, new_tree, to_file, path_encoding,
958
old_label, new_label, diff_file, context_lines=context_lines)
959
return klass(old_tree, new_tree, to_file, path_encoding, diff_text,
962
def show_diff(self, specific_files, extra_trees=None):
963
"""Write tree diff to self.to_file
965
:param specific_files: the specific files to compare (recursive)
966
:param extra_trees: extra trees to use for mapping paths to file_ids
969
return self._show_diff(specific_files, extra_trees)
971
for differ in self.differs:
974
def _show_diff(self, specific_files, extra_trees):
975
# TODO: Generation of pseudo-diffs for added/deleted files could
976
# be usefully made into a much faster special case.
977
iterator = self.new_tree.iter_changes(self.old_tree,
978
specific_files=specific_files,
979
extra_trees=extra_trees,
980
require_versioned=True)
982
def changes_key(change):
983
old_path, new_path = change[1]
988
def get_encoded_path(path):
990
return path.encode(self.path_encoding, "replace")
991
for (file_id, paths, changed_content, versioned, parent, name, kind,
992
executable) in sorted(iterator, key=changes_key):
993
# The root does not get diffed, and items with no known kind (that
994
# is, missing) in both trees are skipped as well.
995
if parent == (None, None) or kind == (None, None):
997
oldpath, newpath = paths
998
oldpath_encoded = get_encoded_path(paths[0])
999
newpath_encoded = get_encoded_path(paths[1])
1000
old_present = (kind[0] is not None and versioned[0])
1001
new_present = (kind[1] is not None and versioned[1])
1002
renamed = (parent[0], name[0]) != (parent[1], name[1])
1004
properties_changed = []
1005
properties_changed.extend(get_executable_change(executable[0], executable[1]))
1007
if properties_changed:
1008
prop_str = " (properties changed: %s)" % (", ".join(properties_changed),)
1012
if (old_present, new_present) == (True, False):
1013
self.to_file.write("=== removed %s '%s'\n" %
1014
(kind[0], oldpath_encoded))
1016
elif (old_present, new_present) == (False, True):
1017
self.to_file.write("=== added %s '%s'\n" %
1018
(kind[1], newpath_encoded))
1021
self.to_file.write("=== renamed %s '%s' => '%s'%s\n" %
1022
(kind[0], oldpath_encoded, newpath_encoded, prop_str))
1024
# if it was produced by iter_changes, it must be
1025
# modified *somehow*, either content or execute bit.
1026
self.to_file.write("=== modified %s '%s'%s\n" % (kind[0],
1027
newpath_encoded, prop_str))
1029
self._diff(file_id, oldpath, newpath, kind[0], kind[1])
1035
def diff(self, file_id, old_path, new_path):
1036
"""Perform a diff of a single file
1038
:param file_id: file-id of the file
1039
:param old_path: The path of the file in the old tree
1040
:param new_path: The path of the file in the new tree
1043
old_kind = self.old_tree.kind(file_id)
1044
except (errors.NoSuchId, errors.NoSuchFile):
1047
new_kind = self.new_tree.kind(file_id)
1048
except (errors.NoSuchId, errors.NoSuchFile):
1050
self._diff(file_id, old_path, new_path, old_kind, new_kind)
1053
def _diff(self, file_id, old_path, new_path, old_kind, new_kind):
1054
result = DiffPath._diff_many(self.differs, file_id, old_path,
1055
new_path, old_kind, new_kind)
1056
if result is DiffPath.CANNOT_DIFF:
1057
error_path = new_path
1058
if error_path is None:
1059
error_path = old_path
1060
raise errors.NoDiffFound(error_path)
1063
format_registry = Registry()
1064
format_registry.register('default', DiffTree)