~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/diff.py

(jam) Handle bug #382709 by encoding paths as 'mbcs' when spawning
        external diff.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2005-2011 Canonical Ltd.
 
1
# Copyright (C) 2004, 2005, 2006 Canonical Ltd.
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
17
17
import difflib
18
18
import os
19
19
import re
20
 
import string
 
20
import shutil
21
21
import sys
22
22
 
23
23
from bzrlib.lazy_import import lazy_import
25
25
import errno
26
26
import subprocess
27
27
import tempfile
 
28
import time
28
29
 
29
30
from bzrlib import (
30
 
    cleanup,
31
 
    cmdline,
32
 
    controldir,
 
31
    branch as _mod_branch,
 
32
    bzrdir,
 
33
    commands,
33
34
    errors,
34
35
    osutils,
35
36
    patiencediff,
37
38
    timestamp,
38
39
    views,
39
40
    )
40
 
 
41
 
from bzrlib.workingtree import WorkingTree
42
 
from bzrlib.i18n import gettext
43
41
""")
44
42
 
45
 
from bzrlib.registry import (
46
 
    Registry,
 
43
from bzrlib.symbol_versioning import (
 
44
    deprecated_function,
47
45
    )
48
46
from bzrlib.trace import mutter, note, warning
49
47
 
50
48
 
51
 
class AtTemplate(string.Template):
52
 
    """Templating class that uses @ instead of $."""
53
 
 
54
 
    delimiter = '@'
55
 
 
56
 
 
57
49
# TODO: Rather than building a changeset object, we should probably
58
50
# invoke callbacks on an object.  That object can either accumulate a
59
51
# list, write them out directly, etc etc.
94
86
    if sequence_matcher is None:
95
87
        sequence_matcher = patiencediff.PatienceSequenceMatcher
96
88
    ud = patiencediff.unified_diff(oldlines, newlines,
97
 
                      fromfile=old_filename.encode(path_encoding, 'replace'),
98
 
                      tofile=new_filename.encode(path_encoding, 'replace'),
 
89
                      fromfile=old_filename.encode(path_encoding),
 
90
                      tofile=new_filename.encode(path_encoding),
99
91
                      sequencematcher=sequence_matcher)
100
92
 
101
93
    ud = list(ud)
285
277
                        new_abspath, e)
286
278
 
287
279
 
288
 
def get_trees_and_branches_to_diff_locked(
289
 
    path_list, revision_specs, old_url, new_url, add_cleanup, apply_view=True):
 
280
def _get_trees_to_diff(path_list, revision_specs, old_url, new_url,
 
281
    apply_view=True):
290
282
    """Get the trees and specific files to diff given a list of paths.
291
283
 
292
284
    This method works out the trees to be diff'ed and the files of
303
295
    :param new_url:
304
296
        The url of the new branch or tree. If None, the tree to use is
305
297
        taken from the first path, if any, or the current working tree.
306
 
    :param add_cleanup:
307
 
        a callable like Command.add_cleanup.  get_trees_and_branches_to_diff
308
 
        will register cleanups that must be run to unlock the trees, etc.
309
298
    :param apply_view:
310
299
        if True and a view is set, apply the view or check that the paths
311
300
        are within it
312
301
    :returns:
313
 
        a tuple of (old_tree, new_tree, old_branch, new_branch,
314
 
        specific_files, extra_trees) where extra_trees is a sequence of
315
 
        additional trees to search in for file-ids.  The trees and branches
316
 
        will be read-locked until the cleanups registered via the add_cleanup
317
 
        param are run.
 
302
        a tuple of (old_tree, new_tree, specific_files, extra_trees) where
 
303
        extra_trees is a sequence of additional trees to search in for
 
304
        file-ids.
318
305
    """
319
306
    # Get the old and new revision specs
320
307
    old_revision_spec = None
343
330
        default_location = path_list[0]
344
331
        other_paths = path_list[1:]
345
332
 
346
 
    def lock_tree_or_branch(wt, br):
347
 
        if wt is not None:
348
 
            wt.lock_read()
349
 
            add_cleanup(wt.unlock)
350
 
        elif br is not None:
351
 
            br.lock_read()
352
 
            add_cleanup(br.unlock)
353
 
 
354
333
    # Get the old location
355
334
    specific_files = []
356
335
    if old_url is None:
357
336
        old_url = default_location
358
337
    working_tree, branch, relpath = \
359
 
        controldir.ControlDir.open_containing_tree_or_branch(old_url)
360
 
    lock_tree_or_branch(working_tree, branch)
 
338
        bzrdir.BzrDir.open_containing_tree_or_branch(old_url)
361
339
    if consider_relpath and relpath != '':
362
340
        if working_tree is not None and apply_view:
363
341
            views.check_path_in_view(working_tree, relpath)
364
342
        specific_files.append(relpath)
365
343
    old_tree = _get_tree_to_diff(old_revision_spec, working_tree, branch)
366
 
    old_branch = branch
367
344
 
368
345
    # Get the new location
369
346
    if new_url is None:
370
347
        new_url = default_location
371
348
    if new_url != old_url:
372
349
        working_tree, branch, relpath = \
373
 
            controldir.ControlDir.open_containing_tree_or_branch(new_url)
374
 
        lock_tree_or_branch(working_tree, branch)
 
350
            bzrdir.BzrDir.open_containing_tree_or_branch(new_url)
375
351
        if consider_relpath and relpath != '':
376
352
            if working_tree is not None and apply_view:
377
353
                views.check_path_in_view(working_tree, relpath)
378
354
            specific_files.append(relpath)
379
355
    new_tree = _get_tree_to_diff(new_revision_spec, working_tree, branch,
380
356
        basis_is_default=working_tree is None)
381
 
    new_branch = branch
382
357
 
383
358
    # Get the specific files (all files is None, no files is [])
384
359
    if make_paths_wt_relative and working_tree is not None:
385
 
        other_paths = working_tree.safe_relpath_files(
386
 
            other_paths,
 
360
        try:
 
361
            from bzrlib.builtins import safe_relpath_files
 
362
            other_paths = safe_relpath_files(working_tree, other_paths,
387
363
            apply_view=apply_view)
 
364
        except errors.FileInWrongBranch:
 
365
            raise errors.BzrCommandError("Files are in different branches")
388
366
    specific_files.extend(other_paths)
389
367
    if len(specific_files) == 0:
390
368
        specific_files = None
394
372
            if view_files:
395
373
                specific_files = view_files
396
374
                view_str = views.view_display_str(view_files)
397
 
                note(gettext("*** Ignoring files outside view. View is %s") % view_str)
 
375
                note("*** Ignoring files outside view. View is %s" % view_str)
398
376
 
399
377
    # Get extra trees that ought to be searched for file-ids
400
378
    extra_trees = None
401
379
    if working_tree is not None and working_tree not in (old_tree, new_tree):
402
380
        extra_trees = (working_tree,)
403
 
    return (old_tree, new_tree, old_branch, new_branch,
404
 
            specific_files, extra_trees)
405
 
 
 
381
    return old_tree, new_tree, specific_files, extra_trees
406
382
 
407
383
def _get_tree_to_diff(spec, tree=None, branch=None, basis_is_default=True):
408
384
    if branch is None and tree is not None:
423
399
                    old_label='a/', new_label='b/',
424
400
                    extra_trees=None,
425
401
                    path_encoding='utf8',
426
 
                    using=None,
427
 
                    format_cls=None):
 
402
                    using=None):
428
403
    """Show in text form the changes from one tree to another.
429
404
 
430
 
    :param to_file: The output stream.
431
 
    :param specific_files: Include only changes to these files - None for all
432
 
        changes.
433
 
    :param external_diff_options: If set, use an external GNU diff and pass 
434
 
        these options.
435
 
    :param extra_trees: If set, more Trees to use for looking up file ids
436
 
    :param path_encoding: If set, the path will be encoded as specified, 
437
 
        otherwise is supposed to be utf8
438
 
    :param format_cls: Formatter class (DiffTree subclass)
 
405
    to_file
 
406
        The output stream.
 
407
 
 
408
    specific_files
 
409
        Include only changes to these files - None for all changes.
 
410
 
 
411
    external_diff_options
 
412
        If set, use an external GNU diff and pass these options.
 
413
 
 
414
    extra_trees
 
415
        If set, more Trees to use for looking up file ids
 
416
 
 
417
    path_encoding
 
418
        If set, the path will be encoded as specified, otherwise is supposed
 
419
        to be utf8
439
420
    """
440
 
    if format_cls is None:
441
 
        format_cls = DiffTree
442
421
    old_tree.lock_read()
443
422
    try:
444
423
        if extra_trees is not None:
446
425
                tree.lock_read()
447
426
        new_tree.lock_read()
448
427
        try:
449
 
            differ = format_cls.from_trees_options(old_tree, new_tree, to_file,
450
 
                                                   path_encoding,
451
 
                                                   external_diff_options,
452
 
                                                   old_label, new_label, using)
 
428
            differ = DiffTree.from_trees_options(old_tree, new_tree, to_file,
 
429
                                                 path_encoding,
 
430
                                                 external_diff_options,
 
431
                                                 old_label, new_label, using)
453
432
            return differ.show_diff(specific_files, extra_trees)
454
433
        finally:
455
434
            new_tree.unlock()
462
441
 
463
442
def _patch_header_date(tree, file_id, path):
464
443
    """Returns a timestamp suitable for use in a patch header."""
465
 
    try:
466
 
        mtime = tree.get_file_mtime(file_id, path)
467
 
    except errors.FileTimestampUnavailable:
468
 
        mtime = 0
 
444
    mtime = tree.get_file_mtime(file_id, path)
469
445
    return timestamp.format_patch_date(mtime)
470
446
 
471
447
 
666
642
        """
667
643
        def _get_text(tree, file_id, path):
668
644
            if file_id is not None:
669
 
                return tree.get_file_lines(file_id, path)
 
645
                return tree.get_file(file_id, path).readlines()
670
646
            else:
671
647
                return []
672
648
        try:
673
649
            from_text = _get_text(self.old_tree, from_file_id, from_path)
674
650
            to_text = _get_text(self.new_tree, to_file_id, to_path)
675
651
            self.text_differ(from_label, from_text, to_label, to_text,
676
 
                             self.to_file, path_encoding=self.path_encoding)
 
652
                             self.to_file)
677
653
        except errors.BinaryFile:
678
654
            self.to_file.write(
679
655
                  ("Binary files %s and %s differ\n" %
680
 
                  (from_label, to_label)).encode(self.path_encoding,'replace'))
 
656
                  (from_label, to_label)).encode(self.path_encoding))
681
657
        return self.CHANGED
682
658
 
683
659
 
692
668
    @classmethod
693
669
    def from_string(klass, command_string, old_tree, new_tree, to_file,
694
670
                    path_encoding='utf-8'):
695
 
        command_template = cmdline.split(command_string)
696
 
        if '@' not in command_string:
697
 
            command_template.extend(['@old_path', '@new_path'])
 
671
        command_template = commands.shlex_split_unicode(command_string)
 
672
        command_template.extend(['%(old_path)s', '%(new_path)s'])
698
673
        return klass(command_template, old_tree, new_tree, to_file,
699
674
                     path_encoding)
700
675
 
701
676
    @classmethod
702
 
    def make_from_diff_tree(klass, command_string, external_diff_options=None):
 
677
    def make_from_diff_tree(klass, command_string):
703
678
        def from_diff_tree(diff_tree):
704
 
            full_command_string = [command_string]
705
 
            if external_diff_options is not None:
706
 
                full_command_string += ' ' + external_diff_options
707
 
            return klass.from_string(full_command_string, diff_tree.old_tree,
 
679
            return klass.from_string(command_string, diff_tree.old_tree,
708
680
                                     diff_tree.new_tree, diff_tree.to_file)
709
681
        return from_diff_tree
710
682
 
711
683
    def _get_command(self, old_path, new_path):
712
684
        my_map = {'old_path': old_path, 'new_path': new_path}
713
 
        command = [AtTemplate(t).substitute(my_map) for t in
714
 
                   self.command_template]
715
 
        if sys.platform == 'win32': # Popen doesn't accept unicode on win32
716
 
            command_encoded = []
717
 
            for c in command:
718
 
                if isinstance(c, unicode):
719
 
                    command_encoded.append(c.encode('mbcs'))
720
 
                else:
721
 
                    command_encoded.append(c)
722
 
            return command_encoded
723
 
        else:
724
 
            return command
 
685
        return [t % my_map for t in self.command_template]
725
686
 
726
687
    def _execute(self, old_path, new_path):
727
688
        command = self._get_command(old_path, new_path)
747
708
                raise
748
709
        return True
749
710
 
750
 
    @staticmethod
751
 
    def _fenc():
752
 
        """Returns safe encoding for passing file path to diff tool"""
753
 
        if sys.platform == 'win32':
754
 
            return 'mbcs'
755
 
        else:
756
 
            # Don't fallback to 'utf-8' because subprocess may not be able to
757
 
            # handle utf-8 correctly when locale is not utf-8.
758
 
            return sys.getfilesystemencoding() or 'ascii'
759
 
 
760
 
    def _is_safepath(self, path):
761
 
        """Return true if `path` may be able to pass to subprocess."""
762
 
        fenc = self._fenc()
763
 
        try:
764
 
            return path == path.encode(fenc).decode(fenc)
765
 
        except UnicodeError:
766
 
            return False
767
 
 
768
 
    def _safe_filename(self, prefix, relpath):
769
 
        """Replace unsafe character in `relpath` then join `self._root`,
770
 
        `prefix` and `relpath`."""
771
 
        fenc = self._fenc()
772
 
        # encoded_str.replace('?', '_') may break multibyte char.
773
 
        # So we should encode, decode, then replace(u'?', u'_')
774
 
        relpath_tmp = relpath.encode(fenc, 'replace').decode(fenc, 'replace')
775
 
        relpath_tmp = relpath_tmp.replace(u'?', u'_')
776
 
        return osutils.pathjoin(self._root, prefix, relpath_tmp)
777
 
 
778
 
    def _write_file(self, file_id, tree, prefix, relpath, force_temp=False,
779
 
                    allow_write=False):
780
 
        if not force_temp and isinstance(tree, WorkingTree):
781
 
            full_path = tree.abspath(tree.id2path(file_id))
782
 
            if self._is_safepath(full_path):
783
 
                return full_path
784
 
 
785
 
        full_path = self._safe_filename(prefix, relpath)
786
 
        if not force_temp and self._try_symlink_root(tree, prefix):
 
711
    def _write_file(self, file_id, tree, prefix, relpath):
 
712
        full_path = osutils.pathjoin(self._root, prefix, relpath)
 
713
        if self._try_symlink_root(tree, prefix):
787
714
            return full_path
788
715
        parent_dir = osutils.dirname(full_path)
789
716
        try:
800
727
                target.close()
801
728
        finally:
802
729
            source.close()
803
 
        try:
804
 
            mtime = tree.get_file_mtime(file_id)
805
 
        except errors.FileTimestampUnavailable:
806
 
            pass
807
 
        else:
808
 
            os.utime(full_path, (mtime, mtime))
809
 
        if not allow_write:
810
 
            osutils.make_readonly(full_path)
 
730
        osutils.make_readonly(full_path)
 
731
        mtime = tree.get_file_mtime(file_id)
 
732
        os.utime(full_path, (mtime, mtime))
811
733
        return full_path
812
734
 
813
 
    def _prepare_files(self, file_id, old_path, new_path, force_temp=False,
814
 
                       allow_write_new=False):
 
735
    def _prepare_files(self, file_id, old_path, new_path):
815
736
        old_disk_path = self._write_file(file_id, self.old_tree, 'old',
816
 
                                         old_path, force_temp)
 
737
                                         old_path)
817
738
        new_disk_path = self._write_file(file_id, self.new_tree, 'new',
818
 
                                         new_path, force_temp,
819
 
                                         allow_write=allow_write_new)
 
739
                                         new_path)
820
740
        return old_disk_path, new_disk_path
821
741
 
822
742
    def finish(self):
830
750
    def diff(self, file_id, old_path, new_path, old_kind, new_kind):
831
751
        if (old_kind, new_kind) != ('file', 'file'):
832
752
            return DiffPath.CANNOT_DIFF
833
 
        (old_disk_path, new_disk_path) = self._prepare_files(
834
 
                                                file_id, old_path, new_path)
835
 
        self._execute(old_disk_path, new_disk_path)
836
 
 
837
 
    def edit_file(self, file_id):
838
 
        """Use this tool to edit a file.
839
 
 
840
 
        A temporary copy will be edited, and the new contents will be
841
 
        returned.
842
 
 
843
 
        :param file_id: The id of the file to edit.
844
 
        :return: The new contents of the file.
845
 
        """
846
 
        old_path = self.old_tree.id2path(file_id)
847
 
        new_path = self.new_tree.id2path(file_id)
848
 
        old_abs_path, new_abs_path = self._prepare_files(
849
 
                                            file_id, old_path, new_path,
850
 
                                            allow_write_new=True,
851
 
                                            force_temp=True)
852
 
        command = self._get_command(old_abs_path, new_abs_path)
853
 
        subprocess.call(command, cwd=self._root)
854
 
        new_file = open(new_abs_path, 'rb')
855
 
        try:
856
 
            return new_file.read()
857
 
        finally:
858
 
            new_file.close()
 
753
        self._prepare_files(file_id, old_path, new_path)
 
754
        self._execute(osutils.pathjoin('old', old_path),
 
755
                      osutils.pathjoin('new', new_path))
859
756
 
860
757
 
861
758
class DiffTree(object):
907
804
        """Factory for producing a DiffTree.
908
805
 
909
806
        Designed to accept options used by show_diff_trees.
910
 
 
911
807
        :param old_tree: The tree to show as old in the comparison
912
808
        :param new_tree: The tree to show as new in the comparison
913
809
        :param to_file: File to write comparisons to
919
815
        :param using: Commandline to use to invoke an external diff tool
920
816
        """
921
817
        if using is not None:
922
 
            extra_factories = [DiffFromTool.make_from_diff_tree(using, external_diff_options)]
 
818
            extra_factories = [DiffFromTool.make_from_diff_tree(using)]
923
819
        else:
924
820
            extra_factories = []
925
821
        if external_diff_options:
926
822
            opts = external_diff_options.split()
927
 
            def diff_file(olab, olines, nlab, nlines, to_file, path_encoding=None):
928
 
                """:param path_encoding: not used but required
929
 
                        to match the signature of internal_diff.
930
 
                """
 
823
            def diff_file(olab, olines, nlab, nlines, to_file):
931
824
                external_diff(olab, olines, nlab, nlines, to_file, opts)
932
825
        else:
933
826
            diff_file = internal_diff
939
832
    def show_diff(self, specific_files, extra_trees=None):
940
833
        """Write tree diff to self.to_file
941
834
 
942
 
        :param specific_files: the specific files to compare (recursive)
 
835
        :param sepecific_files: the specific files to compare (recursive)
943
836
        :param extra_trees: extra trees to use for mapping paths to file_ids
944
837
        """
945
838
        try:
1035
928
            if error_path is None:
1036
929
                error_path = old_path
1037
930
            raise errors.NoDiffFound(error_path)
1038
 
 
1039
 
 
1040
 
format_registry = Registry()
1041
 
format_registry.register('default', DiffTree)