~bzr-pqm/bzr/bzr.dev

2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
1
#!/usr/bin/env python
2
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
3
#
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
17
6379.6.3 by Jelmer Vernooij
Use absolute_import.
18
from __future__ import absolute_import
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
19
20
from bzrlib.lazy_import import lazy_import
21
lazy_import(globals(), """
22
import os
23
import sys
24
import time
25
import difflib
26
""")
27
28
29
__all__ = ['PatienceSequenceMatcher', 'unified_diff', 'unified_diff_files']
30
31
32
# This is a version of unified_diff which only adds a factory parameter
33
# so that you can override the default SequenceMatcher
34
# this has been submitted as a patch to python
35
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
36
                 tofiledate='', n=3, lineterm='\n',
37
                 sequencematcher=None):
38
    r"""
39
    Compare two sequences of lines; generate the delta as a unified diff.
40
41
    Unified diffs are a compact way of showing line changes and a few
42
    lines of context.  The number of context lines is set by 'n' which
43
    defaults to three.
44
45
    By default, the diff control lines (those with ---, +++, or @@) are
46
    created with a trailing newline.  This is helpful so that inputs
47
    created from file.readlines() result in diffs that are suitable for
48
    file.writelines() since both the inputs and outputs have trailing
49
    newlines.
50
51
    For inputs that do not have trailing newlines, set the lineterm
52
    argument to "" so that the output will be uniformly newline free.
53
54
    The unidiff format normally has a header for filenames and modification
55
    times.  Any or all of these may be specified using strings for
56
    'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.  The modification
57
    times are normally expressed in the format returned by time.ctime().
58
59
    Example:
60
61
    >>> for line in unified_diff('one two three four'.split(),
62
    ...             'zero one tree four'.split(), 'Original', 'Current',
63
    ...             'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
64
    ...             lineterm=''):
65
    ...     print line
66
    --- Original Sat Jan 26 23:30:50 1991
67
    +++ Current Fri Jun 06 10:20:52 2003
68
    @@ -1,4 +1,4 @@
69
    +zero
70
     one
71
    -two
72
    -three
73
    +tree
74
     four
75
    """
76
    if sequencematcher is None:
77
        sequencematcher = difflib.SequenceMatcher
78
3922.1.1 by Adeodato Simo
Change the patience_diff.unified_diff code to not add trailing whitespace when no timestamp is supplied.
79
    if fromfiledate:
3922.1.4 by John Arbash Meinel
It turns out that internal_diff worked around the trailing whitespace problem
80
        fromfiledate = '\t' + str(fromfiledate)
3922.1.1 by Adeodato Simo
Change the patience_diff.unified_diff code to not add trailing whitespace when no timestamp is supplied.
81
    if tofiledate:
3922.1.4 by John Arbash Meinel
It turns out that internal_diff worked around the trailing whitespace problem
82
        tofiledate = '\t' + str(tofiledate)
3922.1.1 by Adeodato Simo
Change the patience_diff.unified_diff code to not add trailing whitespace when no timestamp is supplied.
83
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
84
    started = False
85
    for group in sequencematcher(None,a,b).get_grouped_opcodes(n):
86
        if not started:
3922.1.1 by Adeodato Simo
Change the patience_diff.unified_diff code to not add trailing whitespace when no timestamp is supplied.
87
            yield '--- %s%s%s' % (fromfile, fromfiledate, lineterm)
88
            yield '+++ %s%s%s' % (tofile, tofiledate, lineterm)
2781.1.1 by Martin Pool
merge cpatiencediff from Lukas
89
            started = True
90
        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
91
        yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
92
        for tag, i1, i2, j1, j2 in group:
93
            if tag == 'equal':
94
                for line in a[i1:i2]:
95
                    yield ' ' + line
96
                continue
97
            if tag == 'replace' or tag == 'delete':
98
                for line in a[i1:i2]:
99
                    yield '-' + line
100
            if tag == 'replace' or tag == 'insert':
101
                for line in b[j1:j2]:
102
                    yield '+' + line
103
104
105
def unified_diff_files(a, b, sequencematcher=None):
106
    """Generate the diff for two files.
107
    """
108
    # Should this actually be an error?
109
    if a == b:
110
        return []
111
    if a == '-':
112
        file_a = sys.stdin
113
        time_a = time.time()
114
    else:
115
        file_a = open(a, 'rb')
116
        time_a = os.stat(a).st_mtime
117
118
    if b == '-':
119
        file_b = sys.stdin
120
        time_b = time.time()
121
    else:
122
        file_b = open(b, 'rb')
123
        time_b = os.stat(b).st_mtime
124
125
    # TODO: Include fromfiledate and tofiledate
126
    return unified_diff(file_a.readlines(), file_b.readlines(),
127
                        fromfile=a, tofile=b,
128
                        sequencematcher=sequencematcher)
129
130
131
try:
132
    from bzrlib._patiencediff_c import (
133
        unique_lcs_c as unique_lcs,
134
        recurse_matches_c as recurse_matches,
135
        PatienceSequenceMatcher_c as PatienceSequenceMatcher
136
        )
137
except ImportError:
138
    from bzrlib._patiencediff_py import (
139
        unique_lcs_py as unique_lcs,
140
        recurse_matches_py as recurse_matches,
141
        PatienceSequenceMatcher_py as PatienceSequenceMatcher
142
        )
143
144
145
def main(args):
146
    import optparse
147
    p = optparse.OptionParser(usage='%prog [options] file_a file_b'
148
                                    '\nFiles can be "-" to read from stdin')
149
    p.add_option('--patience', dest='matcher', action='store_const', const='patience',
150
                 default='patience', help='Use the patience difference algorithm')
151
    p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',
152
                 default='patience', help='Use python\'s difflib algorithm')
153
154
    algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}
155
156
    (opts, args) = p.parse_args(args)
157
    matcher = algorithms[opts.matcher]
158
159
    if len(args) != 2:
160
        print 'You must supply 2 filenames to diff'
161
        return -1
162
163
    for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):
164
        sys.stdout.write(line)
165
166
167
if __name__ == '__main__':
168
    sys.exit(main(sys.argv[1:]))