~bzr-pqm/bzr/bzr.dev : contents of bzrlib/patiencediff.py at revision 3945.1.7

~bzr-pqm/bzr/bzr.dev : (revision 3945.1.7)

#!/usr/bin/env python
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


from bzrlib.lazy_import import lazy_import
lazy_import(globals(), """
import os
import sys
import time
import difflib
""")


__all__ = ['PatienceSequenceMatcher', 'unified_diff', 'unified_diff_files']


# This is a version of unified_diff which only adds a factory parameter
# so that you can override the default SequenceMatcher
# this has been submitted as a patch to python
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
                 tofiledate='', n=3, lineterm='\n',
                 sequencematcher=None):
    r"""
    Compare two sequences of lines; generate the delta as a unified diff.

    Unified diffs are a compact way of showing line changes and a few
    lines of context.  The number of context lines is set by 'n' which
    defaults to three.

    By default, the diff control lines (those with ---, +++, or @@) are
    created with a trailing newline.  This is helpful so that inputs
    created from file.readlines() result in diffs that are suitable for
    file.writelines() since both the inputs and outputs have trailing
    newlines.

    For inputs that do not have trailing newlines, set the lineterm
    argument to "" so that the output will be uniformly newline free.

    The unidiff format normally has a header for filenames and modification
    times.  Any or all of these may be specified using strings for
    'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.  The modification
    times are normally expressed in the format returned by time.ctime().

    Example:

    >>> for line in unified_diff('one two three four'.split(),
    ...             'zero one tree four'.split(), 'Original', 'Current',
    ...             'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
    ...             lineterm=''):
    ...     print line
    --- Original Sat Jan 26 23:30:50 1991
    +++ Current Fri Jun 06 10:20:52 2003
    @@ -1,4 +1,4 @@
    +zero
     one
    -two
    -three
    +tree
     four
    """
    if sequencematcher is None:
        import difflib
        sequencematcher = difflib.SequenceMatcher

    if fromfiledate:
        fromfiledate = '\t' + str(fromfiledate)
    if tofiledate:
        tofiledate = '\t' + str(tofiledate)

    started = False
    for group in sequencematcher(None,a,b).get_grouped_opcodes(n):
        if not started:
            yield '--- %s%s%s' % (fromfile, fromfiledate, lineterm)
            yield '+++ %s%s%s' % (tofile, tofiledate, lineterm)
            started = True
        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
        yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
        for tag, i1, i2, j1, j2 in group:
            if tag == 'equal':
                for line in a[i1:i2]:
                    yield ' ' + line
                continue
            if tag == 'replace' or tag == 'delete':
                for line in a[i1:i2]:
                    yield '-' + line
            if tag == 'replace' or tag == 'insert':
                for line in b[j1:j2]:
                    yield '+' + line


def unified_diff_files(a, b, sequencematcher=None):
    """Generate the diff for two files.
    """
    # Should this actually be an error?
    if a == b:
        return []
    if a == '-':
        file_a = sys.stdin
        time_a = time.time()
    else:
        file_a = open(a, 'rb')
        time_a = os.stat(a).st_mtime

    if b == '-':
        file_b = sys.stdin
        time_b = time.time()
    else:
        file_b = open(b, 'rb')
        time_b = os.stat(b).st_mtime

    # TODO: Include fromfiledate and tofiledate
    return unified_diff(file_a.readlines(), file_b.readlines(),
                        fromfile=a, tofile=b,
                        sequencematcher=sequencematcher)


try:
    from bzrlib._patiencediff_c import (
        unique_lcs_c as unique_lcs,
        recurse_matches_c as recurse_matches,
        PatienceSequenceMatcher_c as PatienceSequenceMatcher
        )
except ImportError:
    from bzrlib._patiencediff_py import (
        unique_lcs_py as unique_lcs,
        recurse_matches_py as recurse_matches,
        PatienceSequenceMatcher_py as PatienceSequenceMatcher
        )


def main(args):
    import optparse
    p = optparse.OptionParser(usage='%prog [options] file_a file_b'
                                    '\nFiles can be "-" to read from stdin')
    p.add_option('--patience', dest='matcher', action='store_const', const='patience',
                 default='patience', help='Use the patience difference algorithm')
    p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',
                 default='patience', help='Use python\'s difflib algorithm')

    algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}

    (opts, args) = p.parse_args(args)
    matcher = algorithms[opts.matcher]

    if len(args) != 2:
        print 'You must supply 2 filenames to diff'
        return -1

    for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):
        sys.stdout.write(line)


if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))

2781.1.1 by Martin Pool merge cpatiencediff from Lukas	1	#!/usr/bin/env python
	2	# Copyright (C) 2005, 2006, 2007 Canonical Ltd
	3	#
	4	# This program is free software; you can redistribute it and/or modify
	5	# it under the terms of the GNU General Public License as published by
	6	# the Free Software Foundation; either version 2 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU General Public License
	15	# along with this program; if not, write to the Free Software
	16	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17
	18
	19	from bzrlib.lazy_import import lazy_import
	20	lazy_import(globals(), """
	21	import os
	22	import sys
	23	import time
	24	import difflib
	25	""")
	26
	27
	28	__all__ = ['PatienceSequenceMatcher', 'unified_diff', 'unified_diff_files']
	29
	30
	31	# This is a version of unified_diff which only adds a factory parameter
	32	# so that you can override the default SequenceMatcher
	33	# this has been submitted as a patch to python
	34	def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
	35	tofiledate='', n=3, lineterm='\n',
	36	sequencematcher=None):
	37	r"""
	38	Compare two sequences of lines; generate the delta as a unified diff.
	39
	40	Unified diffs are a compact way of showing line changes and a few
	41	lines of context. The number of context lines is set by 'n' which
	42	defaults to three.
	43
	44	By default, the diff control lines (those with ---, +++, or @@) are
	45	created with a trailing newline. This is helpful so that inputs
	46	created from file.readlines() result in diffs that are suitable for
	47	file.writelines() since both the inputs and outputs have trailing
	48	newlines.
	49
	50	For inputs that do not have trailing newlines, set the lineterm
	51	argument to "" so that the output will be uniformly newline free.
	52
	53	The unidiff format normally has a header for filenames and modification
	54	times. Any or all of these may be specified using strings for
	55	'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification
	56	times are normally expressed in the format returned by time.ctime().
	57
	58	Example:
	59
	60	>>> for line in unified_diff('one two three four'.split(),
	61	... 'zero one tree four'.split(), 'Original', 'Current',
	62	... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
	63	... lineterm=''):
	64	... print line
65	--- Original Sat Jan 26 23:30:50 1991
66	+++ Current Fri Jun 06 10:20:52 2003
67	@@ -1,4 +1,4 @@
68	+zero
69	one
70	-two
71	-three
72	+tree
73	four
74	"""
75	if sequencematcher is None:
76	import difflib
77	sequencematcher = difflib.SequenceMatcher
78
3922.1.1 by Adeodato Simo Change the patience_diff.unified_diff code to not add trailing whitespace when no timestamp is supplied.	79	if fromfiledate:
3922.1.4 by John Arbash Meinel It turns out that internal_diff worked around the trailing whitespace problem	80	fromfiledate = '\t' + str(fromfiledate)
3922.1.1 by Adeodato Simo Change the patience_diff.unified_diff code to not add trailing whitespace when no timestamp is supplied.	81	if tofiledate:
3922.1.4 by John Arbash Meinel It turns out that internal_diff worked around the trailing whitespace problem	82	tofiledate = '\t' + str(tofiledate)
3922.1.1 by Adeodato Simo Change the patience_diff.unified_diff code to not add trailing whitespace when no timestamp is supplied.	83
2781.1.1 by Martin Pool merge cpatiencediff from Lukas	84	started = False
	85	for group in sequencematcher(None,a,b).get_grouped_opcodes(n):
	86	if not started:
3922.1.1 by Adeodato Simo Change the patience_diff.unified_diff code to not add trailing whitespace when no timestamp is supplied.	87	yield '--- %s%s%s' % (fromfile, fromfiledate, lineterm)
	88	yield '+++ %s%s%s' % (tofile, tofiledate, lineterm)
2781.1.1 by Martin Pool merge cpatiencediff from Lukas	89	started = True
	90	i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
	91	yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
	92	for tag, i1, i2, j1, j2 in group:
	93	if tag == 'equal':
	94	for line in a[i1:i2]:
	95	yield ' ' + line
	96	continue
	97	if tag == 'replace' or tag == 'delete':
	98	for line in a[i1:i2]:
	99	yield '-' + line
	100	if tag == 'replace' or tag == 'insert':
	101	for line in b[j1:j2]:
	102	yield '+' + line
	103
	104
	105	def unified_diff_files(a, b, sequencematcher=None):
	106	"""Generate the diff for two files.
	107	"""
	108	# Should this actually be an error?
	109	if a == b:
	110	return []
	111	if a == '-':
	112	file_a = sys.stdin
	113	time_a = time.time()
	114	else:
	115	file_a = open(a, 'rb')
	116	time_a = os.stat(a).st_mtime
	117
	118	if b == '-':
	119	file_b = sys.stdin
	120	time_b = time.time()
	121	else:
	122	file_b = open(b, 'rb')
	123	time_b = os.stat(b).st_mtime
	124
	125	# TODO: Include fromfiledate and tofiledate
	126	return unified_diff(file_a.readlines(), file_b.readlines(),
	127	fromfile=a, tofile=b,
	128	sequencematcher=sequencematcher)
	129
	130
	131	try:
	132	from bzrlib._patiencediff_c import (
	133	unique_lcs_c as unique_lcs,
	134	recurse_matches_c as recurse_matches,
	135	PatienceSequenceMatcher_c as PatienceSequenceMatcher
	136	)
	137	except ImportError:
	138	from bzrlib._patiencediff_py import (
	139	unique_lcs_py as unique_lcs,
	140	recurse_matches_py as recurse_matches,
	141	PatienceSequenceMatcher_py as PatienceSequenceMatcher
	142	)
	143
	144
	145	def main(args):
	146	import optparse
	147	p = optparse.OptionParser(usage='%prog [options] file_a file_b'
	148	'\nFiles can be "-" to read from stdin')
	149	p.add_option('--patience', dest='matcher', action='store_const', const='patience',
	150	default='patience', help='Use the patience difference algorithm')
	151	p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',
	152	default='patience', help='Use python\'s difflib algorithm')
153
154	algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}
155
156	(opts, args) = p.parse_args(args)
157	matcher = algorithms[opts.matcher]
158
159	if len(args) != 2:
160	print 'You must supply 2 filenames to diff'
161	return -1
162
163	for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):
164	sys.stdout.write(line)
165
166
167	if __name__ == '__main__':
168	sys.exit(main(sys.argv[1:]))