~bzr-pqm/bzr/bzr.dev : contents of bzrlib/patches.py at revision 6311

~bzr-pqm/bzr/bzr.dev : (revision 6311)

# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
# <aaron.bentley@utoronto.ca>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

from bzrlib.errors import (
    BinaryFiles,
    MalformedHunkHeader,
    MalformedLine,
    MalformedPatchHeader,
    PatchConflict,
    PatchSyntax,
    )

import re


binary_files_re = 'Binary files (.*) and (.*) differ\n'

def get_patch_names(iter_lines):
    try:
        line = iter_lines.next()
        match = re.match(binary_files_re, line)
        if match is not None:
            raise BinaryFiles(match.group(1), match.group(2))
        if not line.startswith("--- "):
            raise MalformedPatchHeader("No orig name", line)
        else:
            orig_name = line[4:].rstrip("\n")
    except StopIteration:
        raise MalformedPatchHeader("No orig line", "")
    try:
        line = iter_lines.next()
        if not line.startswith("+++ "):
            raise PatchSyntax("No mod name")
        else:
            mod_name = line[4:].rstrip("\n")
    except StopIteration:
        raise MalformedPatchHeader("No mod line", "")
    return (orig_name, mod_name)


def parse_range(textrange):
    """Parse a patch range, handling the "1" special-case

    :param textrange: The text to parse
    :type textrange: str
    :return: the position and range, as a tuple
    :rtype: (int, int)
    """
    tmp = textrange.split(',')
    if len(tmp) == 1:
        pos = tmp[0]
        range = "1"
    else:
        (pos, range) = tmp
    pos = int(pos)
    range = int(range)
    return (pos, range)


def hunk_from_header(line):
    import re
    matches = re.match(r'\@\@ ([^@]*) \@\@( (.*))?\n', line)
    if matches is None:
        raise MalformedHunkHeader("Does not match format.", line)
    try:
        (orig, mod) = matches.group(1).split(" ")
    except (ValueError, IndexError), e:
        raise MalformedHunkHeader(str(e), line)
    if not orig.startswith('-') or not mod.startswith('+'):
        raise MalformedHunkHeader("Positions don't start with + or -.", line)
    try:
        (orig_pos, orig_range) = parse_range(orig[1:])
        (mod_pos, mod_range) = parse_range(mod[1:])
    except (ValueError, IndexError), e:
        raise MalformedHunkHeader(str(e), line)
    if mod_range < 0 or orig_range < 0:
        raise MalformedHunkHeader("Hunk range is negative", line)
    tail = matches.group(3)
    return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)


class HunkLine:
    def __init__(self, contents):
        self.contents = contents

    def get_str(self, leadchar):
        if self.contents == "\n" and leadchar == " " and False:
            return "\n"
        if not self.contents.endswith('\n'):
            terminator = '\n' + NO_NL
        else:
            terminator = ''
        return leadchar + self.contents + terminator


class ContextLine(HunkLine):
    def __init__(self, contents):
        HunkLine.__init__(self, contents)

    def __str__(self):
        return self.get_str(" ")


class InsertLine(HunkLine):
    def __init__(self, contents):
        HunkLine.__init__(self, contents)

    def __str__(self):
        return self.get_str("+")


class RemoveLine(HunkLine):
    def __init__(self, contents):
        HunkLine.__init__(self, contents)

    def __str__(self):
        return self.get_str("-")

NO_NL = '\\ No newline at end of file\n'
__pychecker__="no-returnvalues"

def parse_line(line):
    if line.startswith("\n"):
        return ContextLine(line)
    elif line.startswith(" "):
        return ContextLine(line[1:])
    elif line.startswith("+"):
        return InsertLine(line[1:])
    elif line.startswith("-"):
        return RemoveLine(line[1:])
    else:
        raise MalformedLine("Unknown line type", line)
__pychecker__=""


class Hunk:
    def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
        self.orig_pos = orig_pos
        self.orig_range = orig_range
        self.mod_pos = mod_pos
        self.mod_range = mod_range
        self.tail = tail
        self.lines = []

    def get_header(self):
        if self.tail is None:
            tail_str = ''
        else:
            tail_str = ' ' + self.tail
        return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
                                                     self.orig_range),
                                      self.range_str(self.mod_pos,
                                                     self.mod_range),
                                      tail_str)

    def range_str(self, pos, range):
        """Return a file range, special-casing for 1-line files.

        :param pos: The position in the file
        :type pos: int
        :range: The range in the file
        :type range: int
        :return: a string in the format 1,4 except when range == pos == 1
        """
        if range == 1:
            return "%i" % pos
        else:
            return "%i,%i" % (pos, range)

    def __str__(self):
        lines = [self.get_header()]
        for line in self.lines:
            lines.append(str(line))
        return "".join(lines)

    def shift_to_mod(self, pos):
        if pos < self.orig_pos-1:
            return 0
        elif pos > self.orig_pos+self.orig_range:
            return self.mod_range - self.orig_range
        else:
            return self.shift_to_mod_lines(pos)

    def shift_to_mod_lines(self, pos):
        position = self.orig_pos-1
        shift = 0
        for line in self.lines:
            if isinstance(line, InsertLine):
                shift += 1
            elif isinstance(line, RemoveLine):
                if position == pos:
                    return None
                shift -= 1
                position += 1
            elif isinstance(line, ContextLine):
                position += 1
            if position > pos:
                break
        return shift


def iter_hunks(iter_lines, allow_dirty=False):
    '''
    :arg iter_lines: iterable of lines to parse for hunks
    :kwarg allow_dirty: If True, when we encounter something that is not
        a hunk header when we're looking for one, assume the rest of the lines
        are not part of the patch (comments or other junk).  Default False
    '''
    hunk = None
    for line in iter_lines:
        if line == "\n":
            if hunk is not None:
                yield hunk
                hunk = None
            continue
        if hunk is not None:
            yield hunk
        try:
            hunk = hunk_from_header(line)
        except MalformedHunkHeader:
            if allow_dirty:
                # If the line isn't a hunk header, then we've reached the end
                # of this patch and there's "junk" at the end.  Ignore the
                # rest of this patch.
                return
            raise
        orig_size = 0
        mod_size = 0
        while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
            hunk_line = parse_line(iter_lines.next())
            hunk.lines.append(hunk_line)
            if isinstance(hunk_line, (RemoveLine, ContextLine)):
                orig_size += 1
            if isinstance(hunk_line, (InsertLine, ContextLine)):
                mod_size += 1
    if hunk is not None:
        yield hunk


class BinaryPatch(object):
    def __init__(self, oldname, newname):
        self.oldname = oldname
        self.newname = newname

    def __str__(self):
        return 'Binary files %s and %s differ\n' % (self.oldname, self.newname)


class Patch(BinaryPatch):

    def __init__(self, oldname, newname):
        BinaryPatch.__init__(self, oldname, newname)
        self.hunks = []

    def __str__(self):
        ret = self.get_header()
        ret += "".join([str(h) for h in self.hunks])
        return ret

    def get_header(self):
        return "--- %s\n+++ %s\n" % (self.oldname, self.newname)

    def stats_values(self):
        """Calculate the number of inserts and removes."""
        removes = 0
        inserts = 0
        for hunk in self.hunks:
            for line in hunk.lines:
                if isinstance(line, InsertLine):
                     inserts+=1;
                elif isinstance(line, RemoveLine):
                     removes+=1;
        return (inserts, removes, len(self.hunks))

    def stats_str(self):
        """Return a string of patch statistics"""
        return "%i inserts, %i removes in %i hunks" % \
            self.stats_values()

    def pos_in_mod(self, position):
        newpos = position
        for hunk in self.hunks:
            shift = hunk.shift_to_mod(position)
            if shift is None:
                return None
            newpos += shift
        return newpos

    def iter_inserted(self):
        """Iteraties through inserted lines

        :return: Pair of line number, line
        :rtype: iterator of (int, InsertLine)
        """
        for hunk in self.hunks:
            pos = hunk.mod_pos - 1;
            for line in hunk.lines:
                if isinstance(line, InsertLine):
                    yield (pos, line)
                    pos += 1
                if isinstance(line, ContextLine):
                    pos += 1


def parse_patch(iter_lines, allow_dirty=False):
    '''
    :arg iter_lines: iterable of lines to parse
    :kwarg allow_dirty: If True, allow the patch to have trailing junk.
        Default False
    '''
    iter_lines = iter_lines_handle_nl(iter_lines)
    try:
        (orig_name, mod_name) = get_patch_names(iter_lines)
    except BinaryFiles, e:
        return BinaryPatch(e.orig_name, e.mod_name)
    else:
        patch = Patch(orig_name, mod_name)
        for hunk in iter_hunks(iter_lines, allow_dirty):
            patch.hunks.append(hunk)
        return patch


def iter_file_patch(iter_lines, allow_dirty=False):
    '''
    :arg iter_lines: iterable of lines to parse for patches
    :kwarg allow_dirty: If True, allow comments and other non-patch text
        before the first patch.  Note that the algorithm here can only find
        such text before any patches have been found.  Comments after the
        first patch are stripped away in iter_hunks() if it is also passed
        allow_dirty=True.  Default False.
    '''
    ### FIXME: Docstring is not quite true.  We allow certain comments no
    # matter what, If they startwith '===', '***', or '#' Someone should
    # reexamine this logic and decide if we should include those in
    # allow_dirty or restrict those to only being before the patch is found
    # (as allow_dirty does).
    regex = re.compile(binary_files_re)
    saved_lines = []
    orig_range = 0
    beginning = True
    for line in iter_lines:
        if line.startswith('=== ') or line.startswith('*** '):
            continue
        if line.startswith('#'):
            continue
        elif orig_range > 0:
            if line.startswith('-') or line.startswith(' '):
                orig_range -= 1
        elif line.startswith('--- ') or regex.match(line):
            if allow_dirty and beginning:
                # Patches can have "junk" at the beginning
                # Stripping junk from the end of patches is handled when we
                # parse the patch
                beginning = False
            elif len(saved_lines) > 0:
                yield saved_lines
            saved_lines = []
        elif line.startswith('@@'):
            hunk = hunk_from_header(line)
            orig_range = hunk.orig_range
        saved_lines.append(line)
    if len(saved_lines) > 0:
        yield saved_lines


def iter_lines_handle_nl(iter_lines):
    """
    Iterates through lines, ensuring that lines that originally had no
    terminating \n are produced without one.  This transformation may be
    applied at any point up until hunk line parsing, and is safe to apply
    repeatedly.
    """
    last_line = None
    for line in iter_lines:
        if line == NO_NL:
            if not last_line.endswith('\n'):
                raise AssertionError()
            last_line = last_line[:-1]
            line = None
        if last_line is not None:
            yield last_line
        last_line = line
    if last_line is not None:
        yield last_line


def parse_patches(iter_lines, allow_dirty=False):
    '''
    :arg iter_lines: iterable of lines to parse for patches
    :kwarg allow_dirty: If True, allow text that's not part of the patch at
        selected places.  This includes comments before and after a patch
        for instance.  Default False.
    '''
    return [parse_patch(f.__iter__(), allow_dirty) for f in
                        iter_file_patch(iter_lines, allow_dirty)]


def difference_index(atext, btext):
    """Find the indext of the first character that differs between two texts

    :param atext: The first text
    :type atext: str
    :param btext: The second text
    :type str: str
    :return: The index, or None if there are no differences within the range
    :rtype: int or NoneType
    """
    length = len(atext)
    if len(btext) < length:
        length = len(btext)
    for i in range(length):
        if atext[i] != btext[i]:
            return i;
    return None


def iter_patched(orig_lines, patch_lines):
    """Iterate through a series of lines with a patch applied.
    This handles a single file, and does exact, not fuzzy patching.
    """
    patch_lines = iter_lines_handle_nl(iter(patch_lines))
    get_patch_names(patch_lines)
    return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))


def iter_patched_from_hunks(orig_lines, hunks):
    """Iterate through a series of lines with a patch applied.
    This handles a single file, and does exact, not fuzzy patching.

    :param orig_lines: The unpatched lines.
    :param hunks: An iterable of Hunk instances.
    """
    seen_patch = []
    line_no = 1
    if orig_lines is not None:
        orig_lines = iter(orig_lines)
    for hunk in hunks:
        while line_no < hunk.orig_pos:
            orig_line = orig_lines.next()
            yield orig_line
            line_no += 1
        for hunk_line in hunk.lines:
            seen_patch.append(str(hunk_line))
            if isinstance(hunk_line, InsertLine):
                yield hunk_line.contents
            elif isinstance(hunk_line, (ContextLine, RemoveLine)):
                orig_line = orig_lines.next()
                if orig_line != hunk_line.contents:
                    raise PatchConflict(line_no, orig_line, "".join(seen_patch))
                if isinstance(hunk_line, ContextLine):
                    yield orig_line
                else:
                    if not isinstance(hunk_line, RemoveLine):
                        raise AssertionError(hunk_line)
                line_no += 1
    if orig_lines is not None:
        for line in orig_lines:
            yield line

4763.2.4 by John Arbash Meinel merge bzr.2.1 in preparation for NEWS entry.	1	# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
0.5.93 by Aaron Bentley Added patches.py	2	# <aaron.bentley@utoronto.ca>
0.5.93 by Aaron Bentley Added patches.py	3	#
2052.3.1 by John Arbash Meinel Add tests to cleanup the copyright of all source files	4	# This program is free software; you can redistribute it and/or modify
	5	# it under the terms of the GNU General Public License as published by
	6	# the Free Software Foundation; either version 2 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU General Public License
	15	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	16	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6289.2.1 by Jelmer Vernooij Move the primary definition of the patches exceptions to bzrlib.errors.	17
	18	from bzrlib.errors import (
	19	BinaryFiles,
	20	MalformedHunkHeader,
	21	MalformedLine,
	22	MalformedPatchHeader,
	23	PatchConflict,
	24	PatchSyntax,
	25	)
	26
4634.80.1 by Aaron Bentley Parse binary files.	27	import re
	28
	29
4634.98.1 by Aaron Bentley Improve patch binary section handling.	30	binary_files_re = 'Binary files (.) and (.) differ\n'
	31
0.5.93 by Aaron Bentley Added patches.py	32	def get_patch_names(iter_lines):
	33	try:
	34	line = iter_lines.next()
4634.98.1 by Aaron Bentley Improve patch binary section handling.	35	match = re.match(binary_files_re, line)
4634.80.1 by Aaron Bentley Parse binary files.	36	if match is not None:
4634.80.1 by Aaron Bentley Parse binary files.	37	raise BinaryFiles(match.group(1), match.group(2))
0.5.93 by Aaron Bentley Added patches.py	38	if not line.startswith("--- "):
	39	raise MalformedPatchHeader("No orig name", line)
	40	else:
	41	orig_name = line[4:].rstrip("\n")
	42	except StopIteration:
	43	raise MalformedPatchHeader("No orig line", "")
	44	try:
	45	line = iter_lines.next()
	46	if not line.startswith("+++ "):
	47	raise PatchSyntax("No mod name")
	48	else:
	49	mod_name = line[4:].rstrip("\n")
	50	except StopIteration:
	51	raise MalformedPatchHeader("No mod line", "")
	52	return (orig_name, mod_name)
	53
1185.82.123 by Aaron Bentley Cleanups to prepare for review	54
0.5.93 by Aaron Bentley Added patches.py	55	def parse_range(textrange):
	56	"""Parse a patch range, handling the "1" special-case
	57
	58	:param textrange: The text to parse
	59	:type textrange: str
	60	:return: the position and range, as a tuple
	61	:rtype: (int, int)
	62	"""
	63	tmp = textrange.split(',')
	64	if len(tmp) == 1:
	65	pos = tmp[0]
	66	range = "1"
	67	else:
	68	(pos, range) = tmp
	69	pos = int(pos)
	70	range = int(range)
	71	return (pos, range)
	72
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	73
0.5.93 by Aaron Bentley Added patches.py	74	def hunk_from_header(line):
3224.5.1 by Andrew Bennetts Lots of assorted hackery to reduce the number of imports for common operations. Improves 'rocks', 'st' and 'help' times by ~50ms on my laptop.	75	import re
1551.18.6 by Aaron Bentley Add support for diff -p-style diffs to patch parser	76	matches = re.match(r'\@\@ ([^@]) \@\@( (.))?\n', line)
	77	if matches is None:
	78	raise MalformedHunkHeader("Does not match format.", line)
0.5.93 by Aaron Bentley Added patches.py	79	try:
1551.18.6 by Aaron Bentley Add support for diff -p-style diffs to patch parser	80	(orig, mod) = matches.group(1).split(" ")
2358.3.1 by Martin Pool Update some too-general exception blocks	81	except (ValueError, IndexError), e:
0.5.93 by Aaron Bentley Added patches.py	82	raise MalformedHunkHeader(str(e), line)
	83	if not orig.startswith('-') or not mod.startswith('+'):
	84	raise MalformedHunkHeader("Positions don't start with + or -.", line)
	85	try:
	86	(orig_pos, orig_range) = parse_range(orig[1:])
	87	(mod_pos, mod_range) = parse_range(mod[1:])
2358.3.1 by Martin Pool Update some too-general exception blocks	88	except (ValueError, IndexError), e:
0.5.93 by Aaron Bentley Added patches.py	89	raise MalformedHunkHeader(str(e), line)
	90	if mod_range < 0 or orig_range < 0:
	91	raise MalformedHunkHeader("Hunk range is negative", line)
1551.18.6 by Aaron Bentley Add support for diff -p-style diffs to patch parser	92	tail = matches.group(3)
	93	return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
0.5.93 by Aaron Bentley Added patches.py	94
	95
	96	class HunkLine:
	97	def __init__(self, contents):
	98	self.contents = contents
	99
	100	def get_str(self, leadchar):
	101	if self.contents == "\n" and leadchar == " " and False:
	102	return "\n"
	103	if not self.contents.endswith('\n'):
	104	terminator = '\n' + NO_NL
	105	else:
	106	terminator = ''
	107	return leadchar + self.contents + terminator
	108
	109
	110	class ContextLine(HunkLine):
	111	def __init__(self, contents):
	112	HunkLine.__init__(self, contents)
	113
	114	def __str__(self):
	115	return self.get_str(" ")
	116
	117
	118	class InsertLine(HunkLine):
	119	def __init__(self, contents):
	120	HunkLine.__init__(self, contents)
	121
	122	def __str__(self):
	123	return self.get_str("+")
	124
	125
	126	class RemoveLine(HunkLine):
	127	def __init__(self, contents):
	128	HunkLine.__init__(self, contents)
	129
	130	def __str__(self):
	131	return self.get_str("-")
	132
	133	NO_NL = '\\ No newline at end of file\n'
	134	__pychecker__="no-returnvalues"
	135
	136	def parse_line(line):
	137	if line.startswith("\n"):
	138	return ContextLine(line)
	139	elif line.startswith(" "):
	140	return ContextLine(line[1:])
	141	elif line.startswith("+"):
	142	return InsertLine(line[1:])
	143	elif line.startswith("-"):
	144	return RemoveLine(line[1:])
	145	else:
	146	raise MalformedLine("Unknown line type", line)
	147	__pychecker__=""
	148
	149
	150	class Hunk:
1551.18.6 by Aaron Bentley Add support for diff -p-style diffs to patch parser	151	def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
0.5.93 by Aaron Bentley Added patches.py	152	self.orig_pos = orig_pos
	153	self.orig_range = orig_range
	154	self.mod_pos = mod_pos
	155	self.mod_range = mod_range
1551.18.6 by Aaron Bentley Add support for diff -p-style diffs to patch parser	156	self.tail = tail
0.5.93 by Aaron Bentley Added patches.py	157	self.lines = []
	158
	159	def get_header(self):
1551.18.6 by Aaron Bentley Add support for diff -p-style diffs to patch parser	160	if self.tail is None:
	161	tail_str = ''
	162	else:
	163	tail_str = ' ' + self.tail
	164	return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
	165	self.orig_range),
	166	self.range_str(self.mod_pos,
	167	self.mod_range),
	168	tail_str)
0.5.93 by Aaron Bentley Added patches.py	169
	170	def range_str(self, pos, range):
	171	"""Return a file range, special-casing for 1-line files.
	172
	173	:param pos: The position in the file
	174	:type pos: int
	175	:range: The range in the file
	176	:type range: int
	177	:return: a string in the format 1,4 except when range == pos == 1
	178	"""
	179	if range == 1:
	180	return "%i" % pos
	181	else:
	182	return "%i,%i" % (pos, range)
	183
	184	def __str__(self):
	185	lines = [self.get_header()]
	186	for line in self.lines:
	187	lines.append(str(line))
	188	return "".join(lines)
	189
	190	def shift_to_mod(self, pos):
	191	if pos < self.orig_pos-1:
	192	return 0
	193	elif pos > self.orig_pos+self.orig_range:
	194	return self.mod_range - self.orig_range
	195	else:
	196	return self.shift_to_mod_lines(pos)
	197
	198	def shift_to_mod_lines(self, pos):
	199	position = self.orig_pos-1
	200	shift = 0
	201	for line in self.lines:
	202	if isinstance(line, InsertLine):
	203	shift += 1
	204	elif isinstance(line, RemoveLine):
	205	if position == pos:
	206	return None
	207	shift -= 1
	208	position += 1
	209	elif isinstance(line, ContextLine):
	210	position += 1
	211	if position > pos:
	212	break
	213	return shift
	214
1185.82.123 by Aaron Bentley Cleanups to prepare for review	215
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	216	def iter_hunks(iter_lines, allow_dirty=False):
	217	'''
	218	:arg iter_lines: iterable of lines to parse for hunks
	219	:kwarg allow_dirty: If True, when we encounter something that is not
	220	a hunk header when we're looking for one, assume the rest of the lines
	221	are not part of the patch (comments or other junk). Default False
	222	'''
0.5.93 by Aaron Bentley Added patches.py	223	hunk = None
0.5.93 by Aaron Bentley Added patches.py	224	for line in iter_lines:
0.5.96 by Aaron Bentley Cleaned up handling of files with no terminating \n	225	if line == "\n":
0.5.93 by Aaron Bentley Added patches.py	226	if hunk is not None:
	227	yield hunk
	228	hunk = None
	229	continue
	230	if hunk is not None:
	231	yield hunk
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	232	try:
	233	hunk = hunk_from_header(line)
	234	except MalformedHunkHeader:
	235	if allow_dirty:
	236	# If the line isn't a hunk header, then we've reached the end
	237	# of this patch and there's "junk" at the end. Ignore the
	238	# rest of this patch.
	239	return
	240	raise
0.5.93 by Aaron Bentley Added patches.py	241	orig_size = 0
	242	mod_size = 0
	243	while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
	244	hunk_line = parse_line(iter_lines.next())
0.5.96 by Aaron Bentley Cleaned up handling of files with no terminating \n	245	hunk.lines.append(hunk_line)
0.5.93 by Aaron Bentley Added patches.py	246	if isinstance(hunk_line, (RemoveLine, ContextLine)):
	247	orig_size += 1
	248	if isinstance(hunk_line, (InsertLine, ContextLine)):
	249	mod_size += 1
	250	if hunk is not None:
	251	yield hunk
	252
1185.82.123 by Aaron Bentley Cleanups to prepare for review	253
4634.80.1 by Aaron Bentley Parse binary files.	254	class BinaryPatch(object):
0.5.93 by Aaron Bentley Added patches.py	255	def __init__(self, oldname, newname):
	256	self.oldname = oldname
	257	self.newname = newname
4634.80.1 by Aaron Bentley Parse binary files.	258
4634.80.1 by Aaron Bentley Parse binary files.	259	def __str__(self):
4634.80.2 by Aaron Bentley Ensure patch roundtripping.	260	return 'Binary files %s and %s differ\n' % (self.oldname, self.newname)
4634.80.1 by Aaron Bentley Parse binary files.	261
	262
	263	class Patch(BinaryPatch):
	264
	265	def __init__(self, oldname, newname):
	266	BinaryPatch.__init__(self, oldname, newname)
0.5.93 by Aaron Bentley Added patches.py	267	self.hunks = []
	268
	269	def __str__(self):
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	270	ret = self.get_header()
0.5.93 by Aaron Bentley Added patches.py	271	ret += "".join([str(h) for h in self.hunks])
	272	return ret
	273
0.5.95 by Aaron Bentley Updated patch to match bzrtools	274	def get_header(self):
	275	return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
	276
3946.4.1 by Tim Penhey Extract out the counting of the stats values.	277	def stats_values(self):
	278	"""Calculate the number of inserts and removes."""
0.5.93 by Aaron Bentley Added patches.py	279	removes = 0
	280	inserts = 0
	281	for hunk in self.hunks:
	282	for line in hunk.lines:
	283	if isinstance(line, InsertLine):
	284	inserts+=1;
	285	elif isinstance(line, RemoveLine):
	286	removes+=1;
3946.4.1 by Tim Penhey Extract out the counting of the stats values.	287	return (inserts, removes, len(self.hunks))
	288
	289	def stats_str(self):
	290	"""Return a string of patch statistics"""
0.5.93 by Aaron Bentley Added patches.py	291	return "%i inserts, %i removes in %i hunks" % \
3946.4.1 by Tim Penhey Extract out the counting of the stats values.	292	self.stats_values()
0.5.93 by Aaron Bentley Added patches.py	293
	294	def pos_in_mod(self, position):
	295	newpos = position
	296	for hunk in self.hunks:
	297	shift = hunk.shift_to_mod(position)
	298	if shift is None:
	299	return None
	300	newpos += shift
	301	return newpos
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	302
0.5.93 by Aaron Bentley Added patches.py	303	def iter_inserted(self):
0.5.93 by Aaron Bentley Added patches.py	304	"""Iteraties through inserted lines
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	305
0.5.93 by Aaron Bentley Added patches.py	306	:return: Pair of line number, line
	307	:rtype: iterator of (int, InsertLine)
	308	"""
	309	for hunk in self.hunks:
	310	pos = hunk.mod_pos - 1;
	311	for line in hunk.lines:
	312	if isinstance(line, InsertLine):
	313	yield (pos, line)
	314	pos += 1
	315	if isinstance(line, ContextLine):
	316	pos += 1
	317
1185.82.123 by Aaron Bentley Cleanups to prepare for review	318
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	319	def parse_patch(iter_lines, allow_dirty=False):
	320	'''
	321	:arg iter_lines: iterable of lines to parse
	322	:kwarg allow_dirty: If True, allow the patch to have trailing junk.
	323	Default False
	324	'''
3873.1.8 by Benoît Pierre Fix regressions in other parts of the testsuite.	325	iter_lines = iter_lines_handle_nl(iter_lines)
4634.80.1 by Aaron Bentley Parse binary files.	326	try:
	327	(orig_name, mod_name) = get_patch_names(iter_lines)
	328	except BinaryFiles, e:
	329	return BinaryPatch(e.orig_name, e.mod_name)
	330	else:
	331	patch = Patch(orig_name, mod_name)
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	332	for hunk in iter_hunks(iter_lines, allow_dirty):
4634.80.1 by Aaron Bentley Parse binary files.	333	patch.hunks.append(hunk)
4634.80.1 by Aaron Bentley Parse binary files.	334	return patch
0.5.93 by Aaron Bentley Added patches.py	335
0.5.93 by Aaron Bentley Added patches.py	336
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	337	def iter_file_patch(iter_lines, allow_dirty=False):
	338	'''
	339	:arg iter_lines: iterable of lines to parse for patches
	340	:kwarg allow_dirty: If True, allow comments and other non-patch text
	341	before the first patch. Note that the algorithm here can only find
	342	such text before any patches have been found. Comments after the
	343	first patch are stripped away in iter_hunks() if it is also passed
	344	allow_dirty=True. Default False.
	345	'''
	346	### FIXME: Docstring is not quite true. We allow certain comments no
	347	# matter what, If they startwith '===', '***', or '#' Someone should
	348	# reexamine this logic and decide if we should include those in
	349	# allow_dirty or restrict those to only being before the patch is found
	350	# (as allow_dirty does).
4634.98.1 by Aaron Bentley Improve patch binary section handling.	351	regex = re.compile(binary_files_re)
0.5.93 by Aaron Bentley Added patches.py	352	saved_lines = []
2298.6.1 by Johan Dahlberg Fix bzrtools shelve command for removed lines beginning with "--"	353	orig_range = 0
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	354	beginning = True
0.5.93 by Aaron Bentley Added patches.py	355	for line in iter_lines:
0.5.106 by John Arbash Meinel Allowing *** to be a patch header.	356	if line.startswith('=== ') or line.startswith('*** '):
0.5.93 by Aaron Bentley Added patches.py	357	continue
1770.1.1 by Aaron Bentley Ignore lines that start with '#' in patch parser	358	if line.startswith('#'):
	359	continue
2298.6.1 by Johan Dahlberg Fix bzrtools shelve command for removed lines beginning with "--"	360	elif orig_range > 0:
	361	if line.startswith('-') or line.startswith(' '):
	362	orig_range -= 1
4634.98.1 by Aaron Bentley Improve patch binary section handling.	363	elif line.startswith('--- ') or regex.match(line):
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	364	if allow_dirty and beginning:
	365	# Patches can have "junk" at the beginning
	366	# Stripping junk from the end of patches is handled when we
	367	# parse the patch
	368	beginning = False
	369	elif len(saved_lines) > 0:
0.5.93 by Aaron Bentley Added patches.py	370	yield saved_lines
0.5.93 by Aaron Bentley Added patches.py	371	saved_lines = []
2298.6.1 by Johan Dahlberg Fix bzrtools shelve command for removed lines beginning with "--"	372	elif line.startswith('@@'):
	373	hunk = hunk_from_header(line)
	374	orig_range = hunk.orig_range
0.5.93 by Aaron Bentley Added patches.py	375	saved_lines.append(line)
	376	if len(saved_lines) > 0:
	377	yield saved_lines
	378
	379
3873.1.6 by Benoît Pierre OK, so now patches should handle '\ No newline at end of file' in both	380	def iter_lines_handle_nl(iter_lines):
	381	"""
	382	Iterates through lines, ensuring that lines that originally had no
	383	terminating \n are produced without one. This transformation may be
	384	applied at any point up until hunk line parsing, and is safe to apply
	385	repeatedly.
	386	"""
	387	last_line = None
	388	for line in iter_lines:
	389	if line == NO_NL:
	390	if not last_line.endswith('\n'):
	391	raise AssertionError()
	392	last_line = last_line[:-1]
	393	line = None
	394	if last_line is not None:
	395	yield last_line
	396	last_line = line
	397	if last_line is not None:
	398	yield last_line
	399
	400
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	401	def parse_patches(iter_lines, allow_dirty=False):
	402	'''
	403	:arg iter_lines: iterable of lines to parse for patches
	404	:kwarg allow_dirty: If True, allow text that's not part of the patch at
	405	selected places. This includes comments before and after a patch
	406	for instance. Default False.
	407	'''
	408	return [parse_patch(f.__iter__(), allow_dirty) for f in
	409	iter_file_patch(iter_lines, allow_dirty)]
0.5.93 by Aaron Bentley Added patches.py	410
	411
	412	def difference_index(atext, btext):
1759.2.1 by Jelmer Vernooij Fix some types (found using aspell).	413	"""Find the indext of the first character that differs between two texts
0.5.93 by Aaron Bentley Added patches.py	414
	415	:param atext: The first text
	416	:type atext: str
	417	:param btext: The second text
	418	:type str: str
	419	:return: The index, or None if there are no differences within the range
	420	:rtype: int or NoneType
	421	"""
	422	length = len(atext)
	423	if len(btext) < length:
	424	length = len(btext)
	425	for i in range(length):
	426	if atext[i] != btext[i]:
	427	return i;
	428	return None
	429
1185.82.123 by Aaron Bentley Cleanups to prepare for review	430
0.5.93 by Aaron Bentley Added patches.py	431	def iter_patched(orig_lines, patch_lines):
	432	"""Iterate through a series of lines with a patch applied.
	433	This handles a single file, and does exact, not fuzzy patching.
	434	"""
3873.1.8 by Benoît Pierre Fix regressions in other parts of the testsuite.	435	patch_lines = iter_lines_handle_nl(iter(patch_lines))
0.5.93 by Aaron Bentley Added patches.py	436	get_patch_names(patch_lines)
3363.18.1 by Aaron Bentley Allow patching directly from parsed hunks	437	return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
	438
3363.18.4 by Aaron Bentley Updates from review (and a doc update)	439
3363.18.1 by Aaron Bentley Allow patching directly from parsed hunks	440	def iter_patched_from_hunks(orig_lines, hunks):
3363.18.4 by Aaron Bentley Updates from review (and a doc update)	441	"""Iterate through a series of lines with a patch applied.
	442	This handles a single file, and does exact, not fuzzy patching.
	443
	444	:param orig_lines: The unpatched lines.
	445	:param hunks: An iterable of Hunk instances.
	446	"""
3363.18.1 by Aaron Bentley Allow patching directly from parsed hunks	447	seen_patch = []
0.5.93 by Aaron Bentley Added patches.py	448	line_no = 1
3363.18.1 by Aaron Bentley Allow patching directly from parsed hunks	449	if orig_lines is not None:
3363.18.4 by Aaron Bentley Updates from review (and a doc update)	450	orig_lines = iter(orig_lines)
3363.18.1 by Aaron Bentley Allow patching directly from parsed hunks	451	for hunk in hunks:
0.5.93 by Aaron Bentley Added patches.py	452	while line_no < hunk.orig_pos:
	453	orig_line = orig_lines.next()
	454	yield orig_line
	455	line_no += 1
	456	for hunk_line in hunk.lines:
	457	seen_patch.append(str(hunk_line))
	458	if isinstance(hunk_line, InsertLine):
	459	yield hunk_line.contents
	460	elif isinstance(hunk_line, (ContextLine, RemoveLine)):
	461	orig_line = orig_lines.next()
	462	if orig_line != hunk_line.contents:
	463	raise PatchConflict(line_no, orig_line, "".join(seen_patch))
	464	if isinstance(hunk_line, ContextLine):
	465	yield orig_line
	466	else:
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	467	if not isinstance(hunk_line, RemoveLine):
	468	raise AssertionError(hunk_line)
0.5.93 by Aaron Bentley Added patches.py	469	line_no += 1
0.5.105 by John Arbash Meinel Adding more test patches to the test suite.	470	if orig_lines is not None:
	471	for line in orig_lines:
	472	yield line