~bzr-pqm/bzr/bzr.dev : contents of bzrlib/patches.py at revision 6379.6.7

~bzr-pqm/bzr/bzr.dev : (revision 6379.6.7)

4763.2.4 by John Arbash Meinel merge bzr.2.1 in preparation for NEWS entry.	1	# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
0.5.93 by Aaron Bentley Added patches.py	2	# <aaron.bentley@utoronto.ca>
0.5.93 by Aaron Bentley Added patches.py	3	#
2052.3.1 by John Arbash Meinel Add tests to cleanup the copyright of all source files	4	# This program is free software; you can redistribute it and/or modify
	5	# it under the terms of the GNU General Public License as published by
	6	# the Free Software Foundation; either version 2 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU General Public License
	15	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	16	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6289.2.1 by Jelmer Vernooij Move the primary definition of the patches exceptions to bzrlib.errors.	17
6379.6.3 by Jelmer Vernooij Use absolute_import.	18	from __future__ import absolute_import
6379.6.3 by Jelmer Vernooij Use absolute_import.	19
6289.2.1 by Jelmer Vernooij Move the primary definition of the patches exceptions to bzrlib.errors.	20	from bzrlib.errors import (
	21	BinaryFiles,
	22	MalformedHunkHeader,
	23	MalformedLine,
	24	MalformedPatchHeader,
	25	PatchConflict,
	26	PatchSyntax,
	27	)
	28
4634.80.1 by Aaron Bentley Parse binary files.	29	import re
	30
	31
4634.98.1 by Aaron Bentley Improve patch binary section handling.	32	binary_files_re = 'Binary files (.) and (.) differ\n'
	33
0.5.93 by Aaron Bentley Added patches.py	34	def get_patch_names(iter_lines):
	35	try:
	36	line = iter_lines.next()
4634.98.1 by Aaron Bentley Improve patch binary section handling.	37	match = re.match(binary_files_re, line)
4634.80.1 by Aaron Bentley Parse binary files.	38	if match is not None:
4634.80.1 by Aaron Bentley Parse binary files.	39	raise BinaryFiles(match.group(1), match.group(2))
0.5.93 by Aaron Bentley Added patches.py	40	if not line.startswith("--- "):
	41	raise MalformedPatchHeader("No orig name", line)
	42	else:
	43	orig_name = line[4:].rstrip("\n")
	44	except StopIteration:
	45	raise MalformedPatchHeader("No orig line", "")
	46	try:
	47	line = iter_lines.next()
	48	if not line.startswith("+++ "):
	49	raise PatchSyntax("No mod name")
	50	else:
	51	mod_name = line[4:].rstrip("\n")
	52	except StopIteration:
	53	raise MalformedPatchHeader("No mod line", "")
	54	return (orig_name, mod_name)
	55
1185.82.123 by Aaron Bentley Cleanups to prepare for review	56
0.5.93 by Aaron Bentley Added patches.py	57	def parse_range(textrange):
	58	"""Parse a patch range, handling the "1" special-case
	59
	60	:param textrange: The text to parse
	61	:type textrange: str
	62	:return: the position and range, as a tuple
	63	:rtype: (int, int)
	64	"""
	65	tmp = textrange.split(',')
	66	if len(tmp) == 1:
	67	pos = tmp[0]
	68	range = "1"
	69	else:
	70	(pos, range) = tmp
	71	pos = int(pos)
	72	range = int(range)
	73	return (pos, range)
	74
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	75
0.5.93 by Aaron Bentley Added patches.py	76	def hunk_from_header(line):
3224.5.1 by Andrew Bennetts Lots of assorted hackery to reduce the number of imports for common operations. Improves 'rocks', 'st' and 'help' times by ~50ms on my laptop.	77	import re
1551.18.6 by Aaron Bentley Add support for diff -p-style diffs to patch parser	78	matches = re.match(r'\@\@ ([^@]) \@\@( (.))?\n', line)
	79	if matches is None:
	80	raise MalformedHunkHeader("Does not match format.", line)
0.5.93 by Aaron Bentley Added patches.py	81	try:
1551.18.6 by Aaron Bentley Add support for diff -p-style diffs to patch parser	82	(orig, mod) = matches.group(1).split(" ")
2358.3.1 by Martin Pool Update some too-general exception blocks	83	except (ValueError, IndexError), e:
0.5.93 by Aaron Bentley Added patches.py	84	raise MalformedHunkHeader(str(e), line)
	85	if not orig.startswith('-') or not mod.startswith('+'):
	86	raise MalformedHunkHeader("Positions don't start with + or -.", line)
	87	try:
	88	(orig_pos, orig_range) = parse_range(orig[1:])
	89	(mod_pos, mod_range) = parse_range(mod[1:])
2358.3.1 by Martin Pool Update some too-general exception blocks	90	except (ValueError, IndexError), e:
0.5.93 by Aaron Bentley Added patches.py	91	raise MalformedHunkHeader(str(e), line)
	92	if mod_range < 0 or orig_range < 0:
	93	raise MalformedHunkHeader("Hunk range is negative", line)
1551.18.6 by Aaron Bentley Add support for diff -p-style diffs to patch parser	94	tail = matches.group(3)
	95	return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
0.5.93 by Aaron Bentley Added patches.py	96
	97
	98	class HunkLine:
	99	def __init__(self, contents):
	100	self.contents = contents
	101
	102	def get_str(self, leadchar):
	103	if self.contents == "\n" and leadchar == " " and False:
	104	return "\n"
	105	if not self.contents.endswith('\n'):
	106	terminator = '\n' + NO_NL
	107	else:
	108	terminator = ''
	109	return leadchar + self.contents + terminator
	110
	111
	112	class ContextLine(HunkLine):
	113	def __init__(self, contents):
	114	HunkLine.__init__(self, contents)
	115
	116	def __str__(self):
	117	return self.get_str(" ")
	118
	119
	120	class InsertLine(HunkLine):
	121	def __init__(self, contents):
	122	HunkLine.__init__(self, contents)
	123
	124	def __str__(self):
	125	return self.get_str("+")
	126
	127
	128	class RemoveLine(HunkLine):
	129	def __init__(self, contents):
	130	HunkLine.__init__(self, contents)
	131
	132	def __str__(self):
	133	return self.get_str("-")
	134
	135	NO_NL = '\\ No newline at end of file\n'
	136	__pychecker__="no-returnvalues"
	137
	138	def parse_line(line):
	139	if line.startswith("\n"):
	140	return ContextLine(line)
	141	elif line.startswith(" "):
	142	return ContextLine(line[1:])
	143	elif line.startswith("+"):
	144	return InsertLine(line[1:])
	145	elif line.startswith("-"):
	146	return RemoveLine(line[1:])
	147	else:
	148	raise MalformedLine("Unknown line type", line)
	149	__pychecker__=""
	150
	151
	152	class Hunk:
1551.18.6 by Aaron Bentley Add support for diff -p-style diffs to patch parser	153	def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
0.5.93 by Aaron Bentley Added patches.py	154	self.orig_pos = orig_pos
	155	self.orig_range = orig_range
	156	self.mod_pos = mod_pos
	157	self.mod_range = mod_range
1551.18.6 by Aaron Bentley Add support for diff -p-style diffs to patch parser	158	self.tail = tail
0.5.93 by Aaron Bentley Added patches.py	159	self.lines = []
	160
	161	def get_header(self):
1551.18.6 by Aaron Bentley Add support for diff -p-style diffs to patch parser	162	if self.tail is None:
	163	tail_str = ''
	164	else:
	165	tail_str = ' ' + self.tail
	166	return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
	167	self.orig_range),
	168	self.range_str(self.mod_pos,
	169	self.mod_range),
	170	tail_str)
0.5.93 by Aaron Bentley Added patches.py	171
	172	def range_str(self, pos, range):
	173	"""Return a file range, special-casing for 1-line files.
	174
	175	:param pos: The position in the file
	176	:type pos: int
	177	:range: The range in the file
	178	:type range: int
	179	:return: a string in the format 1,4 except when range == pos == 1
	180	"""
	181	if range == 1:
	182	return "%i" % pos
	183	else:
	184	return "%i,%i" % (pos, range)
	185
	186	def __str__(self):
	187	lines = [self.get_header()]
	188	for line in self.lines:
	189	lines.append(str(line))
	190	return "".join(lines)
	191
	192	def shift_to_mod(self, pos):
	193	if pos < self.orig_pos-1:
	194	return 0
	195	elif pos > self.orig_pos+self.orig_range:
	196	return self.mod_range - self.orig_range
	197	else:
	198	return self.shift_to_mod_lines(pos)
	199
	200	def shift_to_mod_lines(self, pos):
	201	position = self.orig_pos-1
	202	shift = 0
	203	for line in self.lines:
	204	if isinstance(line, InsertLine):
	205	shift += 1
	206	elif isinstance(line, RemoveLine):
	207	if position == pos:
	208	return None
	209	shift -= 1
	210	position += 1
	211	elif isinstance(line, ContextLine):
	212	position += 1
	213	if position > pos:
	214	break
	215	return shift
	216
1185.82.123 by Aaron Bentley Cleanups to prepare for review	217
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	218	def iter_hunks(iter_lines, allow_dirty=False):
	219	'''
	220	:arg iter_lines: iterable of lines to parse for hunks
	221	:kwarg allow_dirty: If True, when we encounter something that is not
	222	a hunk header when we're looking for one, assume the rest of the lines
	223	are not part of the patch (comments or other junk). Default False
	224	'''
0.5.93 by Aaron Bentley Added patches.py	225	hunk = None
0.5.93 by Aaron Bentley Added patches.py	226	for line in iter_lines:
0.5.96 by Aaron Bentley Cleaned up handling of files with no terminating \n	227	if line == "\n":
0.5.93 by Aaron Bentley Added patches.py	228	if hunk is not None:
	229	yield hunk
	230	hunk = None
	231	continue
	232	if hunk is not None:
	233	yield hunk
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	234	try:
	235	hunk = hunk_from_header(line)
	236	except MalformedHunkHeader:
	237	if allow_dirty:
	238	# If the line isn't a hunk header, then we've reached the end
	239	# of this patch and there's "junk" at the end. Ignore the
	240	# rest of this patch.
	241	return
	242	raise
0.5.93 by Aaron Bentley Added patches.py	243	orig_size = 0
	244	mod_size = 0
	245	while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
	246	hunk_line = parse_line(iter_lines.next())
0.5.96 by Aaron Bentley Cleaned up handling of files with no terminating \n	247	hunk.lines.append(hunk_line)
0.5.93 by Aaron Bentley Added patches.py	248	if isinstance(hunk_line, (RemoveLine, ContextLine)):
	249	orig_size += 1
	250	if isinstance(hunk_line, (InsertLine, ContextLine)):
	251	mod_size += 1
	252	if hunk is not None:
	253	yield hunk
	254
1185.82.123 by Aaron Bentley Cleanups to prepare for review	255
4634.80.1 by Aaron Bentley Parse binary files.	256	class BinaryPatch(object):
0.5.93 by Aaron Bentley Added patches.py	257	def __init__(self, oldname, newname):
	258	self.oldname = oldname
	259	self.newname = newname
4634.80.1 by Aaron Bentley Parse binary files.	260
4634.80.1 by Aaron Bentley Parse binary files.	261	def __str__(self):
4634.80.2 by Aaron Bentley Ensure patch roundtripping.	262	return 'Binary files %s and %s differ\n' % (self.oldname, self.newname)
4634.80.1 by Aaron Bentley Parse binary files.	263
	264
	265	class Patch(BinaryPatch):
	266
	267	def __init__(self, oldname, newname):
	268	BinaryPatch.__init__(self, oldname, newname)
0.5.93 by Aaron Bentley Added patches.py	269	self.hunks = []
	270
	271	def __str__(self):
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	272	ret = self.get_header()
0.5.93 by Aaron Bentley Added patches.py	273	ret += "".join([str(h) for h in self.hunks])
	274	return ret
	275
0.5.95 by Aaron Bentley Updated patch to match bzrtools	276	def get_header(self):
	277	return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
	278
3946.4.1 by Tim Penhey Extract out the counting of the stats values.	279	def stats_values(self):
	280	"""Calculate the number of inserts and removes."""
0.5.93 by Aaron Bentley Added patches.py	281	removes = 0
	282	inserts = 0
	283	for hunk in self.hunks:
	284	for line in hunk.lines:
	285	if isinstance(line, InsertLine):
	286	inserts+=1;
	287	elif isinstance(line, RemoveLine):
	288	removes+=1;
3946.4.1 by Tim Penhey Extract out the counting of the stats values.	289	return (inserts, removes, len(self.hunks))
	290
	291	def stats_str(self):
	292	"""Return a string of patch statistics"""
0.5.93 by Aaron Bentley Added patches.py	293	return "%i inserts, %i removes in %i hunks" % \
3946.4.1 by Tim Penhey Extract out the counting of the stats values.	294	self.stats_values()
0.5.93 by Aaron Bentley Added patches.py	295
	296	def pos_in_mod(self, position):
	297	newpos = position
	298	for hunk in self.hunks:
	299	shift = hunk.shift_to_mod(position)
	300	if shift is None:
	301	return None
	302	newpos += shift
	303	return newpos
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	304
0.5.93 by Aaron Bentley Added patches.py	305	def iter_inserted(self):
0.5.93 by Aaron Bentley Added patches.py	306	"""Iteraties through inserted lines
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	307
0.5.93 by Aaron Bentley Added patches.py	308	:return: Pair of line number, line
	309	:rtype: iterator of (int, InsertLine)
	310	"""
	311	for hunk in self.hunks:
	312	pos = hunk.mod_pos - 1;
	313	for line in hunk.lines:
	314	if isinstance(line, InsertLine):
	315	yield (pos, line)
	316	pos += 1
	317	if isinstance(line, ContextLine):
	318	pos += 1
	319
1185.82.123 by Aaron Bentley Cleanups to prepare for review	320
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	321	def parse_patch(iter_lines, allow_dirty=False):
	322	'''
	323	:arg iter_lines: iterable of lines to parse
	324	:kwarg allow_dirty: If True, allow the patch to have trailing junk.
	325	Default False
	326	'''
3873.1.8 by Benoît Pierre Fix regressions in other parts of the testsuite.	327	iter_lines = iter_lines_handle_nl(iter_lines)
4634.80.1 by Aaron Bentley Parse binary files.	328	try:
	329	(orig_name, mod_name) = get_patch_names(iter_lines)
	330	except BinaryFiles, e:
	331	return BinaryPatch(e.orig_name, e.mod_name)
	332	else:
	333	patch = Patch(orig_name, mod_name)
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	334	for hunk in iter_hunks(iter_lines, allow_dirty):
4634.80.1 by Aaron Bentley Parse binary files.	335	patch.hunks.append(hunk)
4634.80.1 by Aaron Bentley Parse binary files.	336	return patch
0.5.93 by Aaron Bentley Added patches.py	337
0.5.93 by Aaron Bentley Added patches.py	338
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	339	def iter_file_patch(iter_lines, allow_dirty=False):
	340	'''
	341	:arg iter_lines: iterable of lines to parse for patches
	342	:kwarg allow_dirty: If True, allow comments and other non-patch text
	343	before the first patch. Note that the algorithm here can only find
	344	such text before any patches have been found. Comments after the
	345	first patch are stripped away in iter_hunks() if it is also passed
	346	allow_dirty=True. Default False.
	347	'''
	348	### FIXME: Docstring is not quite true. We allow certain comments no
	349	# matter what, If they startwith '===', '***', or '#' Someone should
	350	# reexamine this logic and decide if we should include those in
	351	# allow_dirty or restrict those to only being before the patch is found
	352	# (as allow_dirty does).
4634.98.1 by Aaron Bentley Improve patch binary section handling.	353	regex = re.compile(binary_files_re)
0.5.93 by Aaron Bentley Added patches.py	354	saved_lines = []
2298.6.1 by Johan Dahlberg Fix bzrtools shelve command for removed lines beginning with "--"	355	orig_range = 0
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	356	beginning = True
0.5.93 by Aaron Bentley Added patches.py	357	for line in iter_lines:
0.5.106 by John Arbash Meinel Allowing *** to be a patch header.	358	if line.startswith('=== ') or line.startswith('*** '):
0.5.93 by Aaron Bentley Added patches.py	359	continue
1770.1.1 by Aaron Bentley Ignore lines that start with '#' in patch parser	360	if line.startswith('#'):
	361	continue
2298.6.1 by Johan Dahlberg Fix bzrtools shelve command for removed lines beginning with "--"	362	elif orig_range > 0:
	363	if line.startswith('-') or line.startswith(' '):
	364	orig_range -= 1
4634.98.1 by Aaron Bentley Improve patch binary section handling.	365	elif line.startswith('--- ') or regex.match(line):
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	366	if allow_dirty and beginning:
	367	# Patches can have "junk" at the beginning
	368	# Stripping junk from the end of patches is handled when we
	369	# parse the patch
	370	beginning = False
	371	elif len(saved_lines) > 0:
0.5.93 by Aaron Bentley Added patches.py	372	yield saved_lines
0.5.93 by Aaron Bentley Added patches.py	373	saved_lines = []
2298.6.1 by Johan Dahlberg Fix bzrtools shelve command for removed lines beginning with "--"	374	elif line.startswith('@@'):
	375	hunk = hunk_from_header(line)
	376	orig_range = hunk.orig_range
0.5.93 by Aaron Bentley Added patches.py	377	saved_lines.append(line)
	378	if len(saved_lines) > 0:
	379	yield saved_lines
	380
	381
3873.1.6 by Benoît Pierre OK, so now patches should handle '\ No newline at end of file' in both	382	def iter_lines_handle_nl(iter_lines):
	383	"""
	384	Iterates through lines, ensuring that lines that originally had no
	385	terminating \n are produced without one. This transformation may be
	386	applied at any point up until hunk line parsing, and is safe to apply
	387	repeatedly.
	388	"""
	389	last_line = None
	390	for line in iter_lines:
	391	if line == NO_NL:
	392	if not last_line.endswith('\n'):
	393	raise AssertionError()
	394	last_line = last_line[:-1]
	395	line = None
	396	if last_line is not None:
	397	yield last_line
	398	last_line = line
	399	if last_line is not None:
	400	yield last_line
	401
	402
5016.3.1 by Toshio Kuratomi iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.	403	def parse_patches(iter_lines, allow_dirty=False):
	404	'''
	405	:arg iter_lines: iterable of lines to parse for patches
	406	:kwarg allow_dirty: If True, allow text that's not part of the patch at
	407	selected places. This includes comments before and after a patch
	408	for instance. Default False.
	409	'''
	410	return [parse_patch(f.__iter__(), allow_dirty) for f in
	411	iter_file_patch(iter_lines, allow_dirty)]
0.5.93 by Aaron Bentley Added patches.py	412
	413
	414	def difference_index(atext, btext):
1759.2.1 by Jelmer Vernooij Fix some types (found using aspell).	415	"""Find the indext of the first character that differs between two texts
0.5.93 by Aaron Bentley Added patches.py	416
	417	:param atext: The first text
	418	:type atext: str
	419	:param btext: The second text
	420	:type str: str
	421	:return: The index, or None if there are no differences within the range
	422	:rtype: int or NoneType
	423	"""
	424	length = len(atext)
	425	if len(btext) < length:
	426	length = len(btext)
	427	for i in range(length):
	428	if atext[i] != btext[i]:
	429	return i;
	430	return None
	431
1185.82.123 by Aaron Bentley Cleanups to prepare for review	432
0.5.93 by Aaron Bentley Added patches.py	433	def iter_patched(orig_lines, patch_lines):
	434	"""Iterate through a series of lines with a patch applied.
	435	This handles a single file, and does exact, not fuzzy patching.
	436	"""
3873.1.8 by Benoît Pierre Fix regressions in other parts of the testsuite.	437	patch_lines = iter_lines_handle_nl(iter(patch_lines))
0.5.93 by Aaron Bentley Added patches.py	438	get_patch_names(patch_lines)
3363.18.1 by Aaron Bentley Allow patching directly from parsed hunks	439	return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
	440
3363.18.4 by Aaron Bentley Updates from review (and a doc update)	441
3363.18.1 by Aaron Bentley Allow patching directly from parsed hunks	442	def iter_patched_from_hunks(orig_lines, hunks):
3363.18.4 by Aaron Bentley Updates from review (and a doc update)	443	"""Iterate through a series of lines with a patch applied.
	444	This handles a single file, and does exact, not fuzzy patching.
	445
	446	:param orig_lines: The unpatched lines.
	447	:param hunks: An iterable of Hunk instances.
	448	"""
3363.18.1 by Aaron Bentley Allow patching directly from parsed hunks	449	seen_patch = []
0.5.93 by Aaron Bentley Added patches.py	450	line_no = 1
3363.18.1 by Aaron Bentley Allow patching directly from parsed hunks	451	if orig_lines is not None:
3363.18.4 by Aaron Bentley Updates from review (and a doc update)	452	orig_lines = iter(orig_lines)
3363.18.1 by Aaron Bentley Allow patching directly from parsed hunks	453	for hunk in hunks:
0.5.93 by Aaron Bentley Added patches.py	454	while line_no < hunk.orig_pos:
	455	orig_line = orig_lines.next()
	456	yield orig_line
	457	line_no += 1
	458	for hunk_line in hunk.lines:
	459	seen_patch.append(str(hunk_line))
	460	if isinstance(hunk_line, InsertLine):
	461	yield hunk_line.contents
	462	elif isinstance(hunk_line, (ContextLine, RemoveLine)):
	463	orig_line = orig_lines.next()
	464	if orig_line != hunk_line.contents:
	465	raise PatchConflict(line_no, orig_line, "".join(seen_patch))
	466	if isinstance(hunk_line, ContextLine):
	467	yield orig_line
	468	else:
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	469	if not isinstance(hunk_line, RemoveLine):
	470	raise AssertionError(hunk_line)
0.5.93 by Aaron Bentley Added patches.py	471	line_no += 1
0.5.105 by John Arbash Meinel Adding more test patches to the test suite.	472	if orig_lines is not None:
	473	for line in orig_lines:
	474	yield line