1
# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
2
# <aaron.bentley@utoronto.ca>
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
from bzrlib.errors import (
30
binary_files_re = 'Binary files (.*) and (.*) differ\n'
32
def get_patch_names(iter_lines):
34
line = iter_lines.next()
35
match = re.match(binary_files_re, line)
37
raise BinaryFiles(match.group(1), match.group(2))
38
if not line.startswith("--- "):
39
raise MalformedPatchHeader("No orig name", line)
41
orig_name = line[4:].rstrip("\n")
43
raise MalformedPatchHeader("No orig line", "")
45
line = iter_lines.next()
46
if not line.startswith("+++ "):
47
raise PatchSyntax("No mod name")
49
mod_name = line[4:].rstrip("\n")
51
raise MalformedPatchHeader("No mod line", "")
52
return (orig_name, mod_name)
55
def parse_range(textrange):
56
"""Parse a patch range, handling the "1" special-case
58
:param textrange: The text to parse
60
:return: the position and range, as a tuple
63
tmp = textrange.split(',')
74
def hunk_from_header(line):
76
matches = re.match(r'\@\@ ([^@]*) \@\@( (.*))?\n', line)
78
raise MalformedHunkHeader("Does not match format.", line)
80
(orig, mod) = matches.group(1).split(" ")
81
except (ValueError, IndexError), e:
82
raise MalformedHunkHeader(str(e), line)
83
if not orig.startswith('-') or not mod.startswith('+'):
84
raise MalformedHunkHeader("Positions don't start with + or -.", line)
86
(orig_pos, orig_range) = parse_range(orig[1:])
87
(mod_pos, mod_range) = parse_range(mod[1:])
88
except (ValueError, IndexError), e:
89
raise MalformedHunkHeader(str(e), line)
90
if mod_range < 0 or orig_range < 0:
91
raise MalformedHunkHeader("Hunk range is negative", line)
92
tail = matches.group(3)
93
return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
97
def __init__(self, contents):
98
self.contents = contents
100
def get_str(self, leadchar):
101
if self.contents == "\n" and leadchar == " " and False:
103
if not self.contents.endswith('\n'):
104
terminator = '\n' + NO_NL
107
return leadchar + self.contents + terminator
110
class ContextLine(HunkLine):
111
def __init__(self, contents):
112
HunkLine.__init__(self, contents)
115
return self.get_str(" ")
118
class InsertLine(HunkLine):
119
def __init__(self, contents):
120
HunkLine.__init__(self, contents)
123
return self.get_str("+")
126
class RemoveLine(HunkLine):
127
def __init__(self, contents):
128
HunkLine.__init__(self, contents)
131
return self.get_str("-")
133
NO_NL = '\\ No newline at end of file\n'
134
__pychecker__="no-returnvalues"
136
def parse_line(line):
137
if line.startswith("\n"):
138
return ContextLine(line)
139
elif line.startswith(" "):
140
return ContextLine(line[1:])
141
elif line.startswith("+"):
142
return InsertLine(line[1:])
143
elif line.startswith("-"):
144
return RemoveLine(line[1:])
146
raise MalformedLine("Unknown line type", line)
151
def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
152
self.orig_pos = orig_pos
153
self.orig_range = orig_range
154
self.mod_pos = mod_pos
155
self.mod_range = mod_range
159
def get_header(self):
160
if self.tail is None:
163
tail_str = ' ' + self.tail
164
return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
166
self.range_str(self.mod_pos,
170
def range_str(self, pos, range):
171
"""Return a file range, special-casing for 1-line files.
173
:param pos: The position in the file
175
:range: The range in the file
177
:return: a string in the format 1,4 except when range == pos == 1
182
return "%i,%i" % (pos, range)
185
lines = [self.get_header()]
186
for line in self.lines:
187
lines.append(str(line))
188
return "".join(lines)
190
def shift_to_mod(self, pos):
191
if pos < self.orig_pos-1:
193
elif pos > self.orig_pos+self.orig_range:
194
return self.mod_range - self.orig_range
196
return self.shift_to_mod_lines(pos)
198
def shift_to_mod_lines(self, pos):
199
position = self.orig_pos-1
201
for line in self.lines:
202
if isinstance(line, InsertLine):
204
elif isinstance(line, RemoveLine):
209
elif isinstance(line, ContextLine):
216
def iter_hunks(iter_lines, allow_dirty=False):
218
:arg iter_lines: iterable of lines to parse for hunks
219
:kwarg allow_dirty: If True, when we encounter something that is not
220
a hunk header when we're looking for one, assume the rest of the lines
221
are not part of the patch (comments or other junk). Default False
224
for line in iter_lines:
233
hunk = hunk_from_header(line)
234
except MalformedHunkHeader:
236
# If the line isn't a hunk header, then we've reached the end
237
# of this patch and there's "junk" at the end. Ignore the
238
# rest of this patch.
243
while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
244
hunk_line = parse_line(iter_lines.next())
245
hunk.lines.append(hunk_line)
246
if isinstance(hunk_line, (RemoveLine, ContextLine)):
248
if isinstance(hunk_line, (InsertLine, ContextLine)):
254
class BinaryPatch(object):
255
def __init__(self, oldname, newname):
256
self.oldname = oldname
257
self.newname = newname
260
return 'Binary files %s and %s differ\n' % (self.oldname, self.newname)
263
class Patch(BinaryPatch):
265
def __init__(self, oldname, newname):
266
BinaryPatch.__init__(self, oldname, newname)
270
ret = self.get_header()
271
ret += "".join([str(h) for h in self.hunks])
274
def get_header(self):
275
return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
277
def stats_values(self):
278
"""Calculate the number of inserts and removes."""
281
for hunk in self.hunks:
282
for line in hunk.lines:
283
if isinstance(line, InsertLine):
285
elif isinstance(line, RemoveLine):
287
return (inserts, removes, len(self.hunks))
290
"""Return a string of patch statistics"""
291
return "%i inserts, %i removes in %i hunks" % \
294
def pos_in_mod(self, position):
296
for hunk in self.hunks:
297
shift = hunk.shift_to_mod(position)
303
def iter_inserted(self):
304
"""Iteraties through inserted lines
306
:return: Pair of line number, line
307
:rtype: iterator of (int, InsertLine)
309
for hunk in self.hunks:
310
pos = hunk.mod_pos - 1;
311
for line in hunk.lines:
312
if isinstance(line, InsertLine):
315
if isinstance(line, ContextLine):
319
def parse_patch(iter_lines, allow_dirty=False):
321
:arg iter_lines: iterable of lines to parse
322
:kwarg allow_dirty: If True, allow the patch to have trailing junk.
325
iter_lines = iter_lines_handle_nl(iter_lines)
327
(orig_name, mod_name) = get_patch_names(iter_lines)
328
except BinaryFiles, e:
329
return BinaryPatch(e.orig_name, e.mod_name)
331
patch = Patch(orig_name, mod_name)
332
for hunk in iter_hunks(iter_lines, allow_dirty):
333
patch.hunks.append(hunk)
337
def iter_file_patch(iter_lines, allow_dirty=False):
339
:arg iter_lines: iterable of lines to parse for patches
340
:kwarg allow_dirty: If True, allow comments and other non-patch text
341
before the first patch. Note that the algorithm here can only find
342
such text before any patches have been found. Comments after the
343
first patch are stripped away in iter_hunks() if it is also passed
344
allow_dirty=True. Default False.
346
### FIXME: Docstring is not quite true. We allow certain comments no
347
# matter what, If they startwith '===', '***', or '#' Someone should
348
# reexamine this logic and decide if we should include those in
349
# allow_dirty or restrict those to only being before the patch is found
350
# (as allow_dirty does).
351
regex = re.compile(binary_files_re)
355
for line in iter_lines:
356
if line.startswith('=== ') or line.startswith('*** '):
358
if line.startswith('#'):
361
if line.startswith('-') or line.startswith(' '):
363
elif line.startswith('--- ') or regex.match(line):
364
if allow_dirty and beginning:
365
# Patches can have "junk" at the beginning
366
# Stripping junk from the end of patches is handled when we
369
elif len(saved_lines) > 0:
372
elif line.startswith('@@'):
373
hunk = hunk_from_header(line)
374
orig_range = hunk.orig_range
375
saved_lines.append(line)
376
if len(saved_lines) > 0:
380
def iter_lines_handle_nl(iter_lines):
382
Iterates through lines, ensuring that lines that originally had no
383
terminating \n are produced without one. This transformation may be
384
applied at any point up until hunk line parsing, and is safe to apply
388
for line in iter_lines:
390
if not last_line.endswith('\n'):
391
raise AssertionError()
392
last_line = last_line[:-1]
394
if last_line is not None:
397
if last_line is not None:
401
def parse_patches(iter_lines, allow_dirty=False):
403
:arg iter_lines: iterable of lines to parse for patches
404
:kwarg allow_dirty: If True, allow text that's not part of the patch at
405
selected places. This includes comments before and after a patch
406
for instance. Default False.
408
return [parse_patch(f.__iter__(), allow_dirty) for f in
409
iter_file_patch(iter_lines, allow_dirty)]
412
def difference_index(atext, btext):
413
"""Find the indext of the first character that differs between two texts
415
:param atext: The first text
417
:param btext: The second text
419
:return: The index, or None if there are no differences within the range
420
:rtype: int or NoneType
423
if len(btext) < length:
425
for i in range(length):
426
if atext[i] != btext[i]:
431
def iter_patched(orig_lines, patch_lines):
432
"""Iterate through a series of lines with a patch applied.
433
This handles a single file, and does exact, not fuzzy patching.
435
patch_lines = iter_lines_handle_nl(iter(patch_lines))
436
get_patch_names(patch_lines)
437
return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
440
def iter_patched_from_hunks(orig_lines, hunks):
441
"""Iterate through a series of lines with a patch applied.
442
This handles a single file, and does exact, not fuzzy patching.
444
:param orig_lines: The unpatched lines.
445
:param hunks: An iterable of Hunk instances.
449
if orig_lines is not None:
450
orig_lines = iter(orig_lines)
452
while line_no < hunk.orig_pos:
453
orig_line = orig_lines.next()
456
for hunk_line in hunk.lines:
457
seen_patch.append(str(hunk_line))
458
if isinstance(hunk_line, InsertLine):
459
yield hunk_line.contents
460
elif isinstance(hunk_line, (ContextLine, RemoveLine)):
461
orig_line = orig_lines.next()
462
if orig_line != hunk_line.contents:
463
raise PatchConflict(line_no, orig_line, "".join(seen_patch))
464
if isinstance(hunk_line, ContextLine):
467
if not isinstance(hunk_line, RemoveLine):
468
raise AssertionError(hunk_line)
470
if orig_lines is not None:
471
for line in orig_lines: