1
# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
2
# <aaron.bentley@utoronto.ca>
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
from __future__ import absolute_import
20
from bzrlib.errors import (
32
binary_files_re = 'Binary files (.*) and (.*) differ\n'
34
def get_patch_names(iter_lines):
36
line = iter_lines.next()
37
match = re.match(binary_files_re, line)
39
raise BinaryFiles(match.group(1), match.group(2))
40
if not line.startswith("--- "):
41
raise MalformedPatchHeader("No orig name", line)
43
orig_name = line[4:].rstrip("\n")
45
raise MalformedPatchHeader("No orig line", "")
47
line = iter_lines.next()
48
if not line.startswith("+++ "):
49
raise PatchSyntax("No mod name")
51
mod_name = line[4:].rstrip("\n")
53
raise MalformedPatchHeader("No mod line", "")
54
return (orig_name, mod_name)
57
def parse_range(textrange):
58
"""Parse a patch range, handling the "1" special-case
60
:param textrange: The text to parse
62
:return: the position and range, as a tuple
65
tmp = textrange.split(',')
76
def hunk_from_header(line):
78
matches = re.match(r'\@\@ ([^@]*) \@\@( (.*))?\n', line)
80
raise MalformedHunkHeader("Does not match format.", line)
82
(orig, mod) = matches.group(1).split(" ")
83
except (ValueError, IndexError), e:
84
raise MalformedHunkHeader(str(e), line)
85
if not orig.startswith('-') or not mod.startswith('+'):
86
raise MalformedHunkHeader("Positions don't start with + or -.", line)
88
(orig_pos, orig_range) = parse_range(orig[1:])
89
(mod_pos, mod_range) = parse_range(mod[1:])
90
except (ValueError, IndexError), e:
91
raise MalformedHunkHeader(str(e), line)
92
if mod_range < 0 or orig_range < 0:
93
raise MalformedHunkHeader("Hunk range is negative", line)
94
tail = matches.group(3)
95
return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
99
def __init__(self, contents):
100
self.contents = contents
102
def get_str(self, leadchar):
103
if self.contents == "\n" and leadchar == " " and False:
105
if not self.contents.endswith('\n'):
106
terminator = '\n' + NO_NL
109
return leadchar + self.contents + terminator
112
class ContextLine(HunkLine):
113
def __init__(self, contents):
114
HunkLine.__init__(self, contents)
117
return self.get_str(" ")
120
class InsertLine(HunkLine):
121
def __init__(self, contents):
122
HunkLine.__init__(self, contents)
125
return self.get_str("+")
128
class RemoveLine(HunkLine):
129
def __init__(self, contents):
130
HunkLine.__init__(self, contents)
133
return self.get_str("-")
135
NO_NL = '\\ No newline at end of file\n'
136
__pychecker__="no-returnvalues"
138
def parse_line(line):
139
if line.startswith("\n"):
140
return ContextLine(line)
141
elif line.startswith(" "):
142
return ContextLine(line[1:])
143
elif line.startswith("+"):
144
return InsertLine(line[1:])
145
elif line.startswith("-"):
146
return RemoveLine(line[1:])
148
raise MalformedLine("Unknown line type", line)
153
def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
154
self.orig_pos = orig_pos
155
self.orig_range = orig_range
156
self.mod_pos = mod_pos
157
self.mod_range = mod_range
161
def get_header(self):
162
if self.tail is None:
165
tail_str = ' ' + self.tail
166
return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
168
self.range_str(self.mod_pos,
172
def range_str(self, pos, range):
173
"""Return a file range, special-casing for 1-line files.
175
:param pos: The position in the file
177
:range: The range in the file
179
:return: a string in the format 1,4 except when range == pos == 1
184
return "%i,%i" % (pos, range)
187
lines = [self.get_header()]
188
for line in self.lines:
189
lines.append(str(line))
190
return "".join(lines)
192
def shift_to_mod(self, pos):
193
if pos < self.orig_pos-1:
195
elif pos > self.orig_pos+self.orig_range:
196
return self.mod_range - self.orig_range
198
return self.shift_to_mod_lines(pos)
200
def shift_to_mod_lines(self, pos):
201
position = self.orig_pos-1
203
for line in self.lines:
204
if isinstance(line, InsertLine):
206
elif isinstance(line, RemoveLine):
211
elif isinstance(line, ContextLine):
218
def iter_hunks(iter_lines, allow_dirty=False):
220
:arg iter_lines: iterable of lines to parse for hunks
221
:kwarg allow_dirty: If True, when we encounter something that is not
222
a hunk header when we're looking for one, assume the rest of the lines
223
are not part of the patch (comments or other junk). Default False
226
for line in iter_lines:
235
hunk = hunk_from_header(line)
236
except MalformedHunkHeader:
238
# If the line isn't a hunk header, then we've reached the end
239
# of this patch and there's "junk" at the end. Ignore the
240
# rest of this patch.
245
while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
246
hunk_line = parse_line(iter_lines.next())
247
hunk.lines.append(hunk_line)
248
if isinstance(hunk_line, (RemoveLine, ContextLine)):
250
if isinstance(hunk_line, (InsertLine, ContextLine)):
256
class BinaryPatch(object):
257
def __init__(self, oldname, newname):
258
self.oldname = oldname
259
self.newname = newname
262
return 'Binary files %s and %s differ\n' % (self.oldname, self.newname)
265
class Patch(BinaryPatch):
267
def __init__(self, oldname, newname):
268
BinaryPatch.__init__(self, oldname, newname)
272
ret = self.get_header()
273
ret += "".join([str(h) for h in self.hunks])
276
def get_header(self):
277
return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
279
def stats_values(self):
280
"""Calculate the number of inserts and removes."""
283
for hunk in self.hunks:
284
for line in hunk.lines:
285
if isinstance(line, InsertLine):
287
elif isinstance(line, RemoveLine):
289
return (inserts, removes, len(self.hunks))
292
"""Return a string of patch statistics"""
293
return "%i inserts, %i removes in %i hunks" % \
296
def pos_in_mod(self, position):
298
for hunk in self.hunks:
299
shift = hunk.shift_to_mod(position)
305
def iter_inserted(self):
306
"""Iteraties through inserted lines
308
:return: Pair of line number, line
309
:rtype: iterator of (int, InsertLine)
311
for hunk in self.hunks:
312
pos = hunk.mod_pos - 1;
313
for line in hunk.lines:
314
if isinstance(line, InsertLine):
317
if isinstance(line, ContextLine):
321
def parse_patch(iter_lines, allow_dirty=False):
323
:arg iter_lines: iterable of lines to parse
324
:kwarg allow_dirty: If True, allow the patch to have trailing junk.
327
iter_lines = iter_lines_handle_nl(iter_lines)
329
(orig_name, mod_name) = get_patch_names(iter_lines)
330
except BinaryFiles, e:
331
return BinaryPatch(e.orig_name, e.mod_name)
333
patch = Patch(orig_name, mod_name)
334
for hunk in iter_hunks(iter_lines, allow_dirty):
335
patch.hunks.append(hunk)
339
def iter_file_patch(iter_lines, allow_dirty=False):
341
:arg iter_lines: iterable of lines to parse for patches
342
:kwarg allow_dirty: If True, allow comments and other non-patch text
343
before the first patch. Note that the algorithm here can only find
344
such text before any patches have been found. Comments after the
345
first patch are stripped away in iter_hunks() if it is also passed
346
allow_dirty=True. Default False.
348
### FIXME: Docstring is not quite true. We allow certain comments no
349
# matter what, If they startwith '===', '***', or '#' Someone should
350
# reexamine this logic and decide if we should include those in
351
# allow_dirty or restrict those to only being before the patch is found
352
# (as allow_dirty does).
353
regex = re.compile(binary_files_re)
357
for line in iter_lines:
358
if line.startswith('=== ') or line.startswith('*** '):
360
if line.startswith('#'):
363
if line.startswith('-') or line.startswith(' '):
365
elif line.startswith('--- ') or regex.match(line):
366
if allow_dirty and beginning:
367
# Patches can have "junk" at the beginning
368
# Stripping junk from the end of patches is handled when we
371
elif len(saved_lines) > 0:
374
elif line.startswith('@@'):
375
hunk = hunk_from_header(line)
376
orig_range = hunk.orig_range
377
saved_lines.append(line)
378
if len(saved_lines) > 0:
382
def iter_lines_handle_nl(iter_lines):
384
Iterates through lines, ensuring that lines that originally had no
385
terminating \n are produced without one. This transformation may be
386
applied at any point up until hunk line parsing, and is safe to apply
390
for line in iter_lines:
392
if not last_line.endswith('\n'):
393
raise AssertionError()
394
last_line = last_line[:-1]
396
if last_line is not None:
399
if last_line is not None:
403
def parse_patches(iter_lines, allow_dirty=False):
405
:arg iter_lines: iterable of lines to parse for patches
406
:kwarg allow_dirty: If True, allow text that's not part of the patch at
407
selected places. This includes comments before and after a patch
408
for instance. Default False.
410
return [parse_patch(f.__iter__(), allow_dirty) for f in
411
iter_file_patch(iter_lines, allow_dirty)]
414
def difference_index(atext, btext):
415
"""Find the indext of the first character that differs between two texts
417
:param atext: The first text
419
:param btext: The second text
421
:return: The index, or None if there are no differences within the range
422
:rtype: int or NoneType
425
if len(btext) < length:
427
for i in range(length):
428
if atext[i] != btext[i]:
433
def iter_patched(orig_lines, patch_lines):
434
"""Iterate through a series of lines with a patch applied.
435
This handles a single file, and does exact, not fuzzy patching.
437
patch_lines = iter_lines_handle_nl(iter(patch_lines))
438
get_patch_names(patch_lines)
439
return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
442
def iter_patched_from_hunks(orig_lines, hunks):
443
"""Iterate through a series of lines with a patch applied.
444
This handles a single file, and does exact, not fuzzy patching.
446
:param orig_lines: The unpatched lines.
447
:param hunks: An iterable of Hunk instances.
451
if orig_lines is not None:
452
orig_lines = iter(orig_lines)
454
while line_no < hunk.orig_pos:
455
orig_line = orig_lines.next()
458
for hunk_line in hunk.lines:
459
seen_patch.append(str(hunk_line))
460
if isinstance(hunk_line, InsertLine):
461
yield hunk_line.contents
462
elif isinstance(hunk_line, (ContextLine, RemoveLine)):
463
orig_line = orig_lines.next()
464
if orig_line != hunk_line.contents:
465
raise PatchConflict(line_no, orig_line, "".join(seen_patch))
466
if isinstance(hunk_line, ContextLine):
469
if not isinstance(hunk_line, RemoveLine):
470
raise AssertionError(hunk_line)
472
if orig_lines is not None:
473
for line in orig_lines: