1
# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
1
# Copyright (C) 2004 - 2006 Aaron Bentley
2
2
# <aaron.bentley@utoronto.ca>
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
binary_files_re = 'Binary files (.*) and (.*) differ\n'
23
class BinaryFiles(Exception):
25
def __init__(self, orig_name, mod_name):
26
self.orig_name = orig_name
27
self.mod_name = mod_name
28
Exception.__init__(self, 'Binary files section encountered.')
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31
19
class PatchSyntax(Exception):
69
57
def get_patch_names(iter_lines):
71
59
line = iter_lines.next()
72
match = re.match(binary_files_re, line)
74
raise BinaryFiles(match.group(1), match.group(2))
75
60
if not line.startswith("--- "):
76
61
raise MalformedPatchHeader("No orig name", line)
107
92
range = int(range)
108
93
return (pos, range)
111
96
def hunk_from_header(line):
113
matches = re.match(r'\@\@ ([^@]*) \@\@( (.*))?\n', line)
115
raise MalformedHunkHeader("Does not match format.", line)
97
if not line.startswith("@@") or not line.endswith("@@\n") \
99
raise MalformedHunkHeader("Does not start and end with @@.", line)
117
(orig, mod) = matches.group(1).split(" ")
118
except (ValueError, IndexError), e:
101
(orig, mod) = line[3:-4].split(" ")
119
103
raise MalformedHunkHeader(str(e), line)
120
104
if not orig.startswith('-') or not mod.startswith('+'):
121
105
raise MalformedHunkHeader("Positions don't start with + or -.", line)
123
107
(orig_pos, orig_range) = parse_range(orig[1:])
124
108
(mod_pos, mod_range) = parse_range(mod[1:])
125
except (ValueError, IndexError), e:
126
110
raise MalformedHunkHeader(str(e), line)
127
111
if mod_range < 0 or orig_range < 0:
128
112
raise MalformedHunkHeader("Hunk range is negative", line)
129
tail = matches.group(3)
130
return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
113
return Hunk(orig_pos, orig_range, mod_pos, mod_range)
179
162
return InsertLine(line[1:])
180
163
elif line.startswith("-"):
181
164
return RemoveLine(line[1:])
183
168
raise MalformedLine("Unknown line type", line)
188
def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
173
def __init__(self, orig_pos, orig_range, mod_pos, mod_range):
189
174
self.orig_pos = orig_pos
190
175
self.orig_range = orig_range
191
176
self.mod_pos = mod_pos
192
177
self.mod_range = mod_range
196
180
def get_header(self):
197
if self.tail is None:
200
tail_str = ' ' + self.tail
201
return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
203
self.range_str(self.mod_pos,
181
return "@@ -%s +%s @@\n" % (self.range_str(self.orig_pos,
183
self.range_str(self.mod_pos,
207
186
def range_str(self, pos, range):
208
187
"""Return a file range, special-casing for 1-line files.
253
def iter_hunks(iter_lines, allow_dirty=False):
255
:arg iter_lines: iterable of lines to parse for hunks
256
:kwarg allow_dirty: If True, when we encounter something that is not
257
a hunk header when we're looking for one, assume the rest of the lines
258
are not part of the patch (comments or other junk). Default False
233
def iter_hunks(iter_lines):
261
235
for line in iter_lines:
267
241
if hunk is not None:
270
hunk = hunk_from_header(line)
271
except MalformedHunkHeader:
273
# If the line isn't a hunk header, then we've reached the end
274
# of this patch and there's "junk" at the end. Ignore the
275
# rest of this patch.
243
hunk = hunk_from_header(line)
280
246
while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
291
class BinaryPatch(object):
292
258
def __init__(self, oldname, newname):
293
259
self.oldname = oldname
294
260
self.newname = newname
297
return 'Binary files %s and %s differ\n' % (self.oldname, self.newname)
300
class Patch(BinaryPatch):
302
def __init__(self, oldname, newname):
303
BinaryPatch.__init__(self, oldname, newname)
306
263
def __str__(self):
307
ret = self.get_header()
264
ret = self.get_header()
308
265
ret += "".join([str(h) for h in self.hunks])
311
268
def get_header(self):
312
269
return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
314
def stats_values(self):
315
"""Calculate the number of inserts and removes."""
272
"""Return a string of patch statistics"""
318
275
for hunk in self.hunks:
322
279
elif isinstance(line, RemoveLine):
324
return (inserts, removes, len(self.hunks))
327
"""Return a string of patch statistics"""
328
281
return "%i inserts, %i removes in %i hunks" % \
282
(inserts, removes, len(self.hunks))
331
284
def pos_in_mod(self, position):
332
285
newpos = position
356
def parse_patch(iter_lines, allow_dirty=False):
358
:arg iter_lines: iterable of lines to parse
359
:kwarg allow_dirty: If True, allow the patch to have trailing junk.
362
iter_lines = iter_lines_handle_nl(iter_lines)
364
(orig_name, mod_name) = get_patch_names(iter_lines)
365
except BinaryFiles, e:
366
return BinaryPatch(e.orig_name, e.mod_name)
368
patch = Patch(orig_name, mod_name)
369
for hunk in iter_hunks(iter_lines, allow_dirty):
370
patch.hunks.append(hunk)
374
def iter_file_patch(iter_lines, allow_dirty=False):
376
:arg iter_lines: iterable of lines to parse for patches
377
:kwarg allow_dirty: If True, allow comments and other non-patch text
378
before the first patch. Note that the algorithm here can only find
379
such text before any patches have been found. Comments after the
380
first patch are stripped away in iter_hunks() if it is also passed
381
allow_dirty=True. Default False.
383
### FIXME: Docstring is not quite true. We allow certain comments no
384
# matter what, If they startwith '===', '***', or '#' Someone should
385
# reexamine this logic and decide if we should include those in
386
# allow_dirty or restrict those to only being before the patch is found
387
# (as allow_dirty does).
388
regex = re.compile(binary_files_re)
309
def parse_patch(iter_lines):
310
(orig_name, mod_name) = get_patch_names(iter_lines)
311
patch = Patch(orig_name, mod_name)
312
for hunk in iter_hunks(iter_lines):
313
patch.hunks.append(hunk)
317
def iter_file_patch(iter_lines):
392
319
for line in iter_lines:
393
320
if line.startswith('=== ') or line.startswith('*** '):
395
if line.startswith('#'):
398
if line.startswith('-') or line.startswith(' '):
400
elif line.startswith('--- ') or regex.match(line):
401
if allow_dirty and beginning:
402
# Patches can have "junk" at the beginning
403
# Stripping junk from the end of patches is handled when we
406
elif len(saved_lines) > 0:
322
elif line.startswith('--- '):
323
if len(saved_lines) > 0:
407
324
yield saved_lines
409
elif line.startswith('@@'):
410
hunk = hunk_from_header(line)
411
orig_range = hunk.orig_range
412
326
saved_lines.append(line)
413
327
if len(saved_lines) > 0:
414
328
yield saved_lines
438
def parse_patches(iter_lines, allow_dirty=False):
440
:arg iter_lines: iterable of lines to parse for patches
441
:kwarg allow_dirty: If True, allow text that's not part of the patch at
442
selected places. This includes comments before and after a patch
443
for instance. Default False.
445
return [parse_patch(f.__iter__(), allow_dirty) for f in
446
iter_file_patch(iter_lines, allow_dirty)]
351
def parse_patches(iter_lines):
352
iter_lines = iter_lines_handle_nl(iter_lines)
353
return [parse_patch(f.__iter__()) for f in iter_file_patch(iter_lines)]
449
356
def difference_index(atext, btext):
450
"""Find the indext of the first character that differs between two texts
357
"""Find the indext of the first character that differs betweeen two texts
452
359
:param atext: The first text
469
376
"""Iterate through a series of lines with a patch applied.
470
377
This handles a single file, and does exact, not fuzzy patching.
472
patch_lines = iter_lines_handle_nl(iter(patch_lines))
379
if orig_lines is not None:
380
orig_lines = orig_lines.__iter__()
382
patch_lines = iter_lines_handle_nl(patch_lines.__iter__())
473
383
get_patch_names(patch_lines)
474
return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
477
def iter_patched_from_hunks(orig_lines, hunks):
478
"""Iterate through a series of lines with a patch applied.
479
This handles a single file, and does exact, not fuzzy patching.
481
:param orig_lines: The unpatched lines.
482
:param hunks: An iterable of Hunk instances.
486
if orig_lines is not None:
487
orig_lines = iter(orig_lines)
385
for hunk in iter_hunks(patch_lines):
489
386
while line_no < hunk.orig_pos:
490
387
orig_line = orig_lines.next()
501
398
if isinstance(hunk_line, ContextLine):
504
if not isinstance(hunk_line, RemoveLine):
505
raise AssertionError(hunk_line)
401
assert isinstance(hunk_line, RemoveLine)
507
403
if orig_lines is not None:
508
404
for line in orig_lines: