1
# Copyright (C) 2004 - 2006 Aaron Bentley, Canonical Ltd
1
# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
2
2
# <aaron.bentley@utoronto.ca>
4
4
# This program is free software; you can redistribute it and/or modify
14
14
# You should have received a copy of the GNU General Public License
15
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
binary_files_re = 'Binary files (.*) and (.*) differ\n'
23
class BinaryFiles(Exception):
25
def __init__(self, orig_name, mod_name):
26
self.orig_name = orig_name
27
self.mod_name = mod_name
28
Exception.__init__(self, 'Binary files section encountered.')
19
31
class PatchSyntax(Exception):
57
69
def get_patch_names(iter_lines):
59
71
line = iter_lines.next()
72
match = re.match(binary_files_re, line)
74
raise BinaryFiles(match.group(1), match.group(2))
60
75
if not line.startswith("--- "):
61
76
raise MalformedPatchHeader("No orig name", line)
92
107
range = int(range)
93
108
return (pos, range)
96
111
def hunk_from_header(line):
97
if not line.startswith("@@") or not line.endswith("@@\n") \
99
raise MalformedHunkHeader("Does not start and end with @@.", line)
113
matches = re.match(r'\@\@ ([^@]*) \@\@( (.*))?\n', line)
115
raise MalformedHunkHeader("Does not match format.", line)
101
(orig, mod) = line[3:-4].split(" ")
117
(orig, mod) = matches.group(1).split(" ")
118
except (ValueError, IndexError), e:
103
119
raise MalformedHunkHeader(str(e), line)
104
120
if not orig.startswith('-') or not mod.startswith('+'):
105
121
raise MalformedHunkHeader("Positions don't start with + or -.", line)
107
123
(orig_pos, orig_range) = parse_range(orig[1:])
108
124
(mod_pos, mod_range) = parse_range(mod[1:])
125
except (ValueError, IndexError), e:
110
126
raise MalformedHunkHeader(str(e), line)
111
127
if mod_range < 0 or orig_range < 0:
112
128
raise MalformedHunkHeader("Hunk range is negative", line)
113
return Hunk(orig_pos, orig_range, mod_pos, mod_range)
129
tail = matches.group(3)
130
return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
162
179
return InsertLine(line[1:])
163
180
elif line.startswith("-"):
164
181
return RemoveLine(line[1:])
168
183
raise MalformedLine("Unknown line type", line)
173
def __init__(self, orig_pos, orig_range, mod_pos, mod_range):
188
def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
174
189
self.orig_pos = orig_pos
175
190
self.orig_range = orig_range
176
191
self.mod_pos = mod_pos
177
192
self.mod_range = mod_range
180
196
def get_header(self):
181
return "@@ -%s +%s @@\n" % (self.range_str(self.orig_pos,
183
self.range_str(self.mod_pos,
197
if self.tail is None:
200
tail_str = ' ' + self.tail
201
return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
203
self.range_str(self.mod_pos,
186
207
def range_str(self, pos, range):
187
208
"""Return a file range, special-casing for 1-line files.
212
233
return self.shift_to_mod_lines(pos)
214
235
def shift_to_mod_lines(self, pos):
215
assert (pos >= self.orig_pos-1 and pos <= self.orig_pos+self.orig_range)
216
236
position = self.orig_pos-1
218
238
for line in self.lines:
233
def iter_hunks(iter_lines):
253
def iter_hunks(iter_lines, allow_dirty=False):
255
:arg iter_lines: iterable of lines to parse for hunks
256
:kwarg allow_dirty: If True, when we encounter something that is not
257
a hunk header when we're looking for one, assume the rest of the lines
258
are not part of the patch (comments or other junk). Default False
235
261
for line in iter_lines:
241
267
if hunk is not None:
243
hunk = hunk_from_header(line)
270
hunk = hunk_from_header(line)
271
except MalformedHunkHeader:
273
# If the line isn't a hunk header, then we've reached the end
274
# of this patch and there's "junk" at the end. Ignore the
275
# rest of this patch.
246
280
while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
291
class BinaryPatch(object):
258
292
def __init__(self, oldname, newname):
259
293
self.oldname = oldname
260
294
self.newname = newname
297
return 'Binary files %s and %s differ\n' % (self.oldname, self.newname)
300
class Patch(BinaryPatch):
302
def __init__(self, oldname, newname):
303
BinaryPatch.__init__(self, oldname, newname)
263
306
def __str__(self):
264
ret = self.get_header()
307
ret = self.get_header()
265
308
ret += "".join([str(h) for h in self.hunks])
268
311
def get_header(self):
269
312
return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
272
"""Return a string of patch statistics"""
314
def stats_values(self):
315
"""Calculate the number of inserts and removes."""
275
318
for hunk in self.hunks:
279
322
elif isinstance(line, RemoveLine):
324
return (inserts, removes, len(self.hunks))
327
"""Return a string of patch statistics"""
281
328
return "%i inserts, %i removes in %i hunks" % \
282
(inserts, removes, len(self.hunks))
284
331
def pos_in_mod(self, position):
285
332
newpos = position
309
def parse_patch(iter_lines):
310
(orig_name, mod_name) = get_patch_names(iter_lines)
311
patch = Patch(orig_name, mod_name)
312
for hunk in iter_hunks(iter_lines):
313
patch.hunks.append(hunk)
317
def iter_file_patch(iter_lines):
356
def parse_patch(iter_lines, allow_dirty=False):
358
:arg iter_lines: iterable of lines to parse
359
:kwarg allow_dirty: If True, allow the patch to have trailing junk.
362
iter_lines = iter_lines_handle_nl(iter_lines)
364
(orig_name, mod_name) = get_patch_names(iter_lines)
365
except BinaryFiles, e:
366
return BinaryPatch(e.orig_name, e.mod_name)
368
patch = Patch(orig_name, mod_name)
369
for hunk in iter_hunks(iter_lines, allow_dirty):
370
patch.hunks.append(hunk)
374
def iter_file_patch(iter_lines, allow_dirty=False):
376
:arg iter_lines: iterable of lines to parse for patches
377
:kwarg allow_dirty: If True, allow comments and other non-patch text
378
before the first patch. Note that the algorithm here can only find
379
such text before any patches have been found. Comments after the
380
first patch are stripped away in iter_hunks() if it is also passed
381
allow_dirty=True. Default False.
383
### FIXME: Docstring is not quite true. We allow certain comments no
384
# matter what, If they startwith '===', '***', or '#' Someone should
385
# reexamine this logic and decide if we should include those in
386
# allow_dirty or restrict those to only being before the patch is found
387
# (as allow_dirty does).
388
regex = re.compile(binary_files_re)
320
392
for line in iter_lines:
321
393
if line.startswith('=== ') or line.startswith('*** '):
325
397
elif orig_range > 0:
326
398
if line.startswith('-') or line.startswith(' '):
328
elif line.startswith('--- '):
329
if len(saved_lines) > 0:
400
elif line.startswith('--- ') or regex.match(line):
401
if allow_dirty and beginning:
402
# Patches can have "junk" at the beginning
403
# Stripping junk from the end of patches is handled when we
406
elif len(saved_lines) > 0:
330
407
yield saved_lines
332
409
elif line.startswith('@@'):
360
def parse_patches(iter_lines):
361
iter_lines = iter_lines_handle_nl(iter_lines)
362
return [parse_patch(f.__iter__()) for f in iter_file_patch(iter_lines)]
438
def parse_patches(iter_lines, allow_dirty=False):
440
:arg iter_lines: iterable of lines to parse for patches
441
:kwarg allow_dirty: If True, allow text that's not part of the patch at
442
selected places. This includes comments before and after a patch
443
for instance. Default False.
445
return [parse_patch(f.__iter__(), allow_dirty) for f in
446
iter_file_patch(iter_lines, allow_dirty)]
365
449
def difference_index(atext, btext):
385
469
"""Iterate through a series of lines with a patch applied.
386
470
This handles a single file, and does exact, not fuzzy patching.
388
if orig_lines is not None:
389
orig_lines = orig_lines.__iter__()
472
patch_lines = iter_lines_handle_nl(iter(patch_lines))
473
get_patch_names(patch_lines)
474
return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
477
def iter_patched_from_hunks(orig_lines, hunks):
478
"""Iterate through a series of lines with a patch applied.
479
This handles a single file, and does exact, not fuzzy patching.
481
:param orig_lines: The unpatched lines.
482
:param hunks: An iterable of Hunk instances.
391
patch_lines = iter_lines_handle_nl(patch_lines.__iter__())
392
get_patch_names(patch_lines)
394
for hunk in iter_hunks(patch_lines):
486
if orig_lines is not None:
487
orig_lines = iter(orig_lines)
395
489
while line_no < hunk.orig_pos:
396
490
orig_line = orig_lines.next()
407
501
if isinstance(hunk_line, ContextLine):
410
assert isinstance(hunk_line, RemoveLine)
504
if not isinstance(hunk_line, RemoveLine):
505
raise AssertionError(hunk_line)
412
507
if orig_lines is not None:
413
508
for line in orig_lines: