~bzr-pqm/bzr/bzr.dev

4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2005-2010 Aaron Bentley, Canonical Ltd
0.5.93 by Aaron Bentley
Added patches.py
2
# <aaron.bentley@utoronto.ca>
3
#
2052.3.1 by John Arbash Meinel
Add tests to cleanup the copyright of all source files
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6289.2.1 by Jelmer Vernooij
Move the primary definition of the patches exceptions to bzrlib.errors.
17
18
from bzrlib.errors import (
19
    BinaryFiles,
20
    MalformedHunkHeader,
21
    MalformedLine,
22
    MalformedPatchHeader,
23
    PatchConflict,
24
    PatchSyntax,
25
    )
26
4634.80.1 by Aaron Bentley
Parse binary files.
27
import re
28
29
4634.98.1 by Aaron Bentley
Improve patch binary section handling.
30
binary_files_re = 'Binary files (.*) and (.*) differ\n'
31
0.5.93 by Aaron Bentley
Added patches.py
32
def get_patch_names(iter_lines):
33
    try:
34
        line = iter_lines.next()
4634.98.1 by Aaron Bentley
Improve patch binary section handling.
35
        match = re.match(binary_files_re, line)
4634.80.1 by Aaron Bentley
Parse binary files.
36
        if match is not None:
37
            raise BinaryFiles(match.group(1), match.group(2))
0.5.93 by Aaron Bentley
Added patches.py
38
        if not line.startswith("--- "):
39
            raise MalformedPatchHeader("No orig name", line)
40
        else:
41
            orig_name = line[4:].rstrip("\n")
42
    except StopIteration:
43
        raise MalformedPatchHeader("No orig line", "")
44
    try:
45
        line = iter_lines.next()
46
        if not line.startswith("+++ "):
47
            raise PatchSyntax("No mod name")
48
        else:
49
            mod_name = line[4:].rstrip("\n")
50
    except StopIteration:
51
        raise MalformedPatchHeader("No mod line", "")
52
    return (orig_name, mod_name)
53
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
54
0.5.93 by Aaron Bentley
Added patches.py
55
def parse_range(textrange):
56
    """Parse a patch range, handling the "1" special-case
57
58
    :param textrange: The text to parse
59
    :type textrange: str
60
    :return: the position and range, as a tuple
61
    :rtype: (int, int)
62
    """
63
    tmp = textrange.split(',')
64
    if len(tmp) == 1:
65
        pos = tmp[0]
66
        range = "1"
67
    else:
68
        (pos, range) = tmp
69
    pos = int(pos)
70
    range = int(range)
71
    return (pos, range)
72
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
73
0.5.93 by Aaron Bentley
Added patches.py
74
def hunk_from_header(line):
3224.5.1 by Andrew Bennetts
Lots of assorted hackery to reduce the number of imports for common operations. Improves 'rocks', 'st' and 'help' times by ~50ms on my laptop.
75
    import re
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
76
    matches = re.match(r'\@\@ ([^@]*) \@\@( (.*))?\n', line)
77
    if matches is None:
78
        raise MalformedHunkHeader("Does not match format.", line)
0.5.93 by Aaron Bentley
Added patches.py
79
    try:
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
80
        (orig, mod) = matches.group(1).split(" ")
2358.3.1 by Martin Pool
Update some too-general exception blocks
81
    except (ValueError, IndexError), e:
0.5.93 by Aaron Bentley
Added patches.py
82
        raise MalformedHunkHeader(str(e), line)
83
    if not orig.startswith('-') or not mod.startswith('+'):
84
        raise MalformedHunkHeader("Positions don't start with + or -.", line)
85
    try:
86
        (orig_pos, orig_range) = parse_range(orig[1:])
87
        (mod_pos, mod_range) = parse_range(mod[1:])
2358.3.1 by Martin Pool
Update some too-general exception blocks
88
    except (ValueError, IndexError), e:
0.5.93 by Aaron Bentley
Added patches.py
89
        raise MalformedHunkHeader(str(e), line)
90
    if mod_range < 0 or orig_range < 0:
91
        raise MalformedHunkHeader("Hunk range is negative", line)
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
92
    tail = matches.group(3)
93
    return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
0.5.93 by Aaron Bentley
Added patches.py
94
95
96
class HunkLine:
97
    def __init__(self, contents):
98
        self.contents = contents
99
100
    def get_str(self, leadchar):
101
        if self.contents == "\n" and leadchar == " " and False:
102
            return "\n"
103
        if not self.contents.endswith('\n'):
104
            terminator = '\n' + NO_NL
105
        else:
106
            terminator = ''
107
        return leadchar + self.contents + terminator
108
109
110
class ContextLine(HunkLine):
111
    def __init__(self, contents):
112
        HunkLine.__init__(self, contents)
113
114
    def __str__(self):
115
        return self.get_str(" ")
116
117
118
class InsertLine(HunkLine):
119
    def __init__(self, contents):
120
        HunkLine.__init__(self, contents)
121
122
    def __str__(self):
123
        return self.get_str("+")
124
125
126
class RemoveLine(HunkLine):
127
    def __init__(self, contents):
128
        HunkLine.__init__(self, contents)
129
130
    def __str__(self):
131
        return self.get_str("-")
132
133
NO_NL = '\\ No newline at end of file\n'
134
__pychecker__="no-returnvalues"
135
136
def parse_line(line):
137
    if line.startswith("\n"):
138
        return ContextLine(line)
139
    elif line.startswith(" "):
140
        return ContextLine(line[1:])
141
    elif line.startswith("+"):
142
        return InsertLine(line[1:])
143
    elif line.startswith("-"):
144
        return RemoveLine(line[1:])
145
    else:
146
        raise MalformedLine("Unknown line type", line)
147
__pychecker__=""
148
149
150
class Hunk:
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
151
    def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
0.5.93 by Aaron Bentley
Added patches.py
152
        self.orig_pos = orig_pos
153
        self.orig_range = orig_range
154
        self.mod_pos = mod_pos
155
        self.mod_range = mod_range
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
156
        self.tail = tail
0.5.93 by Aaron Bentley
Added patches.py
157
        self.lines = []
158
159
    def get_header(self):
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
160
        if self.tail is None:
161
            tail_str = ''
162
        else:
163
            tail_str = ' ' + self.tail
164
        return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
165
                                                     self.orig_range),
166
                                      self.range_str(self.mod_pos,
167
                                                     self.mod_range),
168
                                      tail_str)
0.5.93 by Aaron Bentley
Added patches.py
169
170
    def range_str(self, pos, range):
171
        """Return a file range, special-casing for 1-line files.
172
173
        :param pos: The position in the file
174
        :type pos: int
175
        :range: The range in the file
176
        :type range: int
177
        :return: a string in the format 1,4 except when range == pos == 1
178
        """
179
        if range == 1:
180
            return "%i" % pos
181
        else:
182
            return "%i,%i" % (pos, range)
183
184
    def __str__(self):
185
        lines = [self.get_header()]
186
        for line in self.lines:
187
            lines.append(str(line))
188
        return "".join(lines)
189
190
    def shift_to_mod(self, pos):
191
        if pos < self.orig_pos-1:
192
            return 0
193
        elif pos > self.orig_pos+self.orig_range:
194
            return self.mod_range - self.orig_range
195
        else:
196
            return self.shift_to_mod_lines(pos)
197
198
    def shift_to_mod_lines(self, pos):
199
        position = self.orig_pos-1
200
        shift = 0
201
        for line in self.lines:
202
            if isinstance(line, InsertLine):
203
                shift += 1
204
            elif isinstance(line, RemoveLine):
205
                if position == pos:
206
                    return None
207
                shift -= 1
208
                position += 1
209
            elif isinstance(line, ContextLine):
210
                position += 1
211
            if position > pos:
212
                break
213
        return shift
214
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
215
5016.3.1 by Toshio Kuratomi
iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.
216
def iter_hunks(iter_lines, allow_dirty=False):
217
    '''
218
    :arg iter_lines: iterable of lines to parse for hunks
219
    :kwarg allow_dirty: If True, when we encounter something that is not
220
        a hunk header when we're looking for one, assume the rest of the lines
221
        are not part of the patch (comments or other junk).  Default False
222
    '''
0.5.93 by Aaron Bentley
Added patches.py
223
    hunk = None
224
    for line in iter_lines:
0.5.96 by Aaron Bentley
Cleaned up handling of files with no terminating \n
225
        if line == "\n":
0.5.93 by Aaron Bentley
Added patches.py
226
            if hunk is not None:
227
                yield hunk
228
                hunk = None
229
            continue
230
        if hunk is not None:
231
            yield hunk
5016.3.1 by Toshio Kuratomi
iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.
232
        try:
233
            hunk = hunk_from_header(line)
234
        except MalformedHunkHeader:
235
            if allow_dirty:
236
                # If the line isn't a hunk header, then we've reached the end
237
                # of this patch and there's "junk" at the end.  Ignore the
238
                # rest of this patch.
239
                return
240
            raise
0.5.93 by Aaron Bentley
Added patches.py
241
        orig_size = 0
242
        mod_size = 0
243
        while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
244
            hunk_line = parse_line(iter_lines.next())
0.5.96 by Aaron Bentley
Cleaned up handling of files with no terminating \n
245
            hunk.lines.append(hunk_line)
0.5.93 by Aaron Bentley
Added patches.py
246
            if isinstance(hunk_line, (RemoveLine, ContextLine)):
247
                orig_size += 1
248
            if isinstance(hunk_line, (InsertLine, ContextLine)):
249
                mod_size += 1
250
    if hunk is not None:
251
        yield hunk
252
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
253
4634.80.1 by Aaron Bentley
Parse binary files.
254
class BinaryPatch(object):
0.5.93 by Aaron Bentley
Added patches.py
255
    def __init__(self, oldname, newname):
256
        self.oldname = oldname
257
        self.newname = newname
4634.80.1 by Aaron Bentley
Parse binary files.
258
259
    def __str__(self):
4634.80.2 by Aaron Bentley
Ensure patch roundtripping.
260
        return 'Binary files %s and %s differ\n' % (self.oldname, self.newname)
4634.80.1 by Aaron Bentley
Parse binary files.
261
262
263
class Patch(BinaryPatch):
264
265
    def __init__(self, oldname, newname):
266
        BinaryPatch.__init__(self, oldname, newname)
0.5.93 by Aaron Bentley
Added patches.py
267
        self.hunks = []
268
269
    def __str__(self):
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
270
        ret = self.get_header()
0.5.93 by Aaron Bentley
Added patches.py
271
        ret += "".join([str(h) for h in self.hunks])
272
        return ret
273
0.5.95 by Aaron Bentley
Updated patch to match bzrtools
274
    def get_header(self):
275
        return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
276
3946.4.1 by Tim Penhey
Extract out the counting of the stats values.
277
    def stats_values(self):
278
        """Calculate the number of inserts and removes."""
0.5.93 by Aaron Bentley
Added patches.py
279
        removes = 0
280
        inserts = 0
281
        for hunk in self.hunks:
282
            for line in hunk.lines:
283
                if isinstance(line, InsertLine):
284
                     inserts+=1;
285
                elif isinstance(line, RemoveLine):
286
                     removes+=1;
3946.4.1 by Tim Penhey
Extract out the counting of the stats values.
287
        return (inserts, removes, len(self.hunks))
288
289
    def stats_str(self):
290
        """Return a string of patch statistics"""
0.5.93 by Aaron Bentley
Added patches.py
291
        return "%i inserts, %i removes in %i hunks" % \
3946.4.1 by Tim Penhey
Extract out the counting of the stats values.
292
            self.stats_values()
0.5.93 by Aaron Bentley
Added patches.py
293
294
    def pos_in_mod(self, position):
295
        newpos = position
296
        for hunk in self.hunks:
297
            shift = hunk.shift_to_mod(position)
298
            if shift is None:
299
                return None
300
            newpos += shift
301
        return newpos
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
302
0.5.93 by Aaron Bentley
Added patches.py
303
    def iter_inserted(self):
304
        """Iteraties through inserted lines
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
305
0.5.93 by Aaron Bentley
Added patches.py
306
        :return: Pair of line number, line
307
        :rtype: iterator of (int, InsertLine)
308
        """
309
        for hunk in self.hunks:
310
            pos = hunk.mod_pos - 1;
311
            for line in hunk.lines:
312
                if isinstance(line, InsertLine):
313
                    yield (pos, line)
314
                    pos += 1
315
                if isinstance(line, ContextLine):
316
                    pos += 1
317
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
318
5016.3.1 by Toshio Kuratomi
iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.
319
def parse_patch(iter_lines, allow_dirty=False):
320
    '''
321
    :arg iter_lines: iterable of lines to parse
322
    :kwarg allow_dirty: If True, allow the patch to have trailing junk.
323
        Default False
324
    '''
3873.1.8 by Benoît Pierre
Fix regressions in other parts of the testsuite.
325
    iter_lines = iter_lines_handle_nl(iter_lines)
4634.80.1 by Aaron Bentley
Parse binary files.
326
    try:
327
        (orig_name, mod_name) = get_patch_names(iter_lines)
328
    except BinaryFiles, e:
329
        return BinaryPatch(e.orig_name, e.mod_name)
330
    else:
331
        patch = Patch(orig_name, mod_name)
5016.3.1 by Toshio Kuratomi
iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.
332
        for hunk in iter_hunks(iter_lines, allow_dirty):
4634.80.1 by Aaron Bentley
Parse binary files.
333
            patch.hunks.append(hunk)
334
        return patch
0.5.93 by Aaron Bentley
Added patches.py
335
336
5016.3.1 by Toshio Kuratomi
iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.
337
def iter_file_patch(iter_lines, allow_dirty=False):
338
    '''
339
    :arg iter_lines: iterable of lines to parse for patches
340
    :kwarg allow_dirty: If True, allow comments and other non-patch text
341
        before the first patch.  Note that the algorithm here can only find
342
        such text before any patches have been found.  Comments after the
343
        first patch are stripped away in iter_hunks() if it is also passed
344
        allow_dirty=True.  Default False.
345
    '''
346
    ### FIXME: Docstring is not quite true.  We allow certain comments no
347
    # matter what, If they startwith '===', '***', or '#' Someone should
348
    # reexamine this logic and decide if we should include those in
349
    # allow_dirty or restrict those to only being before the patch is found
350
    # (as allow_dirty does).
4634.98.1 by Aaron Bentley
Improve patch binary section handling.
351
    regex = re.compile(binary_files_re)
0.5.93 by Aaron Bentley
Added patches.py
352
    saved_lines = []
2298.6.1 by Johan Dahlberg
Fix bzrtools shelve command for removed lines beginning with "--"
353
    orig_range = 0
5016.3.1 by Toshio Kuratomi
iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.
354
    beginning = True
0.5.93 by Aaron Bentley
Added patches.py
355
    for line in iter_lines:
0.5.106 by John Arbash Meinel
Allowing *** to be a patch header.
356
        if line.startswith('=== ') or line.startswith('*** '):
0.5.93 by Aaron Bentley
Added patches.py
357
            continue
1770.1.1 by Aaron Bentley
Ignore lines that start with '#' in patch parser
358
        if line.startswith('#'):
359
            continue
2298.6.1 by Johan Dahlberg
Fix bzrtools shelve command for removed lines beginning with "--"
360
        elif orig_range > 0:
361
            if line.startswith('-') or line.startswith(' '):
362
                orig_range -= 1
4634.98.1 by Aaron Bentley
Improve patch binary section handling.
363
        elif line.startswith('--- ') or regex.match(line):
5016.3.1 by Toshio Kuratomi
iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.
364
            if allow_dirty and beginning:
365
                # Patches can have "junk" at the beginning
366
                # Stripping junk from the end of patches is handled when we
367
                # parse the patch
368
                beginning = False
369
            elif len(saved_lines) > 0:
0.5.93 by Aaron Bentley
Added patches.py
370
                yield saved_lines
371
            saved_lines = []
2298.6.1 by Johan Dahlberg
Fix bzrtools shelve command for removed lines beginning with "--"
372
        elif line.startswith('@@'):
373
            hunk = hunk_from_header(line)
374
            orig_range = hunk.orig_range
0.5.93 by Aaron Bentley
Added patches.py
375
        saved_lines.append(line)
376
    if len(saved_lines) > 0:
377
        yield saved_lines
378
379
3873.1.6 by Benoît Pierre
OK, so now patches should handle '\ No newline at end of file' in both
380
def iter_lines_handle_nl(iter_lines):
381
    """
382
    Iterates through lines, ensuring that lines that originally had no
383
    terminating \n are produced without one.  This transformation may be
384
    applied at any point up until hunk line parsing, and is safe to apply
385
    repeatedly.
386
    """
387
    last_line = None
388
    for line in iter_lines:
389
        if line == NO_NL:
390
            if not last_line.endswith('\n'):
391
                raise AssertionError()
392
            last_line = last_line[:-1]
393
            line = None
394
        if last_line is not None:
395
            yield last_line
396
        last_line = line
397
    if last_line is not None:
398
        yield last_line
399
400
5016.3.1 by Toshio Kuratomi
iAdd an allow_dirty parameter that allows patch files with non-patch data to be used.
401
def parse_patches(iter_lines, allow_dirty=False):
402
    '''
403
    :arg iter_lines: iterable of lines to parse for patches
404
    :kwarg allow_dirty: If True, allow text that's not part of the patch at
405
        selected places.  This includes comments before and after a patch
406
        for instance.  Default False.
407
    '''
408
    return [parse_patch(f.__iter__(), allow_dirty) for f in
409
                        iter_file_patch(iter_lines, allow_dirty)]
0.5.93 by Aaron Bentley
Added patches.py
410
411
412
def difference_index(atext, btext):
1759.2.1 by Jelmer Vernooij
Fix some types (found using aspell).
413
    """Find the indext of the first character that differs between two texts
0.5.93 by Aaron Bentley
Added patches.py
414
415
    :param atext: The first text
416
    :type atext: str
417
    :param btext: The second text
418
    :type str: str
419
    :return: The index, or None if there are no differences within the range
420
    :rtype: int or NoneType
421
    """
422
    length = len(atext)
423
    if len(btext) < length:
424
        length = len(btext)
425
    for i in range(length):
426
        if atext[i] != btext[i]:
427
            return i;
428
    return None
429
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
430
0.5.93 by Aaron Bentley
Added patches.py
431
def iter_patched(orig_lines, patch_lines):
432
    """Iterate through a series of lines with a patch applied.
433
    This handles a single file, and does exact, not fuzzy patching.
434
    """
3873.1.8 by Benoît Pierre
Fix regressions in other parts of the testsuite.
435
    patch_lines = iter_lines_handle_nl(iter(patch_lines))
0.5.93 by Aaron Bentley
Added patches.py
436
    get_patch_names(patch_lines)
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
437
    return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
438
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
439
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
440
def iter_patched_from_hunks(orig_lines, hunks):
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
441
    """Iterate through a series of lines with a patch applied.
442
    This handles a single file, and does exact, not fuzzy patching.
443
444
    :param orig_lines: The unpatched lines.
445
    :param hunks: An iterable of Hunk instances.
446
    """
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
447
    seen_patch = []
0.5.93 by Aaron Bentley
Added patches.py
448
    line_no = 1
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
449
    if orig_lines is not None:
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
450
        orig_lines = iter(orig_lines)
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
451
    for hunk in hunks:
0.5.93 by Aaron Bentley
Added patches.py
452
        while line_no < hunk.orig_pos:
453
            orig_line = orig_lines.next()
454
            yield orig_line
455
            line_no += 1
456
        for hunk_line in hunk.lines:
457
            seen_patch.append(str(hunk_line))
458
            if isinstance(hunk_line, InsertLine):
459
                yield hunk_line.contents
460
            elif isinstance(hunk_line, (ContextLine, RemoveLine)):
461
                orig_line = orig_lines.next()
462
                if orig_line != hunk_line.contents:
463
                    raise PatchConflict(line_no, orig_line, "".join(seen_patch))
464
                if isinstance(hunk_line, ContextLine):
465
                    yield orig_line
466
                else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
467
                    if not isinstance(hunk_line, RemoveLine):
468
                        raise AssertionError(hunk_line)
0.5.93 by Aaron Bentley
Added patches.py
469
                line_no += 1
0.5.105 by John Arbash Meinel
Adding more test patches to the test suite.
470
    if orig_lines is not None:
471
        for line in orig_lines:
472
            yield line