~bzr-pqm/bzr/bzr.dev

3363.18.3 by Aaron Bentley
Add tests for iter_patched_from_hunks
1
# Copyright (C) 2004 - 2006, 2008 Aaron Bentley, Canonical Ltd
0.5.93 by Aaron Bentley
Added patches.py
2
# <aaron.bentley@utoronto.ca>
3
#
2052.3.1 by John Arbash Meinel
Add tests to cleanup the copyright of all source files
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
4634.80.1 by Aaron Bentley
Parse binary files.
17
import re
18
19
20
class BinaryFiles(Exception):
21
22
    def __init__(self, orig_name, mod_name):
23
        self.orig_name = orig_name
24
        self.mod_name = mod_name
25
        Exception.__init__(self, 'Binary files section encountered.')
0.5.93 by Aaron Bentley
Added patches.py
26
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
27
0.5.93 by Aaron Bentley
Added patches.py
28
class PatchSyntax(Exception):
29
    def __init__(self, msg):
30
        Exception.__init__(self, msg)
31
32
33
class MalformedPatchHeader(PatchSyntax):
34
    def __init__(self, desc, line):
35
        self.desc = desc
36
        self.line = line
37
        msg = "Malformed patch header.  %s\n%r" % (self.desc, self.line)
38
        PatchSyntax.__init__(self, msg)
39
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
40
0.5.93 by Aaron Bentley
Added patches.py
41
class MalformedHunkHeader(PatchSyntax):
42
    def __init__(self, desc, line):
43
        self.desc = desc
44
        self.line = line
45
        msg = "Malformed hunk header.  %s\n%r" % (self.desc, self.line)
46
        PatchSyntax.__init__(self, msg)
47
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
48
0.5.93 by Aaron Bentley
Added patches.py
49
class MalformedLine(PatchSyntax):
50
    def __init__(self, desc, line):
51
        self.desc = desc
52
        self.line = line
53
        msg = "Malformed line.  %s\n%s" % (self.desc, self.line)
54
        PatchSyntax.__init__(self, msg)
55
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
56
1185.82.125 by Aaron Bentley
More cleanups
57
class PatchConflict(Exception):
58
    def __init__(self, line_no, orig_line, patch_line):
59
        orig = orig_line.rstrip('\n')
60
        patch = str(patch_line).rstrip('\n')
61
        msg = 'Text contents mismatch at line %d.  Original has "%s",'\
62
            ' but patch says it should be "%s"' % (line_no, orig, patch)
63
        Exception.__init__(self, msg)
64
65
0.5.93 by Aaron Bentley
Added patches.py
66
def get_patch_names(iter_lines):
67
    try:
68
        line = iter_lines.next()
4634.80.1 by Aaron Bentley
Parse binary files.
69
        match = re.match('Binary files (.*) and (.*) differ\n', line)
70
        if match is not None:
71
            raise BinaryFiles(match.group(1), match.group(2))
0.5.93 by Aaron Bentley
Added patches.py
72
        if not line.startswith("--- "):
73
            raise MalformedPatchHeader("No orig name", line)
74
        else:
75
            orig_name = line[4:].rstrip("\n")
76
    except StopIteration:
77
        raise MalformedPatchHeader("No orig line", "")
78
    try:
79
        line = iter_lines.next()
80
        if not line.startswith("+++ "):
81
            raise PatchSyntax("No mod name")
82
        else:
83
            mod_name = line[4:].rstrip("\n")
84
    except StopIteration:
85
        raise MalformedPatchHeader("No mod line", "")
86
    return (orig_name, mod_name)
87
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
88
0.5.93 by Aaron Bentley
Added patches.py
89
def parse_range(textrange):
90
    """Parse a patch range, handling the "1" special-case
91
92
    :param textrange: The text to parse
93
    :type textrange: str
94
    :return: the position and range, as a tuple
95
    :rtype: (int, int)
96
    """
97
    tmp = textrange.split(',')
98
    if len(tmp) == 1:
99
        pos = tmp[0]
100
        range = "1"
101
    else:
102
        (pos, range) = tmp
103
    pos = int(pos)
104
    range = int(range)
105
    return (pos, range)
106
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
107
0.5.93 by Aaron Bentley
Added patches.py
108
def hunk_from_header(line):
3224.5.1 by Andrew Bennetts
Lots of assorted hackery to reduce the number of imports for common operations. Improves 'rocks', 'st' and 'help' times by ~50ms on my laptop.
109
    import re
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
110
    matches = re.match(r'\@\@ ([^@]*) \@\@( (.*))?\n', line)
111
    if matches is None:
112
        raise MalformedHunkHeader("Does not match format.", line)
0.5.93 by Aaron Bentley
Added patches.py
113
    try:
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
114
        (orig, mod) = matches.group(1).split(" ")
2358.3.1 by Martin Pool
Update some too-general exception blocks
115
    except (ValueError, IndexError), e:
0.5.93 by Aaron Bentley
Added patches.py
116
        raise MalformedHunkHeader(str(e), line)
117
    if not orig.startswith('-') or not mod.startswith('+'):
118
        raise MalformedHunkHeader("Positions don't start with + or -.", line)
119
    try:
120
        (orig_pos, orig_range) = parse_range(orig[1:])
121
        (mod_pos, mod_range) = parse_range(mod[1:])
2358.3.1 by Martin Pool
Update some too-general exception blocks
122
    except (ValueError, IndexError), e:
0.5.93 by Aaron Bentley
Added patches.py
123
        raise MalformedHunkHeader(str(e), line)
124
    if mod_range < 0 or orig_range < 0:
125
        raise MalformedHunkHeader("Hunk range is negative", line)
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
126
    tail = matches.group(3)
127
    return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
0.5.93 by Aaron Bentley
Added patches.py
128
129
130
class HunkLine:
131
    def __init__(self, contents):
132
        self.contents = contents
133
134
    def get_str(self, leadchar):
135
        if self.contents == "\n" and leadchar == " " and False:
136
            return "\n"
137
        if not self.contents.endswith('\n'):
138
            terminator = '\n' + NO_NL
139
        else:
140
            terminator = ''
141
        return leadchar + self.contents + terminator
142
143
144
class ContextLine(HunkLine):
145
    def __init__(self, contents):
146
        HunkLine.__init__(self, contents)
147
148
    def __str__(self):
149
        return self.get_str(" ")
150
151
152
class InsertLine(HunkLine):
153
    def __init__(self, contents):
154
        HunkLine.__init__(self, contents)
155
156
    def __str__(self):
157
        return self.get_str("+")
158
159
160
class RemoveLine(HunkLine):
161
    def __init__(self, contents):
162
        HunkLine.__init__(self, contents)
163
164
    def __str__(self):
165
        return self.get_str("-")
166
167
NO_NL = '\\ No newline at end of file\n'
168
__pychecker__="no-returnvalues"
169
170
def parse_line(line):
171
    if line.startswith("\n"):
172
        return ContextLine(line)
173
    elif line.startswith(" "):
174
        return ContextLine(line[1:])
175
    elif line.startswith("+"):
176
        return InsertLine(line[1:])
177
    elif line.startswith("-"):
178
        return RemoveLine(line[1:])
179
    else:
180
        raise MalformedLine("Unknown line type", line)
181
__pychecker__=""
182
183
184
class Hunk:
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
185
    def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
0.5.93 by Aaron Bentley
Added patches.py
186
        self.orig_pos = orig_pos
187
        self.orig_range = orig_range
188
        self.mod_pos = mod_pos
189
        self.mod_range = mod_range
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
190
        self.tail = tail
0.5.93 by Aaron Bentley
Added patches.py
191
        self.lines = []
192
193
    def get_header(self):
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
194
        if self.tail is None:
195
            tail_str = ''
196
        else:
197
            tail_str = ' ' + self.tail
198
        return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
199
                                                     self.orig_range),
200
                                      self.range_str(self.mod_pos,
201
                                                     self.mod_range),
202
                                      tail_str)
0.5.93 by Aaron Bentley
Added patches.py
203
204
    def range_str(self, pos, range):
205
        """Return a file range, special-casing for 1-line files.
206
207
        :param pos: The position in the file
208
        :type pos: int
209
        :range: The range in the file
210
        :type range: int
211
        :return: a string in the format 1,4 except when range == pos == 1
212
        """
213
        if range == 1:
214
            return "%i" % pos
215
        else:
216
            return "%i,%i" % (pos, range)
217
218
    def __str__(self):
219
        lines = [self.get_header()]
220
        for line in self.lines:
221
            lines.append(str(line))
222
        return "".join(lines)
223
224
    def shift_to_mod(self, pos):
225
        if pos < self.orig_pos-1:
226
            return 0
227
        elif pos > self.orig_pos+self.orig_range:
228
            return self.mod_range - self.orig_range
229
        else:
230
            return self.shift_to_mod_lines(pos)
231
232
    def shift_to_mod_lines(self, pos):
233
        position = self.orig_pos-1
234
        shift = 0
235
        for line in self.lines:
236
            if isinstance(line, InsertLine):
237
                shift += 1
238
            elif isinstance(line, RemoveLine):
239
                if position == pos:
240
                    return None
241
                shift -= 1
242
                position += 1
243
            elif isinstance(line, ContextLine):
244
                position += 1
245
            if position > pos:
246
                break
247
        return shift
248
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
249
0.5.93 by Aaron Bentley
Added patches.py
250
def iter_hunks(iter_lines):
251
    hunk = None
252
    for line in iter_lines:
0.5.96 by Aaron Bentley
Cleaned up handling of files with no terminating \n
253
        if line == "\n":
0.5.93 by Aaron Bentley
Added patches.py
254
            if hunk is not None:
255
                yield hunk
256
                hunk = None
257
            continue
258
        if hunk is not None:
259
            yield hunk
260
        hunk = hunk_from_header(line)
261
        orig_size = 0
262
        mod_size = 0
263
        while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
264
            hunk_line = parse_line(iter_lines.next())
0.5.96 by Aaron Bentley
Cleaned up handling of files with no terminating \n
265
            hunk.lines.append(hunk_line)
0.5.93 by Aaron Bentley
Added patches.py
266
            if isinstance(hunk_line, (RemoveLine, ContextLine)):
267
                orig_size += 1
268
            if isinstance(hunk_line, (InsertLine, ContextLine)):
269
                mod_size += 1
270
    if hunk is not None:
271
        yield hunk
272
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
273
4634.80.1 by Aaron Bentley
Parse binary files.
274
class BinaryPatch(object):
0.5.93 by Aaron Bentley
Added patches.py
275
    def __init__(self, oldname, newname):
276
        self.oldname = oldname
277
        self.newname = newname
4634.80.1 by Aaron Bentley
Parse binary files.
278
279
    def __str__(self):
4634.80.2 by Aaron Bentley
Ensure patch roundtripping.
280
        return 'Binary files %s and %s differ\n' % (self.oldname, self.newname)
4634.80.1 by Aaron Bentley
Parse binary files.
281
282
283
class Patch(BinaryPatch):
284
285
    def __init__(self, oldname, newname):
286
        BinaryPatch.__init__(self, oldname, newname)
0.5.93 by Aaron Bentley
Added patches.py
287
        self.hunks = []
288
289
    def __str__(self):
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
290
        ret = self.get_header()
0.5.93 by Aaron Bentley
Added patches.py
291
        ret += "".join([str(h) for h in self.hunks])
292
        return ret
293
0.5.95 by Aaron Bentley
Updated patch to match bzrtools
294
    def get_header(self):
295
        return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
296
3946.4.1 by Tim Penhey
Extract out the counting of the stats values.
297
    def stats_values(self):
298
        """Calculate the number of inserts and removes."""
0.5.93 by Aaron Bentley
Added patches.py
299
        removes = 0
300
        inserts = 0
301
        for hunk in self.hunks:
302
            for line in hunk.lines:
303
                if isinstance(line, InsertLine):
304
                     inserts+=1;
305
                elif isinstance(line, RemoveLine):
306
                     removes+=1;
3946.4.1 by Tim Penhey
Extract out the counting of the stats values.
307
        return (inserts, removes, len(self.hunks))
308
309
    def stats_str(self):
310
        """Return a string of patch statistics"""
0.5.93 by Aaron Bentley
Added patches.py
311
        return "%i inserts, %i removes in %i hunks" % \
3946.4.1 by Tim Penhey
Extract out the counting of the stats values.
312
            self.stats_values()
0.5.93 by Aaron Bentley
Added patches.py
313
314
    def pos_in_mod(self, position):
315
        newpos = position
316
        for hunk in self.hunks:
317
            shift = hunk.shift_to_mod(position)
318
            if shift is None:
319
                return None
320
            newpos += shift
321
        return newpos
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
322
0.5.93 by Aaron Bentley
Added patches.py
323
    def iter_inserted(self):
324
        """Iteraties through inserted lines
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
325
0.5.93 by Aaron Bentley
Added patches.py
326
        :return: Pair of line number, line
327
        :rtype: iterator of (int, InsertLine)
328
        """
329
        for hunk in self.hunks:
330
            pos = hunk.mod_pos - 1;
331
            for line in hunk.lines:
332
                if isinstance(line, InsertLine):
333
                    yield (pos, line)
334
                    pos += 1
335
                if isinstance(line, ContextLine):
336
                    pos += 1
337
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
338
0.5.93 by Aaron Bentley
Added patches.py
339
def parse_patch(iter_lines):
3873.1.8 by Benoît Pierre
Fix regressions in other parts of the testsuite.
340
    iter_lines = iter_lines_handle_nl(iter_lines)
4634.80.1 by Aaron Bentley
Parse binary files.
341
    try:
342
        (orig_name, mod_name) = get_patch_names(iter_lines)
343
    except BinaryFiles, e:
344
        return BinaryPatch(e.orig_name, e.mod_name)
345
    else:
346
        patch = Patch(orig_name, mod_name)
347
        for hunk in iter_hunks(iter_lines):
348
            patch.hunks.append(hunk)
349
        return patch
0.5.93 by Aaron Bentley
Added patches.py
350
351
352
def iter_file_patch(iter_lines):
353
    saved_lines = []
2298.6.1 by Johan Dahlberg
Fix bzrtools shelve command for removed lines beginning with "--"
354
    orig_range = 0
0.5.93 by Aaron Bentley
Added patches.py
355
    for line in iter_lines:
0.5.106 by John Arbash Meinel
Allowing *** to be a patch header.
356
        if line.startswith('=== ') or line.startswith('*** '):
0.5.93 by Aaron Bentley
Added patches.py
357
            continue
1770.1.1 by Aaron Bentley
Ignore lines that start with '#' in patch parser
358
        if line.startswith('#'):
359
            continue
2298.6.1 by Johan Dahlberg
Fix bzrtools shelve command for removed lines beginning with "--"
360
        elif orig_range > 0:
361
            if line.startswith('-') or line.startswith(' '):
362
                orig_range -= 1
0.5.93 by Aaron Bentley
Added patches.py
363
        elif line.startswith('--- '):
364
            if len(saved_lines) > 0:
365
                yield saved_lines
366
            saved_lines = []
2298.6.1 by Johan Dahlberg
Fix bzrtools shelve command for removed lines beginning with "--"
367
        elif line.startswith('@@'):
368
            hunk = hunk_from_header(line)
369
            orig_range = hunk.orig_range
0.5.93 by Aaron Bentley
Added patches.py
370
        saved_lines.append(line)
371
    if len(saved_lines) > 0:
372
        yield saved_lines
373
374
3873.1.6 by Benoît Pierre
OK, so now patches should handle '\ No newline at end of file' in both
375
def iter_lines_handle_nl(iter_lines):
376
    """
377
    Iterates through lines, ensuring that lines that originally had no
378
    terminating \n are produced without one.  This transformation may be
379
    applied at any point up until hunk line parsing, and is safe to apply
380
    repeatedly.
381
    """
382
    last_line = None
383
    for line in iter_lines:
384
        if line == NO_NL:
385
            if not last_line.endswith('\n'):
386
                raise AssertionError()
387
            last_line = last_line[:-1]
388
            line = None
389
        if last_line is not None:
390
            yield last_line
391
        last_line = line
392
    if last_line is not None:
393
        yield last_line
394
395
0.5.93 by Aaron Bentley
Added patches.py
396
def parse_patches(iter_lines):
397
    return [parse_patch(f.__iter__()) for f in iter_file_patch(iter_lines)]
398
399
400
def difference_index(atext, btext):
1759.2.1 by Jelmer Vernooij
Fix some types (found using aspell).
401
    """Find the indext of the first character that differs between two texts
0.5.93 by Aaron Bentley
Added patches.py
402
403
    :param atext: The first text
404
    :type atext: str
405
    :param btext: The second text
406
    :type str: str
407
    :return: The index, or None if there are no differences within the range
408
    :rtype: int or NoneType
409
    """
410
    length = len(atext)
411
    if len(btext) < length:
412
        length = len(btext)
413
    for i in range(length):
414
        if atext[i] != btext[i]:
415
            return i;
416
    return None
417
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
418
0.5.93 by Aaron Bentley
Added patches.py
419
def iter_patched(orig_lines, patch_lines):
420
    """Iterate through a series of lines with a patch applied.
421
    This handles a single file, and does exact, not fuzzy patching.
422
    """
3873.1.8 by Benoît Pierre
Fix regressions in other parts of the testsuite.
423
    patch_lines = iter_lines_handle_nl(iter(patch_lines))
0.5.93 by Aaron Bentley
Added patches.py
424
    get_patch_names(patch_lines)
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
425
    return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
426
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
427
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
428
def iter_patched_from_hunks(orig_lines, hunks):
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
429
    """Iterate through a series of lines with a patch applied.
430
    This handles a single file, and does exact, not fuzzy patching.
431
432
    :param orig_lines: The unpatched lines.
433
    :param hunks: An iterable of Hunk instances.
434
    """
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
435
    seen_patch = []
0.5.93 by Aaron Bentley
Added patches.py
436
    line_no = 1
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
437
    if orig_lines is not None:
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
438
        orig_lines = iter(orig_lines)
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
439
    for hunk in hunks:
0.5.93 by Aaron Bentley
Added patches.py
440
        while line_no < hunk.orig_pos:
441
            orig_line = orig_lines.next()
442
            yield orig_line
443
            line_no += 1
444
        for hunk_line in hunk.lines:
445
            seen_patch.append(str(hunk_line))
446
            if isinstance(hunk_line, InsertLine):
447
                yield hunk_line.contents
448
            elif isinstance(hunk_line, (ContextLine, RemoveLine)):
449
                orig_line = orig_lines.next()
450
                if orig_line != hunk_line.contents:
451
                    raise PatchConflict(line_no, orig_line, "".join(seen_patch))
452
                if isinstance(hunk_line, ContextLine):
453
                    yield orig_line
454
                else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
455
                    if not isinstance(hunk_line, RemoveLine):
456
                        raise AssertionError(hunk_line)
0.5.93 by Aaron Bentley
Added patches.py
457
                line_no += 1
0.5.105 by John Arbash Meinel
Adding more test patches to the test suite.
458
    if orig_lines is not None:
459
        for line in orig_lines:
460
            yield line