~bzr-pqm/bzr/bzr.dev

3363.18.3 by Aaron Bentley
Add tests for iter_patched_from_hunks
1
# Copyright (C) 2004 - 2006, 2008 Aaron Bentley, Canonical Ltd
0.5.93 by Aaron Bentley
Added patches.py
2
# <aaron.bentley@utoronto.ca>
3
#
2052.3.1 by John Arbash Meinel
Add tests to cleanup the copyright of all source files
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.5.93 by Aaron Bentley
Added patches.py
17
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
18
0.5.93 by Aaron Bentley
Added patches.py
19
class PatchSyntax(Exception):
20
    def __init__(self, msg):
21
        Exception.__init__(self, msg)
22
23
24
class MalformedPatchHeader(PatchSyntax):
25
    def __init__(self, desc, line):
26
        self.desc = desc
27
        self.line = line
28
        msg = "Malformed patch header.  %s\n%r" % (self.desc, self.line)
29
        PatchSyntax.__init__(self, msg)
30
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
31
0.5.93 by Aaron Bentley
Added patches.py
32
class MalformedHunkHeader(PatchSyntax):
33
    def __init__(self, desc, line):
34
        self.desc = desc
35
        self.line = line
36
        msg = "Malformed hunk header.  %s\n%r" % (self.desc, self.line)
37
        PatchSyntax.__init__(self, msg)
38
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
39
0.5.93 by Aaron Bentley
Added patches.py
40
class MalformedLine(PatchSyntax):
41
    def __init__(self, desc, line):
42
        self.desc = desc
43
        self.line = line
44
        msg = "Malformed line.  %s\n%s" % (self.desc, self.line)
45
        PatchSyntax.__init__(self, msg)
46
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
47
1185.82.125 by Aaron Bentley
More cleanups
48
class PatchConflict(Exception):
49
    def __init__(self, line_no, orig_line, patch_line):
50
        orig = orig_line.rstrip('\n')
51
        patch = str(patch_line).rstrip('\n')
52
        msg = 'Text contents mismatch at line %d.  Original has "%s",'\
53
            ' but patch says it should be "%s"' % (line_no, orig, patch)
54
        Exception.__init__(self, msg)
55
56
0.5.93 by Aaron Bentley
Added patches.py
57
def get_patch_names(iter_lines):
58
    try:
59
        line = iter_lines.next()
60
        if not line.startswith("--- "):
61
            raise MalformedPatchHeader("No orig name", line)
62
        else:
63
            orig_name = line[4:].rstrip("\n")
64
    except StopIteration:
65
        raise MalformedPatchHeader("No orig line", "")
66
    try:
67
        line = iter_lines.next()
68
        if not line.startswith("+++ "):
69
            raise PatchSyntax("No mod name")
70
        else:
71
            mod_name = line[4:].rstrip("\n")
72
    except StopIteration:
73
        raise MalformedPatchHeader("No mod line", "")
74
    return (orig_name, mod_name)
75
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
76
0.5.93 by Aaron Bentley
Added patches.py
77
def parse_range(textrange):
78
    """Parse a patch range, handling the "1" special-case
79
80
    :param textrange: The text to parse
81
    :type textrange: str
82
    :return: the position and range, as a tuple
83
    :rtype: (int, int)
84
    """
85
    tmp = textrange.split(',')
86
    if len(tmp) == 1:
87
        pos = tmp[0]
88
        range = "1"
89
    else:
90
        (pos, range) = tmp
91
    pos = int(pos)
92
    range = int(range)
93
    return (pos, range)
94
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
95
0.5.93 by Aaron Bentley
Added patches.py
96
def hunk_from_header(line):
3224.5.1 by Andrew Bennetts
Lots of assorted hackery to reduce the number of imports for common operations. Improves 'rocks', 'st' and 'help' times by ~50ms on my laptop.
97
    import re
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
98
    matches = re.match(r'\@\@ ([^@]*) \@\@( (.*))?\n', line)
99
    if matches is None:
100
        raise MalformedHunkHeader("Does not match format.", line)
0.5.93 by Aaron Bentley
Added patches.py
101
    try:
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
102
        (orig, mod) = matches.group(1).split(" ")
2358.3.1 by Martin Pool
Update some too-general exception blocks
103
    except (ValueError, IndexError), e:
0.5.93 by Aaron Bentley
Added patches.py
104
        raise MalformedHunkHeader(str(e), line)
105
    if not orig.startswith('-') or not mod.startswith('+'):
106
        raise MalformedHunkHeader("Positions don't start with + or -.", line)
107
    try:
108
        (orig_pos, orig_range) = parse_range(orig[1:])
109
        (mod_pos, mod_range) = parse_range(mod[1:])
2358.3.1 by Martin Pool
Update some too-general exception blocks
110
    except (ValueError, IndexError), e:
0.5.93 by Aaron Bentley
Added patches.py
111
        raise MalformedHunkHeader(str(e), line)
112
    if mod_range < 0 or orig_range < 0:
113
        raise MalformedHunkHeader("Hunk range is negative", line)
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
114
    tail = matches.group(3)
115
    return Hunk(orig_pos, orig_range, mod_pos, mod_range, tail)
0.5.93 by Aaron Bentley
Added patches.py
116
117
118
class HunkLine:
119
    def __init__(self, contents):
120
        self.contents = contents
121
122
    def get_str(self, leadchar):
123
        if self.contents == "\n" and leadchar == " " and False:
124
            return "\n"
125
        if not self.contents.endswith('\n'):
126
            terminator = '\n' + NO_NL
127
        else:
128
            terminator = ''
129
        return leadchar + self.contents + terminator
130
131
132
class ContextLine(HunkLine):
133
    def __init__(self, contents):
134
        HunkLine.__init__(self, contents)
135
136
    def __str__(self):
137
        return self.get_str(" ")
138
139
140
class InsertLine(HunkLine):
141
    def __init__(self, contents):
142
        HunkLine.__init__(self, contents)
143
144
    def __str__(self):
145
        return self.get_str("+")
146
147
148
class RemoveLine(HunkLine):
149
    def __init__(self, contents):
150
        HunkLine.__init__(self, contents)
151
152
    def __str__(self):
153
        return self.get_str("-")
154
155
NO_NL = '\\ No newline at end of file\n'
156
__pychecker__="no-returnvalues"
157
158
def parse_line(line):
159
    if line.startswith("\n"):
160
        return ContextLine(line)
161
    elif line.startswith(" "):
162
        return ContextLine(line[1:])
163
    elif line.startswith("+"):
164
        return InsertLine(line[1:])
165
    elif line.startswith("-"):
166
        return RemoveLine(line[1:])
167
    else:
168
        raise MalformedLine("Unknown line type", line)
169
__pychecker__=""
170
171
172
class Hunk:
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
173
    def __init__(self, orig_pos, orig_range, mod_pos, mod_range, tail=None):
0.5.93 by Aaron Bentley
Added patches.py
174
        self.orig_pos = orig_pos
175
        self.orig_range = orig_range
176
        self.mod_pos = mod_pos
177
        self.mod_range = mod_range
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
178
        self.tail = tail
0.5.93 by Aaron Bentley
Added patches.py
179
        self.lines = []
180
181
    def get_header(self):
1551.18.6 by Aaron Bentley
Add support for diff -p-style diffs to patch parser
182
        if self.tail is None:
183
            tail_str = ''
184
        else:
185
            tail_str = ' ' + self.tail
186
        return "@@ -%s +%s @@%s\n" % (self.range_str(self.orig_pos,
187
                                                     self.orig_range),
188
                                      self.range_str(self.mod_pos,
189
                                                     self.mod_range),
190
                                      tail_str)
0.5.93 by Aaron Bentley
Added patches.py
191
192
    def range_str(self, pos, range):
193
        """Return a file range, special-casing for 1-line files.
194
195
        :param pos: The position in the file
196
        :type pos: int
197
        :range: The range in the file
198
        :type range: int
199
        :return: a string in the format 1,4 except when range == pos == 1
200
        """
201
        if range == 1:
202
            return "%i" % pos
203
        else:
204
            return "%i,%i" % (pos, range)
205
206
    def __str__(self):
207
        lines = [self.get_header()]
208
        for line in self.lines:
209
            lines.append(str(line))
210
        return "".join(lines)
211
212
    def shift_to_mod(self, pos):
213
        if pos < self.orig_pos-1:
214
            return 0
215
        elif pos > self.orig_pos+self.orig_range:
216
            return self.mod_range - self.orig_range
217
        else:
218
            return self.shift_to_mod_lines(pos)
219
220
    def shift_to_mod_lines(self, pos):
221
        position = self.orig_pos-1
222
        shift = 0
223
        for line in self.lines:
224
            if isinstance(line, InsertLine):
225
                shift += 1
226
            elif isinstance(line, RemoveLine):
227
                if position == pos:
228
                    return None
229
                shift -= 1
230
                position += 1
231
            elif isinstance(line, ContextLine):
232
                position += 1
233
            if position > pos:
234
                break
235
        return shift
236
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
237
0.5.93 by Aaron Bentley
Added patches.py
238
def iter_hunks(iter_lines):
239
    hunk = None
240
    for line in iter_lines:
0.5.96 by Aaron Bentley
Cleaned up handling of files with no terminating \n
241
        if line == "\n":
0.5.93 by Aaron Bentley
Added patches.py
242
            if hunk is not None:
243
                yield hunk
244
                hunk = None
245
            continue
246
        if hunk is not None:
247
            yield hunk
248
        hunk = hunk_from_header(line)
249
        orig_size = 0
250
        mod_size = 0
251
        while orig_size < hunk.orig_range or mod_size < hunk.mod_range:
252
            hunk_line = parse_line(iter_lines.next())
0.5.96 by Aaron Bentley
Cleaned up handling of files with no terminating \n
253
            hunk.lines.append(hunk_line)
0.5.93 by Aaron Bentley
Added patches.py
254
            if isinstance(hunk_line, (RemoveLine, ContextLine)):
255
                orig_size += 1
256
            if isinstance(hunk_line, (InsertLine, ContextLine)):
257
                mod_size += 1
258
    if hunk is not None:
259
        yield hunk
260
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
261
0.5.93 by Aaron Bentley
Added patches.py
262
class Patch:
263
    def __init__(self, oldname, newname):
264
        self.oldname = oldname
265
        self.newname = newname
266
        self.hunks = []
267
268
    def __str__(self):
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
269
        ret = self.get_header()
0.5.93 by Aaron Bentley
Added patches.py
270
        ret += "".join([str(h) for h in self.hunks])
271
        return ret
272
0.5.95 by Aaron Bentley
Updated patch to match bzrtools
273
    def get_header(self):
274
        return "--- %s\n+++ %s\n" % (self.oldname, self.newname)
275
3946.4.1 by Tim Penhey
Extract out the counting of the stats values.
276
    def stats_values(self):
277
        """Calculate the number of inserts and removes."""
0.5.93 by Aaron Bentley
Added patches.py
278
        removes = 0
279
        inserts = 0
280
        for hunk in self.hunks:
281
            for line in hunk.lines:
282
                if isinstance(line, InsertLine):
283
                     inserts+=1;
284
                elif isinstance(line, RemoveLine):
285
                     removes+=1;
3946.4.1 by Tim Penhey
Extract out the counting of the stats values.
286
        return (inserts, removes, len(self.hunks))
287
288
    def stats_str(self):
289
        """Return a string of patch statistics"""
0.5.93 by Aaron Bentley
Added patches.py
290
        return "%i inserts, %i removes in %i hunks" % \
3946.4.1 by Tim Penhey
Extract out the counting of the stats values.
291
            self.stats_values()
0.5.93 by Aaron Bentley
Added patches.py
292
293
    def pos_in_mod(self, position):
294
        newpos = position
295
        for hunk in self.hunks:
296
            shift = hunk.shift_to_mod(position)
297
            if shift is None:
298
                return None
299
            newpos += shift
300
        return newpos
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
301
0.5.93 by Aaron Bentley
Added patches.py
302
    def iter_inserted(self):
303
        """Iteraties through inserted lines
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
304
0.5.93 by Aaron Bentley
Added patches.py
305
        :return: Pair of line number, line
306
        :rtype: iterator of (int, InsertLine)
307
        """
308
        for hunk in self.hunks:
309
            pos = hunk.mod_pos - 1;
310
            for line in hunk.lines:
311
                if isinstance(line, InsertLine):
312
                    yield (pos, line)
313
                    pos += 1
314
                if isinstance(line, ContextLine):
315
                    pos += 1
316
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
317
0.5.93 by Aaron Bentley
Added patches.py
318
def parse_patch(iter_lines):
3873.1.8 by Benoît Pierre
Fix regressions in other parts of the testsuite.
319
    iter_lines = iter_lines_handle_nl(iter_lines)
0.5.93 by Aaron Bentley
Added patches.py
320
    (orig_name, mod_name) = get_patch_names(iter_lines)
321
    patch = Patch(orig_name, mod_name)
322
    for hunk in iter_hunks(iter_lines):
323
        patch.hunks.append(hunk)
324
    return patch
325
326
327
def iter_file_patch(iter_lines):
328
    saved_lines = []
2298.6.1 by Johan Dahlberg
Fix bzrtools shelve command for removed lines beginning with "--"
329
    orig_range = 0
0.5.93 by Aaron Bentley
Added patches.py
330
    for line in iter_lines:
0.5.106 by John Arbash Meinel
Allowing *** to be a patch header.
331
        if line.startswith('=== ') or line.startswith('*** '):
0.5.93 by Aaron Bentley
Added patches.py
332
            continue
1770.1.1 by Aaron Bentley
Ignore lines that start with '#' in patch parser
333
        if line.startswith('#'):
334
            continue
2298.6.1 by Johan Dahlberg
Fix bzrtools shelve command for removed lines beginning with "--"
335
        elif orig_range > 0:
336
            if line.startswith('-') or line.startswith(' '):
337
                orig_range -= 1
0.5.93 by Aaron Bentley
Added patches.py
338
        elif line.startswith('--- '):
339
            if len(saved_lines) > 0:
340
                yield saved_lines
341
            saved_lines = []
2298.6.1 by Johan Dahlberg
Fix bzrtools shelve command for removed lines beginning with "--"
342
        elif line.startswith('@@'):
343
            hunk = hunk_from_header(line)
344
            orig_range = hunk.orig_range
0.5.93 by Aaron Bentley
Added patches.py
345
        saved_lines.append(line)
346
    if len(saved_lines) > 0:
347
        yield saved_lines
348
349
3873.1.6 by Benoît Pierre
OK, so now patches should handle '\ No newline at end of file' in both
350
def iter_lines_handle_nl(iter_lines):
351
    """
352
    Iterates through lines, ensuring that lines that originally had no
353
    terminating \n are produced without one.  This transformation may be
354
    applied at any point up until hunk line parsing, and is safe to apply
355
    repeatedly.
356
    """
357
    last_line = None
358
    for line in iter_lines:
359
        if line == NO_NL:
360
            if not last_line.endswith('\n'):
361
                raise AssertionError()
362
            last_line = last_line[:-1]
363
            line = None
364
        if last_line is not None:
365
            yield last_line
366
        last_line = line
367
    if last_line is not None:
368
        yield last_line
369
370
0.5.93 by Aaron Bentley
Added patches.py
371
def parse_patches(iter_lines):
372
    return [parse_patch(f.__iter__()) for f in iter_file_patch(iter_lines)]
373
374
375
def difference_index(atext, btext):
1759.2.1 by Jelmer Vernooij
Fix some types (found using aspell).
376
    """Find the indext of the first character that differs between two texts
0.5.93 by Aaron Bentley
Added patches.py
377
378
    :param atext: The first text
379
    :type atext: str
380
    :param btext: The second text
381
    :type str: str
382
    :return: The index, or None if there are no differences within the range
383
    :rtype: int or NoneType
384
    """
385
    length = len(atext)
386
    if len(btext) < length:
387
        length = len(btext)
388
    for i in range(length):
389
        if atext[i] != btext[i]:
390
            return i;
391
    return None
392
1185.82.123 by Aaron Bentley
Cleanups to prepare for review
393
0.5.93 by Aaron Bentley
Added patches.py
394
def iter_patched(orig_lines, patch_lines):
395
    """Iterate through a series of lines with a patch applied.
396
    This handles a single file, and does exact, not fuzzy patching.
397
    """
3873.1.8 by Benoît Pierre
Fix regressions in other parts of the testsuite.
398
    patch_lines = iter_lines_handle_nl(iter(patch_lines))
0.5.93 by Aaron Bentley
Added patches.py
399
    get_patch_names(patch_lines)
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
400
    return iter_patched_from_hunks(orig_lines, iter_hunks(patch_lines))
401
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
402
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
403
def iter_patched_from_hunks(orig_lines, hunks):
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
404
    """Iterate through a series of lines with a patch applied.
405
    This handles a single file, and does exact, not fuzzy patching.
406
407
    :param orig_lines: The unpatched lines.
408
    :param hunks: An iterable of Hunk instances.
409
    """
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
410
    seen_patch = []
0.5.93 by Aaron Bentley
Added patches.py
411
    line_no = 1
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
412
    if orig_lines is not None:
3363.18.4 by Aaron Bentley
Updates from review (and a doc update)
413
        orig_lines = iter(orig_lines)
3363.18.1 by Aaron Bentley
Allow patching directly from parsed hunks
414
    for hunk in hunks:
0.5.93 by Aaron Bentley
Added patches.py
415
        while line_no < hunk.orig_pos:
416
            orig_line = orig_lines.next()
417
            yield orig_line
418
            line_no += 1
419
        for hunk_line in hunk.lines:
420
            seen_patch.append(str(hunk_line))
421
            if isinstance(hunk_line, InsertLine):
422
                yield hunk_line.contents
423
            elif isinstance(hunk_line, (ContextLine, RemoveLine)):
424
                orig_line = orig_lines.next()
425
                if orig_line != hunk_line.contents:
426
                    raise PatchConflict(line_no, orig_line, "".join(seen_patch))
427
                if isinstance(hunk_line, ContextLine):
428
                    yield orig_line
429
                else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
430
                    if not isinstance(hunk_line, RemoveLine):
431
                        raise AssertionError(hunk_line)
0.5.93 by Aaron Bentley
Added patches.py
432
                line_no += 1
0.5.105 by John Arbash Meinel
Adding more test patches to the test suite.
433
    if orig_lines is not None:
434
        for line in orig_lines:
435
            yield line