198
by mbp at sourcefrog
- experimental compressed Revfile support |
1 |
# (C) 2005 Matt Mackall
|
205
by mbp at sourcefrog
Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines |
2 |
# (C) 2005 Canonical Ltd
|
3 |
||
4 |
# based on code by Matt Mackall, hacked by Martin Pool
|
|
5 |
||
6 |
# mm's code works line-by-line; this just works on byte strings.
|
|
7 |
# Possibly slower; possibly gives better results for code not
|
|
8 |
# regularly separated by newlines and anyhow a bit simpler.
|
|
9 |
||
198
by mbp at sourcefrog
- experimental compressed Revfile support |
10 |
|
11 |
# This program is free software; you can redistribute it and/or modify
|
|
12 |
# it under the terms of the GNU General Public License as published by
|
|
13 |
# the Free Software Foundation; either version 2 of the License, or
|
|
14 |
# (at your option) any later version.
|
|
15 |
||
16 |
# This program is distributed in the hope that it will be useful,
|
|
17 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
19 |
# GNU General Public License for more details.
|
|
20 |
||
21 |
# You should have received a copy of the GNU General Public License
|
|
22 |
# along with this program; if not, write to the Free Software
|
|
23 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
24 |
||
205
by mbp at sourcefrog
Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines |
25 |
|
26 |
# TODO: maybe work on files not strings?
|
|
27 |
||
974.1.26
by aaron.bentley at utoronto
merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472 |
28 |
# FIXME: doesn't work properly on files without trailing newlines
|
205
by mbp at sourcefrog
Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines |
29 |
|
1185.1.41
by Robert Collins
massive patch from Alexander Belchenko - many PEP8 fixes, removes unused function uuid |
30 |
import difflib |
31 |
import struct |
|
32 |
import sys |
|
974.1.26
by aaron.bentley at utoronto
merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472 |
33 |
import unittest |
205
by mbp at sourcefrog
Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines |
34 |
from cStringIO import StringIO |
198
by mbp at sourcefrog
- experimental compressed Revfile support |
35 |
|
1185.1.41
by Robert Collins
massive patch from Alexander Belchenko - many PEP8 fixes, removes unused function uuid |
36 |
|
974.1.26
by aaron.bentley at utoronto
merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472 |
37 |
def linesplit(a): |
38 |
"""Split into two lists: content and line positions.
|
|
39 |
||
40 |
This returns (al, ap).
|
|
41 |
||
42 |
al[i] is the string content of line i of the file, including its
|
|
43 |
newline (if any).
|
|
44 |
||
45 |
ap[i] is the byte position in the file where that line starts.
|
|
46 |
||
47 |
ap[-1] is the byte position of the end of the file (i.e. the
|
|
48 |
length of the file.)
|
|
49 |
||
50 |
This transformation allows us to do a line-based diff and then map
|
|
51 |
back to byte positions.
|
|
52 |
"""
|
|
53 |
||
54 |
al, ap = [], [] |
|
55 |
last = 0 |
|
56 |
||
57 |
n = a.find("\n") + 1 |
|
58 |
while n > 0: |
|
59 |
ap.append(last) |
|
60 |
al.append(a[last:n]) |
|
61 |
last = n |
|
62 |
n = a.find("\n", n) + 1 |
|
63 |
||
64 |
if last < len(a): |
|
65 |
al.append(a[last:]) |
|
66 |
ap.append(last) |
|
67 |
||
68 |
# position at the end
|
|
69 |
ap.append(len(a)) |
|
70 |
||
71 |
return (al, ap) |
|
72 |
||
73 |
||
198
by mbp at sourcefrog
- experimental compressed Revfile support |
74 |
def diff(a, b): |
974.1.26
by aaron.bentley at utoronto
merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472 |
75 |
# TODO: Use different splits, perhaps rsync-like, for binary files?
|
76 |
||
77 |
(al, ap) = linesplit(a) |
|
78 |
(bl, bp) = linesplit(b) |
|
79 |
||
80 |
d = difflib.SequenceMatcher(None, al, bl) |
|
225
by mbp at sourcefrog
debug output |
81 |
|
82 |
## sys.stderr.write(' ~ real_quick_ratio: %.4f\n' % d.real_quick_ratio())
|
|
83 |
||
198
by mbp at sourcefrog
- experimental compressed Revfile support |
84 |
for o, m, n, s, t in d.get_opcodes(): |
85 |
if o == 'equal': continue |
|
205
by mbp at sourcefrog
Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines |
86 |
# a[m:n] should be replaced by b[s:t]
|
87 |
if s == t: |
|
974.1.26
by aaron.bentley at utoronto
merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472 |
88 |
yield ap[m], ap[n], '' |
205
by mbp at sourcefrog
Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines |
89 |
else: |
974.1.26
by aaron.bentley at utoronto
merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472 |
90 |
yield ap[m], ap[n], ''.join(bl[s:t]) |
198
by mbp at sourcefrog
- experimental compressed Revfile support |
91 |
|
92 |
||
93 |
def tobinary(ops): |
|
205
by mbp at sourcefrog
Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines |
94 |
b = StringIO() |
198
by mbp at sourcefrog
- experimental compressed Revfile support |
95 |
for f in ops: |
205
by mbp at sourcefrog
Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines |
96 |
b.write(struct.pack(">III", f[0], f[1], len(f[2]))) |
97 |
b.write(f[2]) |
|
98 |
return b.getvalue() |
|
99 |
||
198
by mbp at sourcefrog
- experimental compressed Revfile support |
100 |
|
101 |
def bdiff(a, b): |
|
102 |
return tobinary(diff(a, b)) |
|
103 |
||
205
by mbp at sourcefrog
Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines |
104 |
|
198
by mbp at sourcefrog
- experimental compressed Revfile support |
105 |
def patch(t, ops): |
106 |
last = 0 |
|
205
by mbp at sourcefrog
Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines |
107 |
b = StringIO() |
108 |
||
109 |
for m, n, r in ops: |
|
110 |
b.write(t[last:m]) |
|
111 |
if r: |
|
112 |
b.write(r) |
|
113 |
last = n |
|
114 |
||
115 |
b.write(t[last:]) |
|
116 |
return b.getvalue() |
|
117 |
||
198
by mbp at sourcefrog
- experimental compressed Revfile support |
118 |
|
119 |
def frombinary(b): |
|
205
by mbp at sourcefrog
Revfile:- store and retrieve deltas!mdiff:- work on bytes not lines |
120 |
bin = StringIO(b) |
121 |
while True: |
|
122 |
p = bin.read(12) |
|
123 |
if not p: |
|
124 |
break
|
|
125 |
||
126 |
m, n, l = struct.unpack(">III", p) |
|
127 |
||
128 |
if l == 0: |
|
129 |
r = '' |
|
130 |
else: |
|
131 |
r = bin.read(l) |
|
132 |
if len(r) != l: |
|
133 |
raise Exception("truncated patch data") |
|
134 |
||
135 |
yield m, n, r |
|
136 |
||
198
by mbp at sourcefrog
- experimental compressed Revfile support |
137 |
|
138 |
def bpatch(t, b): |
|
139 |
return patch(t, frombinary(b)) |
|
140 |
||
141 |
||
142 |
||
143 |
||
974.1.26
by aaron.bentley at utoronto
merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472 |
144 |
class TestDiffPatch(unittest.TestCase): |
145 |
def doDiffPatch(self, old, new): |
|
146 |
diff = bdiff(old, new) |
|
147 |
result = bpatch(old, diff) |
|
148 |
self.assertEquals(new, result) |
|
149 |
||
150 |
||
151 |
def testSimpleDiff(self): |
|
152 |
"""Simply add a line at the end"""
|
|
153 |
self.doDiffPatch('a\nb\n', 'a\nb\nc\n') |
|
154 |
||
155 |
||
156 |
||
157 |
def testTrailingLine(self): |
|
158 |
"""Test diff that adds an unterminated line.
|
|
159 |
||
160 |
(Old versions didn't do this properly.)"""
|
|
161 |
self.doDiffPatch('a\nb\nc\n', |
|
162 |
'a\nb\nc\nd') |
|
163 |
||
164 |
||
165 |
if __name__ == '__main__': |
|
166 |
unittest.main() |