3
# Copyright (C) 2005 Canonical Ltd
7
# Author: Martin Pool <mbp@canonical.com>
10
"""knit - a weave-like structure"""
13
class VerInfo(object):
14
included = frozenset()
15
def __init__(self, included=None):
17
self.included = frozenset(included)
20
s = self.__class__.__name__ + '('
22
s += 'included=%r' % (list(self.included))
28
"""knit - versioned text file storage.
30
A Knit manages versions of line-based text files, keeping track of the
31
originating version for each line.
33
Texts can be identified in either of two ways:
35
* a nonnegative index number.
37
* a version-id string.
39
Typically the index number will be valid only inside this knit and
40
the version-id is used to reference it in the larger world.
43
List of edit instructions.
45
Each line is stored as a tuple of (index-id, text). The line
46
is present in the version equal to index-id.
49
List of versions, indexed by index number.
51
For each version we store the tuple (included_versions), which
52
lists the previous versions also considered active.
59
def add(self, parents, text):
60
"""Add a single text on top of the weave.
62
Returns the index number of the newly added version.
65
List or set of parent version numbers.
68
Sequence of lines to be added in the new version."""
69
if not isinstance(text, list):
70
raise ValueError("text should be a list, not %s" % type(text))
72
self._check_versions(parents)
77
parents = frozenset(parents)
78
delta = self._delta(parents, text)
80
for i1, i2, newlines in delta:
81
# TODO: handle lines being offset as we insert stuff
83
raise NotImplementedError("can't handle replacing weave lines %d-%d yet"
89
to_insert.append((idx, line))
91
self._l[i1:i1] = to_insert
93
self._v.append(VerInfo(parents))
95
# special case; adding with no parents revision; can do this
96
# more quickly by just appending unconditionally
98
self._l.append((idx, line))
100
self._v.append(VerInfo())
105
def _check_versions(self, indexes):
106
"""Check everything in the sequence of indexes is valid"""
111
raise IndexError("invalid version number %r" % i)
114
def annotate(self, index):
115
return list(self.annotate_iter(index))
118
def annotate_iter(self, index):
119
"""Yield list of (index-id, line) pairs for the specified version.
121
The index indicates when the line originated in the weave."""
125
raise IndexError('version index %d out of range' % index)
126
included = set(vi.included)
128
return iter(self._extract(included))
131
def _extract(self, included):
132
"""Yield annotation of lines in included set.
134
The set typically but not necessarily corresponds to a version.
136
for origin, line in self._l:
137
if origin in included:
142
def getiter(self, index):
143
"""Yield lines for the specified version."""
144
for origin, line in self.annotate_iter(index):
148
def get(self, index):
149
return list(self.getiter(index))
152
def dump(self, to_file):
153
from pprint import pprint
154
print >>to_file, "Knit._l = ",
155
pprint(self._l, to_file)
156
print >>to_file, "Knit._v = ",
157
pprint(self._v, to_file)
161
for vers_info in self._v:
163
for vi in vers_info[0]:
164
if vi < 0 or vi >= index:
165
raise ValueError("invalid included version %d for index %d"
168
raise ValueError("repeated included version %d for index %d"
174
def _delta(self, included, lines):
175
"""Return changes from basis to new revision.
177
The old text for comparison is the union of included revisions.
179
This is used in inserting a new text.
181
Delta is returned as a sequence of (line1, line2, newlines),
182
indicating that line1 through line2 of the old weave should be
183
replaced by the sequence of lines in newlines. Note that
184
these line numbers are positions in the total weave and don't
185
correspond to the lines in any extracted version, or even the
186
extracted union of included versions.
188
If line1=line2, this is a pure insert; if newlines=[] this is a
189
pure delete. (Similar to difflib.)
192
self._check_versions(included)
194
##from pprint import pprint
196
# first get basis for comparison
197
# basis holds (lineno, origin, line)
204
for origin, line in self._l:
205
if origin in included:
206
basis.append((lineno, line))
209
assert lineno == len(self._l)
211
# now make a parallel list with only the text, to pass to the differ
212
basis_lines = [line for (lineno, line) in basis]
214
# add a sentinal, because we can also match against the final line
215
basis.append((len(self._l), None))
217
# XXX: which line of the weave should we really consider matches the end of the file?
218
# the current code says it's the last line of the weave?
220
from difflib import SequenceMatcher
221
s = SequenceMatcher(None, basis_lines, lines)
223
##print 'basis sequence:'
226
for tag, i1, i2, j1, j2 in s.get_opcodes():
227
##print tag, i1, i2, j1, j2
232
# i1,i2 are given in offsets within basis_lines; we need to map them
233
# back to offsets within the entire weave
234
real_i1 = basis[i1][0]
235
real_i2 = basis[i2][0]
237
# find the text identified by j:
243
assert j2 <= len(lines)
244
newlines = lines[j1:j2]
246
yield real_i1, real_i2, newlines
249
def update_knit(knit, new_vers, new_lines):
250
"""Return a new knit whose text matches new_lines.
252
First of all the knit is diffed against the new lines, considering
253
only the text of the lines from the knit. This identifies lines
254
unchanged from the knit, plus insertions and deletions.
256
The deletions are marked as deleted. The insertions are added
257
with their new values.
261
if not isinstance(new_vers, int):
262
raise TypeError('new version-id must be an int: %r' % new_vers)
264
from difflib import SequenceMatcher
265
knit_lines = knit2text(knit)
266
m = SequenceMatcher(None, knit_lines, new_lines)
268
for block in m.get_matching_blocks():
269
print "a[%d] and b[%d] match for %d elements" % block
272
for tag, i1, i2, j1, j2 in m.get_opcodes():
273
print ("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
274
(tag, i1, i2, knit_lines[i1:i2], j1, j2, new_lines[j1:j2]))
277
new_knit.extend(knit[i1:i2])
278
elif tag == 'delete':
279
for i in range(i1, i2):
281
new_knit.append((kl[0], kl[1], False))