3
# Copyright (C) 2005 Canonical Ltd
7
# Author: Martin Pool <mbp@canonical.com>
10
"""knit - a weave-like structure"""
13
class VerInfo(object):
14
included = frozenset()
15
def __init__(self, included=None):
17
self.included = frozenset(included)
20
s = self.__class__.__name__ + '('
22
s += 'included=%r' % (list(self.included))
28
"""knit - versioned text file storage.
30
A Knit manages versions of line-based text files, keeping track of the
31
originating version for each line.
33
Texts can be identified in either of two ways:
35
* a nonnegative index number.
37
* a version-id string.
39
Typically the index number will be valid only inside this knit and
40
the version-id is used to reference it in the larger world.
43
List of edit instructions.
45
Each line is stored as a tuple of (index-id, text). The line
46
is present in the version equal to index-id.
49
List of versions, indexed by index number.
51
For each version we store the tuple (included_versions), which
52
lists the previous versions also considered active.
59
def add(self, parents, text):
60
"""Add a single text on top of the weave.
62
Returns the index number of the newly added version.
65
List or set of parent version numbers.
68
Sequence of lines to be added in the new version."""
69
if not isinstance(text, list):
70
raise ValueError("text should be a list, not %s" % type(text))
75
parents = frozenset(parents)
76
delta = self._delta(parents, text)
78
for i1, i2, newlines in delta:
79
# TODO: handle lines being offset as we insert stuff
81
raise NotImplementedError("can't handle replacing weave lines %d-%d yet"
87
to_insert.append((idx, line))
89
self._l[i1:i1] = to_insert
91
self._v.append(VerInfo(parents))
93
# special case; adding with no parents revision; can do this
94
# more quickly by just appending unconditionally
96
self._l.append((idx, line))
98
self._v.append(VerInfo())
103
def annotate(self, index):
104
return list(self.annotate_iter(index))
107
def annotate_iter(self, index):
108
"""Yield list of (index-id, line) pairs for the specified version.
110
The index indicates when the line originated in the weave."""
114
raise IndexError('version index %d out of range' % index)
115
included = set(vi.included)
117
return iter(self._extract(included))
120
def _extract(self, included):
121
"""Yield annotation of lines in included set.
123
The set typically but not necessarily corresponds to a version.
125
for origin, line in self._l:
126
if origin in included:
131
def getiter(self, index):
132
"""Yield lines for the specified version."""
133
for origin, line in self.annotate_iter(index):
137
def get(self, index):
138
return list(self.getiter(index))
141
def dump(self, to_file):
142
from pprint import pprint
143
print >>to_file, "Knit._l = ",
144
pprint(self._l, to_file)
145
print >>to_file, "Knit._v = ",
146
pprint(self._v, to_file)
150
for vers_info in self._v:
152
for vi in vers_info[0]:
153
if vi < 0 or vi >= index:
154
raise ValueError("invalid included version %d for index %d"
157
raise ValueError("repeated included version %d for index %d"
163
def _delta(self, included, lines):
164
"""Return changes from basis to new revision.
166
The old text for comparison is the union of included revisions.
168
This is used in inserting a new text.
170
Delta is returned as a sequence of (line1, line2, newlines),
171
indicating that line1 through line2 of the old weave should be
172
replaced by the sequence of lines in newlines. Note that
173
these line numbers are positions in the total weave and don't
174
correspond to the lines in any extracted version, or even the
175
extracted union of included versions.
177
If line1=line2, this is a pure insert; if newlines=[] this is a
178
pure delete. (Similar to difflib.)
181
##from pprint import pprint
183
# first get basis for comparison
184
# basis holds (lineno, origin, line)
191
for origin, line in self._l:
192
if origin in included:
193
basis.append((lineno, line))
196
assert lineno == len(self._l)
198
# now make a parallel list with only the text, to pass to the differ
199
basis_lines = [line for (lineno, line) in basis]
201
# add a sentinal, because we can also match against the final line
202
basis.append((len(self._l), None))
204
# XXX: which line of the weave should we really consider matches the end of the file?
205
# the current code says it's the last line of the weave?
207
from difflib import SequenceMatcher
208
s = SequenceMatcher(None, basis_lines, lines)
210
##print 'basis sequence:'
213
for tag, i1, i2, j1, j2 in s.get_opcodes():
214
##print tag, i1, i2, j1, j2
219
# i1,i2 are given in offsets within basis_lines; we need to map them
220
# back to offsets within the entire weave
221
real_i1 = basis[i1][0]
222
real_i2 = basis[i2][0]
224
# find the text identified by j:
230
assert j2 <= len(lines)
231
newlines = lines[j1:j2]
233
yield real_i1, real_i2, newlines
236
def update_knit(knit, new_vers, new_lines):
237
"""Return a new knit whose text matches new_lines.
239
First of all the knit is diffed against the new lines, considering
240
only the text of the lines from the knit. This identifies lines
241
unchanged from the knit, plus insertions and deletions.
243
The deletions are marked as deleted. The insertions are added
244
with their new values.
248
if not isinstance(new_vers, int):
249
raise TypeError('new version-id must be an int: %r' % new_vers)
251
from difflib import SequenceMatcher
252
knit_lines = knit2text(knit)
253
m = SequenceMatcher(None, knit_lines, new_lines)
255
for block in m.get_matching_blocks():
256
print "a[%d] and b[%d] match for %d elements" % block
259
for tag, i1, i2, j1, j2 in m.get_opcodes():
260
print ("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
261
(tag, i1, i2, knit_lines[i1:i2], j1, j2, new_lines[j1:j2]))
264
new_knit.extend(knit[i1:i2])
265
elif tag == 'delete':
266
for i in range(i1, i2):
268
new_knit.append((kl[0], kl[1], False))