3
# Copyright (C) 2005 Canonical Ltd
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
# Author: Martin Pool <mbp@canonical.com>
22
"""Weave - storage of related text file versions"""
24
# TODO: Perhaps have copy and comparison methods of Weave instances?
27
class VerInfo(object):
28
"""Information about a version in a Weave."""
29
included = frozenset()
30
def __init__(self, included=None):
32
self.included = frozenset(included)
35
s = self.__class__.__name__ + '('
37
s += 'included=%r' % (list(self.included))
43
"""weave - versioned text file storage.
45
A Weave manages versions of line-based text files, keeping track of the
46
originating version for each line.
48
Texts can be identified in either of two ways:
50
* a nonnegative index number.
52
* a version-id string.
54
Typically the index number will be valid only inside this weave and
55
the version-id is used to reference it in the larger world.
57
The weave is represented as a list mixing edit instructions and
58
literal text. Each entry in _l can be either a string (or
59
unicode), or a tuple. If a string, it means that the given line
60
should be output in the currently active revisions.
62
If a tuple, it gives a processing instruction saying in which
63
revisions the enclosed lines are active. The tuple has the form
64
(instruction, version).
66
The instruction can be '{' or '}' for an insertion block, and '['
67
and ']' for a deletion block respectively. The version is the
68
integer version index.
72
* A later version can delete lines that were introduced by any
73
number of ancestor versions; this implies that deletion
74
instructions can span insertion blocks without regard to the
75
insertion block's nesting.
77
* Similarly, deletions need not be properly nested with regard to
78
each other, because they might have been generated by
79
independent revisions.
81
* It doesn't seem very useful to have an active insertion
82
inside an inactive insertion, but it might happen.
84
* Therefore, all instructions are always"considered"; that
85
is passed onto and off the stack. An outer inactive block
86
doesn't disable an inner block.
88
* Lines are enabled if the most recent enclosing insertion is
89
active and none of the enclosing deletions are active.
96
List of versions, indexed by index number.
98
For each version we store the tuple (included_versions), which
99
lists the previous versions also considered active.
106
def add(self, parents, text):
107
"""Add a single text on top of the weave.
109
Returns the index number of the newly added version.
112
List or set of parent version numbers.
115
Sequence of lines to be added in the new version."""
116
self._check_versions(parents)
117
self._check_lines(text)
122
parents = frozenset(parents)
123
delta = self._delta(parents, text)
125
# offset gives the number of lines that have been inserted
126
# into the weave up to the current point; if the original edit instruction
127
# says to change line A then we actually change (A+offset)
130
for i1, i2, newlines in delta:
133
assert i2 <= len(self._l)
136
raise NotImplementedError("can't handle replacing weave [%d:%d] yet"
139
self._l.insert(i1 + offset, ('{', idx))
141
self._l[i:i] = newlines
142
self._l.insert(i + 1, ('}', idx))
143
offset += 2 + len(newlines)
145
self._v.append(VerInfo(parents))
147
# special case; adding with no parents revision; can do this
148
# more quickly by just appending unconditionally
149
self._l.append(('{', idx))
151
self._l.append(('}', idx))
153
self._v.append(VerInfo())
158
def _check_lines(self, text):
159
if not isinstance(text, list):
160
raise ValueError("text should be a list, not %s" % type(text))
163
if not isinstance(l, basestring):
164
raise ValueError("text line should be a string or unicode, not %s" % type(l))
168
def _check_versions(self, indexes):
169
"""Check everything in the sequence of indexes is valid"""
174
raise IndexError("invalid version number %r" % i)
177
def annotate(self, index):
178
return list(self.annotate_iter(index))
181
def annotate_iter(self, index):
182
"""Yield list of (index-id, line) pairs for the specified version.
184
The index indicates when the line originated in the weave."""
188
raise IndexError('version index %d out of range' % index)
189
included = set(vi.included)
191
for origin, lineno, text in self._extract(included):
195
def _extract(self, included):
196
"""Yield annotation of lines in included set.
198
Yields a sequence of tuples (origin, lineno, text), where
199
origin is the origin version, lineno the index in the weave,
200
and text the text of the line.
202
The set typically but not necessarily corresponds to a version.
209
if isinstance(l, tuple):
213
isactive = (v in included)
215
oldc, oldv = stack.pop()
218
isactive = stack and (stack[-1][1] in included)
220
raise ValueError("invalid processing instruction %r on line %d"
223
assert isinstance(l, basestring)
225
raise ValueError("literal at top level on line %d"
228
origin = stack[-1][1]
229
yield origin, lineno, l
233
raise ValueError("unclosed blocks at end of weave",
237
def getiter(self, index):
238
"""Yield lines for the specified version."""
239
for origin, line in self.annotate_iter(index):
243
def get(self, index):
244
return list(self.getiter(index))
247
def dump(self, to_file):
248
from pprint import pprint
249
print >>to_file, "Weave._l = ",
250
pprint(self._l, to_file)
251
print >>to_file, "Weave._v = ",
252
pprint(self._v, to_file)
256
for vers_info in self._v:
258
for vi in vers_info[0]:
259
if vi < 0 or vi >= index:
260
raise ValueError("invalid included version %d for index %d"
263
raise ValueError("repeated included version %d for index %d"
269
def _delta(self, included, lines):
270
"""Return changes from basis to new revision.
272
The old text for comparison is the union of included revisions.
274
This is used in inserting a new text.
276
Delta is returned as a sequence of (line1, line2, newlines),
277
indicating that line1 through line2 of the old weave should be
278
replaced by the sequence of lines in newlines. Note that
279
these line numbers are positions in the total weave and don't
280
correspond to the lines in any extracted version, or even the
281
extracted union of included versions.
283
If line1=line2, this is a pure insert; if newlines=[] this is a
284
pure delete. (Similar to difflib.)
287
self._check_versions(included)
289
##from pprint import pprint
291
# first get basis for comparison
292
# basis holds (lineno, origin, line)
298
basis = list(self._extract(included))
300
# now make a parallel list with only the text, to pass to the differ
301
basis_lines = [line for (origin, lineno, line) in basis]
303
# add a sentinal, because we can also match against the final line
304
basis.append((len(self._l), None))
306
# XXX: which line of the weave should we really consider matches the end of the file?
307
# the current code says it's the last line of the weave?
309
from difflib import SequenceMatcher
310
s = SequenceMatcher(None, basis_lines, lines)
312
##print 'basis sequence:'
315
for tag, i1, i2, j1, j2 in s.get_opcodes():
316
##print tag, i1, i2, j1, j2
321
# i1,i2 are given in offsets within basis_lines; we need to map them
322
# back to offsets within the entire weave
323
real_i1 = basis[i1][0]
324
real_i2 = basis[i2][0]
328
assert j2 <= len(lines)
330
yield real_i1, real_i2, lines[j1:j2]