~bzr-pqm/bzr/bzr.dev

0.1.1 by Martin Pool
Check in old existing knit code.
1
#! /usr/bin/python
2
3
# Copyright (C) 2005 Canonical Ltd
4
0.1.33 by Martin Pool
add gpl text
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0.1.1 by Martin Pool
Check in old existing knit code.
18
19
# Author: Martin Pool <mbp@canonical.com>
20
21
0.1.38 by Martin Pool
Rename knit to weave. (I don't think there's an existing module called weave.)
22
"""Weave - storage of related text file versions"""
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
23
928 by Martin Pool
- go back to using plain builtin set()
24
# before intset (r923) 2000 versions in 41.5s
25
# with intset (r926) 2000 versions in 93s !!!
26
# better to just use plain sets.
27
931 by Martin Pool
- experiment with making Weave._extract() return a list, not a generator - slightly faster
28
# making _extract build and return a list, rather than being a generator
29
# takes 37.94s
30
938 by Martin Pool
- various optimizations to weave add code
31
# with python -O, r923 does 2000 versions in 36.87s
32
33
# with optimizations to avoid mutating lists - 35.75!  I guess copying
34
# all the elements every time costs more than the small manipulations.
35
# a surprisingly small change.
36
37
# r931, which avoids using a generator for extract, does 36.98s
38
39
# with memoized inclusions, takes 41.49s; not very good
40
41
# with slots, takes 37.35s; without takes 39.16, a bit surprising
42
43
# with the delta calculation mixed in with the add method, rather than
44
# separated, takes 36.78s
45
46
# with delta folded in and mutation of the list, 36.13s
47
48
# with all this and simplification of add code, 33s 
49
50
0.1.61 by Martin Pool
doc
51
# TODO: Perhaps have copy method for Weave instances?
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
52
0.1.58 by Martin Pool
doc
53
# XXX: If we do weaves this way, will a merge still behave the same
54
# way if it's done in a different order?  That's a pretty desirable
55
# property.
56
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
57
# TODO: Nothing here so far assumes the lines are really \n newlines,
58
# rather than being split up in some other way.  We could accomodate
59
# binaries, perhaps by naively splitting on \n or perhaps using
60
# something like a rolling checksum.
61
62
# TODO: Track version names as well as indexes. 
63
0.1.85 by Martin Pool
doc
64
# TODO: End marker for each version so we can stop reading?
0.1.69 by Martin Pool
Simple text-based format for storing weaves, cleaner than
65
66
# TODO: Check that no insertion occurs inside a deletion that was
67
# active in the version of the insertion.
68
912 by Martin Pool
- update todos for weave
69
# TODO: In addition to the SHA-1 check, perhaps have some code that
70
# checks structural constraints of the weave: ie that insertions are
71
# properly nested, that there is no text outside of an insertion, that
72
# insertions or deletions are not repeated, etc.
0.1.85 by Martin Pool
doc
73
918 by Martin Pool
- start doing new weave-merge algorithm
74
# TODO: Parallel-extract that passes back each line along with a
75
# description of which revisions include it.  Nice for checking all
76
# shas in parallel.
77
78
0.1.85 by Martin Pool
doc
79
924 by Martin Pool
- Add IntSet class
80
0.1.47 by Martin Pool
New WeaveError and WeaveFormatError rather than assertions.
81
class WeaveError(Exception):
82
    """Exception in processing weave"""
83
84
85
class WeaveFormatError(WeaveError):
86
    """Weave invariant violated"""
87
    
88
0.1.38 by Martin Pool
Rename knit to weave. (I don't think there's an existing module called weave.)
89
class Weave(object):
90
    """weave - versioned text file storage.
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
91
    
0.1.72 by Martin Pool
Go back to weave lines normally having newlines at the end.
92
    A Weave manages versions of line-based text files, keeping track
93
    of the originating version for each line.
94
95
    To clients the "lines" of the file are represented as a list of strings.
96
    These strings  will typically have terminal newline characters, but
97
    this is not required.  In particular files commonly do not have a newline
98
    at the end of the file.
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
99
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
100
    Texts can be identified in either of two ways:
101
102
    * a nonnegative index number.
103
104
    * a version-id string.
105
0.1.38 by Martin Pool
Rename knit to weave. (I don't think there's an existing module called weave.)
106
    Typically the index number will be valid only inside this weave and
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
107
    the version-id is used to reference it in the larger world.
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
108
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
109
    The weave is represented as a list mixing edit instructions and
944 by Martin Pool
- refactor member names in Weave code
110
    literal text.  Each entry in _weave can be either a string (or
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
111
    unicode), or a tuple.  If a string, it means that the given line
112
    should be output in the currently active revisions.
113
114
    If a tuple, it gives a processing instruction saying in which
115
    revisions the enclosed lines are active.  The tuple has the form
116
    (instruction, version).
117
118
    The instruction can be '{' or '}' for an insertion block, and '['
119
    and ']' for a deletion block respectively.  The version is the
0.1.45 by Martin Pool
doc
120
    integer version index.  There is no replace operator, only deletes
121
    and inserts.
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
122
0.1.41 by Martin Pool
Doc
123
    Constraints/notes:
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
124
125
    * A later version can delete lines that were introduced by any
126
      number of ancestor versions; this implies that deletion
127
      instructions can span insertion blocks without regard to the
128
      insertion block's nesting.
129
0.1.41 by Martin Pool
Doc
130
    * Similarly, deletions need not be properly nested with regard to
131
      each other, because they might have been generated by
132
      independent revisions.
133
0.1.45 by Martin Pool
doc
134
    * Insertions are always made by inserting a new bracketed block
135
      into a single point in the previous weave.  This implies they
136
      can nest but not overlap, and the nesting must always have later
137
      insertions on the inside.
138
0.1.41 by Martin Pool
Doc
139
    * It doesn't seem very useful to have an active insertion
140
      inside an inactive insertion, but it might happen.
0.1.45 by Martin Pool
doc
141
      
0.1.41 by Martin Pool
Doc
142
    * Therefore, all instructions are always"considered"; that
143
      is passed onto and off the stack.  An outer inactive block
144
      doesn't disable an inner block.
145
146
    * Lines are enabled if the most recent enclosing insertion is
147
      active and none of the enclosing deletions are active.
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
148
0.1.49 by Martin Pool
Add another constraint: revisions should not delete text that they
149
    * There is no point having a deletion directly inside its own
150
      insertion; you might as well just not write it.  And there
151
      should be no way to get an earlier version deleting a later
152
      version.
153
944 by Martin Pool
- refactor member names in Weave code
154
    _weave
155
        Text of the weave; list of control instruction tuples and strings.
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
156
944 by Martin Pool
- refactor member names in Weave code
157
    _parents
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
158
        List of parents, indexed by version number.
159
        It is only necessary to store the minimal set of parents for
160
        each version; the parent's parents are implied.
0.1.13 by Martin Pool
Knit structure now allows for versions to include the lines present in other
161
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
162
    _sha1s
163
        List of hex SHA-1 of each version, or None if not recorded.
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
164
    """
938 by Martin Pool
- various optimizations to weave add code
165
944 by Martin Pool
- refactor member names in Weave code
166
    __slots__ = ['_weave', '_parents', '_sha1s']
938 by Martin Pool
- various optimizations to weave add code
167
    
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
168
    def __init__(self):
944 by Martin Pool
- refactor member names in Weave code
169
        self._weave = []
170
        self._parents = []
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
171
        self._sha1s = []
0.1.60 by Martin Pool
Weave eq and ne methods
172
173
174
    def __eq__(self, other):
175
        if not isinstance(other, Weave):
176
            return False
944 by Martin Pool
- refactor member names in Weave code
177
        return self._parents == other._parents \
178
               and self._weave == other._weave
0.1.60 by Martin Pool
Weave eq and ne methods
179
    
180
181
    def __ne__(self, other):
182
        return not self.__eq__(other)
183
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
184
        
0.1.26 by Martin Pool
Refactor parameters to add command
185
    def add(self, parents, text):
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
186
        """Add a single text on top of the weave.
0.1.36 by Martin Pool
doc
187
  
0.1.26 by Martin Pool
Refactor parameters to add command
188
        Returns the index number of the newly added version.
189
190
        parents
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
191
            List or set of direct parent version numbers.
192
            
0.1.26 by Martin Pool
Refactor parameters to add command
193
        text
194
            Sequence of lines to be added in the new version."""
938 by Martin Pool
- various optimizations to weave add code
195
196
        self._check_versions(parents)
0.1.82 by Martin Pool
Small weave optimizations
197
        ## self._check_lines(text)
944 by Martin Pool
- refactor member names in Weave code
198
        new_version = len(self._parents)
0.1.5 by Martin Pool
Add test for storing two text versions.
199
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
200
        import sha
201
        s = sha.new()
938 by Martin Pool
- various optimizations to weave add code
202
        map(s.update, text)
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
203
        sha1 = s.hexdigest()
204
        del s
205
938 by Martin Pool
- various optimizations to weave add code
206
        # if we abort after here the weave will be corrupt
944 by Martin Pool
- refactor member names in Weave code
207
        self._parents.append(frozenset(parents))
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
208
        self._sha1s.append(sha1)
938 by Martin Pool
- various optimizations to weave add code
209
210
            
211
        if not parents:
212
            # special case; adding with no parents revision; can do
213
            # this more quickly by just appending unconditionally.
214
            # even more specially, if we're adding an empty text we
215
            # need do nothing at all.
216
            if text:
944 by Martin Pool
- refactor member names in Weave code
217
                self._weave.append(('{', new_version))
218
                self._weave.extend(text)
219
                self._weave.append(('}', new_version))
938 by Martin Pool
- various optimizations to weave add code
220
        
221
            return new_version
222
941 by Martin Pool
- allow for parents specified to Weave.add to be a set
223
        if len(parents) == 1:
224
            pv = list(parents)[0]
225
            if sha1 == self._sha1s[pv]:
226
                # special case: same as the single parent
227
                return new_version
938 by Martin Pool
- various optimizations to weave add code
228
            
229
230
        ancestors = self.inclusions(parents)
231
944 by Martin Pool
- refactor member names in Weave code
232
        l = self._weave
938 by Martin Pool
- various optimizations to weave add code
233
234
        # basis a list of (origin, lineno, line)
235
        basis_lineno = []
236
        basis_lines = []
237
        for origin, lineno, line in self._extract(ancestors):
238
            basis_lineno.append(lineno)
239
            basis_lines.append(line)
240
974.1.26 by aaron.bentley at utoronto
merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472
241
        # another small special case: a merge, producing the same text
242
        # as auto-merge
938 by Martin Pool
- various optimizations to weave add code
243
        if text == basis_lines:
244
            return new_version            
245
246
        # add a sentinal, because we can also match against the final line
944 by Martin Pool
- refactor member names in Weave code
247
        basis_lineno.append(len(self._weave))
938 by Martin Pool
- various optimizations to weave add code
248
249
        # XXX: which line of the weave should we really consider
250
        # matches the end of the file?  the current code says it's the
251
        # last line of the weave?
252
253
        #print 'basis_lines:', basis_lines
254
        #print 'new_lines:  ', lines
255
256
        from difflib import SequenceMatcher
257
        s = SequenceMatcher(None, basis_lines, text)
258
259
        # offset gives the number of lines that have been inserted
260
        # into the weave up to the current point; if the original edit instruction
261
        # says to change line A then we actually change (A+offset)
262
        offset = 0
263
264
        for tag, i1, i2, j1, j2 in s.get_opcodes():
265
            # i1,i2 are given in offsets within basis_lines; we need to map them
266
            # back to offsets within the entire weave
267
            #print 'raw match', tag, i1, i2, j1, j2
268
            if tag == 'equal':
269
                continue
270
271
            i1 = basis_lineno[i1]
272
            i2 = basis_lineno[i2]
273
274
            assert 0 <= j1 <= j2 <= len(text)
275
276
            #print tag, i1, i2, j1, j2
277
278
            # the deletion and insertion are handled separately.
279
            # first delete the region.
280
            if i1 != i2:
944 by Martin Pool
- refactor member names in Weave code
281
                self._weave.insert(i1+offset, ('[', new_version))
282
                self._weave.insert(i2+offset+1, (']', new_version))
938 by Martin Pool
- various optimizations to weave add code
283
                offset += 2
284
285
            if j1 != j2:
286
                # there may have been a deletion spanning up to
287
                # i2; we want to insert after this region to make sure
288
                # we don't destroy ourselves
289
                i = i2 + offset
944 by Martin Pool
- refactor member names in Weave code
290
                self._weave[i:i] = ([('{', new_version)] 
938 by Martin Pool
- various optimizations to weave add code
291
                                + text[j1:j2] 
292
                                + [('}', new_version)])
293
                offset += 2 + (j2 - j1)
294
295
        return new_version
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
296
0.1.27 by Martin Pool
Check that version numbers passed in are reasonable
297
0.1.78 by Martin Pool
Rename Weave.get_included to inclusions and getiter to get_iter
298
    def inclusions(self, versions):
893 by Martin Pool
- Refactor weave calculation of inclusions
299
        """Return set of all ancestors of given version(s)."""
928 by Martin Pool
- go back to using plain builtin set()
300
        i = set(versions)
893 by Martin Pool
- Refactor weave calculation of inclusions
301
        v = max(versions)
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
302
        try:
893 by Martin Pool
- Refactor weave calculation of inclusions
303
            while v >= 0:
304
                if v in i:
305
                    # include all its parents
944 by Martin Pool
- refactor member names in Weave code
306
                    i.update(self._parents[v])
893 by Martin Pool
- Refactor weave calculation of inclusions
307
                v -= 1
308
            return i
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
309
        except IndexError:
310
            raise ValueError("version %d not present in weave" % v)
0.1.77 by Martin Pool
New Weave.get_included() does transitive expansion
311
312
890 by Martin Pool
- weave info should show minimal expression of parents
313
    def minimal_parents(self, version):
314
        """Find the minimal set of parents for the version."""
944 by Martin Pool
- refactor member names in Weave code
315
        included = self._parents[version]
890 by Martin Pool
- weave info should show minimal expression of parents
316
        if not included:
317
            return []
318
        
319
        li = list(included)
893 by Martin Pool
- Refactor weave calculation of inclusions
320
        li.sort(reverse=True)
890 by Martin Pool
- weave info should show minimal expression of parents
321
322
        mininc = []
928 by Martin Pool
- go back to using plain builtin set()
323
        gotit = set()
890 by Martin Pool
- weave info should show minimal expression of parents
324
325
        for pv in li:
326
            if pv not in gotit:
327
                mininc.append(pv)
893 by Martin Pool
- Refactor weave calculation of inclusions
328
                gotit.update(self.inclusions(pv))
890 by Martin Pool
- weave info should show minimal expression of parents
329
330
        assert mininc[0] >= 0
331
        assert mininc[-1] < version
332
        return mininc
333
334
0.1.75 by Martin Pool
Remove VerInfo class; just store sets directly in the list of
335
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
336
    def _check_lines(self, text):
337
        if not isinstance(text, list):
338
            raise ValueError("text should be a list, not %s" % type(text))
339
340
        for l in text:
341
            if not isinstance(l, basestring):
869 by Martin Pool
- more weave.py command line options
342
                raise ValueError("text line should be a string or unicode, not %s"
343
                                 % type(l))
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
344
        
345
346
0.1.27 by Martin Pool
Check that version numbers passed in are reasonable
347
    def _check_versions(self, indexes):
348
        """Check everything in the sequence of indexes is valid"""
349
        for i in indexes:
350
            try:
944 by Martin Pool
- refactor member names in Weave code
351
                self._parents[i]
0.1.27 by Martin Pool
Check that version numbers passed in are reasonable
352
            except IndexError:
353
                raise IndexError("invalid version number %r" % i)
354
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
355
    
0.1.7 by Martin Pool
Add trivial annotate text
356
    def annotate(self, index):
357
        return list(self.annotate_iter(index))
358
359
0.1.78 by Martin Pool
Rename Weave.get_included to inclusions and getiter to get_iter
360
    def annotate_iter(self, version):
0.1.7 by Martin Pool
Add trivial annotate text
361
        """Yield list of (index-id, line) pairs for the specified version.
362
363
        The index indicates when the line originated in the weave."""
893 by Martin Pool
- Refactor weave calculation of inclusions
364
        for origin, lineno, text in self._extract([version]):
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
365
            yield origin, text
0.1.22 by Martin Pool
Calculate delta for new versions relative to a set of parent versions.
366
367
918 by Martin Pool
- start doing new weave-merge algorithm
368
    def _walk(self):
369
        """Walk the weave.
370
371
        Yields sequence of
372
        (lineno, insert, deletes, text)
373
        for each literal line.
374
        """
375
        
376
        istack = []
928 by Martin Pool
- go back to using plain builtin set()
377
        dset = set()
918 by Martin Pool
- start doing new weave-merge algorithm
378
379
        lineno = 0         # line of weave, 0-based
380
944 by Martin Pool
- refactor member names in Weave code
381
        for l in self._weave:
918 by Martin Pool
- start doing new weave-merge algorithm
382
            if isinstance(l, tuple):
383
                c, v = l
384
                isactive = None
385
                if c == '{':
386
                    istack.append(v)
387
                elif c == '}':
388
                    oldv = istack.pop()
389
                elif c == '[':
926 by Martin Pool
- update more weave code to use intsets
390
                    assert v not in dset
391
                    dset.add(v)
918 by Martin Pool
- start doing new weave-merge algorithm
392
                elif c == ']':
926 by Martin Pool
- update more weave code to use intsets
393
                    dset.remove(v)
918 by Martin Pool
- start doing new weave-merge algorithm
394
                else:
395
                    raise WeaveFormatError('unexpected instruction %r'
396
                                           % v)
397
            else:
398
                assert isinstance(l, basestring)
399
                assert istack
400
                yield lineno, istack[-1], dset, l
401
            lineno += 1
402
403
404
893 by Martin Pool
- Refactor weave calculation of inclusions
405
    def _extract(self, versions):
0.1.20 by Martin Pool
Factor out Knit.extract() method
406
        """Yield annotation of lines in included set.
407
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
408
        Yields a sequence of tuples (origin, lineno, text), where
409
        origin is the origin version, lineno the index in the weave,
410
        and text the text of the line.
411
0.1.20 by Martin Pool
Factor out Knit.extract() method
412
        The set typically but not necessarily corresponds to a version.
413
        """
893 by Martin Pool
- Refactor weave calculation of inclusions
414
        included = self.inclusions(versions)
881 by Martin Pool
- faster weave extraction
415
416
        istack = []
928 by Martin Pool
- go back to using plain builtin set()
417
        dset = set()
0.1.48 by Martin Pool
Basic parsing of delete instructions.
418
419
        lineno = 0         # line of weave, 0-based
891 by Martin Pool
- fix up refactoring of weave
420
894 by Martin Pool
- small optimization for weave extract
421
        isactive = None
0.1.85 by Martin Pool
doc
422
931 by Martin Pool
- experiment with making Weave._extract() return a list, not a generator - slightly faster
423
        result = []
424
0.1.63 by Martin Pool
Abbreviate WeaveFormatError in some code
425
        WFE = WeaveFormatError
0.1.95 by Martin Pool
- preliminary merge conflict detection
426
944 by Martin Pool
- refactor member names in Weave code
427
        for l in self._weave:
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
428
            if isinstance(l, tuple):
429
                c, v = l
894 by Martin Pool
- small optimization for weave extract
430
                isactive = None
891 by Martin Pool
- fix up refactoring of weave
431
                if c == '{':
432
                    assert v not in istack
433
                    istack.append(v)
434
                elif c == '}':
435
                    oldv = istack.pop()
436
                    assert oldv == v
437
                elif c == '[':
438
                    if v in included:
881 by Martin Pool
- faster weave extraction
439
                        assert v not in dset
0.1.48 by Martin Pool
Basic parsing of delete instructions.
440
                        dset.add(v)
891 by Martin Pool
- fix up refactoring of weave
441
                else:
442
                    assert c == ']'
443
                    if v in included:
881 by Martin Pool
- faster weave extraction
444
                        assert v in dset
0.1.48 by Martin Pool
Basic parsing of delete instructions.
445
                        dset.remove(v)
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
446
            else:
447
                assert isinstance(l, basestring)
894 by Martin Pool
- small optimization for weave extract
448
                if isactive is None:
449
                    isactive = (not dset) and istack and (istack[-1] in included)
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
450
                if isactive:
931 by Martin Pool
- experiment with making Weave._extract() return a list, not a generator - slightly faster
451
                    result.append((istack[-1], lineno, l))
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
452
            lineno += 1
0.1.7 by Martin Pool
Add trivial annotate text
453
0.1.46 by Martin Pool
More constraints on structure of weave, and checks that they work
454
        if istack:
0.1.63 by Martin Pool
Abbreviate WeaveFormatError in some code
455
            raise WFE("unclosed insertion blocks at end of weave",
0.1.47 by Martin Pool
New WeaveError and WeaveFormatError rather than assertions.
456
                                   istack)
0.1.48 by Martin Pool
Basic parsing of delete instructions.
457
        if dset:
0.1.63 by Martin Pool
Abbreviate WeaveFormatError in some code
458
            raise WFE("unclosed deletion blocks at end of weave",
0.1.48 by Martin Pool
Basic parsing of delete instructions.
459
                                   dset)
0.1.40 by Martin Pool
Add test for extracting from weave with nested insertions
460
931 by Martin Pool
- experiment with making Weave._extract() return a list, not a generator - slightly faster
461
        return result
462
    
463
0.1.7 by Martin Pool
Add trivial annotate text
464
0.1.78 by Martin Pool
Rename Weave.get_included to inclusions and getiter to get_iter
465
    def get_iter(self, version):
0.1.5 by Martin Pool
Add test for storing two text versions.
466
        """Yield lines for the specified version."""
893 by Martin Pool
- Refactor weave calculation of inclusions
467
        for origin, lineno, line in self._extract([version]):
0.1.8 by Martin Pool
Unify get/annotate code
468
            yield line
0.1.5 by Martin Pool
Add test for storing two text versions.
469
470
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
471
    def get(self, index):
0.1.78 by Martin Pool
Rename Weave.get_included to inclusions and getiter to get_iter
472
        return list(self.get_iter(index))
0.1.1 by Martin Pool
Check in old existing knit code.
473
474
0.1.95 by Martin Pool
- preliminary merge conflict detection
475
    def mash_iter(self, included):
0.1.65 by Martin Pool
Add Weave.merge_iter to get automerged lines
476
        """Return composed version of multiple included versions."""
893 by Martin Pool
- Refactor weave calculation of inclusions
477
        for origin, lineno, text in self._extract(included):
0.1.65 by Martin Pool
Add Weave.merge_iter to get automerged lines
478
            yield text
479
480
0.1.11 by Martin Pool
Add Knit.dump method
481
    def dump(self, to_file):
482
        from pprint import pprint
944 by Martin Pool
- refactor member names in Weave code
483
        print >>to_file, "Weave._weave = ",
484
        pprint(self._weave, to_file)
485
        print >>to_file, "Weave._parents = ",
486
        pprint(self._parents, to_file)
0.1.11 by Martin Pool
Add Knit.dump method
487
488
0.1.91 by Martin Pool
Update Weave.check
489
490
    def numversions(self):
944 by Martin Pool
- refactor member names in Weave code
491
        l = len(self._parents)
0.1.91 by Martin Pool
Update Weave.check
492
        assert l == len(self._sha1s)
493
        return l
494
495
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
496
    def __len__(self):
497
        return self.numversions()
498
499
894 by Martin Pool
- small optimization for weave extract
500
    def check(self, progress_bar=None):
0.1.91 by Martin Pool
Update Weave.check
501
        # check no circular inclusions
502
        for version in range(self.numversions()):
944 by Martin Pool
- refactor member names in Weave code
503
            inclusions = list(self._parents[version])
0.1.91 by Martin Pool
Update Weave.check
504
            if inclusions:
505
                inclusions.sort()
506
                if inclusions[-1] >= version:
0.1.47 by Martin Pool
New WeaveError and WeaveFormatError rather than assertions.
507
                    raise WeaveFormatError("invalid included version %d for index %d"
0.1.91 by Martin Pool
Update Weave.check
508
                                           % (inclusions[-1], version))
509
510
        # try extracting all versions; this is a bit slow and parallel
511
        # extraction could be used
512
        import sha
894 by Martin Pool
- small optimization for weave extract
513
        nv = self.numversions()
514
        for version in range(nv):
515
            if progress_bar:
516
                progress_bar.update('checking text', version, nv)
0.1.91 by Martin Pool
Update Weave.check
517
            s = sha.new()
518
            for l in self.get_iter(version):
519
                s.update(l)
520
            hd = s.hexdigest()
521
            expected = self._sha1s[version]
522
            if hd != expected:
523
                raise WeaveError("mismatched sha1 for version %d; "
524
                                 "got %s, expected %s"
525
                                 % (version, hd, expected))
0.1.18 by Martin Pool
Better Knit.dump method
526
881 by Martin Pool
- faster weave extraction
527
        # TODO: check insertions are properly nested, that there are
528
        # no lines outside of insertion blocks, that deletions are
529
        # properly paired, etc.
530
0.1.13 by Martin Pool
Knit structure now allows for versions to include the lines present in other
531
532
0.1.95 by Martin Pool
- preliminary merge conflict detection
533
    def merge(self, merge_versions):
534
        """Automerge and mark conflicts between versions.
535
536
        This returns a sequence, each entry describing alternatives
537
        for a chunk of the file.  Each of the alternatives is given as
538
        a list of lines.
539
540
        If there is a chunk of the file where there's no diagreement,
541
        only one alternative is given.
542
        """
543
544
        # approach: find the included versions common to all the
545
        # merged versions
546
        raise NotImplementedError()
547
548
549
0.1.21 by Martin Pool
Start computing a delta to insert a new revision
550
    def _delta(self, included, lines):
551
        """Return changes from basis to new revision.
552
553
        The old text for comparison is the union of included revisions.
554
555
        This is used in inserting a new text.
0.1.22 by Martin Pool
Calculate delta for new versions relative to a set of parent versions.
556
0.1.55 by Martin Pool
doc
557
        Delta is returned as a sequence of
558
        (weave1, weave2, newlines).
559
560
        This indicates that weave1:weave2 of the old weave should be
0.1.22 by Martin Pool
Calculate delta for new versions relative to a set of parent versions.
561
        replaced by the sequence of lines in newlines.  Note that
562
        these line numbers are positions in the total weave and don't
563
        correspond to the lines in any extracted version, or even the
564
        extracted union of included versions.
565
566
        If line1=line2, this is a pure insert; if newlines=[] this is a
567
        pure delete.  (Similar to difflib.)
0.1.21 by Martin Pool
Start computing a delta to insert a new revision
568
        """
569
0.1.1 by Martin Pool
Check in old existing knit code.
570
918 by Martin Pool
- start doing new weave-merge algorithm
571
            
572
    def plan_merge(self, ver_a, ver_b):
573
        """Return pseudo-annotation indicating how the two versions merge.
574
575
        This is computed between versions a and b and their common
576
        base.
577
578
        Weave lines present in none of them are skipped entirely.
579
        """
926 by Martin Pool
- update more weave code to use intsets
580
        inc_a = self.inclusions([ver_a])
581
        inc_b = self.inclusions([ver_b])
918 by Martin Pool
- start doing new weave-merge algorithm
582
        inc_c = inc_a & inc_b
583
584
        for lineno, insert, deleteset, line in self._walk():
585
            if deleteset & inc_c:
586
                # killed in parent; can't be in either a or b
587
                # not relevant to our work
588
                yield 'killed-base', line
926 by Martin Pool
- update more weave code to use intsets
589
            elif insert in inc_c:
918 by Martin Pool
- start doing new weave-merge algorithm
590
                # was inserted in base
591
                killed_a = bool(deleteset & inc_a)
592
                killed_b = bool(deleteset & inc_b)
593
                if killed_a and killed_b:
594
                    yield 'killed-both', line
595
                elif killed_a:
596
                    yield 'killed-a', line
597
                elif killed_b:
598
                    yield 'killed-b', line
599
                else:
600
                    yield 'unchanged', line
926 by Martin Pool
- update more weave code to use intsets
601
            elif insert in inc_a:
918 by Martin Pool
- start doing new weave-merge algorithm
602
                if deleteset & inc_a:
603
                    yield 'ghost-a', line
604
                else:
605
                    # new in A; not in B
606
                    yield 'new-a', line
926 by Martin Pool
- update more weave code to use intsets
607
            elif insert in inc_b:
918 by Martin Pool
- start doing new weave-merge algorithm
608
                if deleteset & inc_b:
609
                    yield 'ghost-b', line
610
                else:
611
                    yield 'new-b', line
612
            else:
613
                # not in either revision
614
                yield 'irrelevant', line
615
919 by Martin Pool
- more development of weave-merge
616
        yield 'unchanged', ''           # terminator
617
618
619
620
    def weave_merge(self, plan):
621
        lines_a = []
622
        lines_b = []
623
        ch_a = ch_b = False
624
625
        for state, line in plan:
626
            if state == 'unchanged' or state == 'killed-both':
627
                # resync and flush queued conflicts changes if any
628
                if not lines_a and not lines_b:
629
                    pass
630
                elif ch_a and not ch_b:
631
                    # one-sided change:                    
632
                    for l in lines_a: yield l
633
                elif ch_b and not ch_a:
634
                    for l in lines_b: yield l
635
                elif lines_a == lines_b:
636
                    for l in lines_a: yield l
637
                else:
638
                    yield '<<<<\n'
639
                    for l in lines_a: yield l
640
                    yield '====\n'
641
                    for l in lines_b: yield l
642
                    yield '>>>>\n'
643
644
                del lines_a[:]
645
                del lines_b[:]
646
                ch_a = ch_b = False
647
                
648
            if state == 'unchanged':
649
                if line:
650
                    yield line
651
            elif state == 'killed-a':
652
                ch_a = True
653
                lines_b.append(line)
654
            elif state == 'killed-b':
655
                ch_b = True
656
                lines_a.append(line)
657
            elif state == 'new-a':
658
                ch_a = True
659
                lines_a.append(line)
660
            elif state == 'new-b':
661
                ch_b = True
662
                lines_b.append(line)
663
            else:
920 by Martin Pool
- add more test cases for weave_merge
664
                assert state in ('irrelevant', 'ghost-a', 'ghost-b', 'killed-base',
665
                                 'killed-both'), \
919 by Martin Pool
- more development of weave-merge
666
                       state
667
668
                
669
670
918 by Martin Pool
- start doing new weave-merge algorithm
671
672
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
673
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
674
def weave_info(w):
0.1.88 by Martin Pool
Add weave info command.
675
    """Show some text information about the weave."""
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
676
    print '%6s %40s %20s' % ('ver', 'sha1', 'parents')
677
    for i in (6, 40, 20):
870 by Martin Pool
- better weave info display
678
        print '-' * i,
679
    print
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
680
    for i in range(w.numversions()):
0.1.91 by Martin Pool
Update Weave.check
681
        sha1 = w._sha1s[i]
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
682
        print '%6d %40s %s' % (i, sha1, ' '.join(map(str, w._parents[i])))
0.1.88 by Martin Pool
Add weave info command.
683
869 by Martin Pool
- more weave.py command line options
684
685
947 by Martin Pool
- new 'weave stats' command
686
def weave_stats(weave_file):
687
    from bzrlib.progress import ProgressBar
688
    from bzrlib.weavefile import read_weave
689
690
    pb = ProgressBar()
691
692
    wf = file(weave_file, 'rb')
693
    w = read_weave(wf)
694
    # FIXME: doesn't work on pipes
695
    weave_size = wf.tell()
696
697
    total = 0
698
    vers = len(w)
699
    for i in range(vers):
700
        pb.update('checking sizes', i, vers)
701
        for line in w.get_iter(i):
702
            total += len(line)
703
704
    pb.clear()
705
706
    print 'versions          %9d' % vers
707
    print 'weave file        %9d bytes' % weave_size
708
    print 'total contents    %9d bytes' % total
709
    print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
974.1.26 by aaron.bentley at utoronto
merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472
710
    if vers:
711
        avg = total/vers
712
        print 'average size      %9d bytes' % avg
713
        print 'relative size     %9.2fx' % (float(weave_size) / float(avg))
947 by Martin Pool
- new 'weave stats' command
714
715
869 by Martin Pool
- more weave.py command line options
716
def usage():
871 by Martin Pool
- add command for merge-based weave
717
    print """bzr weave tool
718
719
Experimental tool for weave algorithm.
720
869 by Martin Pool
- more weave.py command line options
721
usage:
722
    weave init WEAVEFILE
723
        Create an empty weave file
724
    weave get WEAVEFILE VERSION
725
        Write out specified version.
726
    weave check WEAVEFILE
727
        Check consistency of all versions.
728
    weave info WEAVEFILE
729
        Display table of contents.
730
    weave add WEAVEFILE [BASE...] < NEWTEXT
731
        Add NEWTEXT, with specified parent versions.
732
    weave annotate WEAVEFILE VERSION
733
        Display origin of each line.
734
    weave mash WEAVEFILE VERSION...
735
        Display composite of all selected versions.
736
    weave merge WEAVEFILE VERSION1 VERSION2 > OUT
737
        Auto-merge two versions and display conflicts.
871 by Martin Pool
- add command for merge-based weave
738
739
example:
740
741
    % weave init foo.weave
742
    % vi foo.txt
743
    % weave add foo.weave < foo.txt
744
    added version 0
745
746
    (create updated version)
747
    % vi foo.txt
748
    % weave get foo.weave 0 | diff -u - foo.txt
749
    % weave add foo.weave 0 < foo.txt
750
    added version 1
751
752
    % weave get foo.weave 0 > foo.txt       (create forked version)
753
    % vi foo.txt
754
    % weave add foo.weave 0 < foo.txt
755
    added version 2
756
757
    % weave merge foo.weave 1 2 > foo.txt   (merge them)
758
    % vi foo.txt                            (resolve conflicts)
759
    % weave add foo.weave 1 2 < foo.txt     (commit merged version)     
760
    
869 by Martin Pool
- more weave.py command line options
761
"""
0.1.88 by Martin Pool
Add weave info command.
762
    
763
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
764
765
def main(argv):
766
    import sys
767
    import os
869 by Martin Pool
- more weave.py command line options
768
    from weavefile import write_weave, read_weave
894 by Martin Pool
- small optimization for weave extract
769
    from bzrlib.progress import ProgressBar
770
771
    #import psyco
772
    #psyco.full()
773
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
774
    cmd = argv[1]
869 by Martin Pool
- more weave.py command line options
775
776
    def readit():
777
        return read_weave(file(argv[2], 'rb'))
778
    
779
    if cmd == 'help':
780
        usage()
781
    elif cmd == 'add':
782
        w = readit()
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
783
        # at the moment, based on everything in the file
869 by Martin Pool
- more weave.py command line options
784
        parents = map(int, argv[3:])
0.1.72 by Martin Pool
Go back to weave lines normally having newlines at the end.
785
        lines = sys.stdin.readlines()
0.1.69 by Martin Pool
Simple text-based format for storing weaves, cleaner than
786
        ver = w.add(parents, lines)
869 by Martin Pool
- more weave.py command line options
787
        write_weave(w, file(argv[2], 'wb'))
788
        print 'added version %d' % ver
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
789
    elif cmd == 'init':
790
        fn = argv[2]
791
        if os.path.exists(fn):
792
            raise IOError("file exists")
793
        w = Weave()
869 by Martin Pool
- more weave.py command line options
794
        write_weave(w, file(fn, 'wb'))
795
    elif cmd == 'get': # get one version
796
        w = readit()
0.1.94 by Martin Pool
Fix get_iter call
797
        sys.stdout.writelines(w.get_iter(int(argv[3])))
869 by Martin Pool
- more weave.py command line options
798
        
799
    elif cmd == 'mash': # get composite
800
        w = readit()
801
        sys.stdout.writelines(w.mash_iter(map(int, argv[3:])))
802
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
803
    elif cmd == 'annotate':
869 by Martin Pool
- more weave.py command line options
804
        w = readit()
0.1.72 by Martin Pool
Go back to weave lines normally having newlines at the end.
805
        # newline is added to all lines regardless; too hard to get
806
        # reasonable formatting otherwise
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
807
        lasto = None
808
        for origin, text in w.annotate(int(argv[3])):
0.1.72 by Martin Pool
Go back to weave lines normally having newlines at the end.
809
            text = text.rstrip('\r\n')
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
810
            if origin == lasto:
811
                print '      | %s' % (text)
812
            else:
813
                print '%5d | %s' % (origin, text)
814
                lasto = origin
871 by Martin Pool
- add command for merge-based weave
815
                
0.1.88 by Martin Pool
Add weave info command.
816
    elif cmd == 'info':
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
817
        weave_info(readit())
947 by Martin Pool
- new 'weave stats' command
818
819
    elif cmd == 'stats':
820
        weave_stats(argv[2])
871 by Martin Pool
- add command for merge-based weave
821
        
0.1.91 by Martin Pool
Update Weave.check
822
    elif cmd == 'check':
869 by Martin Pool
- more weave.py command line options
823
        w = readit()
894 by Martin Pool
- small optimization for weave extract
824
        pb = ProgressBar()
825
        w.check(pb)
826
        pb.clear()
938 by Martin Pool
- various optimizations to weave add code
827
        print '%d versions ok' % w.numversions()
871 by Martin Pool
- add command for merge-based weave
828
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
829
    elif cmd == 'inclusions':
830
        w = readit()
831
        print ' '.join(map(str, w.inclusions([int(argv[3])])))
832
833
    elif cmd == 'parents':
834
        w = readit()
944 by Martin Pool
- refactor member names in Weave code
835
        print ' '.join(map(str, w._parents[int(argv[3])]))
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
836
918 by Martin Pool
- start doing new weave-merge algorithm
837
    elif cmd == 'plan-merge':
838
        w = readit()
839
        for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
919 by Martin Pool
- more development of weave-merge
840
            if line:
841
                print '%14s | %s' % (state, line),
918 by Martin Pool
- start doing new weave-merge algorithm
842
871 by Martin Pool
- add command for merge-based weave
843
    elif cmd == 'merge':
919 by Martin Pool
- more development of weave-merge
844
        w = readit()
845
        p = w.plan_merge(int(argv[3]), int(argv[4]))
846
        sys.stdout.writelines(w.weave_merge(p))
847
            
848
    elif cmd == 'mash-merge':
871 by Martin Pool
- add command for merge-based weave
849
        if len(argv) != 5:
850
            usage()
851
            return 1
852
853
        w = readit()
854
        v1, v2 = map(int, argv[3:5])
855
856
        basis = w.inclusions([v1]).intersection(w.inclusions([v2]))
857
858
        base_lines = list(w.mash_iter(basis))
859
        a_lines = list(w.get(v1))
860
        b_lines = list(w.get(v2))
861
862
        from bzrlib.merge3 import Merge3
863
        m3 = Merge3(base_lines, a_lines, b_lines)
864
865
        name_a = 'version %d' % v1
866
        name_b = 'version %d' % v2
867
        sys.stdout.writelines(m3.merge_lines(name_a=name_a, name_b=name_b))
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
868
    else:
869
        raise ValueError('unknown command %r' % cmd)
870
    
871
872
if __name__ == '__main__':
873
    import sys
874
    sys.exit(main(sys.argv))