~bzr-pqm/bzr/bzr.dev

0.1.1 by Martin Pool
Check in old existing knit code.
1
#! /usr/bin/python
2
3
# Copyright (C) 2005 Canonical Ltd
4
0.1.33 by Martin Pool
add gpl text
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0.1.1 by Martin Pool
Check in old existing knit code.
18
19
# Author: Martin Pool <mbp@canonical.com>
20
21
0.1.38 by Martin Pool
Rename knit to weave. (I don't think there's an existing module called weave.)
22
"""Weave - storage of related text file versions"""
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
23
928 by Martin Pool
- go back to using plain builtin set()
24
# before intset (r923) 2000 versions in 41.5s
25
# with intset (r926) 2000 versions in 93s !!!
26
# better to just use plain sets.
27
931 by Martin Pool
- experiment with making Weave._extract() return a list, not a generator - slightly faster
28
# making _extract build and return a list, rather than being a generator
29
# takes 37.94s
30
938 by Martin Pool
- various optimizations to weave add code
31
# with python -O, r923 does 2000 versions in 36.87s
32
33
# with optimizations to avoid mutating lists - 35.75!  I guess copying
34
# all the elements every time costs more than the small manipulations.
35
# a surprisingly small change.
36
37
# r931, which avoids using a generator for extract, does 36.98s
38
39
# with memoized inclusions, takes 41.49s; not very good
40
41
# with slots, takes 37.35s; without takes 39.16, a bit surprising
42
43
# with the delta calculation mixed in with the add method, rather than
44
# separated, takes 36.78s
45
46
# with delta folded in and mutation of the list, 36.13s
47
48
# with all this and simplification of add code, 33s 
49
50
0.1.61 by Martin Pool
doc
51
# TODO: Perhaps have copy method for Weave instances?
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
52
0.1.58 by Martin Pool
doc
53
# XXX: If we do weaves this way, will a merge still behave the same
54
# way if it's done in a different order?  That's a pretty desirable
55
# property.
56
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
57
# TODO: Nothing here so far assumes the lines are really \n newlines,
58
# rather than being split up in some other way.  We could accomodate
59
# binaries, perhaps by naively splitting on \n or perhaps using
60
# something like a rolling checksum.
61
62
# TODO: Track version names as well as indexes. 
63
0.1.85 by Martin Pool
doc
64
# TODO: End marker for each version so we can stop reading?
0.1.69 by Martin Pool
Simple text-based format for storing weaves, cleaner than
65
66
# TODO: Check that no insertion occurs inside a deletion that was
67
# active in the version of the insertion.
68
912 by Martin Pool
- update todos for weave
69
# TODO: In addition to the SHA-1 check, perhaps have some code that
70
# checks structural constraints of the weave: ie that insertions are
71
# properly nested, that there is no text outside of an insertion, that
72
# insertions or deletions are not repeated, etc.
0.1.85 by Martin Pool
doc
73
918 by Martin Pool
- start doing new weave-merge algorithm
74
# TODO: Parallel-extract that passes back each line along with a
75
# description of which revisions include it.  Nice for checking all
76
# shas in parallel.
77
78
0.1.85 by Martin Pool
doc
79
924 by Martin Pool
- Add IntSet class
80
0.1.47 by Martin Pool
New WeaveError and WeaveFormatError rather than assertions.
81
class WeaveError(Exception):
82
    """Exception in processing weave"""
83
84
85
class WeaveFormatError(WeaveError):
86
    """Weave invariant violated"""
87
    
88
0.1.38 by Martin Pool
Rename knit to weave. (I don't think there's an existing module called weave.)
89
class Weave(object):
90
    """weave - versioned text file storage.
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
91
    
0.1.72 by Martin Pool
Go back to weave lines normally having newlines at the end.
92
    A Weave manages versions of line-based text files, keeping track
93
    of the originating version for each line.
94
95
    To clients the "lines" of the file are represented as a list of strings.
96
    These strings  will typically have terminal newline characters, but
97
    this is not required.  In particular files commonly do not have a newline
98
    at the end of the file.
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
99
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
100
    Texts can be identified in either of two ways:
101
102
    * a nonnegative index number.
103
104
    * a version-id string.
105
0.1.38 by Martin Pool
Rename knit to weave. (I don't think there's an existing module called weave.)
106
    Typically the index number will be valid only inside this weave and
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
107
    the version-id is used to reference it in the larger world.
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
108
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
109
    The weave is represented as a list mixing edit instructions and
944 by Martin Pool
- refactor member names in Weave code
110
    literal text.  Each entry in _weave can be either a string (or
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
111
    unicode), or a tuple.  If a string, it means that the given line
112
    should be output in the currently active revisions.
113
114
    If a tuple, it gives a processing instruction saying in which
115
    revisions the enclosed lines are active.  The tuple has the form
116
    (instruction, version).
117
118
    The instruction can be '{' or '}' for an insertion block, and '['
119
    and ']' for a deletion block respectively.  The version is the
0.1.45 by Martin Pool
doc
120
    integer version index.  There is no replace operator, only deletes
121
    and inserts.
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
122
0.1.41 by Martin Pool
Doc
123
    Constraints/notes:
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
124
125
    * A later version can delete lines that were introduced by any
126
      number of ancestor versions; this implies that deletion
127
      instructions can span insertion blocks without regard to the
128
      insertion block's nesting.
129
0.1.41 by Martin Pool
Doc
130
    * Similarly, deletions need not be properly nested with regard to
131
      each other, because they might have been generated by
132
      independent revisions.
133
0.1.45 by Martin Pool
doc
134
    * Insertions are always made by inserting a new bracketed block
135
      into a single point in the previous weave.  This implies they
136
      can nest but not overlap, and the nesting must always have later
137
      insertions on the inside.
138
0.1.41 by Martin Pool
Doc
139
    * It doesn't seem very useful to have an active insertion
140
      inside an inactive insertion, but it might happen.
0.1.45 by Martin Pool
doc
141
      
0.1.41 by Martin Pool
Doc
142
    * Therefore, all instructions are always"considered"; that
143
      is passed onto and off the stack.  An outer inactive block
144
      doesn't disable an inner block.
145
146
    * Lines are enabled if the most recent enclosing insertion is
147
      active and none of the enclosing deletions are active.
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
148
0.1.49 by Martin Pool
Add another constraint: revisions should not delete text that they
149
    * There is no point having a deletion directly inside its own
150
      insertion; you might as well just not write it.  And there
151
      should be no way to get an earlier version deleting a later
152
      version.
153
944 by Martin Pool
- refactor member names in Weave code
154
    _weave
155
        Text of the weave; list of control instruction tuples and strings.
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
156
944 by Martin Pool
- refactor member names in Weave code
157
    _parents
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
158
        List of parents, indexed by version number.
159
        It is only necessary to store the minimal set of parents for
160
        each version; the parent's parents are implied.
0.1.13 by Martin Pool
Knit structure now allows for versions to include the lines present in other
161
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
162
    _sha1s
163
        List of hex SHA-1 of each version, or None if not recorded.
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
164
    """
938 by Martin Pool
- various optimizations to weave add code
165
944 by Martin Pool
- refactor member names in Weave code
166
    __slots__ = ['_weave', '_parents', '_sha1s']
938 by Martin Pool
- various optimizations to weave add code
167
    
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
168
    def __init__(self):
944 by Martin Pool
- refactor member names in Weave code
169
        self._weave = []
170
        self._parents = []
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
171
        self._sha1s = []
0.1.60 by Martin Pool
Weave eq and ne methods
172
173
174
    def __eq__(self, other):
175
        if not isinstance(other, Weave):
176
            return False
944 by Martin Pool
- refactor member names in Weave code
177
        return self._parents == other._parents \
178
               and self._weave == other._weave
0.1.60 by Martin Pool
Weave eq and ne methods
179
    
180
181
    def __ne__(self, other):
182
        return not self.__eq__(other)
183
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
184
        
0.1.26 by Martin Pool
Refactor parameters to add command
185
    def add(self, parents, text):
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
186
        """Add a single text on top of the weave.
0.1.36 by Martin Pool
doc
187
  
0.1.26 by Martin Pool
Refactor parameters to add command
188
        Returns the index number of the newly added version.
189
190
        parents
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
191
            List or set of direct parent version numbers.
192
            
0.1.26 by Martin Pool
Refactor parameters to add command
193
        text
194
            Sequence of lines to be added in the new version."""
938 by Martin Pool
- various optimizations to weave add code
195
196
        self._check_versions(parents)
0.1.82 by Martin Pool
Small weave optimizations
197
        ## self._check_lines(text)
944 by Martin Pool
- refactor member names in Weave code
198
        new_version = len(self._parents)
0.1.5 by Martin Pool
Add test for storing two text versions.
199
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
200
        import sha
201
        s = sha.new()
938 by Martin Pool
- various optimizations to weave add code
202
        map(s.update, text)
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
203
        sha1 = s.hexdigest()
204
        del s
205
938 by Martin Pool
- various optimizations to weave add code
206
        # if we abort after here the weave will be corrupt
944 by Martin Pool
- refactor member names in Weave code
207
        self._parents.append(frozenset(parents))
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
208
        self._sha1s.append(sha1)
938 by Martin Pool
- various optimizations to weave add code
209
210
            
211
        if not parents:
212
            # special case; adding with no parents revision; can do
213
            # this more quickly by just appending unconditionally.
214
            # even more specially, if we're adding an empty text we
215
            # need do nothing at all.
216
            if text:
944 by Martin Pool
- refactor member names in Weave code
217
                self._weave.append(('{', new_version))
218
                self._weave.extend(text)
219
                self._weave.append(('}', new_version))
938 by Martin Pool
- various optimizations to weave add code
220
        
221
            return new_version
222
941 by Martin Pool
- allow for parents specified to Weave.add to be a set
223
        if len(parents) == 1:
224
            pv = list(parents)[0]
225
            if sha1 == self._sha1s[pv]:
226
                # special case: same as the single parent
227
                return new_version
938 by Martin Pool
- various optimizations to weave add code
228
            
229
230
        ancestors = self.inclusions(parents)
231
944 by Martin Pool
- refactor member names in Weave code
232
        l = self._weave
938 by Martin Pool
- various optimizations to weave add code
233
234
        # basis a list of (origin, lineno, line)
235
        basis_lineno = []
236
        basis_lines = []
237
        for origin, lineno, line in self._extract(ancestors):
238
            basis_lineno.append(lineno)
239
            basis_lines.append(line)
240
241
        # another small special case: a merge, producing the same text as auto-merge
242
        if text == basis_lines:
243
            return new_version            
244
245
        # add a sentinal, because we can also match against the final line
944 by Martin Pool
- refactor member names in Weave code
246
        basis_lineno.append(len(self._weave))
938 by Martin Pool
- various optimizations to weave add code
247
248
        # XXX: which line of the weave should we really consider
249
        # matches the end of the file?  the current code says it's the
250
        # last line of the weave?
251
252
        #print 'basis_lines:', basis_lines
253
        #print 'new_lines:  ', lines
254
255
        from difflib import SequenceMatcher
256
        s = SequenceMatcher(None, basis_lines, text)
257
258
        # offset gives the number of lines that have been inserted
259
        # into the weave up to the current point; if the original edit instruction
260
        # says to change line A then we actually change (A+offset)
261
        offset = 0
262
263
        for tag, i1, i2, j1, j2 in s.get_opcodes():
264
            # i1,i2 are given in offsets within basis_lines; we need to map them
265
            # back to offsets within the entire weave
266
            #print 'raw match', tag, i1, i2, j1, j2
267
            if tag == 'equal':
268
                continue
269
270
            i1 = basis_lineno[i1]
271
            i2 = basis_lineno[i2]
272
273
            assert 0 <= j1 <= j2 <= len(text)
274
275
            #print tag, i1, i2, j1, j2
276
277
            # the deletion and insertion are handled separately.
278
            # first delete the region.
279
            if i1 != i2:
944 by Martin Pool
- refactor member names in Weave code
280
                self._weave.insert(i1+offset, ('[', new_version))
281
                self._weave.insert(i2+offset+1, (']', new_version))
938 by Martin Pool
- various optimizations to weave add code
282
                offset += 2
283
284
            if j1 != j2:
285
                # there may have been a deletion spanning up to
286
                # i2; we want to insert after this region to make sure
287
                # we don't destroy ourselves
288
                i = i2 + offset
944 by Martin Pool
- refactor member names in Weave code
289
                self._weave[i:i] = ([('{', new_version)] 
938 by Martin Pool
- various optimizations to weave add code
290
                                + text[j1:j2] 
291
                                + [('}', new_version)])
292
                offset += 2 + (j2 - j1)
293
294
        return new_version
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
295
0.1.27 by Martin Pool
Check that version numbers passed in are reasonable
296
0.1.78 by Martin Pool
Rename Weave.get_included to inclusions and getiter to get_iter
297
    def inclusions(self, versions):
893 by Martin Pool
- Refactor weave calculation of inclusions
298
        """Return set of all ancestors of given version(s)."""
928 by Martin Pool
- go back to using plain builtin set()
299
        i = set(versions)
893 by Martin Pool
- Refactor weave calculation of inclusions
300
        v = max(versions)
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
301
        try:
893 by Martin Pool
- Refactor weave calculation of inclusions
302
            while v >= 0:
303
                if v in i:
304
                    # include all its parents
944 by Martin Pool
- refactor member names in Weave code
305
                    i.update(self._parents[v])
893 by Martin Pool
- Refactor weave calculation of inclusions
306
                v -= 1
307
            return i
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
308
        except IndexError:
309
            raise ValueError("version %d not present in weave" % v)
0.1.77 by Martin Pool
New Weave.get_included() does transitive expansion
310
311
890 by Martin Pool
- weave info should show minimal expression of parents
312
    def minimal_parents(self, version):
313
        """Find the minimal set of parents for the version."""
944 by Martin Pool
- refactor member names in Weave code
314
        included = self._parents[version]
890 by Martin Pool
- weave info should show minimal expression of parents
315
        if not included:
316
            return []
317
        
318
        li = list(included)
893 by Martin Pool
- Refactor weave calculation of inclusions
319
        li.sort(reverse=True)
890 by Martin Pool
- weave info should show minimal expression of parents
320
321
        mininc = []
928 by Martin Pool
- go back to using plain builtin set()
322
        gotit = set()
890 by Martin Pool
- weave info should show minimal expression of parents
323
324
        for pv in li:
325
            if pv not in gotit:
326
                mininc.append(pv)
893 by Martin Pool
- Refactor weave calculation of inclusions
327
                gotit.update(self.inclusions(pv))
890 by Martin Pool
- weave info should show minimal expression of parents
328
329
        assert mininc[0] >= 0
330
        assert mininc[-1] < version
331
        return mininc
332
333
0.1.75 by Martin Pool
Remove VerInfo class; just store sets directly in the list of
334
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
335
    def _check_lines(self, text):
336
        if not isinstance(text, list):
337
            raise ValueError("text should be a list, not %s" % type(text))
338
339
        for l in text:
340
            if not isinstance(l, basestring):
869 by Martin Pool
- more weave.py command line options
341
                raise ValueError("text line should be a string or unicode, not %s"
342
                                 % type(l))
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
343
        
344
345
0.1.27 by Martin Pool
Check that version numbers passed in are reasonable
346
    def _check_versions(self, indexes):
347
        """Check everything in the sequence of indexes is valid"""
348
        for i in indexes:
349
            try:
944 by Martin Pool
- refactor member names in Weave code
350
                self._parents[i]
0.1.27 by Martin Pool
Check that version numbers passed in are reasonable
351
            except IndexError:
352
                raise IndexError("invalid version number %r" % i)
353
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
354
    
0.1.7 by Martin Pool
Add trivial annotate text
355
    def annotate(self, index):
356
        return list(self.annotate_iter(index))
357
358
0.1.78 by Martin Pool
Rename Weave.get_included to inclusions and getiter to get_iter
359
    def annotate_iter(self, version):
0.1.7 by Martin Pool
Add trivial annotate text
360
        """Yield list of (index-id, line) pairs for the specified version.
361
362
        The index indicates when the line originated in the weave."""
893 by Martin Pool
- Refactor weave calculation of inclusions
363
        for origin, lineno, text in self._extract([version]):
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
364
            yield origin, text
0.1.22 by Martin Pool
Calculate delta for new versions relative to a set of parent versions.
365
366
918 by Martin Pool
- start doing new weave-merge algorithm
367
    def _walk(self):
368
        """Walk the weave.
369
370
        Yields sequence of
371
        (lineno, insert, deletes, text)
372
        for each literal line.
373
        """
374
        
375
        istack = []
928 by Martin Pool
- go back to using plain builtin set()
376
        dset = set()
918 by Martin Pool
- start doing new weave-merge algorithm
377
378
        lineno = 0         # line of weave, 0-based
379
944 by Martin Pool
- refactor member names in Weave code
380
        for l in self._weave:
918 by Martin Pool
- start doing new weave-merge algorithm
381
            if isinstance(l, tuple):
382
                c, v = l
383
                isactive = None
384
                if c == '{':
385
                    istack.append(v)
386
                elif c == '}':
387
                    oldv = istack.pop()
388
                elif c == '[':
926 by Martin Pool
- update more weave code to use intsets
389
                    assert v not in dset
390
                    dset.add(v)
918 by Martin Pool
- start doing new weave-merge algorithm
391
                elif c == ']':
926 by Martin Pool
- update more weave code to use intsets
392
                    dset.remove(v)
918 by Martin Pool
- start doing new weave-merge algorithm
393
                else:
394
                    raise WeaveFormatError('unexpected instruction %r'
395
                                           % v)
396
            else:
397
                assert isinstance(l, basestring)
398
                assert istack
399
                yield lineno, istack[-1], dset, l
400
            lineno += 1
401
402
403
893 by Martin Pool
- Refactor weave calculation of inclusions
404
    def _extract(self, versions):
0.1.20 by Martin Pool
Factor out Knit.extract() method
405
        """Yield annotation of lines in included set.
406
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
407
        Yields a sequence of tuples (origin, lineno, text), where
408
        origin is the origin version, lineno the index in the weave,
409
        and text the text of the line.
410
0.1.20 by Martin Pool
Factor out Knit.extract() method
411
        The set typically but not necessarily corresponds to a version.
412
        """
893 by Martin Pool
- Refactor weave calculation of inclusions
413
        included = self.inclusions(versions)
881 by Martin Pool
- faster weave extraction
414
415
        istack = []
928 by Martin Pool
- go back to using plain builtin set()
416
        dset = set()
0.1.48 by Martin Pool
Basic parsing of delete instructions.
417
418
        lineno = 0         # line of weave, 0-based
891 by Martin Pool
- fix up refactoring of weave
419
894 by Martin Pool
- small optimization for weave extract
420
        isactive = None
0.1.85 by Martin Pool
doc
421
931 by Martin Pool
- experiment with making Weave._extract() return a list, not a generator - slightly faster
422
        result = []
423
0.1.63 by Martin Pool
Abbreviate WeaveFormatError in some code
424
        WFE = WeaveFormatError
0.1.95 by Martin Pool
- preliminary merge conflict detection
425
944 by Martin Pool
- refactor member names in Weave code
426
        for l in self._weave:
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
427
            if isinstance(l, tuple):
428
                c, v = l
894 by Martin Pool
- small optimization for weave extract
429
                isactive = None
891 by Martin Pool
- fix up refactoring of weave
430
                if c == '{':
431
                    assert v not in istack
432
                    istack.append(v)
433
                elif c == '}':
434
                    oldv = istack.pop()
435
                    assert oldv == v
436
                elif c == '[':
437
                    if v in included:
881 by Martin Pool
- faster weave extraction
438
                        assert v not in dset
0.1.48 by Martin Pool
Basic parsing of delete instructions.
439
                        dset.add(v)
891 by Martin Pool
- fix up refactoring of weave
440
                else:
441
                    assert c == ']'
442
                    if v in included:
881 by Martin Pool
- faster weave extraction
443
                        assert v in dset
0.1.48 by Martin Pool
Basic parsing of delete instructions.
444
                        dset.remove(v)
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
445
            else:
446
                assert isinstance(l, basestring)
894 by Martin Pool
- small optimization for weave extract
447
                if isactive is None:
448
                    isactive = (not dset) and istack and (istack[-1] in included)
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
449
                if isactive:
931 by Martin Pool
- experiment with making Weave._extract() return a list, not a generator - slightly faster
450
                    result.append((istack[-1], lineno, l))
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
451
            lineno += 1
0.1.7 by Martin Pool
Add trivial annotate text
452
0.1.46 by Martin Pool
More constraints on structure of weave, and checks that they work
453
        if istack:
0.1.63 by Martin Pool
Abbreviate WeaveFormatError in some code
454
            raise WFE("unclosed insertion blocks at end of weave",
0.1.47 by Martin Pool
New WeaveError and WeaveFormatError rather than assertions.
455
                                   istack)
0.1.48 by Martin Pool
Basic parsing of delete instructions.
456
        if dset:
0.1.63 by Martin Pool
Abbreviate WeaveFormatError in some code
457
            raise WFE("unclosed deletion blocks at end of weave",
0.1.48 by Martin Pool
Basic parsing of delete instructions.
458
                                   dset)
0.1.40 by Martin Pool
Add test for extracting from weave with nested insertions
459
931 by Martin Pool
- experiment with making Weave._extract() return a list, not a generator - slightly faster
460
        return result
461
    
462
0.1.7 by Martin Pool
Add trivial annotate text
463
0.1.78 by Martin Pool
Rename Weave.get_included to inclusions and getiter to get_iter
464
    def get_iter(self, version):
0.1.5 by Martin Pool
Add test for storing two text versions.
465
        """Yield lines for the specified version."""
893 by Martin Pool
- Refactor weave calculation of inclusions
466
        for origin, lineno, line in self._extract([version]):
0.1.8 by Martin Pool
Unify get/annotate code
467
            yield line
0.1.5 by Martin Pool
Add test for storing two text versions.
468
469
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
470
    def get(self, index):
0.1.78 by Martin Pool
Rename Weave.get_included to inclusions and getiter to get_iter
471
        return list(self.get_iter(index))
0.1.1 by Martin Pool
Check in old existing knit code.
472
473
0.1.95 by Martin Pool
- preliminary merge conflict detection
474
    def mash_iter(self, included):
0.1.65 by Martin Pool
Add Weave.merge_iter to get automerged lines
475
        """Return composed version of multiple included versions."""
893 by Martin Pool
- Refactor weave calculation of inclusions
476
        for origin, lineno, text in self._extract(included):
0.1.65 by Martin Pool
Add Weave.merge_iter to get automerged lines
477
            yield text
478
479
0.1.11 by Martin Pool
Add Knit.dump method
480
    def dump(self, to_file):
481
        from pprint import pprint
944 by Martin Pool
- refactor member names in Weave code
482
        print >>to_file, "Weave._weave = ",
483
        pprint(self._weave, to_file)
484
        print >>to_file, "Weave._parents = ",
485
        pprint(self._parents, to_file)
0.1.11 by Martin Pool
Add Knit.dump method
486
487
0.1.91 by Martin Pool
Update Weave.check
488
489
    def numversions(self):
944 by Martin Pool
- refactor member names in Weave code
490
        l = len(self._parents)
0.1.91 by Martin Pool
Update Weave.check
491
        assert l == len(self._sha1s)
492
        return l
493
494
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
495
    def __len__(self):
496
        return self.numversions()
497
498
894 by Martin Pool
- small optimization for weave extract
499
    def check(self, progress_bar=None):
0.1.91 by Martin Pool
Update Weave.check
500
        # check no circular inclusions
501
        for version in range(self.numversions()):
944 by Martin Pool
- refactor member names in Weave code
502
            inclusions = list(self._parents[version])
0.1.91 by Martin Pool
Update Weave.check
503
            if inclusions:
504
                inclusions.sort()
505
                if inclusions[-1] >= version:
0.1.47 by Martin Pool
New WeaveError and WeaveFormatError rather than assertions.
506
                    raise WeaveFormatError("invalid included version %d for index %d"
0.1.91 by Martin Pool
Update Weave.check
507
                                           % (inclusions[-1], version))
508
509
        # try extracting all versions; this is a bit slow and parallel
510
        # extraction could be used
511
        import sha
894 by Martin Pool
- small optimization for weave extract
512
        nv = self.numversions()
513
        for version in range(nv):
514
            if progress_bar:
515
                progress_bar.update('checking text', version, nv)
0.1.91 by Martin Pool
Update Weave.check
516
            s = sha.new()
517
            for l in self.get_iter(version):
518
                s.update(l)
519
            hd = s.hexdigest()
520
            expected = self._sha1s[version]
521
            if hd != expected:
522
                raise WeaveError("mismatched sha1 for version %d; "
523
                                 "got %s, expected %s"
524
                                 % (version, hd, expected))
0.1.18 by Martin Pool
Better Knit.dump method
525
881 by Martin Pool
- faster weave extraction
526
        # TODO: check insertions are properly nested, that there are
527
        # no lines outside of insertion blocks, that deletions are
528
        # properly paired, etc.
529
0.1.13 by Martin Pool
Knit structure now allows for versions to include the lines present in other
530
531
0.1.95 by Martin Pool
- preliminary merge conflict detection
532
    def merge(self, merge_versions):
533
        """Automerge and mark conflicts between versions.
534
535
        This returns a sequence, each entry describing alternatives
536
        for a chunk of the file.  Each of the alternatives is given as
537
        a list of lines.
538
539
        If there is a chunk of the file where there's no diagreement,
540
        only one alternative is given.
541
        """
542
543
        # approach: find the included versions common to all the
544
        # merged versions
545
        raise NotImplementedError()
546
547
548
0.1.21 by Martin Pool
Start computing a delta to insert a new revision
549
    def _delta(self, included, lines):
550
        """Return changes from basis to new revision.
551
552
        The old text for comparison is the union of included revisions.
553
554
        This is used in inserting a new text.
0.1.22 by Martin Pool
Calculate delta for new versions relative to a set of parent versions.
555
0.1.55 by Martin Pool
doc
556
        Delta is returned as a sequence of
557
        (weave1, weave2, newlines).
558
559
        This indicates that weave1:weave2 of the old weave should be
0.1.22 by Martin Pool
Calculate delta for new versions relative to a set of parent versions.
560
        replaced by the sequence of lines in newlines.  Note that
561
        these line numbers are positions in the total weave and don't
562
        correspond to the lines in any extracted version, or even the
563
        extracted union of included versions.
564
565
        If line1=line2, this is a pure insert; if newlines=[] this is a
566
        pure delete.  (Similar to difflib.)
0.1.21 by Martin Pool
Start computing a delta to insert a new revision
567
        """
568
0.1.1 by Martin Pool
Check in old existing knit code.
569
918 by Martin Pool
- start doing new weave-merge algorithm
570
            
571
    def plan_merge(self, ver_a, ver_b):
572
        """Return pseudo-annotation indicating how the two versions merge.
573
574
        This is computed between versions a and b and their common
575
        base.
576
577
        Weave lines present in none of them are skipped entirely.
578
        """
926 by Martin Pool
- update more weave code to use intsets
579
        inc_a = self.inclusions([ver_a])
580
        inc_b = self.inclusions([ver_b])
918 by Martin Pool
- start doing new weave-merge algorithm
581
        inc_c = inc_a & inc_b
582
583
        for lineno, insert, deleteset, line in self._walk():
584
            if deleteset & inc_c:
585
                # killed in parent; can't be in either a or b
586
                # not relevant to our work
587
                yield 'killed-base', line
926 by Martin Pool
- update more weave code to use intsets
588
            elif insert in inc_c:
918 by Martin Pool
- start doing new weave-merge algorithm
589
                # was inserted in base
590
                killed_a = bool(deleteset & inc_a)
591
                killed_b = bool(deleteset & inc_b)
592
                if killed_a and killed_b:
593
                    yield 'killed-both', line
594
                elif killed_a:
595
                    yield 'killed-a', line
596
                elif killed_b:
597
                    yield 'killed-b', line
598
                else:
599
                    yield 'unchanged', line
926 by Martin Pool
- update more weave code to use intsets
600
            elif insert in inc_a:
918 by Martin Pool
- start doing new weave-merge algorithm
601
                if deleteset & inc_a:
602
                    yield 'ghost-a', line
603
                else:
604
                    # new in A; not in B
605
                    yield 'new-a', line
926 by Martin Pool
- update more weave code to use intsets
606
            elif insert in inc_b:
918 by Martin Pool
- start doing new weave-merge algorithm
607
                if deleteset & inc_b:
608
                    yield 'ghost-b', line
609
                else:
610
                    yield 'new-b', line
611
            else:
612
                # not in either revision
613
                yield 'irrelevant', line
614
919 by Martin Pool
- more development of weave-merge
615
        yield 'unchanged', ''           # terminator
616
617
618
619
    def weave_merge(self, plan):
620
        lines_a = []
621
        lines_b = []
622
        ch_a = ch_b = False
623
624
        for state, line in plan:
625
            if state == 'unchanged' or state == 'killed-both':
626
                # resync and flush queued conflicts changes if any
627
                if not lines_a and not lines_b:
628
                    pass
629
                elif ch_a and not ch_b:
630
                    # one-sided change:                    
631
                    for l in lines_a: yield l
632
                elif ch_b and not ch_a:
633
                    for l in lines_b: yield l
634
                elif lines_a == lines_b:
635
                    for l in lines_a: yield l
636
                else:
637
                    yield '<<<<\n'
638
                    for l in lines_a: yield l
639
                    yield '====\n'
640
                    for l in lines_b: yield l
641
                    yield '>>>>\n'
642
643
                del lines_a[:]
644
                del lines_b[:]
645
                ch_a = ch_b = False
646
                
647
            if state == 'unchanged':
648
                if line:
649
                    yield line
650
            elif state == 'killed-a':
651
                ch_a = True
652
                lines_b.append(line)
653
            elif state == 'killed-b':
654
                ch_b = True
655
                lines_a.append(line)
656
            elif state == 'new-a':
657
                ch_a = True
658
                lines_a.append(line)
659
            elif state == 'new-b':
660
                ch_b = True
661
                lines_b.append(line)
662
            else:
920 by Martin Pool
- add more test cases for weave_merge
663
                assert state in ('irrelevant', 'ghost-a', 'ghost-b', 'killed-base',
664
                                 'killed-both'), \
919 by Martin Pool
- more development of weave-merge
665
                       state
666
667
                
668
669
918 by Martin Pool
- start doing new weave-merge algorithm
670
671
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
672
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
673
def weave_info(w):
0.1.88 by Martin Pool
Add weave info command.
674
    """Show some text information about the weave."""
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
675
    print '%6s %40s %20s' % ('ver', 'sha1', 'parents')
676
    for i in (6, 40, 20):
870 by Martin Pool
- better weave info display
677
        print '-' * i,
678
    print
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
679
    for i in range(w.numversions()):
0.1.91 by Martin Pool
Update Weave.check
680
        sha1 = w._sha1s[i]
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
681
        print '%6d %40s %s' % (i, sha1, ' '.join(map(str, w._parents[i])))
0.1.88 by Martin Pool
Add weave info command.
682
869 by Martin Pool
- more weave.py command line options
683
684
947 by Martin Pool
- new 'weave stats' command
685
def weave_stats(weave_file):
686
    from bzrlib.progress import ProgressBar
687
    from bzrlib.weavefile import read_weave
688
689
    pb = ProgressBar()
690
691
    wf = file(weave_file, 'rb')
692
    w = read_weave(wf)
693
    # FIXME: doesn't work on pipes
694
    weave_size = wf.tell()
695
696
    total = 0
697
    vers = len(w)
698
    for i in range(vers):
699
        pb.update('checking sizes', i, vers)
700
        for line in w.get_iter(i):
701
            total += len(line)
702
703
    pb.clear()
704
705
    print 'versions          %9d' % vers
706
    print 'weave file        %9d bytes' % weave_size
707
    print 'total contents    %9d bytes' % total
708
    print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
709
710
711
869 by Martin Pool
- more weave.py command line options
712
def usage():
871 by Martin Pool
- add command for merge-based weave
713
    print """bzr weave tool
714
715
Experimental tool for weave algorithm.
716
869 by Martin Pool
- more weave.py command line options
717
usage:
718
    weave init WEAVEFILE
719
        Create an empty weave file
720
    weave get WEAVEFILE VERSION
721
        Write out specified version.
722
    weave check WEAVEFILE
723
        Check consistency of all versions.
724
    weave info WEAVEFILE
725
        Display table of contents.
726
    weave add WEAVEFILE [BASE...] < NEWTEXT
727
        Add NEWTEXT, with specified parent versions.
728
    weave annotate WEAVEFILE VERSION
729
        Display origin of each line.
730
    weave mash WEAVEFILE VERSION...
731
        Display composite of all selected versions.
732
    weave merge WEAVEFILE VERSION1 VERSION2 > OUT
733
        Auto-merge two versions and display conflicts.
871 by Martin Pool
- add command for merge-based weave
734
735
example:
736
737
    % weave init foo.weave
738
    % vi foo.txt
739
    % weave add foo.weave < foo.txt
740
    added version 0
741
742
    (create updated version)
743
    % vi foo.txt
744
    % weave get foo.weave 0 | diff -u - foo.txt
745
    % weave add foo.weave 0 < foo.txt
746
    added version 1
747
748
    % weave get foo.weave 0 > foo.txt       (create forked version)
749
    % vi foo.txt
750
    % weave add foo.weave 0 < foo.txt
751
    added version 2
752
753
    % weave merge foo.weave 1 2 > foo.txt   (merge them)
754
    % vi foo.txt                            (resolve conflicts)
755
    % weave add foo.weave 1 2 < foo.txt     (commit merged version)     
756
    
869 by Martin Pool
- more weave.py command line options
757
"""
0.1.88 by Martin Pool
Add weave info command.
758
    
759
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
760
761
def main(argv):
762
    import sys
763
    import os
869 by Martin Pool
- more weave.py command line options
764
    from weavefile import write_weave, read_weave
894 by Martin Pool
- small optimization for weave extract
765
    from bzrlib.progress import ProgressBar
766
767
    #import psyco
768
    #psyco.full()
769
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
770
    cmd = argv[1]
869 by Martin Pool
- more weave.py command line options
771
772
    def readit():
773
        return read_weave(file(argv[2], 'rb'))
774
    
775
    if cmd == 'help':
776
        usage()
777
    elif cmd == 'add':
778
        w = readit()
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
779
        # at the moment, based on everything in the file
869 by Martin Pool
- more weave.py command line options
780
        parents = map(int, argv[3:])
0.1.72 by Martin Pool
Go back to weave lines normally having newlines at the end.
781
        lines = sys.stdin.readlines()
0.1.69 by Martin Pool
Simple text-based format for storing weaves, cleaner than
782
        ver = w.add(parents, lines)
869 by Martin Pool
- more weave.py command line options
783
        write_weave(w, file(argv[2], 'wb'))
784
        print 'added version %d' % ver
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
785
    elif cmd == 'init':
786
        fn = argv[2]
787
        if os.path.exists(fn):
788
            raise IOError("file exists")
789
        w = Weave()
869 by Martin Pool
- more weave.py command line options
790
        write_weave(w, file(fn, 'wb'))
791
    elif cmd == 'get': # get one version
792
        w = readit()
0.1.94 by Martin Pool
Fix get_iter call
793
        sys.stdout.writelines(w.get_iter(int(argv[3])))
869 by Martin Pool
- more weave.py command line options
794
        
795
    elif cmd == 'mash': # get composite
796
        w = readit()
797
        sys.stdout.writelines(w.mash_iter(map(int, argv[3:])))
798
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
799
    elif cmd == 'annotate':
869 by Martin Pool
- more weave.py command line options
800
        w = readit()
0.1.72 by Martin Pool
Go back to weave lines normally having newlines at the end.
801
        # newline is added to all lines regardless; too hard to get
802
        # reasonable formatting otherwise
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
803
        lasto = None
804
        for origin, text in w.annotate(int(argv[3])):
0.1.72 by Martin Pool
Go back to weave lines normally having newlines at the end.
805
            text = text.rstrip('\r\n')
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
806
            if origin == lasto:
807
                print '      | %s' % (text)
808
            else:
809
                print '%5d | %s' % (origin, text)
810
                lasto = origin
871 by Martin Pool
- add command for merge-based weave
811
                
0.1.88 by Martin Pool
Add weave info command.
812
    elif cmd == 'info':
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
813
        weave_info(readit())
947 by Martin Pool
- new 'weave stats' command
814
815
    elif cmd == 'stats':
816
        weave_stats(argv[2])
871 by Martin Pool
- add command for merge-based weave
817
        
0.1.91 by Martin Pool
Update Weave.check
818
    elif cmd == 'check':
869 by Martin Pool
- more weave.py command line options
819
        w = readit()
894 by Martin Pool
- small optimization for weave extract
820
        pb = ProgressBar()
821
        w.check(pb)
822
        pb.clear()
938 by Martin Pool
- various optimizations to weave add code
823
        print '%d versions ok' % w.numversions()
871 by Martin Pool
- add command for merge-based weave
824
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
825
    elif cmd == 'inclusions':
826
        w = readit()
827
        print ' '.join(map(str, w.inclusions([int(argv[3])])))
828
829
    elif cmd == 'parents':
830
        w = readit()
944 by Martin Pool
- refactor member names in Weave code
831
        print ' '.join(map(str, w._parents[int(argv[3])]))
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
832
918 by Martin Pool
- start doing new weave-merge algorithm
833
    elif cmd == 'plan-merge':
834
        w = readit()
835
        for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
919 by Martin Pool
- more development of weave-merge
836
            if line:
837
                print '%14s | %s' % (state, line),
918 by Martin Pool
- start doing new weave-merge algorithm
838
871 by Martin Pool
- add command for merge-based weave
839
    elif cmd == 'merge':
919 by Martin Pool
- more development of weave-merge
840
        w = readit()
841
        p = w.plan_merge(int(argv[3]), int(argv[4]))
842
        sys.stdout.writelines(w.weave_merge(p))
843
            
844
    elif cmd == 'mash-merge':
871 by Martin Pool
- add command for merge-based weave
845
        if len(argv) != 5:
846
            usage()
847
            return 1
848
849
        w = readit()
850
        v1, v2 = map(int, argv[3:5])
851
852
        basis = w.inclusions([v1]).intersection(w.inclusions([v2]))
853
854
        base_lines = list(w.mash_iter(basis))
855
        a_lines = list(w.get(v1))
856
        b_lines = list(w.get(v2))
857
858
        from bzrlib.merge3 import Merge3
859
        m3 = Merge3(base_lines, a_lines, b_lines)
860
861
        name_a = 'version %d' % v1
862
        name_b = 'version %d' % v2
863
        sys.stdout.writelines(m3.merge_lines(name_a=name_a, name_b=name_b))
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
864
    else:
865
        raise ValueError('unknown command %r' % cmd)
866
    
867
868
if __name__ == '__main__':
869
    import sys
870
    sys.exit(main(sys.argv))