~bzr-pqm/bzr/bzr.dev

0.1.1 by Martin Pool
Check in old existing knit code.
1
#! /usr/bin/python
2
3
# Copyright (C) 2005 Canonical Ltd
4
0.1.33 by Martin Pool
add gpl text
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
9
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
# GNU General Public License for more details.
14
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0.1.1 by Martin Pool
Check in old existing knit code.
18
19
# Author: Martin Pool <mbp@canonical.com>
20
21
0.1.38 by Martin Pool
Rename knit to weave. (I don't think there's an existing module called weave.)
22
"""Weave - storage of related text file versions"""
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
23
938 by Martin Pool
- various optimizations to weave add code
24
0.1.58 by Martin Pool
doc
25
# XXX: If we do weaves this way, will a merge still behave the same
26
# way if it's done in a different order?  That's a pretty desirable
27
# property.
28
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
29
# TODO: Nothing here so far assumes the lines are really \n newlines,
30
# rather than being split up in some other way.  We could accomodate
31
# binaries, perhaps by naively splitting on \n or perhaps using
32
# something like a rolling checksum.
33
0.1.85 by Martin Pool
doc
34
# TODO: End marker for each version so we can stop reading?
0.1.69 by Martin Pool
Simple text-based format for storing weaves, cleaner than
35
36
# TODO: Check that no insertion occurs inside a deletion that was
37
# active in the version of the insertion.
38
912 by Martin Pool
- update todos for weave
39
# TODO: In addition to the SHA-1 check, perhaps have some code that
40
# checks structural constraints of the weave: ie that insertions are
41
# properly nested, that there is no text outside of an insertion, that
42
# insertions or deletions are not repeated, etc.
0.1.85 by Martin Pool
doc
43
918 by Martin Pool
- start doing new weave-merge algorithm
44
# TODO: Parallel-extract that passes back each line along with a
45
# description of which revisions include it.  Nice for checking all
1393.1.25 by Martin Pool
- comments in weave code
46
# shas or calculating stats in parallel.
918 by Martin Pool
- start doing new weave-merge algorithm
47
1082 by Martin Pool
- lift imports
48
# TODO: Using a single _extract routine and then processing the output
49
# is probably inefficient.  It's simple enough that we can afford to
50
# have slight specializations for different ways its used: annotate,
51
# basis for add, get, etc.
52
1185.16.13 by Martin Pool
todo
53
# TODO: Probably the API should work only in names to hide the integer
54
# indexes from the user.
1083 by Martin Pool
- add space to store revision-id in weave files
55
1328 by Martin Pool
- slight refactoring of weave code
56
# TODO: Is there any potential performance win by having an add()
57
# variant that is passed a pre-cooked version of the single basis
58
# version?
59
1185.16.13 by Martin Pool
todo
60
# TODO: Reweave can possibly be made faster by remembering diffs
61
# where the basis and destination are unchanged.
62
63
# FIXME: Sometimes we will be given a parents list for a revision
64
# that includes some redundant parents (i.e. already a parent of 
65
# something in the list.)  We should eliminate them.  This can 
66
# be done fairly efficiently because the sequence numbers constrain
67
# the possible relationships.
1083 by Martin Pool
- add space to store revision-id in weave files
68
1082 by Martin Pool
- lift imports
69
70
import sha
1322 by Martin Pool
- tidy up import
71
from difflib import SequenceMatcher
1237 by Martin Pool
- allow the same version to be repeatedly added to a weave
72
1393.1.68 by Martin Pool
- add reweave that joins with ghosts
73
from bzrlib.trace import mutter
1237 by Martin Pool
- allow the same version to be repeatedly added to a weave
74
924 by Martin Pool
- Add IntSet class
75
0.1.47 by Martin Pool
New WeaveError and WeaveFormatError rather than assertions.
76
class WeaveError(Exception):
77
    """Exception in processing weave"""
78
79
80
class WeaveFormatError(WeaveError):
81
    """Weave invariant violated"""
1185.13.4 by Robert Collins
make reweave visible as a weave method, and quickly integrate into fetch
82
83
84
class WeaveParentMismatch(WeaveError):
85
    """Parents are mismatched between two revisions."""
0.1.47 by Martin Pool
New WeaveError and WeaveFormatError rather than assertions.
86
    
87
0.1.38 by Martin Pool
Rename knit to weave. (I don't think there's an existing module called weave.)
88
class Weave(object):
89
    """weave - versioned text file storage.
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
90
    
0.1.72 by Martin Pool
Go back to weave lines normally having newlines at the end.
91
    A Weave manages versions of line-based text files, keeping track
92
    of the originating version for each line.
93
94
    To clients the "lines" of the file are represented as a list of strings.
95
    These strings  will typically have terminal newline characters, but
96
    this is not required.  In particular files commonly do not have a newline
97
    at the end of the file.
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
98
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
99
    Texts can be identified in either of two ways:
100
101
    * a nonnegative index number.
102
1324 by Martin Pool
- doc
103
    * a version-id string.
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
104
0.1.38 by Martin Pool
Rename knit to weave. (I don't think there's an existing module called weave.)
105
    Typically the index number will be valid only inside this weave and
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
106
    the version-id is used to reference it in the larger world.
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
107
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
108
    The weave is represented as a list mixing edit instructions and
944 by Martin Pool
- refactor member names in Weave code
109
    literal text.  Each entry in _weave can be either a string (or
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
110
    unicode), or a tuple.  If a string, it means that the given line
111
    should be output in the currently active revisions.
112
113
    If a tuple, it gives a processing instruction saying in which
114
    revisions the enclosed lines are active.  The tuple has the form
115
    (instruction, version).
116
117
    The instruction can be '{' or '}' for an insertion block, and '['
118
    and ']' for a deletion block respectively.  The version is the
0.1.45 by Martin Pool
doc
119
    integer version index.  There is no replace operator, only deletes
1075 by Martin Pool
- don't store redundant version number at end of insert blocks
120
    and inserts.  For '}', the end of an insertion, there is no
121
    version parameter because it always closes the most recently
122
    opened insertion.
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
123
0.1.41 by Martin Pool
Doc
124
    Constraints/notes:
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
125
126
    * A later version can delete lines that were introduced by any
127
      number of ancestor versions; this implies that deletion
128
      instructions can span insertion blocks without regard to the
129
      insertion block's nesting.
130
0.1.41 by Martin Pool
Doc
131
    * Similarly, deletions need not be properly nested with regard to
132
      each other, because they might have been generated by
133
      independent revisions.
134
0.1.45 by Martin Pool
doc
135
    * Insertions are always made by inserting a new bracketed block
136
      into a single point in the previous weave.  This implies they
137
      can nest but not overlap, and the nesting must always have later
138
      insertions on the inside.
139
0.1.41 by Martin Pool
Doc
140
    * It doesn't seem very useful to have an active insertion
141
      inside an inactive insertion, but it might happen.
0.1.45 by Martin Pool
doc
142
      
0.1.41 by Martin Pool
Doc
143
    * Therefore, all instructions are always"considered"; that
144
      is passed onto and off the stack.  An outer inactive block
145
      doesn't disable an inner block.
146
147
    * Lines are enabled if the most recent enclosing insertion is
148
      active and none of the enclosing deletions are active.
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
149
0.1.49 by Martin Pool
Add another constraint: revisions should not delete text that they
150
    * There is no point having a deletion directly inside its own
151
      insertion; you might as well just not write it.  And there
152
      should be no way to get an earlier version deleting a later
153
      version.
154
944 by Martin Pool
- refactor member names in Weave code
155
    _weave
156
        Text of the weave; list of control instruction tuples and strings.
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
157
944 by Martin Pool
- refactor member names in Weave code
158
    _parents
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
159
        List of parents, indexed by version number.
160
        It is only necessary to store the minimal set of parents for
161
        each version; the parent's parents are implied.
0.1.13 by Martin Pool
Knit structure now allows for versions to include the lines present in other
162
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
163
    _sha1s
1083 by Martin Pool
- add space to store revision-id in weave files
164
        List of hex SHA-1 of each version.
165
166
    _names
167
        List of symbolic names for each version.  Each should be unique.
168
169
    _name_map
170
        For each name, the version number.
1209 by Martin Pool
- Add Weave._weave_name for debugging purposes
171
172
    _weave_name
173
        Descriptive name of this weave; typically the filename if known.
174
        Set by read_weave.
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
175
    """
938 by Martin Pool
- various optimizations to weave add code
176
1209 by Martin Pool
- Add Weave._weave_name for debugging purposes
177
    __slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
178
                 '_weave_name']
938 by Martin Pool
- various optimizations to weave add code
179
    
1209 by Martin Pool
- Add Weave._weave_name for debugging purposes
180
    def __init__(self, weave_name=None):
944 by Martin Pool
- refactor member names in Weave code
181
        self._weave = []
182
        self._parents = []
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
183
        self._sha1s = []
1083 by Martin Pool
- add space to store revision-id in weave files
184
        self._names = []
185
        self._name_map = {}
1209 by Martin Pool
- Add Weave._weave_name for debugging purposes
186
        self._weave_name = weave_name
0.1.60 by Martin Pool
Weave eq and ne methods
187
188
1393.1.51 by Martin Pool
- new Weave.copy()
189
    def copy(self):
190
        """Return a deep copy of self.
191
        
192
        The copy can be modified without affecting the original weave."""
193
        other = Weave()
194
        other._weave = self._weave[:]
195
        other._parents = self._parents[:]
196
        other._sha1s = self._sha1s[:]
197
        other._names = self._names[:]
198
        other._name_map = self._name_map.copy()
199
        other._weave_name = self._weave_name
200
        return other
201
0.1.60 by Martin Pool
Weave eq and ne methods
202
    def __eq__(self, other):
203
        if not isinstance(other, Weave):
204
            return False
944 by Martin Pool
- refactor member names in Weave code
205
        return self._parents == other._parents \
1083 by Martin Pool
- add space to store revision-id in weave files
206
               and self._weave == other._weave \
207
               and self._sha1s == other._sha1s 
208
0.1.60 by Martin Pool
Weave eq and ne methods
209
    
210
    def __ne__(self, other):
211
        return not self.__eq__(other)
212
1083 by Martin Pool
- add space to store revision-id in weave files
213
1269 by Martin Pool
- some weave operations automatically look up symbolic names if supplied
214
    def maybe_lookup(self, name_or_index):
215
        """Convert possible symbolic name to index, or pass through indexes."""
216
        if isinstance(name_or_index, (int, long)):
217
            return name_or_index
218
        else:
219
            return self.lookup(name_or_index)
220
221
        
1083 by Martin Pool
- add space to store revision-id in weave files
222
    def lookup(self, name):
1269 by Martin Pool
- some weave operations automatically look up symbolic names if supplied
223
        """Convert symbolic version name to index."""
1083 by Martin Pool
- add space to store revision-id in weave files
224
        try:
225
            return self._name_map[name]
226
        except KeyError:
1260 by Martin Pool
- some updates for fetch/update function
227
            raise WeaveError("name %r not present in weave %r" %
1209 by Martin Pool
- Add Weave._weave_name for debugging purposes
228
                             (name, self._weave_name))
1083 by Martin Pool
- add space to store revision-id in weave files
229
1229 by Martin Pool
- new Weave.idx_to_name and .parents methods
230
1393.1.49 by Martin Pool
- add Weave._check_consistent_with
231
    def iter_names(self):
232
        """Yield a list of all names in this weave."""
233
        return iter(self._names)
234
1229 by Martin Pool
- new Weave.idx_to_name and .parents methods
235
    def idx_to_name(self, version):
236
        return self._names[version]
237
1237 by Martin Pool
- allow the same version to be repeatedly added to a weave
238
1323 by Martin Pool
- caller can pass SHA-1 to Weave.add for efficiency
239
    def _check_repeated_add(self, name, parents, text, sha1):
1237 by Martin Pool
- allow the same version to be repeatedly added to a weave
240
        """Check that a duplicated add is OK.
241
242
        If it is, return the (old) index; otherwise raise an exception.
243
        """
244
        idx = self.lookup(name)
245
        if sorted(self._parents[idx]) != sorted(parents):
246
            raise WeaveError("name \"%s\" already present in weave "
247
                             "with different parents" % name)
1323 by Martin Pool
- caller can pass SHA-1 to Weave.add for efficiency
248
        if sha1 != self._sha1s[idx]:
1237 by Martin Pool
- allow the same version to be repeatedly added to a weave
249
            raise WeaveError("name \"%s\" already present in weave "
250
                             "with different text" % name)            
251
        return idx
252
        
253
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
254
        
1323 by Martin Pool
- caller can pass SHA-1 to Weave.add for efficiency
255
    def add(self, name, parents, text, sha1=None):
0.1.4 by Martin Pool
Start indexing knits by both integer and version string.
256
        """Add a single text on top of the weave.
0.1.36 by Martin Pool
doc
257
  
0.1.26 by Martin Pool
Refactor parameters to add command
258
        Returns the index number of the newly added version.
259
1083 by Martin Pool
- add space to store revision-id in weave files
260
        name
261
            Symbolic name for this version.
262
            (Typically the revision-id of the revision that added it.)
263
0.1.26 by Martin Pool
Refactor parameters to add command
264
        parents
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
265
            List or set of direct parent version numbers.
266
            
0.1.26 by Martin Pool
Refactor parameters to add command
267
        text
1323 by Martin Pool
- caller can pass SHA-1 to Weave.add for efficiency
268
            Sequence of lines to be added in the new version.
269
270
        sha -- SHA-1 of the file, if known.  This is trusted to be
271
            correct if supplied.
272
        """
1393.1.4 by Martin Pool
- weave tool now finds bzrlib if it's not on the default path
273
        from bzrlib.osutils import sha_strings
938 by Martin Pool
- various optimizations to weave add code
274
1083 by Martin Pool
- add space to store revision-id in weave files
275
        assert isinstance(name, basestring)
1323 by Martin Pool
- caller can pass SHA-1 to Weave.add for efficiency
276
        if sha1 is None:
277
            sha1 = sha_strings(text)
1083 by Martin Pool
- add space to store revision-id in weave files
278
        if name in self._name_map:
1323 by Martin Pool
- caller can pass SHA-1 to Weave.add for efficiency
279
            return self._check_repeated_add(name, parents, text, sha1)
1269 by Martin Pool
- some weave operations automatically look up symbolic names if supplied
280
281
        parents = map(self.maybe_lookup, parents)
938 by Martin Pool
- various optimizations to weave add code
282
        self._check_versions(parents)
0.1.82 by Martin Pool
Small weave optimizations
283
        ## self._check_lines(text)
944 by Martin Pool
- refactor member names in Weave code
284
        new_version = len(self._parents)
0.1.5 by Martin Pool
Add test for storing two text versions.
285
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
286
1083 by Martin Pool
- add space to store revision-id in weave files
287
        # if we abort after here the (in-memory) weave will be corrupt because only
288
        # some fields are updated
289
        self._parents.append(parents[:])
0.1.89 by Martin Pool
Store SHA1 in weave file for later verification
290
        self._sha1s.append(sha1)
1083 by Martin Pool
- add space to store revision-id in weave files
291
        self._names.append(name)
292
        self._name_map[name] = new_version
938 by Martin Pool
- various optimizations to weave add code
293
294
            
295
        if not parents:
296
            # special case; adding with no parents revision; can do
297
            # this more quickly by just appending unconditionally.
298
            # even more specially, if we're adding an empty text we
299
            # need do nothing at all.
300
            if text:
944 by Martin Pool
- refactor member names in Weave code
301
                self._weave.append(('{', new_version))
302
                self._weave.extend(text)
1075 by Martin Pool
- don't store redundant version number at end of insert blocks
303
                self._weave.append(('}', None))
938 by Martin Pool
- various optimizations to weave add code
304
        
305
            return new_version
306
941 by Martin Pool
- allow for parents specified to Weave.add to be a set
307
        if len(parents) == 1:
308
            pv = list(parents)[0]
309
            if sha1 == self._sha1s[pv]:
310
                # special case: same as the single parent
311
                return new_version
938 by Martin Pool
- various optimizations to weave add code
312
            
313
314
        ancestors = self.inclusions(parents)
315
944 by Martin Pool
- refactor member names in Weave code
316
        l = self._weave
938 by Martin Pool
- various optimizations to weave add code
317
318
        # basis a list of (origin, lineno, line)
319
        basis_lineno = []
320
        basis_lines = []
321
        for origin, lineno, line in self._extract(ancestors):
322
            basis_lineno.append(lineno)
323
            basis_lines.append(line)
324
974.1.26 by aaron.bentley at utoronto
merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472
325
        # another small special case: a merge, producing the same text
326
        # as auto-merge
938 by Martin Pool
- various optimizations to weave add code
327
        if text == basis_lines:
328
            return new_version            
329
330
        # add a sentinal, because we can also match against the final line
944 by Martin Pool
- refactor member names in Weave code
331
        basis_lineno.append(len(self._weave))
938 by Martin Pool
- various optimizations to weave add code
332
333
        # XXX: which line of the weave should we really consider
334
        # matches the end of the file?  the current code says it's the
335
        # last line of the weave?
336
337
        #print 'basis_lines:', basis_lines
338
        #print 'new_lines:  ', lines
339
340
        s = SequenceMatcher(None, basis_lines, text)
341
342
        # offset gives the number of lines that have been inserted
343
        # into the weave up to the current point; if the original edit instruction
344
        # says to change line A then we actually change (A+offset)
345
        offset = 0
346
347
        for tag, i1, i2, j1, j2 in s.get_opcodes():
348
            # i1,i2 are given in offsets within basis_lines; we need to map them
349
            # back to offsets within the entire weave
350
            #print 'raw match', tag, i1, i2, j1, j2
351
            if tag == 'equal':
352
                continue
353
354
            i1 = basis_lineno[i1]
355
            i2 = basis_lineno[i2]
356
357
            assert 0 <= j1 <= j2 <= len(text)
358
359
            #print tag, i1, i2, j1, j2
360
361
            # the deletion and insertion are handled separately.
362
            # first delete the region.
363
            if i1 != i2:
944 by Martin Pool
- refactor member names in Weave code
364
                self._weave.insert(i1+offset, ('[', new_version))
365
                self._weave.insert(i2+offset+1, (']', new_version))
938 by Martin Pool
- various optimizations to weave add code
366
                offset += 2
367
368
            if j1 != j2:
369
                # there may have been a deletion spanning up to
370
                # i2; we want to insert after this region to make sure
371
                # we don't destroy ourselves
372
                i = i2 + offset
944 by Martin Pool
- refactor member names in Weave code
373
                self._weave[i:i] = ([('{', new_version)] 
1075 by Martin Pool
- don't store redundant version number at end of insert blocks
374
                                    + text[j1:j2] 
375
                                    + [('}', None)])
938 by Martin Pool
- various optimizations to weave add code
376
                offset += 2 + (j2 - j1)
377
378
        return new_version
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
379
1092.2.22 by Robert Collins
text_version and name_version unification looking reasonable
380
    def add_identical(self, old_rev_id, new_rev_id, parents):
381
        """Add an identical text to old_rev_id as new_rev_id."""
382
        old_lines = self.get(self.lookup(old_rev_id))
383
        self.add(new_rev_id, parents, old_lines)
0.1.27 by Martin Pool
Check that version numbers passed in are reasonable
384
0.1.78 by Martin Pool
Rename Weave.get_included to inclusions and getiter to get_iter
385
    def inclusions(self, versions):
893 by Martin Pool
- Refactor weave calculation of inclusions
386
        """Return set of all ancestors of given version(s)."""
928 by Martin Pool
- go back to using plain builtin set()
387
        i = set(versions)
1328 by Martin Pool
- slight refactoring of weave code
388
        for v in xrange(max(versions), 0, -1):
389
            if v in i:
390
                # include all its parents
391
                i.update(self._parents[v])
392
        return i
393
        ## except IndexError:
394
        ##     raise ValueError("version %d not present in weave" % v)
0.1.77 by Martin Pool
New Weave.get_included() does transitive expansion
395
396
1229 by Martin Pool
- new Weave.idx_to_name and .parents methods
397
    def parents(self, version):
398
        return self._parents[version]
399
400
1393.1.69 by Martin Pool
- new Weave.parent_names
401
    def parent_names(self, version):
402
        """Return version names for parents of a version."""
403
        return map(self.idx_to_name, self._parents[self.lookup(version)])
404
405
890 by Martin Pool
- weave info should show minimal expression of parents
406
    def minimal_parents(self, version):
407
        """Find the minimal set of parents for the version."""
944 by Martin Pool
- refactor member names in Weave code
408
        included = self._parents[version]
890 by Martin Pool
- weave info should show minimal expression of parents
409
        if not included:
410
            return []
411
        
412
        li = list(included)
893 by Martin Pool
- Refactor weave calculation of inclusions
413
        li.sort(reverse=True)
890 by Martin Pool
- weave info should show minimal expression of parents
414
415
        mininc = []
928 by Martin Pool
- go back to using plain builtin set()
416
        gotit = set()
890 by Martin Pool
- weave info should show minimal expression of parents
417
418
        for pv in li:
419
            if pv not in gotit:
420
                mininc.append(pv)
893 by Martin Pool
- Refactor weave calculation of inclusions
421
                gotit.update(self.inclusions(pv))
890 by Martin Pool
- weave info should show minimal expression of parents
422
423
        assert mininc[0] >= 0
424
        assert mininc[-1] < version
425
        return mininc
426
427
0.1.75 by Martin Pool
Remove VerInfo class; just store sets directly in the list of
428
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
429
    def _check_lines(self, text):
430
        if not isinstance(text, list):
431
            raise ValueError("text should be a list, not %s" % type(text))
432
433
        for l in text:
434
            if not isinstance(l, basestring):
869 by Martin Pool
- more weave.py command line options
435
                raise ValueError("text line should be a string or unicode, not %s"
436
                                 % type(l))
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
437
        
438
439
0.1.27 by Martin Pool
Check that version numbers passed in are reasonable
440
    def _check_versions(self, indexes):
441
        """Check everything in the sequence of indexes is valid"""
442
        for i in indexes:
443
            try:
944 by Martin Pool
- refactor member names in Weave code
444
                self._parents[i]
0.1.27 by Martin Pool
Check that version numbers passed in are reasonable
445
            except IndexError:
446
                raise IndexError("invalid version number %r" % i)
447
0.1.2 by Martin Pool
Import testsweet module adapted from bzr.
448
    
1269 by Martin Pool
- some weave operations automatically look up symbolic names if supplied
449
    def annotate(self, name_or_index):
450
        return list(self.annotate_iter(name_or_index))
451
452
453
    def annotate_iter(self, name_or_index):
0.1.7 by Martin Pool
Add trivial annotate text
454
        """Yield list of (index-id, line) pairs for the specified version.
455
456
        The index indicates when the line originated in the weave."""
1269 by Martin Pool
- some weave operations automatically look up symbolic names if supplied
457
        incls = [self.maybe_lookup(name_or_index)]
458
        for origin, lineno, text in self._extract(incls):
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
459
            yield origin, text
0.1.22 by Martin Pool
Calculate delta for new versions relative to a set of parent versions.
460
461
918 by Martin Pool
- start doing new weave-merge algorithm
462
    def _walk(self):
463
        """Walk the weave.
464
465
        Yields sequence of
466
        (lineno, insert, deletes, text)
467
        for each literal line.
468
        """
469
        
470
        istack = []
928 by Martin Pool
- go back to using plain builtin set()
471
        dset = set()
918 by Martin Pool
- start doing new weave-merge algorithm
472
473
        lineno = 0         # line of weave, 0-based
474
944 by Martin Pool
- refactor member names in Weave code
475
        for l in self._weave:
918 by Martin Pool
- start doing new weave-merge algorithm
476
            if isinstance(l, tuple):
477
                c, v = l
478
                isactive = None
479
                if c == '{':
480
                    istack.append(v)
481
                elif c == '}':
1075 by Martin Pool
- don't store redundant version number at end of insert blocks
482
                    istack.pop()
918 by Martin Pool
- start doing new weave-merge algorithm
483
                elif c == '[':
926 by Martin Pool
- update more weave code to use intsets
484
                    assert v not in dset
485
                    dset.add(v)
918 by Martin Pool
- start doing new weave-merge algorithm
486
                elif c == ']':
926 by Martin Pool
- update more weave code to use intsets
487
                    dset.remove(v)
918 by Martin Pool
- start doing new weave-merge algorithm
488
                else:
489
                    raise WeaveFormatError('unexpected instruction %r'
490
                                           % v)
491
            else:
492
                assert isinstance(l, basestring)
493
                assert istack
494
                yield lineno, istack[-1], dset, l
495
            lineno += 1
496
497
498
893 by Martin Pool
- Refactor weave calculation of inclusions
499
    def _extract(self, versions):
0.1.20 by Martin Pool
Factor out Knit.extract() method
500
        """Yield annotation of lines in included set.
501
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
502
        Yields a sequence of tuples (origin, lineno, text), where
503
        origin is the origin version, lineno the index in the weave,
504
        and text the text of the line.
505
0.1.20 by Martin Pool
Factor out Knit.extract() method
506
        The set typically but not necessarily corresponds to a version.
507
        """
1196 by Martin Pool
- [WIP] retrieve historical texts from weaves
508
        for i in versions:
509
            if not isinstance(i, int):
510
                raise ValueError(i)
511
            
893 by Martin Pool
- Refactor weave calculation of inclusions
512
        included = self.inclusions(versions)
881 by Martin Pool
- faster weave extraction
513
514
        istack = []
928 by Martin Pool
- go back to using plain builtin set()
515
        dset = set()
0.1.48 by Martin Pool
Basic parsing of delete instructions.
516
517
        lineno = 0         # line of weave, 0-based
891 by Martin Pool
- fix up refactoring of weave
518
894 by Martin Pool
- small optimization for weave extract
519
        isactive = None
0.1.85 by Martin Pool
doc
520
931 by Martin Pool
- experiment with making Weave._extract() return a list, not a generator - slightly faster
521
        result = []
522
0.1.63 by Martin Pool
Abbreviate WeaveFormatError in some code
523
        WFE = WeaveFormatError
0.1.95 by Martin Pool
- preliminary merge conflict detection
524
944 by Martin Pool
- refactor member names in Weave code
525
        for l in self._weave:
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
526
            if isinstance(l, tuple):
527
                c, v = l
894 by Martin Pool
- small optimization for weave extract
528
                isactive = None
891 by Martin Pool
- fix up refactoring of weave
529
                if c == '{':
530
                    assert v not in istack
531
                    istack.append(v)
532
                elif c == '}':
1075 by Martin Pool
- don't store redundant version number at end of insert blocks
533
                    istack.pop()
891 by Martin Pool
- fix up refactoring of weave
534
                elif c == '[':
535
                    if v in included:
881 by Martin Pool
- faster weave extraction
536
                        assert v not in dset
0.1.48 by Martin Pool
Basic parsing of delete instructions.
537
                        dset.add(v)
891 by Martin Pool
- fix up refactoring of weave
538
                else:
539
                    assert c == ']'
540
                    if v in included:
881 by Martin Pool
- faster weave extraction
541
                        assert v in dset
0.1.48 by Martin Pool
Basic parsing of delete instructions.
542
                        dset.remove(v)
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
543
            else:
544
                assert isinstance(l, basestring)
894 by Martin Pool
- small optimization for weave extract
545
                if isactive is None:
546
                    isactive = (not dset) and istack and (istack[-1] in included)
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
547
                if isactive:
931 by Martin Pool
- experiment with making Weave._extract() return a list, not a generator - slightly faster
548
                    result.append((istack[-1], lineno, l))
0.1.39 by Martin Pool
Change to a more realistic weave structure which can represent insertions and
549
            lineno += 1
0.1.7 by Martin Pool
Add trivial annotate text
550
0.1.46 by Martin Pool
More constraints on structure of weave, and checks that they work
551
        if istack:
0.1.63 by Martin Pool
Abbreviate WeaveFormatError in some code
552
            raise WFE("unclosed insertion blocks at end of weave",
0.1.47 by Martin Pool
New WeaveError and WeaveFormatError rather than assertions.
553
                                   istack)
0.1.48 by Martin Pool
Basic parsing of delete instructions.
554
        if dset:
0.1.63 by Martin Pool
Abbreviate WeaveFormatError in some code
555
            raise WFE("unclosed deletion blocks at end of weave",
0.1.48 by Martin Pool
Basic parsing of delete instructions.
556
                                   dset)
0.1.40 by Martin Pool
Add test for extracting from weave with nested insertions
557
931 by Martin Pool
- experiment with making Weave._extract() return a list, not a generator - slightly faster
558
        return result
559
    
560
0.1.7 by Martin Pool
Add trivial annotate text
561
1269 by Martin Pool
- some weave operations automatically look up symbolic names if supplied
562
    def get_iter(self, name_or_index):
0.1.5 by Martin Pool
Add test for storing two text versions.
563
        """Yield lines for the specified version."""
1269 by Martin Pool
- some weave operations automatically look up symbolic names if supplied
564
        incls = [self.maybe_lookup(name_or_index)]
565
        for origin, lineno, line in self._extract(incls):
0.1.8 by Martin Pool
Unify get/annotate code
566
            yield line
0.1.5 by Martin Pool
Add test for storing two text versions.
567
568
1369 by Martin Pool
- try to avoid redundant conversion of strings when retrieving from weaves
569
    def get_text(self, name_or_index):
570
        return ''.join(self.get_iter(name_or_index))
571
        assert isinstance(version, int)
572
573
574
    def get_lines(self, name_or_index):
1269 by Martin Pool
- some weave operations automatically look up symbolic names if supplied
575
        return list(self.get_iter(name_or_index))
0.1.1 by Martin Pool
Check in old existing knit code.
576
577
1369 by Martin Pool
- try to avoid redundant conversion of strings when retrieving from weaves
578
    get = get_lines
579
580
0.1.95 by Martin Pool
- preliminary merge conflict detection
581
    def mash_iter(self, included):
0.1.65 by Martin Pool
Add Weave.merge_iter to get automerged lines
582
        """Return composed version of multiple included versions."""
1338 by Martin Pool
- Weave.mash_iter optionally takes names rather than indexes
583
        included = map(self.maybe_lookup, included)
893 by Martin Pool
- Refactor weave calculation of inclusions
584
        for origin, lineno, text in self._extract(included):
0.1.65 by Martin Pool
Add Weave.merge_iter to get automerged lines
585
            yield text
586
587
0.1.11 by Martin Pool
Add Knit.dump method
588
    def dump(self, to_file):
589
        from pprint import pprint
944 by Martin Pool
- refactor member names in Weave code
590
        print >>to_file, "Weave._weave = ",
591
        pprint(self._weave, to_file)
592
        print >>to_file, "Weave._parents = ",
593
        pprint(self._parents, to_file)
0.1.11 by Martin Pool
Add Knit.dump method
594
595
0.1.91 by Martin Pool
Update Weave.check
596
597
    def numversions(self):
944 by Martin Pool
- refactor member names in Weave code
598
        l = len(self._parents)
0.1.91 by Martin Pool
Update Weave.check
599
        assert l == len(self._sha1s)
600
        return l
601
602
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
603
    def __len__(self):
604
        return self.numversions()
605
606
894 by Martin Pool
- small optimization for weave extract
607
    def check(self, progress_bar=None):
0.1.91 by Martin Pool
Update Weave.check
608
        # check no circular inclusions
609
        for version in range(self.numversions()):
944 by Martin Pool
- refactor member names in Weave code
610
            inclusions = list(self._parents[version])
0.1.91 by Martin Pool
Update Weave.check
611
            if inclusions:
612
                inclusions.sort()
613
                if inclusions[-1] >= version:
0.1.47 by Martin Pool
New WeaveError and WeaveFormatError rather than assertions.
614
                    raise WeaveFormatError("invalid included version %d for index %d"
0.1.91 by Martin Pool
Update Weave.check
615
                                           % (inclusions[-1], version))
616
617
        # try extracting all versions; this is a bit slow and parallel
618
        # extraction could be used
894 by Martin Pool
- small optimization for weave extract
619
        nv = self.numversions()
620
        for version in range(nv):
621
            if progress_bar:
622
                progress_bar.update('checking text', version, nv)
0.1.91 by Martin Pool
Update Weave.check
623
            s = sha.new()
624
            for l in self.get_iter(version):
625
                s.update(l)
626
            hd = s.hexdigest()
627
            expected = self._sha1s[version]
628
            if hd != expected:
629
                raise WeaveError("mismatched sha1 for version %d; "
630
                                 "got %s, expected %s"
631
                                 % (version, hd, expected))
0.1.18 by Martin Pool
Better Knit.dump method
632
881 by Martin Pool
- faster weave extraction
633
        # TODO: check insertions are properly nested, that there are
634
        # no lines outside of insertion blocks, that deletions are
635
        # properly paired, etc.
636
0.1.13 by Martin Pool
Knit structure now allows for versions to include the lines present in other
637
638
0.1.95 by Martin Pool
- preliminary merge conflict detection
639
    def merge(self, merge_versions):
640
        """Automerge and mark conflicts between versions.
641
642
        This returns a sequence, each entry describing alternatives
643
        for a chunk of the file.  Each of the alternatives is given as
644
        a list of lines.
645
646
        If there is a chunk of the file where there's no diagreement,
647
        only one alternative is given.
648
        """
649
        # approach: find the included versions common to all the
650
        # merged versions
651
        raise NotImplementedError()
652
653
654
0.1.21 by Martin Pool
Start computing a delta to insert a new revision
655
    def _delta(self, included, lines):
656
        """Return changes from basis to new revision.
657
658
        The old text for comparison is the union of included revisions.
659
660
        This is used in inserting a new text.
0.1.22 by Martin Pool
Calculate delta for new versions relative to a set of parent versions.
661
0.1.55 by Martin Pool
doc
662
        Delta is returned as a sequence of
663
        (weave1, weave2, newlines).
664
665
        This indicates that weave1:weave2 of the old weave should be
0.1.22 by Martin Pool
Calculate delta for new versions relative to a set of parent versions.
666
        replaced by the sequence of lines in newlines.  Note that
667
        these line numbers are positions in the total weave and don't
668
        correspond to the lines in any extracted version, or even the
669
        extracted union of included versions.
670
671
        If line1=line2, this is a pure insert; if newlines=[] this is a
672
        pure delete.  (Similar to difflib.)
0.1.21 by Martin Pool
Start computing a delta to insert a new revision
673
        """
1393.1.47 by Martin Pool
- Weave._delta is not implemented at the moment
674
        raise NotImplementedError()
0.1.1 by Martin Pool
Check in old existing knit code.
675
918 by Martin Pool
- start doing new weave-merge algorithm
676
            
677
    def plan_merge(self, ver_a, ver_b):
678
        """Return pseudo-annotation indicating how the two versions merge.
679
680
        This is computed between versions a and b and their common
681
        base.
682
683
        Weave lines present in none of them are skipped entirely.
684
        """
926 by Martin Pool
- update more weave code to use intsets
685
        inc_a = self.inclusions([ver_a])
686
        inc_b = self.inclusions([ver_b])
918 by Martin Pool
- start doing new weave-merge algorithm
687
        inc_c = inc_a & inc_b
688
689
        for lineno, insert, deleteset, line in self._walk():
690
            if deleteset & inc_c:
691
                # killed in parent; can't be in either a or b
692
                # not relevant to our work
693
                yield 'killed-base', line
926 by Martin Pool
- update more weave code to use intsets
694
            elif insert in inc_c:
918 by Martin Pool
- start doing new weave-merge algorithm
695
                # was inserted in base
696
                killed_a = bool(deleteset & inc_a)
697
                killed_b = bool(deleteset & inc_b)
698
                if killed_a and killed_b:
699
                    yield 'killed-both', line
700
                elif killed_a:
701
                    yield 'killed-a', line
702
                elif killed_b:
703
                    yield 'killed-b', line
704
                else:
705
                    yield 'unchanged', line
926 by Martin Pool
- update more weave code to use intsets
706
            elif insert in inc_a:
918 by Martin Pool
- start doing new weave-merge algorithm
707
                if deleteset & inc_a:
708
                    yield 'ghost-a', line
709
                else:
710
                    # new in A; not in B
711
                    yield 'new-a', line
926 by Martin Pool
- update more weave code to use intsets
712
            elif insert in inc_b:
918 by Martin Pool
- start doing new weave-merge algorithm
713
                if deleteset & inc_b:
714
                    yield 'ghost-b', line
715
                else:
716
                    yield 'new-b', line
717
            else:
718
                # not in either revision
719
                yield 'irrelevant', line
720
919 by Martin Pool
- more development of weave-merge
721
        yield 'unchanged', ''           # terminator
722
723
724
725
    def weave_merge(self, plan):
726
        lines_a = []
727
        lines_b = []
728
        ch_a = ch_b = False
729
730
        for state, line in plan:
731
            if state == 'unchanged' or state == 'killed-both':
732
                # resync and flush queued conflicts changes if any
733
                if not lines_a and not lines_b:
734
                    pass
735
                elif ch_a and not ch_b:
736
                    # one-sided change:                    
737
                    for l in lines_a: yield l
738
                elif ch_b and not ch_a:
739
                    for l in lines_b: yield l
740
                elif lines_a == lines_b:
741
                    for l in lines_a: yield l
742
                else:
743
                    yield '<<<<\n'
744
                    for l in lines_a: yield l
745
                    yield '====\n'
746
                    for l in lines_b: yield l
747
                    yield '>>>>\n'
748
749
                del lines_a[:]
750
                del lines_b[:]
751
                ch_a = ch_b = False
752
                
753
            if state == 'unchanged':
754
                if line:
755
                    yield line
756
            elif state == 'killed-a':
757
                ch_a = True
758
                lines_b.append(line)
759
            elif state == 'killed-b':
760
                ch_b = True
761
                lines_a.append(line)
762
            elif state == 'new-a':
763
                ch_a = True
764
                lines_a.append(line)
765
            elif state == 'new-b':
766
                ch_b = True
767
                lines_b.append(line)
768
            else:
920 by Martin Pool
- add more test cases for weave_merge
769
                assert state in ('irrelevant', 'ghost-a', 'ghost-b', 'killed-base',
770
                                 'killed-both'), \
919 by Martin Pool
- more development of weave-merge
771
                       state
772
773
                
1393.1.48 by Martin Pool
- Add stub Weave.join() method
774
    def join(self, other):
775
        """Integrate versions from other into this weave.
776
1393.1.49 by Martin Pool
- add Weave._check_consistent_with
777
        The resulting weave contains all the history of both weaves; 
778
        any version you could retrieve from either self or other can be 
779
        retrieved from self after this call.
1393.1.48 by Martin Pool
- Add stub Weave.join() method
780
1393.1.49 by Martin Pool
- add Weave._check_consistent_with
781
        It is illegal for the two weaves to contain different values 
1393.1.68 by Martin Pool
- add reweave that joins with ghosts
782
        or different parents for any version.  See also reweave().
1393.1.48 by Martin Pool
- Add stub Weave.join() method
783
        """
784
        if other.numversions() == 0:
785
            return          # nothing to update, easy
1185.13.4 by Robert Collins
make reweave visible as a weave method, and quickly integrate into fetch
786
        # two loops so that we do not change ourselves before verifying it
787
        # will be ok
1393.1.50 by Martin Pool
- more development of Weave.join()
788
        # work through in index order to make sure we get all dependencies
789
        for other_idx, name in enumerate(other._names):
1185.13.4 by Robert Collins
make reweave visible as a weave method, and quickly integrate into fetch
790
            if self._check_version_consistent(other, other_idx, name):
791
                continue
792
        for other_idx, name in enumerate(other._names):
1393.1.50 by Martin Pool
- more development of Weave.join()
793
            # TODO: If all the parents of the other version are already 
794
            # present then we can avoid some work by just taking the delta
795
            # and adjusting the offsets.
1393.1.51 by Martin Pool
- new Weave.copy()
796
            new_parents = self._imported_parents(other, other_idx)
1393.1.50 by Martin Pool
- more development of Weave.join()
797
            lines = other.get_lines(other_idx)
798
            sha1 = other._sha1s[other_idx]
799
            self.add(name, new_parents, lines, sha1)
800
1393.1.51 by Martin Pool
- new Weave.copy()
801
802
    def _imported_parents(self, other, other_idx):
803
        """Return list of parents in self corresponding to indexes in other."""
804
        new_parents = []
805
        for parent_idx in other._parents[other_idx]:
806
            parent_name = other._names[parent_idx]
807
            if parent_name not in self._names:
808
                # should not be possible
809
                raise WeaveError("missing parent {%s} of {%s} in %r" 
810
                                 % (parent_name, other._name_map[other_idx], self))
811
            new_parents.append(self._name_map[parent_name])
812
        return new_parents
813
1393.1.50 by Martin Pool
- more development of Weave.join()
814
    def _check_version_consistent(self, other, other_idx, name):
815
        """Check if a version in consistent in this and other.
1393.1.66 by Martin Pool
- fix join of weaves where parents occur at different offsets
816
817
        To be consistent it must have:
818
819
         * the same text
820
         * the same direct parents (by name, not index, and disregarding
821
           order)
1393.1.50 by Martin Pool
- more development of Weave.join()
822
        
823
        If present & correct return True;
824
        if not present in self return False; 
825
        if inconsistent raise error."""
826
        this_idx = self._name_map.get(name, -1)
827
        if this_idx != -1:
828
            if self._sha1s[this_idx] != other._sha1s[other_idx]:
1393.1.66 by Martin Pool
- fix join of weaves where parents occur at different offsets
829
                raise WeaveError("inconsistent texts for version {%s} "
830
                                 "when joining weaves"
831
                                 % (name))
832
            self_parents = self._parents[this_idx]
833
            other_parents = other._parents[other_idx]
834
            n1 = [self._names[i] for i in self_parents]
835
            n2 = [other._names[i] for i in other_parents]
836
            n1.sort()
837
            n2.sort()
838
            if n1 != n2:
1185.13.4 by Robert Collins
make reweave visible as a weave method, and quickly integrate into fetch
839
                raise WeaveParentMismatch("inconsistent parents "
840
                    "for version {%s}: %s vs %s" % (name, n1, n2))
1393.1.50 by Martin Pool
- more development of Weave.join()
841
            else:
842
                return True         # ok!
843
        else:
844
            return False
918 by Martin Pool
- start doing new weave-merge algorithm
845
1185.13.4 by Robert Collins
make reweave visible as a weave method, and quickly integrate into fetch
846
    def reweave(self, other):
847
        """Reweave self with other."""
848
        new_weave = reweave(self, other)
849
        for attr in self.__slots__:
850
            setattr(self, attr, getattr(new_weave, attr))
851
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
852
1393.1.68 by Martin Pool
- add reweave that joins with ghosts
853
def reweave(wa, wb):
854
    """Combine two weaves and return the result.
855
856
    This works even if a revision R has different parents in 
857
    wa and wb.  In the resulting weave all the parents are given.
858
859
    This is done by just building up a new weave, maintaining ordering 
860
    of the versions in the two inputs.  More efficient approaches
861
    might be possible but it should only be necessary to do 
862
    this operation rarely, when a new previously ghost version is 
863
    inserted.
864
    """
865
    wr = Weave()
866
    ia = ib = 0
867
    queue_a = range(wa.numversions())
868
    queue_b = range(wb.numversions())
869
    # first determine combined parents of all versions
870
    # map from version name -> all parent names
871
    combined_parents = _reweave_parent_graphs(wa, wb)
872
    mutter("combined parents: %r", combined_parents)
873
    order = _make_reweave_order(wa._names, wb._names, combined_parents)
874
    mutter("order to reweave: %r", order)
875
    for name in order:
876
        if name in wa._name_map:
877
            lines = wa.get_lines(name)
878
            if name in wb._name_map:
879
                assert lines == wb.get_lines(name)
880
        else:
881
            lines = wb.get_lines(name)
882
        wr.add(name, combined_parents[name], lines)
883
    return wr
884
885
886
def _reweave_parent_graphs(wa, wb):
887
    """Return combined parent ancestry for two weaves.
888
    
889
    Returned as a list of (version_name, set(parent_names))"""
890
    combined = {}
891
    for weave in [wa, wb]:
892
        for idx, name in enumerate(weave._names):
893
            p = combined.setdefault(name, set())
894
            p.update(map(weave.idx_to_name, weave._parents[idx]))
895
    return combined
896
897
898
def _make_reweave_order(wa_order, wb_order, combined_parents):
899
    """Return an order to reweave versions respecting parents."""
900
    done = set()
901
    result = []
902
    ia = ib = 0
903
    next_a = next_b = None
904
    len_a = len(wa_order)
905
    len_b = len(wb_order)
906
    while ia < len(wa_order) or ib < len(wb_order):
907
        if ia < len_a:
908
            next_a = wa_order[ia]
909
            if next_a in done:
910
                ia += 1
911
                continue
912
            if combined_parents[next_a].issubset(done):
913
                ia += 1
914
                result.append(next_a)
915
                done.add(next_a)
916
                continue
917
        if ib < len_b:
918
            next_b = wb_order[ib]
919
            if next_b in done:
920
                ib += 1
921
                continue
922
            elif combined_parents[next_b].issubset(done):
923
                ib += 1
924
                result.append(next_b)
925
                done.add(next_b)
926
                continue
927
        raise WeaveError("don't know how to reweave at {%s} and {%s}"
928
                         % (next_a, next_b))
929
    return result
930
931
1081 by Martin Pool
- if weave tool is invoked with no arguments, show help
932
def weave_toc(w):
933
    """Show the weave's table-of-contents"""
1083 by Martin Pool
- add space to store revision-id in weave files
934
    print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
935
    for i in (6, 50, 10, 10):
870 by Martin Pool
- better weave info display
936
        print '-' * i,
937
    print
946 by Martin Pool
- weave info only shows the weave headers, doesn't extract every version:
938
    for i in range(w.numversions()):
0.1.91 by Martin Pool
Update Weave.check
939
        sha1 = w._sha1s[i]
1083 by Martin Pool
- add space to store revision-id in weave files
940
        name = w._names[i]
941
        parent_str = ' '.join(map(str, w._parents[i]))
942
        print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
0.1.88 by Martin Pool
Add weave info command.
943
869 by Martin Pool
- more weave.py command line options
944
945
1393.1.4 by Martin Pool
- weave tool now finds bzrlib if it's not on the default path
946
def weave_stats(weave_file, pb):
947 by Martin Pool
- new 'weave stats' command
947
    from bzrlib.weavefile import read_weave
948
949
    wf = file(weave_file, 'rb')
950
    w = read_weave(wf)
951
    # FIXME: doesn't work on pipes
952
    weave_size = wf.tell()
953
954
    total = 0
955
    vers = len(w)
956
    for i in range(vers):
957
        pb.update('checking sizes', i, vers)
1393.1.24 by Martin Pool
- slight speedup for weave stats
958
        for origin, lineno, line in w._extract([i]):
947 by Martin Pool
- new 'weave stats' command
959
            total += len(line)
960
961
    pb.clear()
962
963
    print 'versions          %9d' % vers
964
    print 'weave file        %9d bytes' % weave_size
965
    print 'total contents    %9d bytes' % total
966
    print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
974.1.26 by aaron.bentley at utoronto
merged mbp@sourcefrog.net-20050817233101-0939da1cf91f2472
967
    if vers:
968
        avg = total/vers
969
        print 'average size      %9d bytes' % avg
970
        print 'relative size     %9.2fx' % (float(weave_size) / float(avg))
947 by Martin Pool
- new 'weave stats' command
971
972
869 by Martin Pool
- more weave.py command line options
973
def usage():
871 by Martin Pool
- add command for merge-based weave
974
    print """bzr weave tool
975
976
Experimental tool for weave algorithm.
977
869 by Martin Pool
- more weave.py command line options
978
usage:
979
    weave init WEAVEFILE
980
        Create an empty weave file
981
    weave get WEAVEFILE VERSION
982
        Write out specified version.
983
    weave check WEAVEFILE
984
        Check consistency of all versions.
1081 by Martin Pool
- if weave tool is invoked with no arguments, show help
985
    weave toc WEAVEFILE
869 by Martin Pool
- more weave.py command line options
986
        Display table of contents.
1084 by Martin Pool
- weave add command needs to take a symbolic name too
987
    weave add WEAVEFILE NAME [BASE...] < NEWTEXT
869 by Martin Pool
- more weave.py command line options
988
        Add NEWTEXT, with specified parent versions.
989
    weave annotate WEAVEFILE VERSION
990
        Display origin of each line.
991
    weave mash WEAVEFILE VERSION...
992
        Display composite of all selected versions.
993
    weave merge WEAVEFILE VERSION1 VERSION2 > OUT
994
        Auto-merge two versions and display conflicts.
1329 by Martin Pool
- add diff command to weave utility
995
    weave diff WEAVEFILE VERSION1 VERSION2 
996
        Show differences between two versions.
871 by Martin Pool
- add command for merge-based weave
997
998
example:
999
1000
    % weave init foo.weave
1001
    % vi foo.txt
1084 by Martin Pool
- weave add command needs to take a symbolic name too
1002
    % weave add foo.weave ver0 < foo.txt
871 by Martin Pool
- add command for merge-based weave
1003
    added version 0
1004
1005
    (create updated version)
1006
    % vi foo.txt
1007
    % weave get foo.weave 0 | diff -u - foo.txt
1084 by Martin Pool
- weave add command needs to take a symbolic name too
1008
    % weave add foo.weave ver1 0 < foo.txt
871 by Martin Pool
- add command for merge-based weave
1009
    added version 1
1010
1011
    % weave get foo.weave 0 > foo.txt       (create forked version)
1012
    % vi foo.txt
1084 by Martin Pool
- weave add command needs to take a symbolic name too
1013
    % weave add foo.weave ver2 0 < foo.txt
871 by Martin Pool
- add command for merge-based weave
1014
    added version 2
1015
1016
    % weave merge foo.weave 1 2 > foo.txt   (merge them)
1017
    % vi foo.txt                            (resolve conflicts)
1084 by Martin Pool
- weave add command needs to take a symbolic name too
1018
    % weave add foo.weave merged 1 2 < foo.txt     (commit merged version)     
871 by Martin Pool
- add command for merge-based weave
1019
    
869 by Martin Pool
- more weave.py command line options
1020
"""
0.1.88 by Martin Pool
Add weave info command.
1021
    
1022
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
1023
1024
def main(argv):
1025
    import sys
1026
    import os
1393.1.4 by Martin Pool
- weave tool now finds bzrlib if it's not on the default path
1027
    try:
1028
        import bzrlib
1029
    except ImportError:
1030
        # in case we're run directly from the subdirectory
1031
        sys.path.append('..')
1032
        import bzrlib
1336 by Martin Pool
- tidy up import
1033
    from bzrlib.weavefile import write_weave, read_weave
894 by Martin Pool
- small optimization for weave extract
1034
    from bzrlib.progress import ProgressBar
1035
1078 by Martin Pool
- use psyco for weave if possible
1036
    try:
1037
        import psyco
1038
        psyco.full()
1039
    except ImportError:
1040
        pass
894 by Martin Pool
- small optimization for weave extract
1041
1081 by Martin Pool
- if weave tool is invoked with no arguments, show help
1042
    if len(argv) < 2:
1043
        usage()
1044
        return 0
1045
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
1046
    cmd = argv[1]
869 by Martin Pool
- more weave.py command line options
1047
1048
    def readit():
1049
        return read_weave(file(argv[2], 'rb'))
1050
    
1051
    if cmd == 'help':
1052
        usage()
1053
    elif cmd == 'add':
1054
        w = readit()
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
1055
        # at the moment, based on everything in the file
1084 by Martin Pool
- weave add command needs to take a symbolic name too
1056
        name = argv[3]
1057
        parents = map(int, argv[4:])
0.1.72 by Martin Pool
Go back to weave lines normally having newlines at the end.
1058
        lines = sys.stdin.readlines()
1084 by Martin Pool
- weave add command needs to take a symbolic name too
1059
        ver = w.add(name, parents, lines)
869 by Martin Pool
- more weave.py command line options
1060
        write_weave(w, file(argv[2], 'wb'))
1084 by Martin Pool
- weave add command needs to take a symbolic name too
1061
        print 'added version %r %d' % (name, ver)
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
1062
    elif cmd == 'init':
1063
        fn = argv[2]
1064
        if os.path.exists(fn):
1065
            raise IOError("file exists")
1066
        w = Weave()
869 by Martin Pool
- more weave.py command line options
1067
        write_weave(w, file(fn, 'wb'))
1068
    elif cmd == 'get': # get one version
1069
        w = readit()
0.1.94 by Martin Pool
Fix get_iter call
1070
        sys.stdout.writelines(w.get_iter(int(argv[3])))
869 by Martin Pool
- more weave.py command line options
1071
        
1072
    elif cmd == 'mash': # get composite
1073
        w = readit()
1074
        sys.stdout.writelines(w.mash_iter(map(int, argv[3:])))
1075
1329 by Martin Pool
- add diff command to weave utility
1076
    elif cmd == 'diff':
1077
        from difflib import unified_diff
1078
        w = readit()
1079
        fn = argv[2]
1080
        v1, v2 = map(int, argv[3:5])
1081
        lines1 = w.get(v1)
1082
        lines2 = w.get(v2)
1083
        diff_gen = unified_diff(lines1, lines2,
1084
                                '%s version %d' % (fn, v1),
1085
                                '%s version %d' % (fn, v2))
1086
        sys.stdout.writelines(diff_gen)
1087
            
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
1088
    elif cmd == 'annotate':
869 by Martin Pool
- more weave.py command line options
1089
        w = readit()
0.1.72 by Martin Pool
Go back to weave lines normally having newlines at the end.
1090
        # newline is added to all lines regardless; too hard to get
1091
        # reasonable formatting otherwise
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
1092
        lasto = None
1093
        for origin, text in w.annotate(int(argv[3])):
0.1.72 by Martin Pool
Go back to weave lines normally having newlines at the end.
1094
            text = text.rstrip('\r\n')
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
1095
            if origin == lasto:
1096
                print '      | %s' % (text)
1097
            else:
1098
                print '%5d | %s' % (origin, text)
1099
                lasto = origin
871 by Martin Pool
- add command for merge-based weave
1100
                
1081 by Martin Pool
- if weave tool is invoked with no arguments, show help
1101
    elif cmd == 'toc':
1102
        weave_toc(readit())
947 by Martin Pool
- new 'weave stats' command
1103
1104
    elif cmd == 'stats':
1393.1.4 by Martin Pool
- weave tool now finds bzrlib if it's not on the default path
1105
        weave_stats(argv[2], ProgressBar())
871 by Martin Pool
- add command for merge-based weave
1106
        
0.1.91 by Martin Pool
Update Weave.check
1107
    elif cmd == 'check':
869 by Martin Pool
- more weave.py command line options
1108
        w = readit()
894 by Martin Pool
- small optimization for weave extract
1109
        pb = ProgressBar()
1110
        w.check(pb)
1111
        pb.clear()
938 by Martin Pool
- various optimizations to weave add code
1112
        print '%d versions ok' % w.numversions()
871 by Martin Pool
- add command for merge-based weave
1113
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
1114
    elif cmd == 'inclusions':
1115
        w = readit()
1116
        print ' '.join(map(str, w.inclusions([int(argv[3])])))
1117
1118
    elif cmd == 'parents':
1119
        w = readit()
944 by Martin Pool
- refactor member names in Weave code
1120
        print ' '.join(map(str, w._parents[int(argv[3])]))
892 by Martin Pool
- weave stores only direct parents, and calculates and memoizes expansion as needed
1121
918 by Martin Pool
- start doing new weave-merge algorithm
1122
    elif cmd == 'plan-merge':
1123
        w = readit()
1124
        for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
919 by Martin Pool
- more development of weave-merge
1125
            if line:
1126
                print '%14s | %s' % (state, line),
918 by Martin Pool
- start doing new weave-merge algorithm
1127
871 by Martin Pool
- add command for merge-based weave
1128
    elif cmd == 'merge':
919 by Martin Pool
- more development of weave-merge
1129
        w = readit()
1130
        p = w.plan_merge(int(argv[3]), int(argv[4]))
1131
        sys.stdout.writelines(w.weave_merge(p))
1132
            
1133
    elif cmd == 'mash-merge':
871 by Martin Pool
- add command for merge-based weave
1134
        if len(argv) != 5:
1135
            usage()
1136
            return 1
1137
1138
        w = readit()
1139
        v1, v2 = map(int, argv[3:5])
1140
1141
        basis = w.inclusions([v1]).intersection(w.inclusions([v2]))
1142
1143
        base_lines = list(w.mash_iter(basis))
1144
        a_lines = list(w.get(v1))
1145
        b_lines = list(w.get(v2))
1146
1147
        from bzrlib.merge3 import Merge3
1148
        m3 = Merge3(base_lines, a_lines, b_lines)
1149
1150
        name_a = 'version %d' % v1
1151
        name_b = 'version %d' % v2
1152
        sys.stdout.writelines(m3.merge_lines(name_a=name_a, name_b=name_b))
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
1153
    else:
1154
        raise ValueError('unknown command %r' % cmd)
1155
    
1156
1076 by Martin Pool
- add code to run weave utility under profiler
1157
1158
def profile_main(argv): 
1159
    import tempfile, hotshot, hotshot.stats
1160
1161
    prof_f = tempfile.NamedTemporaryFile()
1162
1163
    prof = hotshot.Profile(prof_f.name)
1164
1165
    ret = prof.runcall(main, argv)
1166
    prof.close()
1167
1168
    stats = hotshot.stats.load(prof_f.name)
1169
    #stats.strip_dirs()
1079 by Martin Pool
- weavefile can just use lists for read-in ancestry, not frozensets
1170
    stats.sort_stats('cumulative')
1076 by Martin Pool
- add code to run weave utility under profiler
1171
    ## XXX: Might like to write to stderr or the trace file instead but
1172
    ## print_stats seems hardcoded to stdout
1173
    stats.print_stats(20)
1174
            
1175
    return ret
1176
1177
0.1.62 by Martin Pool
Lame command-line client for reading and writing weaves.
1178
if __name__ == '__main__':
1179
    import sys
1081 by Martin Pool
- if weave tool is invoked with no arguments, show help
1180
    if '--profile' in sys.argv:
1078 by Martin Pool
- use psyco for weave if possible
1181
        args = sys.argv[:]
1081 by Martin Pool
- if weave tool is invoked with no arguments, show help
1182
        args.remove('--profile')
1078 by Martin Pool
- use psyco for weave if possible
1183
        sys.exit(profile_main(args))
1184
    else:
1185
        sys.exit(main(sys.argv))
1076 by Martin Pool
- add code to run weave utility under profiler
1186