~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/weave.py

  • Committer: abentley
  • Date: 2006-04-20 23:47:53 UTC
  • mfrom: (1681 +trunk)
  • mto: This revision was merged to the branch mainline in revision 1683.
  • Revision ID: abentley@lappy-20060420234753-6a6874b76f09f86d
Merge bzr.dev

Show diffs side-by-side

added added

removed removed

Lines of Context:
66
66
# be done fairly efficiently because the sequence numbers constrain
67
67
# the possible relationships.
68
68
 
 
69
# FIXME: the conflict markers should be *7* characters
69
70
 
70
71
from copy import copy
71
72
from cStringIO import StringIO
260
261
 
261
262
    __contains__ = has_version
262
263
 
 
264
    def get_delta(self, version_id):
 
265
        """See VersionedFile.get_delta."""
 
266
        return self.get_deltas([version_id])[version_id]
 
267
 
 
268
    def get_deltas(self, version_ids):
 
269
        """See VersionedFile.get_deltas."""
 
270
        version_ids = self.get_ancestry(version_ids)
 
271
        for version_id in version_ids:
 
272
            if not self.has_version(version_id):
 
273
                raise RevisionNotPresent(version_id, self)
 
274
        # try extracting all versions; parallel extraction is used
 
275
        nv = self.num_versions()
 
276
        sha1s = {}
 
277
        deltas = {}
 
278
        texts = {}
 
279
        inclusions = {}
 
280
        noeols = {}
 
281
        last_parent_lines = {}
 
282
        parents = {}
 
283
        parent_inclusions = {}
 
284
        parent_linenums = {}
 
285
        parent_noeols = {}
 
286
        current_hunks = {}
 
287
        diff_hunks = {}
 
288
        # its simplest to generate a full set of prepared variables.
 
289
        for i in range(nv):
 
290
            name = self._names[i]
 
291
            sha1s[name] = self.get_sha1(name)
 
292
            parents_list = self.get_parents(name)
 
293
            try:
 
294
                parent = parents_list[0]
 
295
                parents[name] = parent
 
296
                parent_inclusions[name] = inclusions[parent]
 
297
            except IndexError:
 
298
                parents[name] = None
 
299
                parent_inclusions[name] = set()
 
300
            # we want to emit start, finish, replacement_length, replacement_lines tuples.
 
301
            diff_hunks[name] = []
 
302
            current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
 
303
            parent_linenums[name] = 0
 
304
            noeols[name] = False
 
305
            parent_noeols[name] = False
 
306
            last_parent_lines[name] = None
 
307
            new_inc = set([name])
 
308
            for p in self._parents[i]:
 
309
                new_inc.update(inclusions[self._idx_to_name(p)])
 
310
            # debug only, known good so far.
 
311
            #assert set(new_inc) == set(self.get_ancestry(name)), \
 
312
            #    'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
 
313
            inclusions[name] = new_inc
 
314
 
 
315
        nlines = len(self._weave)
 
316
 
 
317
        for lineno, inserted, deletes, line in self._walk_internal():
 
318
            # a line is active in a version if:
 
319
            # insert is in the versions inclusions
 
320
            # and
 
321
            # deleteset & the versions inclusions is an empty set.
 
322
            # so - if we have a included by mapping - version is included by
 
323
            # children, we get a list of children to examine for deletes affect
 
324
            # ing them, which is less than the entire set of children.
 
325
            for version_id in version_ids:  
 
326
                # The active inclusion must be an ancestor,
 
327
                # and no ancestors must have deleted this line,
 
328
                # because we don't support resurrection.
 
329
                parent_inclusion = parent_inclusions[version_id]
 
330
                inclusion = inclusions[version_id]
 
331
                parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
 
332
                version_active = inserted in inclusion and not (deletes & inclusion)
 
333
                if not parent_active and not version_active:
 
334
                    # unrelated line of ancestry
 
335
                    continue
 
336
                elif parent_active and version_active:
 
337
                    # shared line
 
338
                    parent_linenum = parent_linenums[version_id]
 
339
                    if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
 
340
                        diff_hunks[version_id].append(tuple(current_hunks[version_id]))
 
341
                    parent_linenum += 1
 
342
                    current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
 
343
                    parent_linenums[version_id] = parent_linenum
 
344
                    try:
 
345
                        if line[-1] != '\n':
 
346
                            noeols[version_id] = True
 
347
                    except IndexError:
 
348
                        pass
 
349
                elif parent_active and not version_active:
 
350
                    # deleted line
 
351
                    current_hunks[version_id][1] += 1
 
352
                    parent_linenums[version_id] += 1
 
353
                    last_parent_lines[version_id] = line
 
354
                elif not parent_active and version_active:
 
355
                    # replacement line
 
356
                    # noeol only occurs at the end of a file because we 
 
357
                    # diff linewise. We want to show noeol changes as a
 
358
                    # empty diff unless the actual eol-less content changed.
 
359
                    theline = line
 
360
                    try:
 
361
                        if last_parent_lines[version_id][-1] != '\n':
 
362
                            parent_noeols[version_id] = True
 
363
                    except (TypeError, IndexError):
 
364
                        pass
 
365
                    try:
 
366
                        if theline[-1] != '\n':
 
367
                            noeols[version_id] = True
 
368
                    except IndexError:
 
369
                        pass
 
370
                    new_line = False
 
371
                    parent_should_go = False
 
372
 
 
373
                    if parent_noeols[version_id] == noeols[version_id]:
 
374
                        # no noeol toggle, so trust the weaves statement
 
375
                        # that this line is changed.
 
376
                        new_line = True
 
377
                        if parent_noeols[version_id]:
 
378
                            theline = theline + '\n'
 
379
                    elif parent_noeols[version_id]:
 
380
                        # parent has no eol, we do:
 
381
                        # our line is new, report as such..
 
382
                        new_line = True
 
383
                    elif noeols[version_id]:
 
384
                        # append a eol so that it looks like
 
385
                        # a normalised delta
 
386
                        theline = theline + '\n'
 
387
                        if parents[version_id] is not None:
 
388
                        #if last_parent_lines[version_id] is not None:
 
389
                            parent_should_go = True
 
390
                        if last_parent_lines[version_id] != theline:
 
391
                            # but changed anyway
 
392
                            new_line = True
 
393
                            #parent_should_go = False
 
394
                    if new_line:
 
395
                        current_hunks[version_id][2] += 1
 
396
                        current_hunks[version_id][3].append((inserted, theline))
 
397
                    if parent_should_go:
 
398
                        # last hunk last parent line is not eaten
 
399
                        current_hunks[version_id][1] -= 1
 
400
                    if current_hunks[version_id][1] < 0:
 
401
                        current_hunks[version_id][1] = 0
 
402
                        # import pdb;pdb.set_trace()
 
403
                    # assert current_hunks[version_id][1] >= 0
 
404
 
 
405
        # flush last hunk
 
406
        for i in range(nv):
 
407
            version = self._idx_to_name(i)
 
408
            if current_hunks[version] != [0, 0, 0, []]:
 
409
                diff_hunks[version].append(tuple(current_hunks[version]))
 
410
        result = {}
 
411
        for version_id in version_ids:
 
412
            result[version_id] = (
 
413
                                  parents[version_id],
 
414
                                  sha1s[version_id],
 
415
                                  noeols[version_id],
 
416
                                  diff_hunks[version_id],
 
417
                                  )
 
418
        return result
 
419
 
263
420
    def get_parents(self, version_id):
264
421
        """See VersionedFile.get_parent."""
265
422
        return map(self._idx_to_name, self._parents[self._lookup(version_id)])
280
437
        """Please use Weave.clone_text now."""
281
438
        return self.clone_text(new_rev_id, old_rev_id, parents)
282
439
 
283
 
    def _add_lines(self, version_id, parents, lines):
 
440
    def _add_lines(self, version_id, parents, lines, parent_texts):
284
441
        """See VersionedFile.add_lines."""
285
442
        return self._add(version_id, lines, map(self._lookup, parents))
286
443
 
306
463
        """
307
464
 
308
465
        assert isinstance(version_id, basestring)
 
466
        self._check_lines_not_unicode(lines)
 
467
        self._check_lines_are_lines(lines)
309
468
        if not sha1:
310
469
            sha1 = sha_strings(lines)
311
470
        if version_id in self._name_map:
526
685
        lineno = 0         # line of weave, 0-based
527
686
 
528
687
        for l in self._weave:
529
 
            if isinstance(l, tuple):
 
688
            if l.__class__ == tuple:
530
689
                c, v = l
531
690
                isactive = None
532
691
                if c == '{':
533
 
                    istack.append(self._idx_to_name(v))
 
692
                    istack.append(self._names[v])
534
693
                elif c == '}':
535
694
                    istack.pop()
536
695
                elif c == '[':
537
 
                    assert self._idx_to_name(v) not in dset
538
 
                    dset.add(self._idx_to_name(v))
 
696
                    assert self._names[v] not in dset
 
697
                    dset.add(self._names[v])
539
698
                elif c == ']':
540
 
                    dset.remove(self._idx_to_name(v))
 
699
                    dset.remove(self._names[v])
541
700
                else:
542
701
                    raise WeaveFormatError('unexpected instruction %r' % v)
543
702
            else:
544
 
                assert isinstance(l, basestring)
 
703
                assert l.__class__ in (str, unicode)
545
704
                assert istack
546
 
                yield lineno, istack[-1], dset.copy(), l
 
705
                yield lineno, istack[-1], frozenset(dset), l
547
706
            lineno += 1
548
707
 
549
708
        if istack:
553
712
            raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
554
713
                                   % dset)
555
714
 
 
715
    def plan_merge(self, ver_a, ver_b):
 
716
        """Return pseudo-annotation indicating how the two versions merge.
 
717
 
 
718
        This is computed between versions a and b and their common
 
719
        base.
 
720
 
 
721
        Weave lines present in none of them are skipped entirely.
 
722
        """
 
723
        inc_a = set(self.get_ancestry([ver_a]))
 
724
        inc_b = set(self.get_ancestry([ver_b]))
 
725
        inc_c = inc_a & inc_b
 
726
 
 
727
        for lineno, insert, deleteset, line in\
 
728
            self.walk([ver_a, ver_b]):
 
729
            if deleteset & inc_c:
 
730
                # killed in parent; can't be in either a or b
 
731
                # not relevant to our work
 
732
                yield 'killed-base', line
 
733
            elif insert in inc_c:
 
734
                # was inserted in base
 
735
                killed_a = bool(deleteset & inc_a)
 
736
                killed_b = bool(deleteset & inc_b)
 
737
                if killed_a and killed_b:
 
738
                    yield 'killed-both', line
 
739
                elif killed_a:
 
740
                    yield 'killed-a', line
 
741
                elif killed_b:
 
742
                    yield 'killed-b', line
 
743
                else:
 
744
                    yield 'unchanged', line
 
745
            elif insert in inc_a:
 
746
                if deleteset & inc_a:
 
747
                    yield 'ghost-a', line
 
748
                else:
 
749
                    # new in A; not in B
 
750
                    yield 'new-a', line
 
751
            elif insert in inc_b:
 
752
                if deleteset & inc_b:
 
753
                    yield 'ghost-b', line
 
754
                else:
 
755
                    yield 'new-b', line
 
756
            else:
 
757
                # not in either revision
 
758
                yield 'irrelevant', line
 
759
 
 
760
        yield 'unchanged', ''           # terminator
 
761
 
556
762
    def _extract(self, versions):
557
763
        """Yield annotation of lines in included set.
558
764
 
569
775
        included = self._inclusions(versions)
570
776
 
571
777
        istack = []
 
778
        iset = set()
572
779
        dset = set()
573
780
 
574
781
        lineno = 0         # line of weave, 0-based
579
786
 
580
787
        WFE = WeaveFormatError
581
788
 
 
789
        # wow. 
 
790
        #  449       0   4474.6820   2356.5590   bzrlib.weave:556(_extract)
 
791
        #  +285282   0   1676.8040   1676.8040   +<isinstance>
 
792
        # 1.6 seconds in 'isinstance'.
 
793
        # changing the first isinstance:
 
794
        #  449       0   2814.2660   1577.1760   bzrlib.weave:556(_extract)
 
795
        #  +140414   0    762.8050    762.8050   +<isinstance>
 
796
        # note that the inline time actually dropped (less function calls)
 
797
        # and total processing time was halved.
 
798
        # we're still spending ~1/4 of the method in isinstance though.
 
799
        # so lets hard code the acceptable string classes we expect:
 
800
        #  449       0   1202.9420    786.2930   bzrlib.weave:556(_extract)
 
801
        # +71352     0    377.5560    377.5560   +<method 'append' of 'list' 
 
802
        #                                          objects>
 
803
        # yay, down to ~1/4 the initial extract time, and our inline time
 
804
        # has shrunk again, with isinstance no longer dominating.
 
805
        # tweaking the stack inclusion test to use a set gives:
 
806
        #  449       0   1122.8030    713.0080   bzrlib.weave:556(_extract)
 
807
        # +71352     0    354.9980    354.9980   +<method 'append' of 'list' 
 
808
        #                                          objects>
 
809
        # - a 5% win, or possibly just noise. However with large istacks that
 
810
        # 'in' test could dominate, so I'm leaving this change in place -
 
811
        # when its fast enough to consider profiling big datasets we can review.
 
812
 
 
813
              
 
814
             
 
815
 
582
816
        for l in self._weave:
583
 
            if isinstance(l, tuple):
 
817
            if l.__class__ == tuple:
584
818
                c, v = l
585
819
                isactive = None
586
820
                if c == '{':
587
 
                    assert v not in istack
 
821
                    assert v not in iset
588
822
                    istack.append(v)
 
823
                    iset.add(v)
589
824
                elif c == '}':
590
 
                    istack.pop()
 
825
                    iset.remove(istack.pop())
591
826
                elif c == '[':
592
827
                    if v in included:
593
828
                        assert v not in dset
598
833
                        assert v in dset
599
834
                        dset.remove(v)
600
835
            else:
601
 
                assert isinstance(l, basestring)
 
836
                assert l.__class__ in (str, unicode)
602
837
                if isactive is None:
603
838
                    isactive = (not dset) and istack and (istack[-1] in included)
604
839
                if isactive:
618
853
        
619
854
        Please use get_lines now.
620
855
        """
621
 
        return self._get_iter(self._maybe_lookup(name_or_index))
 
856
        return iter(self.get_lines(self._maybe_lookup(name_or_index)))
622
857
 
623
858
    @deprecated_method(zero_eight)
624
859
    def maybe_lookup(self, name_or_index):
635
870
        else:
636
871
            return self._lookup(name_or_index)
637
872
 
638
 
    def _get_iter(self, version_id):
639
 
        """Yield lines for the specified version."""
640
 
        incls = [self._maybe_lookup(version_id)]
641
 
        if len(incls) == 1:
642
 
            index = incls[0]
643
 
            cur_sha = sha.new()
644
 
        else:
645
 
            # We don't have sha1 sums for multiple entries
646
 
            cur_sha = None
647
 
        for origin, lineno, line in self._extract(incls):
648
 
            if cur_sha:
649
 
                cur_sha.update(line)
650
 
            yield line
651
 
        if cur_sha:
652
 
            expected_sha1 = self._sha1s[index]
653
 
            measured_sha1 = cur_sha.hexdigest() 
654
 
            if measured_sha1 != expected_sha1:
655
 
                raise errors.WeaveInvalidChecksum(
656
 
                        'file %s, revision %s, expected: %s, measured %s' 
657
 
                        % (self._weave_name, self._names[index],
658
 
                           expected_sha1, measured_sha1))
659
 
 
660
873
    @deprecated_method(zero_eight)
661
874
    def get(self, version_id):
662
875
        """Please use either Weave.get_text or Weave.get_lines as desired."""
664
877
 
665
878
    def get_lines(self, version_id):
666
879
        """See VersionedFile.get_lines()."""
667
 
        return list(self._get_iter(version_id))
 
880
        int_index = self._maybe_lookup(version_id)
 
881
        result = [line for (origin, lineno, line) in self._extract([int_index])]
 
882
        expected_sha1 = self._sha1s[int_index]
 
883
        measured_sha1 = sha_strings(result)
 
884
        if measured_sha1 != expected_sha1:
 
885
            raise errors.WeaveInvalidChecksum(
 
886
                    'file %s, revision %s, expected: %s, measured %s' 
 
887
                    % (self._weave_name, version_id,
 
888
                       expected_sha1, measured_sha1))
 
889
        return result
668
890
 
669
 
    def get_sha1(self, name):
670
 
        """Get the stored sha1 sum for the given revision.
671
 
        
672
 
        :param name: The name of the version to lookup
673
 
        """
674
 
        return self._sha1s[self._lookup(name)]
 
891
    def get_sha1(self, version_id):
 
892
        """See VersionedFile.get_sha1()."""
 
893
        return self._sha1s[self._lookup(version_id)]
675
894
 
676
895
    @deprecated_method(zero_eight)
677
896
    def numversions(self):
691
910
 
692
911
    def check(self, progress_bar=None):
693
912
        # TODO evaluate performance hit of using string sets in this routine.
694
 
        # check no circular inclusions
 
913
        # TODO: check no circular inclusions
 
914
        # TODO: create a nested progress bar
695
915
        for version in range(self.num_versions()):
696
916
            inclusions = list(self._parents[version])
697
917
            if inclusions:
811
1031
        new_parents = []
812
1032
        for parent_idx in other._parents[other_idx]:
813
1033
            parent_name = other._names[parent_idx]
814
 
            if parent_name not in self._names:
 
1034
            if parent_name not in self._name_map:
815
1035
                # should not be possible
816
1036
                raise WeaveError("missing parent {%s} of {%s} in %r" 
817
1037
                                 % (parent_name, other._name_map[other_idx], self))
889
1109
            # new file, save it
890
1110
            self._save()
891
1111
 
892
 
    def _add_lines(self, version_id, parents, lines):
 
1112
    def _add_lines(self, version_id, parents, lines, parent_texts):
893
1113
        """Add a version and save the weave."""
894
 
        super(WeaveFile, self)._add_lines(version_id, parents, lines)
 
1114
        result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
 
1115
                                                   parent_texts)
895
1116
        self._save()
 
1117
        return result
896
1118
 
897
1119
    def _clone_text(self, new_version_id, old_version_id, parents):
898
1120
        """See VersionedFile.clone_text."""
1183
1405
        print ' '.join(map(str, w._parents[int(argv[3])]))
1184
1406
 
1185
1407
    elif cmd == 'plan-merge':
 
1408
        # replaced by 'bzr weave-plan-merge'
1186
1409
        w = readit()
1187
1410
        for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
1188
1411
            if line:
1189
1412
                print '%14s | %s' % (state, line),
1190
 
 
1191
1413
    elif cmd == 'merge':
 
1414
        # replaced by 'bzr weave-merge-text'
1192
1415
        w = readit()
1193
1416
        p = w.plan_merge(int(argv[3]), int(argv[4]))
1194
1417
        sys.stdout.writelines(w.weave_merge(p))
1195
 
            
1196
1418
    else:
1197
1419
        raise ValueError('unknown command %r' % cmd)
1198
1420
    
1199
1421
 
1200
1422
 
1201
 
def profile_main(argv): 
 
1423
def profile_main(argv):
1202
1424
    import tempfile, hotshot, hotshot.stats
1203
1425
 
1204
1426
    prof_f = tempfile.NamedTemporaryFile()