~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/weave.py

[merge] robert's knit-performance work

Show diffs side-by-side

added added

removed removed

Lines of Context:
260
260
 
261
261
    __contains__ = has_version
262
262
 
 
263
    def get_delta(self, version_id):
 
264
        """See VersionedFile.get_delta."""
 
265
        return self.get_deltas([version_id])[version_id]
 
266
 
 
267
    def get_deltas(self, version_ids):
 
268
        """See VersionedFile.get_deltas."""
 
269
        version_ids = self.get_ancestry(version_ids)
 
270
        for version_id in version_ids:
 
271
            if not self.has_version(version_id):
 
272
                raise RevisionNotPresent(version_id, self)
 
273
        # try extracting all versions; parallel extraction is used
 
274
        nv = self.num_versions()
 
275
        sha1s = {}
 
276
        deltas = {}
 
277
        texts = {}
 
278
        inclusions = {}
 
279
        noeols = {}
 
280
        last_parent_lines = {}
 
281
        parents = {}
 
282
        parent_inclusions = {}
 
283
        parent_linenums = {}
 
284
        parent_noeols = {}
 
285
        current_hunks = {}
 
286
        diff_hunks = {}
 
287
        # its simplest to generate a full set of prepared variables.
 
288
        for i in range(nv):
 
289
            name = self._names[i]
 
290
            sha1s[name] = self.get_sha1(name)
 
291
            parents_list = self.get_parents(name)
 
292
            try:
 
293
                parent = parents_list[0]
 
294
                parents[name] = parent
 
295
                parent_inclusions[name] = inclusions[parent]
 
296
            except IndexError:
 
297
                parents[name] = None
 
298
                parent_inclusions[name] = set()
 
299
            # we want to emit start, finish, replacement_length, replacement_lines tuples.
 
300
            diff_hunks[name] = []
 
301
            current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
 
302
            parent_linenums[name] = 0
 
303
            noeols[name] = False
 
304
            parent_noeols[name] = False
 
305
            last_parent_lines[name] = None
 
306
            new_inc = set([name])
 
307
            for p in self._parents[i]:
 
308
                new_inc.update(inclusions[self._idx_to_name(p)])
 
309
            # debug only, known good so far.
 
310
            #assert set(new_inc) == set(self.get_ancestry(name)), \
 
311
            #    'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
 
312
            inclusions[name] = new_inc
 
313
 
 
314
        nlines = len(self._weave)
 
315
 
 
316
        for lineno, inserted, deletes, line in self._walk_internal():
 
317
            # a line is active in a version if:
 
318
            # insert is in the versions inclusions
 
319
            # and
 
320
            # deleteset & the versions inclusions is an empty set.
 
321
            # so - if we have a included by mapping - version is included by
 
322
            # children, we get a list of children to examine for deletes affect
 
323
            # ing them, which is less than the entire set of children.
 
324
            for version_id in version_ids:  
 
325
                # The active inclusion must be an ancestor,
 
326
                # and no ancestors must have deleted this line,
 
327
                # because we don't support resurrection.
 
328
                parent_inclusion = parent_inclusions[version_id]
 
329
                inclusion = inclusions[version_id]
 
330
                parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
 
331
                version_active = inserted in inclusion and not (deletes & inclusion)
 
332
                if not parent_active and not version_active:
 
333
                    # unrelated line of ancestry
 
334
                    continue
 
335
                elif parent_active and version_active:
 
336
                    # shared line
 
337
                    parent_linenum = parent_linenums[version_id]
 
338
                    if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
 
339
                        diff_hunks[version_id].append(tuple(current_hunks[version_id]))
 
340
                    parent_linenum += 1
 
341
                    current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
 
342
                    parent_linenums[version_id] = parent_linenum
 
343
                    try:
 
344
                        if line[-1] != '\n':
 
345
                            noeols[version_id] = True
 
346
                    except IndexError:
 
347
                        pass
 
348
                elif parent_active and not version_active:
 
349
                    # deleted line
 
350
                    current_hunks[version_id][1] += 1
 
351
                    parent_linenums[version_id] += 1
 
352
                    last_parent_lines[version_id] = line
 
353
                elif not parent_active and version_active:
 
354
                    # replacement line
 
355
                    # noeol only occurs at the end of a file because we 
 
356
                    # diff linewise. We want to show noeol changes as a
 
357
                    # empty diff unless the actual eol-less content changed.
 
358
                    theline = line
 
359
                    try:
 
360
                        if last_parent_lines[version_id][-1] != '\n':
 
361
                            parent_noeols[version_id] = True
 
362
                    except (TypeError, IndexError):
 
363
                        pass
 
364
                    try:
 
365
                        if theline[-1] != '\n':
 
366
                            noeols[version_id] = True
 
367
                    except IndexError:
 
368
                        pass
 
369
                    new_line = False
 
370
                    parent_should_go = False
 
371
 
 
372
                    if parent_noeols[version_id] == noeols[version_id]:
 
373
                        # no noeol toggle, so trust the weaves statement
 
374
                        # that this line is changed.
 
375
                        new_line = True
 
376
                        if parent_noeols[version_id]:
 
377
                            theline = theline + '\n'
 
378
                    elif parent_noeols[version_id]:
 
379
                        # parent has no eol, we do:
 
380
                        # our line is new, report as such..
 
381
                        new_line = True
 
382
                    elif noeols[version_id]:
 
383
                        # append a eol so that it looks like
 
384
                        # a normalised delta
 
385
                        theline = theline + '\n'
 
386
                        if parents[version_id] is not None:
 
387
                        #if last_parent_lines[version_id] is not None:
 
388
                            parent_should_go = True
 
389
                        if last_parent_lines[version_id] != theline:
 
390
                            # but changed anyway
 
391
                            new_line = True
 
392
                            #parent_should_go = False
 
393
                    if new_line:
 
394
                        current_hunks[version_id][2] += 1
 
395
                        current_hunks[version_id][3].append((inserted, theline))
 
396
                    if parent_should_go:
 
397
                        # last hunk last parent line is not eaten
 
398
                        current_hunks[version_id][1] -= 1
 
399
                    if current_hunks[version_id][1] < 0:
 
400
                        current_hunks[version_id][1] = 0
 
401
                        # import pdb;pdb.set_trace()
 
402
                    # assert current_hunks[version_id][1] >= 0
 
403
 
 
404
        # flush last hunk
 
405
        for i in range(nv):
 
406
            version = self._idx_to_name(i)
 
407
            if current_hunks[version] != [0, 0, 0, []]:
 
408
                diff_hunks[version].append(tuple(current_hunks[version]))
 
409
        result = {}
 
410
        for version_id in version_ids:
 
411
            result[version_id] = (
 
412
                                  parents[version_id],
 
413
                                  sha1s[version_id],
 
414
                                  noeols[version_id],
 
415
                                  diff_hunks[version_id],
 
416
                                  )
 
417
        return result
 
418
 
263
419
    def get_parents(self, version_id):
264
420
        """See VersionedFile.get_parent."""
265
421
        return map(self._idx_to_name, self._parents[self._lookup(version_id)])
280
436
        """Please use Weave.clone_text now."""
281
437
        return self.clone_text(new_rev_id, old_rev_id, parents)
282
438
 
283
 
    def _add_lines(self, version_id, parents, lines):
 
439
    def _add_lines(self, version_id, parents, lines, parent_texts):
284
440
        """See VersionedFile.add_lines."""
285
441
        return self._add(version_id, lines, map(self._lookup, parents))
286
442
 
526
682
        lineno = 0         # line of weave, 0-based
527
683
 
528
684
        for l in self._weave:
529
 
            if isinstance(l, tuple):
 
685
            if l.__class__ == tuple:
530
686
                c, v = l
531
687
                isactive = None
532
688
                if c == '{':
533
 
                    istack.append(self._idx_to_name(v))
 
689
                    istack.append(self._names[v])
534
690
                elif c == '}':
535
691
                    istack.pop()
536
692
                elif c == '[':
537
 
                    assert self._idx_to_name(v) not in dset
538
 
                    dset.add(self._idx_to_name(v))
 
693
                    assert self._names[v] not in dset
 
694
                    dset.add(self._names[v])
539
695
                elif c == ']':
540
 
                    dset.remove(self._idx_to_name(v))
 
696
                    dset.remove(self._names[v])
541
697
                else:
542
698
                    raise WeaveFormatError('unexpected instruction %r' % v)
543
699
            else:
544
 
                assert isinstance(l, basestring)
 
700
                assert l.__class__ in (str, unicode)
545
701
                assert istack
546
 
                yield lineno, istack[-1], dset.copy(), l
 
702
                yield lineno, istack[-1], frozenset(dset), l
547
703
            lineno += 1
548
704
 
549
705
        if istack:
569
725
        included = self._inclusions(versions)
570
726
 
571
727
        istack = []
 
728
        iset = set()
572
729
        dset = set()
573
730
 
574
731
        lineno = 0         # line of weave, 0-based
579
736
 
580
737
        WFE = WeaveFormatError
581
738
 
 
739
        # wow. 
 
740
        #  449       0   4474.6820   2356.5590   bzrlib.weave:556(_extract)
 
741
        #  +285282   0   1676.8040   1676.8040   +<isinstance>
 
742
        # 1.6 seconds in 'isinstance'.
 
743
        # changing the first isinstance:
 
744
        #  449       0   2814.2660   1577.1760   bzrlib.weave:556(_extract)
 
745
        #  +140414   0    762.8050    762.8050   +<isinstance>
 
746
        # note that the inline time actually dropped (less function calls)
 
747
        # and total processing time was halved.
 
748
        # we're still spending ~1/4 of the method in isinstance though.
 
749
        # so lets hard code the acceptable string classes we expect:
 
750
        #  449       0   1202.9420    786.2930   bzrlib.weave:556(_extract)
 
751
        # +71352     0    377.5560    377.5560   +<method 'append' of 'list' 
 
752
        #                                          objects>
 
753
        # yay, down to ~1/4 the initial extract time, and our inline time
 
754
        # has shrunk again, with isinstance no longer dominating.
 
755
        # tweaking the stack inclusion test to use a set gives:
 
756
        #  449       0   1122.8030    713.0080   bzrlib.weave:556(_extract)
 
757
        # +71352     0    354.9980    354.9980   +<method 'append' of 'list' 
 
758
        #                                          objects>
 
759
        # - a 5% win, or possibly just noise. However with large istacks that
 
760
        # 'in' test could dominate, so I'm leaving this change in place -
 
761
        # when its fast enough to consider profiling big datasets we can review.
 
762
 
 
763
              
 
764
             
 
765
 
582
766
        for l in self._weave:
583
 
            if isinstance(l, tuple):
 
767
            if l.__class__ == tuple:
584
768
                c, v = l
585
769
                isactive = None
586
770
                if c == '{':
587
 
                    assert v not in istack
 
771
                    assert v not in iset
588
772
                    istack.append(v)
 
773
                    iset.add(v)
589
774
                elif c == '}':
590
 
                    istack.pop()
 
775
                    iset.remove(istack.pop())
591
776
                elif c == '[':
592
777
                    if v in included:
593
778
                        assert v not in dset
598
783
                        assert v in dset
599
784
                        dset.remove(v)
600
785
            else:
601
 
                assert isinstance(l, basestring)
 
786
                assert l.__class__ in (str, unicode)
602
787
                if isactive is None:
603
788
                    isactive = (not dset) and istack and (istack[-1] in included)
604
789
                if isactive:
618
803
        
619
804
        Please use get_lines now.
620
805
        """
621
 
        return self._get_iter(self._maybe_lookup(name_or_index))
 
806
        return iter(self.get_lines(self._maybe_lookup(name_or_index)))
622
807
 
623
808
    @deprecated_method(zero_eight)
624
809
    def maybe_lookup(self, name_or_index):
635
820
        else:
636
821
            return self._lookup(name_or_index)
637
822
 
638
 
    def _get_iter(self, version_id):
639
 
        """Yield lines for the specified version."""
640
 
        incls = [self._maybe_lookup(version_id)]
641
 
        if len(incls) == 1:
642
 
            index = incls[0]
643
 
            cur_sha = sha.new()
644
 
        else:
645
 
            # We don't have sha1 sums for multiple entries
646
 
            cur_sha = None
647
 
        for origin, lineno, line in self._extract(incls):
648
 
            if cur_sha:
649
 
                cur_sha.update(line)
650
 
            yield line
651
 
        if cur_sha:
652
 
            expected_sha1 = self._sha1s[index]
653
 
            measured_sha1 = cur_sha.hexdigest() 
654
 
            if measured_sha1 != expected_sha1:
655
 
                raise errors.WeaveInvalidChecksum(
656
 
                        'file %s, revision %s, expected: %s, measured %s' 
657
 
                        % (self._weave_name, self._names[index],
658
 
                           expected_sha1, measured_sha1))
659
 
 
660
823
    @deprecated_method(zero_eight)
661
824
    def get(self, version_id):
662
825
        """Please use either Weave.get_text or Weave.get_lines as desired."""
664
827
 
665
828
    def get_lines(self, version_id):
666
829
        """See VersionedFile.get_lines()."""
667
 
        return list(self._get_iter(version_id))
 
830
        int_index = self._maybe_lookup(version_id)
 
831
        result = [line for (origin, lineno, line) in self._extract([int_index])]
 
832
        expected_sha1 = self._sha1s[int_index]
 
833
        measured_sha1 = sha_strings(result)
 
834
        if measured_sha1 != expected_sha1:
 
835
            raise errors.WeaveInvalidChecksum(
 
836
                    'file %s, revision %s, expected: %s, measured %s' 
 
837
                    % (self._weave_name, version_id,
 
838
                       expected_sha1, measured_sha1))
 
839
        return result
668
840
 
669
841
    def get_sha1(self, name):
670
842
        """Get the stored sha1 sum for the given revision.
811
983
        new_parents = []
812
984
        for parent_idx in other._parents[other_idx]:
813
985
            parent_name = other._names[parent_idx]
814
 
            if parent_name not in self._names:
 
986
            if parent_name not in self._name_map:
815
987
                # should not be possible
816
988
                raise WeaveError("missing parent {%s} of {%s} in %r" 
817
989
                                 % (parent_name, other._name_map[other_idx], self))
889
1061
            # new file, save it
890
1062
            self._save()
891
1063
 
892
 
    def _add_lines(self, version_id, parents, lines):
 
1064
    def _add_lines(self, version_id, parents, lines, parent_texts):
893
1065
        """Add a version and save the weave."""
894
 
        super(WeaveFile, self)._add_lines(version_id, parents, lines)
 
1066
        result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
 
1067
                                                   parent_texts)
895
1068
        self._save()
 
1069
        return result
896
1070
 
897
1071
    def _clone_text(self, new_version_id, old_version_id, parents):
898
1072
        """See VersionedFile.clone_text."""