~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/weave.py

Merge bzr.ab.integration

Show diffs side-by-side

added added

removed removed

Lines of Context:
66
66
# be done fairly efficiently because the sequence numbers constrain
67
67
# the possible relationships.
68
68
 
 
69
# FIXME: the conflict markers should be *7* characters
69
70
 
70
71
from copy import copy
71
72
from cStringIO import StringIO
260
261
 
261
262
    __contains__ = has_version
262
263
 
 
264
    def get_delta(self, version_id):
 
265
        """See VersionedFile.get_delta."""
 
266
        return self.get_deltas([version_id])[version_id]
 
267
 
 
268
    def get_deltas(self, version_ids):
 
269
        """See VersionedFile.get_deltas."""
 
270
        version_ids = self.get_ancestry(version_ids)
 
271
        for version_id in version_ids:
 
272
            if not self.has_version(version_id):
 
273
                raise RevisionNotPresent(version_id, self)
 
274
        # try extracting all versions; parallel extraction is used
 
275
        nv = self.num_versions()
 
276
        sha1s = {}
 
277
        deltas = {}
 
278
        texts = {}
 
279
        inclusions = {}
 
280
        noeols = {}
 
281
        last_parent_lines = {}
 
282
        parents = {}
 
283
        parent_inclusions = {}
 
284
        parent_linenums = {}
 
285
        parent_noeols = {}
 
286
        current_hunks = {}
 
287
        diff_hunks = {}
 
288
        # its simplest to generate a full set of prepared variables.
 
289
        for i in range(nv):
 
290
            name = self._names[i]
 
291
            sha1s[name] = self.get_sha1(name)
 
292
            parents_list = self.get_parents(name)
 
293
            try:
 
294
                parent = parents_list[0]
 
295
                parents[name] = parent
 
296
                parent_inclusions[name] = inclusions[parent]
 
297
            except IndexError:
 
298
                parents[name] = None
 
299
                parent_inclusions[name] = set()
 
300
            # we want to emit start, finish, replacement_length, replacement_lines tuples.
 
301
            diff_hunks[name] = []
 
302
            current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
 
303
            parent_linenums[name] = 0
 
304
            noeols[name] = False
 
305
            parent_noeols[name] = False
 
306
            last_parent_lines[name] = None
 
307
            new_inc = set([name])
 
308
            for p in self._parents[i]:
 
309
                new_inc.update(inclusions[self._idx_to_name(p)])
 
310
            # debug only, known good so far.
 
311
            #assert set(new_inc) == set(self.get_ancestry(name)), \
 
312
            #    'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
 
313
            inclusions[name] = new_inc
 
314
 
 
315
        nlines = len(self._weave)
 
316
 
 
317
        for lineno, inserted, deletes, line in self._walk_internal():
 
318
            # a line is active in a version if:
 
319
            # insert is in the versions inclusions
 
320
            # and
 
321
            # deleteset & the versions inclusions is an empty set.
 
322
            # so - if we have a included by mapping - version is included by
 
323
            # children, we get a list of children to examine for deletes affect
 
324
            # ing them, which is less than the entire set of children.
 
325
            for version_id in version_ids:  
 
326
                # The active inclusion must be an ancestor,
 
327
                # and no ancestors must have deleted this line,
 
328
                # because we don't support resurrection.
 
329
                parent_inclusion = parent_inclusions[version_id]
 
330
                inclusion = inclusions[version_id]
 
331
                parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
 
332
                version_active = inserted in inclusion and not (deletes & inclusion)
 
333
                if not parent_active and not version_active:
 
334
                    # unrelated line of ancestry
 
335
                    continue
 
336
                elif parent_active and version_active:
 
337
                    # shared line
 
338
                    parent_linenum = parent_linenums[version_id]
 
339
                    if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
 
340
                        diff_hunks[version_id].append(tuple(current_hunks[version_id]))
 
341
                    parent_linenum += 1
 
342
                    current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
 
343
                    parent_linenums[version_id] = parent_linenum
 
344
                    try:
 
345
                        if line[-1] != '\n':
 
346
                            noeols[version_id] = True
 
347
                    except IndexError:
 
348
                        pass
 
349
                elif parent_active and not version_active:
 
350
                    # deleted line
 
351
                    current_hunks[version_id][1] += 1
 
352
                    parent_linenums[version_id] += 1
 
353
                    last_parent_lines[version_id] = line
 
354
                elif not parent_active and version_active:
 
355
                    # replacement line
 
356
                    # noeol only occurs at the end of a file because we 
 
357
                    # diff linewise. We want to show noeol changes as a
 
358
                    # empty diff unless the actual eol-less content changed.
 
359
                    theline = line
 
360
                    try:
 
361
                        if last_parent_lines[version_id][-1] != '\n':
 
362
                            parent_noeols[version_id] = True
 
363
                    except (TypeError, IndexError):
 
364
                        pass
 
365
                    try:
 
366
                        if theline[-1] != '\n':
 
367
                            noeols[version_id] = True
 
368
                    except IndexError:
 
369
                        pass
 
370
                    new_line = False
 
371
                    parent_should_go = False
 
372
 
 
373
                    if parent_noeols[version_id] == noeols[version_id]:
 
374
                        # no noeol toggle, so trust the weaves statement
 
375
                        # that this line is changed.
 
376
                        new_line = True
 
377
                        if parent_noeols[version_id]:
 
378
                            theline = theline + '\n'
 
379
                    elif parent_noeols[version_id]:
 
380
                        # parent has no eol, we do:
 
381
                        # our line is new, report as such..
 
382
                        new_line = True
 
383
                    elif noeols[version_id]:
 
384
                        # append a eol so that it looks like
 
385
                        # a normalised delta
 
386
                        theline = theline + '\n'
 
387
                        if parents[version_id] is not None:
 
388
                        #if last_parent_lines[version_id] is not None:
 
389
                            parent_should_go = True
 
390
                        if last_parent_lines[version_id] != theline:
 
391
                            # but changed anyway
 
392
                            new_line = True
 
393
                            #parent_should_go = False
 
394
                    if new_line:
 
395
                        current_hunks[version_id][2] += 1
 
396
                        current_hunks[version_id][3].append((inserted, theline))
 
397
                    if parent_should_go:
 
398
                        # last hunk last parent line is not eaten
 
399
                        current_hunks[version_id][1] -= 1
 
400
                    if current_hunks[version_id][1] < 0:
 
401
                        current_hunks[version_id][1] = 0
 
402
                        # import pdb;pdb.set_trace()
 
403
                    # assert current_hunks[version_id][1] >= 0
 
404
 
 
405
        # flush last hunk
 
406
        for i in range(nv):
 
407
            version = self._idx_to_name(i)
 
408
            if current_hunks[version] != [0, 0, 0, []]:
 
409
                diff_hunks[version].append(tuple(current_hunks[version]))
 
410
        result = {}
 
411
        for version_id in version_ids:
 
412
            result[version_id] = (
 
413
                                  parents[version_id],
 
414
                                  sha1s[version_id],
 
415
                                  noeols[version_id],
 
416
                                  diff_hunks[version_id],
 
417
                                  )
 
418
        return result
 
419
 
263
420
    def get_parents(self, version_id):
264
421
        """See VersionedFile.get_parent."""
265
422
        return map(self._idx_to_name, self._parents[self._lookup(version_id)])
280
437
        """Please use Weave.clone_text now."""
281
438
        return self.clone_text(new_rev_id, old_rev_id, parents)
282
439
 
283
 
    def _add_lines(self, version_id, parents, lines):
 
440
    def _add_lines(self, version_id, parents, lines, parent_texts):
284
441
        """See VersionedFile.add_lines."""
285
442
        return self._add(version_id, lines, map(self._lookup, parents))
286
443
 
526
683
        lineno = 0         # line of weave, 0-based
527
684
 
528
685
        for l in self._weave:
529
 
            if isinstance(l, tuple):
 
686
            if l.__class__ == tuple:
530
687
                c, v = l
531
688
                isactive = None
532
689
                if c == '{':
533
 
                    istack.append(self._idx_to_name(v))
 
690
                    istack.append(self._names[v])
534
691
                elif c == '}':
535
692
                    istack.pop()
536
693
                elif c == '[':
537
 
                    assert self._idx_to_name(v) not in dset
538
 
                    dset.add(self._idx_to_name(v))
 
694
                    assert self._names[v] not in dset
 
695
                    dset.add(self._names[v])
539
696
                elif c == ']':
540
 
                    dset.remove(self._idx_to_name(v))
 
697
                    dset.remove(self._names[v])
541
698
                else:
542
699
                    raise WeaveFormatError('unexpected instruction %r' % v)
543
700
            else:
544
 
                assert isinstance(l, basestring)
 
701
                assert l.__class__ in (str, unicode)
545
702
                assert istack
546
 
                yield lineno, istack[-1], dset.copy(), l
 
703
                yield lineno, istack[-1], frozenset(dset), l
547
704
            lineno += 1
548
705
 
549
706
        if istack:
569
726
        included = self._inclusions(versions)
570
727
 
571
728
        istack = []
 
729
        iset = set()
572
730
        dset = set()
573
731
 
574
732
        lineno = 0         # line of weave, 0-based
579
737
 
580
738
        WFE = WeaveFormatError
581
739
 
 
740
        # wow. 
 
741
        #  449       0   4474.6820   2356.5590   bzrlib.weave:556(_extract)
 
742
        #  +285282   0   1676.8040   1676.8040   +<isinstance>
 
743
        # 1.6 seconds in 'isinstance'.
 
744
        # changing the first isinstance:
 
745
        #  449       0   2814.2660   1577.1760   bzrlib.weave:556(_extract)
 
746
        #  +140414   0    762.8050    762.8050   +<isinstance>
 
747
        # note that the inline time actually dropped (less function calls)
 
748
        # and total processing time was halved.
 
749
        # we're still spending ~1/4 of the method in isinstance though.
 
750
        # so lets hard code the acceptable string classes we expect:
 
751
        #  449       0   1202.9420    786.2930   bzrlib.weave:556(_extract)
 
752
        # +71352     0    377.5560    377.5560   +<method 'append' of 'list' 
 
753
        #                                          objects>
 
754
        # yay, down to ~1/4 the initial extract time, and our inline time
 
755
        # has shrunk again, with isinstance no longer dominating.
 
756
        # tweaking the stack inclusion test to use a set gives:
 
757
        #  449       0   1122.8030    713.0080   bzrlib.weave:556(_extract)
 
758
        # +71352     0    354.9980    354.9980   +<method 'append' of 'list' 
 
759
        #                                          objects>
 
760
        # - a 5% win, or possibly just noise. However with large istacks that
 
761
        # 'in' test could dominate, so I'm leaving this change in place -
 
762
        # when its fast enough to consider profiling big datasets we can review.
 
763
 
 
764
              
 
765
             
 
766
 
582
767
        for l in self._weave:
583
 
            if isinstance(l, tuple):
 
768
            if l.__class__ == tuple:
584
769
                c, v = l
585
770
                isactive = None
586
771
                if c == '{':
587
 
                    assert v not in istack
 
772
                    assert v not in iset
588
773
                    istack.append(v)
 
774
                    iset.add(v)
589
775
                elif c == '}':
590
 
                    istack.pop()
 
776
                    iset.remove(istack.pop())
591
777
                elif c == '[':
592
778
                    if v in included:
593
779
                        assert v not in dset
598
784
                        assert v in dset
599
785
                        dset.remove(v)
600
786
            else:
601
 
                assert isinstance(l, basestring)
 
787
                assert l.__class__ in (str, unicode)
602
788
                if isactive is None:
603
789
                    isactive = (not dset) and istack and (istack[-1] in included)
604
790
                if isactive:
618
804
        
619
805
        Please use get_lines now.
620
806
        """
621
 
        return self._get_iter(self._maybe_lookup(name_or_index))
 
807
        return iter(self.get_lines(self._maybe_lookup(name_or_index)))
622
808
 
623
809
    @deprecated_method(zero_eight)
624
810
    def maybe_lookup(self, name_or_index):
635
821
        else:
636
822
            return self._lookup(name_or_index)
637
823
 
638
 
    def _get_iter(self, version_id):
639
 
        """Yield lines for the specified version."""
640
 
        incls = [self._maybe_lookup(version_id)]
641
 
        if len(incls) == 1:
642
 
            index = incls[0]
643
 
            cur_sha = sha.new()
644
 
        else:
645
 
            # We don't have sha1 sums for multiple entries
646
 
            cur_sha = None
647
 
        for origin, lineno, line in self._extract(incls):
648
 
            if cur_sha:
649
 
                cur_sha.update(line)
650
 
            yield line
651
 
        if cur_sha:
652
 
            expected_sha1 = self._sha1s[index]
653
 
            measured_sha1 = cur_sha.hexdigest() 
654
 
            if measured_sha1 != expected_sha1:
655
 
                raise errors.WeaveInvalidChecksum(
656
 
                        'file %s, revision %s, expected: %s, measured %s' 
657
 
                        % (self._weave_name, self._names[index],
658
 
                           expected_sha1, measured_sha1))
659
 
 
660
824
    @deprecated_method(zero_eight)
661
825
    def get(self, version_id):
662
826
        """Please use either Weave.get_text or Weave.get_lines as desired."""
664
828
 
665
829
    def get_lines(self, version_id):
666
830
        """See VersionedFile.get_lines()."""
667
 
        return list(self._get_iter(version_id))
 
831
        int_index = self._maybe_lookup(version_id)
 
832
        result = [line for (origin, lineno, line) in self._extract([int_index])]
 
833
        expected_sha1 = self._sha1s[int_index]
 
834
        measured_sha1 = sha_strings(result)
 
835
        if measured_sha1 != expected_sha1:
 
836
            raise errors.WeaveInvalidChecksum(
 
837
                    'file %s, revision %s, expected: %s, measured %s' 
 
838
                    % (self._weave_name, version_id,
 
839
                       expected_sha1, measured_sha1))
 
840
        return result
668
841
 
669
842
    def get_sha1(self, name):
670
843
        """Get the stored sha1 sum for the given revision.
691
864
 
692
865
    def check(self, progress_bar=None):
693
866
        # TODO evaluate performance hit of using string sets in this routine.
694
 
        # check no circular inclusions
 
867
        # TODO: check no circular inclusions
 
868
        # TODO: create a nested progress bar
695
869
        for version in range(self.num_versions()):
696
870
            inclusions = list(self._parents[version])
697
871
            if inclusions:
777
951
                         version in version_ids]
778
952
        for name in topo_sort(pending_graph):
779
953
            other_idx = other._name_map[name]
780
 
            self._check_version_consistent(other, other_idx, name)
781
 
            sha1 = other._sha1s[other_idx]
782
 
 
 
954
            # returns True if we have it, False if we need it.
 
955
            if not self._check_version_consistent(other, other_idx, name):
 
956
                names_to_join.append((other_idx, name))
783
957
            processed += 1
784
958
 
785
 
            if name in self._name_map:
786
 
                idx = self._lookup(name)
787
 
                n1 = set(map(other._idx_to_name, other._parents[other_idx]))
788
 
                n2 = set(map(self._idx_to_name, self._parents[idx]))
789
 
                if sha1 ==  self._sha1s[idx] and n1 == n2:
790
 
                        continue
791
 
 
792
 
            names_to_join.append((other_idx, name))
793
959
 
794
960
        if pb and not msg:
795
961
            msg = 'weave join'
819
985
        new_parents = []
820
986
        for parent_idx in other._parents[other_idx]:
821
987
            parent_name = other._names[parent_idx]
822
 
            if parent_name not in self._names:
 
988
            if parent_name not in self._name_map:
823
989
                # should not be possible
824
990
                raise WeaveError("missing parent {%s} of {%s} in %r" 
825
991
                                 % (parent_name, other._name_map[other_idx], self))
897
1063
            # new file, save it
898
1064
            self._save()
899
1065
 
900
 
    def _add_lines(self, version_id, parents, lines):
 
1066
    def _add_lines(self, version_id, parents, lines, parent_texts):
901
1067
        """Add a version and save the weave."""
902
 
        super(WeaveFile, self)._add_lines(version_id, parents, lines)
 
1068
        result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
 
1069
                                                   parent_texts)
903
1070
        self._save()
 
1071
        return result
904
1072
 
905
1073
    def _clone_text(self, new_version_id, old_version_id, parents):
906
1074
        """See VersionedFile.clone_text."""
1191
1359
        print ' '.join(map(str, w._parents[int(argv[3])]))
1192
1360
 
1193
1361
    elif cmd == 'plan-merge':
 
1362
        # replaced by 'bzr weave-plan-merge'
1194
1363
        w = readit()
1195
1364
        for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
1196
1365
            if line:
1197
1366
                print '%14s | %s' % (state, line),
1198
 
 
1199
1367
    elif cmd == 'merge':
 
1368
        # replaced by 'bzr weave-merge-text'
1200
1369
        w = readit()
1201
1370
        p = w.plan_merge(int(argv[3]), int(argv[4]))
1202
1371
        sys.stdout.writelines(w.weave_merge(p))
1203
 
            
1204
1372
    else:
1205
1373
        raise ValueError('unknown command %r' % cmd)
1206
1374
    
1207
1375
 
1208
1376
 
1209
 
def profile_main(argv): 
 
1377
def profile_main(argv):
1210
1378
    import tempfile, hotshot, hotshot.stats
1211
1379
 
1212
1380
    prof_f = tempfile.NamedTemporaryFile()