~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

  • Committer: Robert Collins
  • Date: 2006-03-10 22:19:08 UTC
  • mto: (1615.1.2 bzr.mbp.integration)
  • mto: This revision was merged to the branch mainline in revision 1616.
  • Revision ID: robertc@robertcollins.net-20060310221908-7c6d446b9aa88eb1
More microopimisations on index reading, now down to 16000 records/seconds.

Show diffs side-by-side

added added

removed removed

Lines of Context:
719
719
 
720
720
    HEADER = "# bzr knit index 7\n"
721
721
 
 
722
    # speed of knit parsing went from 280 ms to 280 ms with slots addition.
 
723
    # __slots__ = ['_cache', '_history', '_transport', '_filename']
 
724
 
722
725
    def _cache_version(self, version_id, options, pos, size, parents):
723
 
        val = (version_id, options, pos, size, parents)
 
726
        """Cache a version record in the history array and index cache.
 
727
        
 
728
        This is inlined into __init__ for performance. KEEP IN SYNC.
 
729
        (It saves 60ms, 25% of the __init__ overhead on local 4000 record
 
730
         indexes).
 
731
        """
724
732
        # only want the _history index to reference the 1st index entry
725
733
        # for version_id
726
 
        if not version_id in self._cache:
 
734
        if version_id not in self._cache:
727
735
            self._history.append(version_id)
728
 
        self._cache[version_id] = val
729
 
 
730
 
    def _iter_index(self, fp):
731
 
        l = fp.readline()
732
 
        while l != '':
733
 
            yield l.split()
734
 
            l = fp.readline()
735
 
        #lines = fp.read()
736
 
        #for l in lines.splitlines(False):
737
 
        #    yield l.split()
 
736
        self._cache[version_id] = (version_id, options, pos, size, parents)
738
737
 
739
738
    def __init__(self, transport, filename, mode, create=False):
740
739
        _KnitComponentFile.__init__(self, transport, filename, mode)
752
751
                pb.update('read knit index', count, total)
753
752
                fp = self._transport.get(self._filename)
754
753
                self.check_header(fp)
755
 
                for rec in self._iter_index(fp):
 
754
                # readlines reads the whole file at once:
 
755
                # bad for transports like http, good for local disk
 
756
                # we save 60 ms doing this one change (
 
757
                # from calling readline each time to calling
 
758
                # readlines once.
 
759
                # probably what we want for nice behaviour on
 
760
                # http is a incremental readlines that yields, or
 
761
                # a check for local vs non local indexes,
 
762
                for l in fp.readlines():
 
763
                    rec = l.split()
756
764
                    count += 1
757
765
                    total += 1
758
 
                    pb.update('read knit index', count, total)
759
 
                    parents = self._parse_parents(rec[4:])
760
 
                    self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),
761
 
                        parents)
 
766
                    #pb.update('read knit index', count, total)
 
767
                    # See self._parse_parents
 
768
                    parents = []
 
769
                    for value in rec[4:]:
 
770
                        if '.' == value[-1]:
 
771
                            # uncompressed reference
 
772
                            parents.append(value[1:])
 
773
                        else:
 
774
                            # this is 15/4000ms faster than isinstance,
 
775
                            # (in lsprof)
 
776
                            # this function is called thousands of times a 
 
777
                            # second so small variations add up.
 
778
                            assert value.__class__ is str
 
779
                            parents.append(self._history[int(value)])
 
780
                    # end self._parse_parents
 
781
                    # self._cache_version(rec[0], 
 
782
                    #                     rec[1].split(','),
 
783
                    #                     int(rec[2]),
 
784
                    #                     int(rec[3]),
 
785
                    #                     parents)
 
786
                    # --- self._cache_version
 
787
                    # only want the _history index to reference the 1st 
 
788
                    # index entry for version_id
 
789
                    version_id = rec[0]
 
790
                    if version_id not in self._cache:
 
791
                        self._history.append(version_id)
 
792
                    self._cache[version_id] = (version_id,
 
793
                                               rec[1].split(','),
 
794
                                               int(rec[2]),
 
795
                                               int(rec[3]),
 
796
                                               parents)
 
797
                    # --- self._cache_version 
762
798
            except NoSuchFile, e:
763
799
                if mode != 'w' or not create:
764
800
                    raise
772
808
 
773
809
        ints are looked up in the index.
774
810
        .FOO values are ghosts and converted in to FOO.
 
811
 
 
812
        NOTE: the function is retained here for clarity, and for possible
 
813
              use in partial index reads. However bulk processing now has
 
814
              it inlined in __init__ for inner-loop optimisation.
775
815
        """
776
816
        result = []
777
817
        for value in compressed_parents:
778
818
            if value[-1] == '.':
 
819
                # uncompressed reference
779
820
                result.append(value[1:])
780
821
            else:
781
822
                # this is 15/4000ms faster than isinstance,