~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

  • Committer: John Arbash Meinel
  • Date: 2009-06-17 17:57:15 UTC
  • mfrom: (4454 +trunk)
  • mto: This revision was merged to the branch mainline in revision 4460.
  • Revision ID: john@arbash-meinel.com-20090617175715-p9ebpwx5rhc0qin1
Merge bzr.dev 4454 in preparation for NEWS entry.

Show diffs side-by-side

added added

removed removed

Lines of Context:
324
324
                raise ValueError('invalid content_len %d for record @ pos %d'
325
325
                                 % (content_len, pos - len_len - 1))
326
326
            if kind == 'f': # Fulltext
327
 
                result.append(('f', content_len))
 
327
                if include_text:
 
328
                    text = self._content[pos:pos+content_len]
 
329
                    result.append(('f', content_len, text))
 
330
                else:
 
331
                    result.append(('f', content_len))
328
332
            elif kind == 'd': # Delta
329
333
                delta_content = self._content[pos:pos+content_len]
330
334
                delta_info = []
339
343
                        (offset, length,
340
344
                         delta_pos) = decode_copy_instruction(delta_content, c,
341
345
                                                              delta_pos)
342
 
                        delta_info.append(('c', offset, length))
 
346
                        if include_text:
 
347
                            text = self._content[offset:offset+length]
 
348
                            delta_info.append(('c', offset, length, text))
 
349
                        else:
 
350
                            delta_info.append(('c', offset, length))
343
351
                        measured_len += length
344
352
                    else: # Insert
345
353
                        if include_text:
746
754
 
747
755
        After calling this, the compressor should no longer be used
748
756
        """
 
757
        # TODO: this causes us to 'bloat' to 2x the size of content in the
 
758
        #       group. This has an impact for 'commit' of large objects.
 
759
        #       One possibility is to use self._content_chunks, and be lazy and
 
760
        #       only fill out self._content as a full string when we actually
 
761
        #       need it. That would at least drop the peak memory consumption
 
762
        #       for 'commit' down to ~1x the size of the largest file, at a
 
763
        #       cost of increased complexity within this code. 2x is still <<
 
764
        #       3x the size of the largest file, so we are doing ok.
749
765
        content = ''.join(self.chunks)
750
766
        self.chunks = None
751
767
        self._delta_index = None
1006
1022
        if not parent_map:
1007
1023
            raise errors.RevisionNotPresent(key, self)
1008
1024
        if parent_map[key] is not None:
1009
 
            search = graph._make_breadth_first_searcher([key])
1010
 
            keys = set()
1011
 
            while True:
1012
 
                try:
1013
 
                    present, ghosts = search.next_with_ghosts()
1014
 
                except StopIteration:
1015
 
                    break
1016
 
                keys.update(present)
1017
 
            parent_map = self.get_parent_map(keys)
 
1025
            parent_map = dict((k, v) for k, v in graph.iter_ancestry([key])
 
1026
                              if v is not None)
 
1027
            keys = parent_map.keys()
1018
1028
        else:
1019
1029
            keys = [key]
1020
1030
            parent_map = {key:()}
1021
 
        # So we used Graph(self) to load the parent_map, but now that we have
1022
 
        # it, we can just query the parent map directly, so create a new Graph
1023
 
        # object
1024
 
        graph = _mod_graph.Graph(_mod_graph.DictParentsProvider(parent_map))
1025
 
        head_cache = _mod_graph.FrozenHeadsCache(graph)
 
1031
        # We used Graph(self) to load the parent_map, but now that we have it,
 
1032
        # we can just query the parent map directly, so create a KnownGraph
 
1033
        heads_provider = _mod_graph.KnownGraph(parent_map)
1026
1034
        parent_cache = {}
1027
1035
        reannotate = annotate.reannotate
1028
1036
        for record in self.get_record_stream(keys, 'topological', True):
1030
1038
            lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
1031
1039
            parent_lines = [parent_cache[parent] for parent in parent_map[key]]
1032
1040
            parent_cache[key] = list(
1033
 
                reannotate(parent_lines, lines, key, None, head_cache))
 
1041
                reannotate(parent_lines, lines, key, None, heads_provider))
1034
1042
        return parent_cache[key]
1035
1043
 
1036
1044
    def check(self, progress_bar=None):