~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

  • Committer: Robert J. Tanner
  • Date: 2009-06-10 03:56:49 UTC
  • mfrom: (4423 +trunk)
  • mto: This revision was merged to the branch mainline in revision 4425.
  • Revision ID: tanner@real-time.com-20090610035649-7rfx4cls4550zc3c
Merge 1.15.1 back to trunk

Show diffs side-by-side

added added

removed removed

Lines of Context:
31
31
    diff,
32
32
    errors,
33
33
    graph as _mod_graph,
 
34
    knit,
34
35
    osutils,
35
36
    pack,
36
37
    patiencediff,
37
38
    trace,
38
39
    )
39
40
from bzrlib.graph import Graph
40
 
from bzrlib.knit import _DirectPackAccess
41
41
from bzrlib.btree_index import BTreeBuilder
42
42
from bzrlib.lru_cache import LRUSizeCache
43
43
from bzrlib.tsort import topo_sort
324
324
                raise ValueError('invalid content_len %d for record @ pos %d'
325
325
                                 % (content_len, pos - len_len - 1))
326
326
            if kind == 'f': # Fulltext
327
 
                result.append(('f', content_len))
 
327
                if include_text:
 
328
                    text = self._content[pos:pos+content_len]
 
329
                    result.append(('f', content_len, text))
 
330
                else:
 
331
                    result.append(('f', content_len))
328
332
            elif kind == 'd': # Delta
329
333
                delta_content = self._content[pos:pos+content_len]
330
334
                delta_info = []
339
343
                        (offset, length,
340
344
                         delta_pos) = decode_copy_instruction(delta_content, c,
341
345
                                                              delta_pos)
342
 
                        delta_info.append(('c', offset, length))
 
346
                        if include_text:
 
347
                            text = self._content[offset:offset+length]
 
348
                            delta_info.append(('c', offset, length, text))
 
349
                        else:
 
350
                            delta_info.append(('c', offset, length))
343
351
                        measured_len += length
344
352
                    else: # Insert
345
353
                        if include_text:
746
754
 
747
755
        After calling this, the compressor should no longer be used
748
756
        """
 
757
        # TODO: this causes us to 'bloat' to 2x the size of content in the
 
758
        #       group. This has an impact for 'commit' of large objects.
 
759
        #       One possibility is to use self._content_chunks, and be lazy and
 
760
        #       only fill out self._content as a full string when we actually
 
761
        #       need it. That would at least drop the peak memory consumption
 
762
        #       for 'commit' down to ~1x the size of the largest file, at a
 
763
        #       cost of increased complexity within this code. 2x is still <<
 
764
        #       3x the size of the largest file, so we are doing ok.
749
765
        content = ''.join(self.chunks)
750
766
        self.chunks = None
751
767
        self._delta_index = None
911
927
        writer.begin()
912
928
        index = _GCGraphIndex(graph_index, lambda:True, parents=parents,
913
929
            add_callback=graph_index.add_nodes)
914
 
        access = _DirectPackAccess({})
 
930
        access = knit._DirectPackAccess({})
915
931
        access.set_writer(writer, graph_index, (transport, 'newpack'))
916
932
        result = GroupCompressVersionedFiles(index, access, delta)
917
933
        result.stream = stream
1018
1034
        else:
1019
1035
            keys = [key]
1020
1036
            parent_map = {key:()}
 
1037
        # So we used Graph(self) to load the parent_map, but now that we have
 
1038
        # it, we can just query the parent map directly, so create a new Graph
 
1039
        # object
 
1040
        graph = _mod_graph.Graph(_mod_graph.DictParentsProvider(parent_map))
1021
1041
        head_cache = _mod_graph.FrozenHeadsCache(graph)
1022
1042
        parent_cache = {}
1023
1043
        reannotate = annotate.reannotate
1024
1044
        for record in self.get_record_stream(keys, 'topological', True):
1025
1045
            key = record.key
1026
 
            chunks = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
 
1046
            lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
1027
1047
            parent_lines = [parent_cache[parent] for parent in parent_map[key]]
1028
1048
            parent_cache[key] = list(
1029
 
                reannotate(parent_lines, chunks, key, None, head_cache))
 
1049
                reannotate(parent_lines, lines, key, None, head_cache))
1030
1050
        return parent_cache[key]
1031
1051
 
1032
1052
    def check(self, progress_bar=None):
1543
1563
    """Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""
1544
1564
 
1545
1565
    def __init__(self, graph_index, is_locked, parents=True,
1546
 
        add_callback=None):
 
1566
        add_callback=None, track_external_parent_refs=False):
1547
1567
        """Construct a _GCGraphIndex on a graph_index.
1548
1568
 
1549
1569
        :param graph_index: An implementation of bzrlib.index.GraphIndex.
1554
1574
        :param add_callback: If not None, allow additions to the index and call
1555
1575
            this callback with a list of added GraphIndex nodes:
1556
1576
            [(node, value, node_refs), ...]
 
1577
        :param track_external_parent_refs: As keys are added, keep track of the
 
1578
            keys they reference, so that we can query get_missing_parents(),
 
1579
            etc.
1557
1580
        """
1558
1581
        self._add_callback = add_callback
1559
1582
        self._graph_index = graph_index
1560
1583
        self._parents = parents
1561
1584
        self.has_graph = parents
1562
1585
        self._is_locked = is_locked
 
1586
        if track_external_parent_refs:
 
1587
            self._key_dependencies = knit._KeyRefs()
 
1588
        else:
 
1589
            self._key_dependencies = None
1563
1590
 
1564
1591
    def add_records(self, records, random_id=False):
1565
1592
        """Add multiple records to the index.
1610
1637
                for key, (value, node_refs) in keys.iteritems():
1611
1638
                    result.append((key, value))
1612
1639
            records = result
 
1640
        key_dependencies = self._key_dependencies
 
1641
        if key_dependencies is not None and self._parents:
 
1642
            for key, value, refs in records:
 
1643
                parents = refs[0]
 
1644
                key_dependencies.add_references(key, parents)
1613
1645
        self._add_callback(records)
1614
1646
 
1615
1647
    def _check_read(self):
1664
1696
                result[node[1]] = None
1665
1697
        return result
1666
1698
 
 
1699
    def get_missing_parents(self):
 
1700
        """Return the keys of missing parents."""
 
1701
        # Copied from _KnitGraphIndex.get_missing_parents
 
1702
        # We may have false positives, so filter those out.
 
1703
        self._key_dependencies.add_keys(
 
1704
            self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))
 
1705
        return frozenset(self._key_dependencies.get_unsatisfied_refs())
 
1706
 
1667
1707
    def get_build_details(self, keys):
1668
1708
        """Get the various build details for keys.
1669
1709
 
1715
1755
        delta_end = int(bits[3])
1716
1756
        return node[0], start, stop, basis_end, delta_end
1717
1757
 
 
1758
    def scan_unvalidated_index(self, graph_index):
 
1759
        """Inform this _GCGraphIndex that there is an unvalidated index.
 
1760
 
 
1761
        This allows this _GCGraphIndex to keep track of any missing
 
1762
        compression parents we may want to have filled in to make those
 
1763
        indices valid.
 
1764
 
 
1765
        :param graph_index: A GraphIndex
 
1766
        """
 
1767
        if self._key_dependencies is not None:
 
1768
            # Add parent refs from graph_index (and discard parent refs that
 
1769
            # the graph_index has).
 
1770
            add_refs = self._key_dependencies.add_references
 
1771
            for node in graph_index.iter_all_entries():
 
1772
                add_refs(node[1], node[3][0])
 
1773
 
 
1774
 
1718
1775
 
1719
1776
from bzrlib._groupcompress_py import (
1720
1777
    apply_delta,