~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/pack_repo.py

  • Committer: John Arbash Meinel
  • Date: 2010-05-11 10:45:26 UTC
  • mto: This revision was merged to the branch mainline in revision 5225.
  • Revision ID: john@arbash-meinel.com-20100511104526-zxnstcxta22hzw2n
Implement a compiled extension for parsing the text key out of a CHKInventory value.

Related to bug #562666. This seems to shave 5-10% out of the time spent doing a complete
branch of bzr.dev/launchpad/etc.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
 
1
# Copyright (C) 2007-2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
24
24
 
25
25
from bzrlib import (
26
26
    chk_map,
 
27
    cleanup,
27
28
    debug,
28
29
    graph,
29
30
    osutils,
54
55
    revision as _mod_revision,
55
56
    )
56
57
 
57
 
from bzrlib.decorators import needs_write_lock
 
58
from bzrlib.decorators import needs_write_lock, only_raises
58
59
from bzrlib.btree_index import (
59
60
    BTreeGraphIndex,
60
61
    BTreeBuilder,
73
74
    )
74
75
from bzrlib.trace import (
75
76
    mutter,
 
77
    note,
76
78
    warning,
77
79
    )
78
80
 
224
226
        return self.index_name('text', name)
225
227
 
226
228
    def _replace_index_with_readonly(self, index_type):
 
229
        unlimited_cache = False
 
230
        if index_type == 'chk':
 
231
            unlimited_cache = True
227
232
        setattr(self, index_type + '_index',
228
233
            self.index_class(self.index_transport,
229
234
                self.index_name(index_type, self.name),
230
 
                self.index_sizes[self.index_offset(index_type)]))
 
235
                self.index_sizes[self.index_offset(index_type)],
 
236
                unlimited_cache=unlimited_cache))
231
237
 
232
238
 
233
239
class ExistingPack(Pack):
422
428
        self._writer.begin()
423
429
        # what state is the pack in? (open, finished, aborted)
424
430
        self._state = 'open'
 
431
        # no name until we finish writing the content
 
432
        self.name = None
425
433
 
426
434
    def abort(self):
427
435
        """Cancel creating this pack."""
448
456
            self.signature_index.key_count() or
449
457
            (self.chk_index is not None and self.chk_index.key_count()))
450
458
 
 
459
    def finish_content(self):
 
460
        if self.name is not None:
 
461
            return
 
462
        self._writer.end()
 
463
        if self._buffer[1]:
 
464
            self._write_data('', flush=True)
 
465
        self.name = self._hash.hexdigest()
 
466
 
451
467
    def finish(self, suspend=False):
452
468
        """Finish the new pack.
453
469
 
459
475
         - stores the index size tuple for the pack in the index_sizes
460
476
           attribute.
461
477
        """
462
 
        self._writer.end()
463
 
        if self._buffer[1]:
464
 
            self._write_data('', flush=True)
465
 
        self.name = self._hash.hexdigest()
 
478
        self.finish_content()
466
479
        if not suspend:
467
480
            self._check_references()
468
481
        # write indices
574
587
                                             flush_func=flush_func)
575
588
        self.add_callback = None
576
589
 
577
 
    def replace_indices(self, index_to_pack, indices):
578
 
        """Replace the current mappings with fresh ones.
579
 
 
580
 
        This should probably not be used eventually, rather incremental add and
581
 
        removal of indices. It has been added during refactoring of existing
582
 
        code.
583
 
 
584
 
        :param index_to_pack: A mapping from index objects to
585
 
            (transport, name) tuples for the pack file data.
586
 
        :param indices: A list of indices.
587
 
        """
588
 
        # refresh the revision pack map dict without replacing the instance.
589
 
        self.index_to_pack.clear()
590
 
        self.index_to_pack.update(index_to_pack)
591
 
        # XXX: API break - clearly a 'replace' method would be good?
592
 
        self.combined_index._indices[:] = indices
593
 
        # the current add nodes callback for the current writable index if
594
 
        # there is one.
595
 
        self.add_callback = None
596
 
 
597
590
    def add_index(self, index, pack):
598
591
        """Add index to the aggregate, which is an index for Pack pack.
599
592
 
606
599
        # expose it to the index map
607
600
        self.index_to_pack[index] = pack.access_tuple()
608
601
        # put it at the front of the linear index list
609
 
        self.combined_index.insert_index(0, index)
 
602
        self.combined_index.insert_index(0, index, pack.name)
610
603
 
611
604
    def add_writable_index(self, index, pack):
612
605
        """Add an index which is able to have data added to it.
632
625
        self.data_access.set_writer(None, None, (None, None))
633
626
        self.index_to_pack.clear()
634
627
        del self.combined_index._indices[:]
 
628
        del self.combined_index._index_names[:]
635
629
        self.add_callback = None
636
630
 
637
 
    def remove_index(self, index, pack):
 
631
    def remove_index(self, index):
638
632
        """Remove index from the indices used to answer queries.
639
633
 
640
634
        :param index: An index from the pack parameter.
641
 
        :param pack: A Pack instance.
642
635
        """
643
636
        del self.index_to_pack[index]
644
 
        self.combined_index._indices.remove(index)
 
637
        pos = self.combined_index._indices.index(index)
 
638
        del self.combined_index._indices[pos]
 
639
        del self.combined_index._index_names[pos]
645
640
        if (self.add_callback is not None and
646
641
            getattr(index, 'add_nodes', None) == self.add_callback):
647
642
            self.add_callback = None
1105
1100
            iterator is a tuple with:
1106
1101
            index, readv_vector, node_vector. readv_vector is a list ready to
1107
1102
            hand to the transport readv method, and node_vector is a list of
1108
 
            (key, eol_flag, references) for the the node retrieved by the
 
1103
            (key, eol_flag, references) for the node retrieved by the
1109
1104
            matching readv_vector.
1110
1105
        """
1111
1106
        # group by pack so we do one readv per pack
1403
1398
        self.inventory_index = AggregateIndex(self.reload_pack_names, flush)
1404
1399
        self.text_index = AggregateIndex(self.reload_pack_names, flush)
1405
1400
        self.signature_index = AggregateIndex(self.reload_pack_names, flush)
 
1401
        all_indices = [self.revision_index, self.inventory_index,
 
1402
                self.text_index, self.signature_index]
1406
1403
        if use_chk_index:
1407
1404
            self.chk_index = AggregateIndex(self.reload_pack_names, flush)
 
1405
            all_indices.append(self.chk_index)
1408
1406
        else:
1409
1407
            # used to determine if we're using a chk_index elsewhere.
1410
1408
            self.chk_index = None
 
1409
        # Tell all the CombinedGraphIndex objects about each other, so they can
 
1410
        # share hints about which pack names to search first.
 
1411
        all_combined = [agg_idx.combined_index for agg_idx in all_indices]
 
1412
        for combined_idx in all_combined:
 
1413
            combined_idx.set_sibling_indices(
 
1414
                set(all_combined).difference([combined_idx]))
1411
1415
        # resumed packs
1412
1416
        self._resumed_packs = []
1413
1417
 
 
1418
    def __repr__(self):
 
1419
        return '%s(%r)' % (self.__class__.__name__, self.repo)
 
1420
 
1414
1421
    def add_pack_to_memory(self, pack):
1415
1422
        """Make a Pack object available to the repository to satisfy queries.
1416
1423
 
1530
1537
                self._remove_pack_from_memory(pack)
1531
1538
        # record the newly available packs and stop advertising the old
1532
1539
        # packs
1533
 
        result = self._save_pack_names(clear_obsolete_packs=True)
1534
 
        # Move the old packs out of the way now they are no longer referenced.
1535
 
        for revision_count, packs in pack_operations:
1536
 
            self._obsolete_packs(packs)
 
1540
        to_be_obsoleted = []
 
1541
        for _, packs in pack_operations:
 
1542
            to_be_obsoleted.extend(packs)
 
1543
        result = self._save_pack_names(clear_obsolete_packs=True,
 
1544
                                       obsolete_packs=to_be_obsoleted)
1537
1545
        return result
1538
1546
 
1539
1547
    def _flush_new_pack(self):
1552
1560
        """Is the collection already packed?"""
1553
1561
        return not (self.repo._format.pack_compresses or (len(self._names) > 1))
1554
1562
 
1555
 
    def pack(self, hint=None):
 
1563
    def pack(self, hint=None, clean_obsolete_packs=False):
1556
1564
        """Pack the pack collection totally."""
1557
1565
        self.ensure_loaded()
1558
1566
        total_packs = len(self._names)
1567
1575
        # determine which packs need changing
1568
1576
        pack_operations = [[0, []]]
1569
1577
        for pack in self.all_packs():
1570
 
            if not hint or pack.name in hint:
 
1578
            if hint is None or pack.name in hint:
 
1579
                # Either no hint was provided (so we are packing everything),
 
1580
                # or this pack was included in the hint.
1571
1581
                pack_operations[-1][0] += pack.get_revision_count()
1572
1582
                pack_operations[-1][1].append(pack)
1573
1583
        self._execute_pack_operations(pack_operations, OptimisingPacker)
1574
1584
 
 
1585
        if clean_obsolete_packs:
 
1586
            self._clear_obsolete_packs()
 
1587
 
1575
1588
    def plan_autopack_combinations(self, existing_packs, pack_distribution):
1576
1589
        """Plan a pack operation.
1577
1590
 
1665
1678
            txt_index = self._make_index(name, '.tix')
1666
1679
            sig_index = self._make_index(name, '.six')
1667
1680
            if self.chk_index is not None:
1668
 
                chk_index = self._make_index(name, '.cix')
 
1681
                chk_index = self._make_index(name, '.cix', unlimited_cache=True)
1669
1682
            else:
1670
1683
                chk_index = None
1671
1684
            result = ExistingPack(self._pack_transport, name, rev_index,
1690
1703
            txt_index = self._make_index(name, '.tix', resume=True)
1691
1704
            sig_index = self._make_index(name, '.six', resume=True)
1692
1705
            if self.chk_index is not None:
1693
 
                chk_index = self._make_index(name, '.cix', resume=True)
 
1706
                chk_index = self._make_index(name, '.cix', resume=True,
 
1707
                                             unlimited_cache=True)
1694
1708
            else:
1695
1709
                chk_index = None
1696
1710
            result = self.resumed_pack_factory(name, rev_index, inv_index,
1726
1740
        return self._index_class(self.transport, 'pack-names', None
1727
1741
                ).iter_all_entries()
1728
1742
 
1729
 
    def _make_index(self, name, suffix, resume=False):
 
1743
    def _make_index(self, name, suffix, resume=False, unlimited_cache=False):
1730
1744
        size_offset = self._suffix_offsets[suffix]
1731
1745
        index_name = name + suffix
1732
1746
        if resume:
1735
1749
        else:
1736
1750
            transport = self._index_transport
1737
1751
            index_size = self._names[name][size_offset]
1738
 
        return self._index_class(transport, index_name, index_size)
 
1752
        return self._index_class(transport, index_name, index_size,
 
1753
                                 unlimited_cache=unlimited_cache)
1739
1754
 
1740
1755
    def _max_pack_count(self, total_revisions):
1741
1756
        """Return the maximum number of packs to use for total revisions.
1769
1784
        :param return: None.
1770
1785
        """
1771
1786
        for pack in packs:
1772
 
            pack.pack_transport.rename(pack.file_name(),
1773
 
                '../obsolete_packs/' + pack.file_name())
 
1787
            try:
 
1788
                pack.pack_transport.rename(pack.file_name(),
 
1789
                    '../obsolete_packs/' + pack.file_name())
 
1790
            except (errors.PathError, errors.TransportError), e:
 
1791
                # TODO: Should these be warnings or mutters?
 
1792
                mutter("couldn't rename obsolete pack, skipping it:\n%s"
 
1793
                       % (e,))
1774
1794
            # TODO: Probably needs to know all possible indices for this pack
1775
1795
            # - or maybe list the directory and move all indices matching this
1776
1796
            # name whether we recognize it or not?
1778
1798
            if self.chk_index is not None:
1779
1799
                suffixes.append('.cix')
1780
1800
            for suffix in suffixes:
1781
 
                self._index_transport.rename(pack.name + suffix,
1782
 
                    '../obsolete_packs/' + pack.name + suffix)
 
1801
                try:
 
1802
                    self._index_transport.rename(pack.name + suffix,
 
1803
                        '../obsolete_packs/' + pack.name + suffix)
 
1804
                except (errors.PathError, errors.TransportError), e:
 
1805
                    mutter("couldn't rename obsolete index, skipping it:\n%s"
 
1806
                           % (e,))
1783
1807
 
1784
1808
    def pack_distribution(self, total_revisions):
1785
1809
        """Generate a list of the number of revisions to put in each pack.
1811
1835
        self._remove_pack_indices(pack)
1812
1836
        self.packs.remove(pack)
1813
1837
 
1814
 
    def _remove_pack_indices(self, pack):
1815
 
        """Remove the indices for pack from the aggregated indices."""
1816
 
        self.revision_index.remove_index(pack.revision_index, pack)
1817
 
        self.inventory_index.remove_index(pack.inventory_index, pack)
1818
 
        self.text_index.remove_index(pack.text_index, pack)
1819
 
        self.signature_index.remove_index(pack.signature_index, pack)
1820
 
        if self.chk_index is not None:
1821
 
            self.chk_index.remove_index(pack.chk_index, pack)
 
1838
    def _remove_pack_indices(self, pack, ignore_missing=False):
 
1839
        """Remove the indices for pack from the aggregated indices.
 
1840
        
 
1841
        :param ignore_missing: Suppress KeyErrors from calling remove_index.
 
1842
        """
 
1843
        for index_type in Pack.index_definitions.keys():
 
1844
            attr_name = index_type + '_index'
 
1845
            aggregate_index = getattr(self, attr_name)
 
1846
            if aggregate_index is not None:
 
1847
                pack_index = getattr(pack, attr_name)
 
1848
                try:
 
1849
                    aggregate_index.remove_index(pack_index)
 
1850
                except KeyError:
 
1851
                    if ignore_missing:
 
1852
                        continue
 
1853
                    raise
1822
1854
 
1823
1855
    def reset(self):
1824
1856
        """Clear all cached data."""
1857
1889
        disk_nodes = set()
1858
1890
        for index, key, value in self._iter_disk_pack_index():
1859
1891
            disk_nodes.add((key, value))
 
1892
        orig_disk_nodes = set(disk_nodes)
1860
1893
 
1861
1894
        # do a two-way diff against our original content
1862
1895
        current_nodes = set()
1875
1908
        disk_nodes.difference_update(deleted_nodes)
1876
1909
        disk_nodes.update(new_nodes)
1877
1910
 
1878
 
        return disk_nodes, deleted_nodes, new_nodes
 
1911
        return disk_nodes, deleted_nodes, new_nodes, orig_disk_nodes
1879
1912
 
1880
1913
    def _syncronize_pack_names_from_disk_nodes(self, disk_nodes):
1881
1914
        """Given the correct set of pack files, update our saved info.
1921
1954
                added.append(name)
1922
1955
        return removed, added, modified
1923
1956
 
1924
 
    def _save_pack_names(self, clear_obsolete_packs=False):
 
1957
    def _save_pack_names(self, clear_obsolete_packs=False, obsolete_packs=None):
1925
1958
        """Save the list of packs.
1926
1959
 
1927
1960
        This will take out the mutex around the pack names list for the
1931
1964
 
1932
1965
        :param clear_obsolete_packs: If True, clear out the contents of the
1933
1966
            obsolete_packs directory.
 
1967
        :param obsolete_packs: Packs that are obsolete once the new pack-names
 
1968
            file has been written.
1934
1969
        :return: A list of the names saved that were not previously on disk.
1935
1970
        """
 
1971
        already_obsolete = []
1936
1972
        self.lock_names()
1937
1973
        try:
1938
1974
            builder = self._index_builder_class()
1939
 
            disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
 
1975
            (disk_nodes, deleted_nodes, new_nodes,
 
1976
             orig_disk_nodes) = self._diff_pack_names()
1940
1977
            # TODO: handle same-name, index-size-changes here -
1941
1978
            # e.g. use the value from disk, not ours, *unless* we're the one
1942
1979
            # changing it.
1944
1981
                builder.add_node(key, value)
1945
1982
            self.transport.put_file('pack-names', builder.finish(),
1946
1983
                mode=self.repo.bzrdir._get_file_mode())
1947
 
            # move the baseline forward
1948
1984
            self._packs_at_load = disk_nodes
1949
1985
            if clear_obsolete_packs:
1950
 
                self._clear_obsolete_packs()
 
1986
                to_preserve = None
 
1987
                if obsolete_packs:
 
1988
                    to_preserve = set([o.name for o in obsolete_packs])
 
1989
                already_obsolete = self._clear_obsolete_packs(to_preserve)
1951
1990
        finally:
1952
1991
            self._unlock_names()
1953
1992
        # synchronise the memory packs list with what we just wrote:
1954
1993
        self._syncronize_pack_names_from_disk_nodes(disk_nodes)
 
1994
        if obsolete_packs:
 
1995
            # TODO: We could add one more condition here. "if o.name not in
 
1996
            #       orig_disk_nodes and o != the new_pack we haven't written to
 
1997
            #       disk yet. However, the new pack object is not easily
 
1998
            #       accessible here (it would have to be passed through the
 
1999
            #       autopacking code, etc.)
 
2000
            obsolete_packs = [o for o in obsolete_packs
 
2001
                              if o.name not in already_obsolete]
 
2002
            self._obsolete_packs(obsolete_packs)
1955
2003
        return [new_node[0][0] for new_node in new_nodes]
1956
2004
 
1957
2005
    def reload_pack_names(self):
1972
2020
        if first_read:
1973
2021
            return True
1974
2022
        # out the new value.
1975
 
        disk_nodes, _, _ = self._diff_pack_names()
1976
 
        self._packs_at_load = disk_nodes
 
2023
        (disk_nodes, deleted_nodes, new_nodes,
 
2024
         orig_disk_nodes) = self._diff_pack_names()
 
2025
        # _packs_at_load is meant to be the explicit list of names in
 
2026
        # 'pack-names' at then start. As such, it should not contain any
 
2027
        # pending names that haven't been written out yet.
 
2028
        self._packs_at_load = orig_disk_nodes
1977
2029
        (removed, added,
1978
2030
         modified) = self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1979
2031
        if removed or added or modified:
1988
2040
            raise
1989
2041
        raise errors.RetryAutopack(self.repo, False, sys.exc_info())
1990
2042
 
1991
 
    def _clear_obsolete_packs(self):
 
2043
    def _clear_obsolete_packs(self, preserve=None):
1992
2044
        """Delete everything from the obsolete-packs directory.
 
2045
 
 
2046
        :return: A list of pack identifiers (the filename without '.pack') that
 
2047
            were found in obsolete_packs.
1993
2048
        """
 
2049
        found = []
1994
2050
        obsolete_pack_transport = self.transport.clone('obsolete_packs')
 
2051
        if preserve is None:
 
2052
            preserve = set()
1995
2053
        for filename in obsolete_pack_transport.list_dir('.'):
 
2054
            name, ext = osutils.splitext(filename)
 
2055
            if ext == '.pack':
 
2056
                found.append(name)
 
2057
            if name in preserve:
 
2058
                continue
1996
2059
            try:
1997
2060
                obsolete_pack_transport.delete(filename)
1998
2061
            except (errors.PathError, errors.TransportError), e:
1999
 
                warning("couldn't delete obsolete pack, skipping it:\n%s" % (e,))
 
2062
                warning("couldn't delete obsolete pack, skipping it:\n%s"
 
2063
                        % (e,))
 
2064
        return found
2000
2065
 
2001
2066
    def _start_write_group(self):
2002
2067
        # Do not permit preparation for writing if we're not in a 'write lock'.
2029
2094
        # FIXME: just drop the transient index.
2030
2095
        # forget what names there are
2031
2096
        if self._new_pack is not None:
2032
 
            try:
2033
 
                self._new_pack.abort()
2034
 
            finally:
2035
 
                # XXX: If we aborted while in the middle of finishing the write
2036
 
                # group, _remove_pack_indices can fail because the indexes are
2037
 
                # already gone.  If they're not there we shouldn't fail in this
2038
 
                # case.  -- mbp 20081113
2039
 
                self._remove_pack_indices(self._new_pack)
2040
 
                self._new_pack = None
 
2097
            operation = cleanup.OperationWithCleanups(self._new_pack.abort)
 
2098
            operation.add_cleanup(setattr, self, '_new_pack', None)
 
2099
            # If we aborted while in the middle of finishing the write
 
2100
            # group, _remove_pack_indices could fail because the indexes are
 
2101
            # already gone.  But they're not there we shouldn't fail in this
 
2102
            # case, so we pass ignore_missing=True.
 
2103
            operation.add_cleanup(self._remove_pack_indices, self._new_pack,
 
2104
                ignore_missing=True)
 
2105
            operation.run_simple()
2041
2106
        for resumed_pack in self._resumed_packs:
2042
 
            try:
2043
 
                resumed_pack.abort()
2044
 
            finally:
2045
 
                # See comment in previous finally block.
2046
 
                try:
2047
 
                    self._remove_pack_indices(resumed_pack)
2048
 
                except KeyError:
2049
 
                    pass
 
2107
            operation = cleanup.OperationWithCleanups(resumed_pack.abort)
 
2108
            # See comment in previous finally block.
 
2109
            operation.add_cleanup(self._remove_pack_indices, resumed_pack,
 
2110
                ignore_missing=True)
 
2111
            operation.run_simple()
2050
2112
        del self._resumed_packs[:]
2051
2113
 
2052
2114
    def _remove_resumed_pack_indices(self):
2054
2116
            self._remove_pack_indices(resumed_pack)
2055
2117
        del self._resumed_packs[:]
2056
2118
 
 
2119
    def _check_new_inventories(self):
 
2120
        """Detect missing inventories in this write group.
 
2121
 
 
2122
        :returns: list of strs, summarising any problems found.  If the list is
 
2123
            empty no problems were found.
 
2124
        """
 
2125
        # The base implementation does no checks.  GCRepositoryPackCollection
 
2126
        # overrides this.
 
2127
        return []
 
2128
        
2057
2129
    def _commit_write_group(self):
2058
2130
        all_missing = set()
2059
2131
        for prefix, versioned_file in (
2068
2140
            raise errors.BzrCheckError(
2069
2141
                "Repository %s has missing compression parent(s) %r "
2070
2142
                 % (self.repo, sorted(all_missing)))
 
2143
        problems = self._check_new_inventories()
 
2144
        if problems:
 
2145
            problems_summary = '\n'.join(problems)
 
2146
            raise errors.BzrCheckError(
 
2147
                "Cannot add revision(s) to repository: " + problems_summary)
2071
2148
        self._remove_pack_indices(self._new_pack)
2072
 
        should_autopack = False
 
2149
        any_new_content = False
2073
2150
        if self._new_pack.data_inserted():
2074
2151
            # get all the data to disk and read to use
2075
2152
            self._new_pack.finish()
2076
2153
            self.allocate(self._new_pack)
2077
2154
            self._new_pack = None
2078
 
            should_autopack = True
 
2155
            any_new_content = True
2079
2156
        else:
2080
2157
            self._new_pack.abort()
2081
2158
            self._new_pack = None
2086
2163
            self._remove_pack_from_memory(resumed_pack)
2087
2164
            resumed_pack.finish()
2088
2165
            self.allocate(resumed_pack)
2089
 
            should_autopack = True
 
2166
            any_new_content = True
2090
2167
        del self._resumed_packs[:]
2091
 
        if should_autopack:
2092
 
            if not self.autopack():
 
2168
        if any_new_content:
 
2169
            result = self.autopack()
 
2170
            if not result:
2093
2171
                # when autopack takes no steps, the names list is still
2094
2172
                # unsaved.
2095
2173
                return self._save_pack_names()
 
2174
            return result
 
2175
        return []
2096
2176
 
2097
2177
    def _suspend_write_group(self):
2098
2178
        tokens = [pack.name for pack in self._resumed_packs]
2200
2280
        self._reconcile_fixes_text_parents = True
2201
2281
        self._reconcile_backsup_inventory = False
2202
2282
 
2203
 
    def _warn_if_deprecated(self):
 
2283
    def _warn_if_deprecated(self, branch=None):
2204
2284
        # This class isn't deprecated, but one sub-format is
2205
2285
        if isinstance(self._format, RepositoryFormatKnitPack5RichRootBroken):
2206
 
            from bzrlib import repository
2207
 
            if repository._deprecation_warning_done:
2208
 
                return
2209
 
            repository._deprecation_warning_done = True
2210
 
            warning("Format %s for %s is deprecated - please use"
2211
 
                    " 'bzr upgrade --1.6.1-rich-root'"
2212
 
                    % (self._format, self.bzrdir.transport.base))
 
2286
            super(KnitPackRepository, self)._warn_if_deprecated(branch)
2213
2287
 
2214
2288
    def _abort_write_group(self):
2215
 
        self.revisions._index._key_dependencies.refs.clear()
 
2289
        self.revisions._index._key_dependencies.clear()
2216
2290
        self._pack_collection._abort_write_group()
2217
2291
 
2218
 
    def _find_inconsistent_revision_parents(self):
2219
 
        """Find revisions with incorrectly cached parents.
2220
 
 
2221
 
        :returns: an iterator yielding tuples of (revison-id, parents-in-index,
2222
 
            parents-in-revision).
2223
 
        """
2224
 
        if not self.is_locked():
2225
 
            raise errors.ObjectNotLocked(self)
2226
 
        pb = ui.ui_factory.nested_progress_bar()
2227
 
        result = []
2228
 
        try:
2229
 
            revision_nodes = self._pack_collection.revision_index \
2230
 
                .combined_index.iter_all_entries()
2231
 
            index_positions = []
2232
 
            # Get the cached index values for all revisions, and also the
2233
 
            # location in each index of the revision text so we can perform
2234
 
            # linear IO.
2235
 
            for index, key, value, refs in revision_nodes:
2236
 
                node = (index, key, value, refs)
2237
 
                index_memo = self.revisions._index._node_to_position(node)
2238
 
                if index_memo[0] != index:
2239
 
                    raise AssertionError('%r != %r' % (index_memo[0], index))
2240
 
                index_positions.append((index_memo, key[0],
2241
 
                                       tuple(parent[0] for parent in refs[0])))
2242
 
                pb.update("Reading revision index", 0, 0)
2243
 
            index_positions.sort()
2244
 
            batch_size = 1000
2245
 
            pb.update("Checking cached revision graph", 0,
2246
 
                      len(index_positions))
2247
 
            for offset in xrange(0, len(index_positions), 1000):
2248
 
                pb.update("Checking cached revision graph", offset)
2249
 
                to_query = index_positions[offset:offset + batch_size]
2250
 
                if not to_query:
2251
 
                    break
2252
 
                rev_ids = [item[1] for item in to_query]
2253
 
                revs = self.get_revisions(rev_ids)
2254
 
                for revision, item in zip(revs, to_query):
2255
 
                    index_parents = item[2]
2256
 
                    rev_parents = tuple(revision.parent_ids)
2257
 
                    if index_parents != rev_parents:
2258
 
                        result.append((revision.revision_id, index_parents,
2259
 
                                       rev_parents))
2260
 
        finally:
2261
 
            pb.finished()
2262
 
        return result
2263
 
 
2264
2292
    def _get_source(self, to_format):
2265
2293
        if to_format.network_name() == self._format.network_name():
2266
2294
            return KnitPackStreamSource(self, to_format)
2278
2306
        self._pack_collection._start_write_group()
2279
2307
 
2280
2308
    def _commit_write_group(self):
2281
 
        self.revisions._index._key_dependencies.refs.clear()
2282
 
        return self._pack_collection._commit_write_group()
 
2309
        hint = self._pack_collection._commit_write_group()
 
2310
        self.revisions._index._key_dependencies.clear()
 
2311
        return hint
2283
2312
 
2284
2313
    def suspend_write_group(self):
2285
2314
        # XXX check self._write_group is self.get_transaction()?
2286
2315
        tokens = self._pack_collection._suspend_write_group()
2287
 
        self.revisions._index._key_dependencies.refs.clear()
 
2316
        self.revisions._index._key_dependencies.clear()
2288
2317
        self._write_group = None
2289
2318
        return tokens
2290
2319
 
2318
2347
        if self._write_lock_count == 1:
2319
2348
            self._transaction = transactions.WriteTransaction()
2320
2349
        if not locked:
 
2350
            if 'relock' in debug.debug_flags and self._prev_lock == 'w':
 
2351
                note('%r was write locked again', self)
 
2352
            self._prev_lock = 'w'
2321
2353
            for repo in self._fallback_repositories:
2322
2354
                # Writes don't affect fallback repos
2323
2355
                repo.lock_read()
2330
2362
        else:
2331
2363
            self.control_files.lock_read()
2332
2364
        if not locked:
 
2365
            if 'relock' in debug.debug_flags and self._prev_lock == 'r':
 
2366
                note('%r was read locked again', self)
 
2367
            self._prev_lock = 'r'
2333
2368
            for repo in self._fallback_repositories:
2334
2369
                repo.lock_read()
2335
2370
            self._refresh_data()
2343
2378
        raise NotImplementedError(self.dont_leave_lock_in_place)
2344
2379
 
2345
2380
    @needs_write_lock
2346
 
    def pack(self, hint=None):
 
2381
    def pack(self, hint=None, clean_obsolete_packs=False):
2347
2382
        """Compress the data within the repository.
2348
2383
 
2349
2384
        This will pack all the data to a single pack. In future it may
2350
2385
        recompress deltas or do other such expensive operations.
2351
2386
        """
2352
 
        self._pack_collection.pack(hint=hint)
 
2387
        self._pack_collection.pack(hint=hint, clean_obsolete_packs=clean_obsolete_packs)
2353
2388
 
2354
2389
    @needs_write_lock
2355
2390
    def reconcile(self, other=None, thorough=False):
2363
2398
        packer = ReconcilePacker(collection, packs, extension, revs)
2364
2399
        return packer.pack(pb)
2365
2400
 
 
2401
    @only_raises(errors.LockNotHeld, errors.LockBroken)
2366
2402
    def unlock(self):
2367
2403
        if self._write_lock_count == 1 and self._write_group is not None:
2368
2404
            self.abort_write_group()
2510
2546
        utf8_files = [('format', self.get_format_string())]
2511
2547
 
2512
2548
        self._upload_blank_content(a_bzrdir, dirs, files, utf8_files, shared)
2513
 
        return self.open(a_bzrdir=a_bzrdir, _found=True)
 
2549
        repository = self.open(a_bzrdir=a_bzrdir, _found=True)
 
2550
        self._run_post_repo_init_hooks(repository, a_bzrdir, shared)
 
2551
        return repository
2514
2552
 
2515
2553
    def open(self, a_bzrdir, _found=False, _override_transport=None):
2516
2554
        """See RepositoryFormat.open().
2565
2603
        """See RepositoryFormat.get_format_description()."""
2566
2604
        return "Packs containing knits without subtree support"
2567
2605
 
2568
 
    def check_conversion_target(self, target_format):
2569
 
        pass
2570
 
 
2571
2606
 
2572
2607
class RepositoryFormatKnitPack3(RepositoryFormatPack):
2573
2608
    """A subtrees parameterized Pack repository.
2582
2617
    repository_class = KnitPackRepository
2583
2618
    _commit_builder_class = PackRootCommitBuilder
2584
2619
    rich_root_data = True
 
2620
    experimental = True
2585
2621
    supports_tree_reference = True
2586
2622
    @property
2587
2623
    def _serializer(self):
2599
2635
 
2600
2636
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2601
2637
 
2602
 
    def check_conversion_target(self, target_format):
2603
 
        if not target_format.rich_root_data:
2604
 
            raise errors.BadConversionTarget(
2605
 
                'Does not support rich root data.', target_format)
2606
 
        if not getattr(target_format, 'supports_tree_reference', False):
2607
 
            raise errors.BadConversionTarget(
2608
 
                'Does not support nested trees', target_format)
2609
 
 
2610
2638
    def get_format_string(self):
2611
2639
        """See RepositoryFormat.get_format_string()."""
2612
2640
        return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
2645
2673
 
2646
2674
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2647
2675
 
2648
 
    def check_conversion_target(self, target_format):
2649
 
        if not target_format.rich_root_data:
2650
 
            raise errors.BadConversionTarget(
2651
 
                'Does not support rich root data.', target_format)
2652
 
 
2653
2676
    def get_format_string(self):
2654
2677
        """See RepositoryFormat.get_format_string()."""
2655
2678
        return ("Bazaar pack repository format 1 with rich root"
2696
2719
        """See RepositoryFormat.get_format_description()."""
2697
2720
        return "Packs 5 (adds stacking support, requires bzr 1.6)"
2698
2721
 
2699
 
    def check_conversion_target(self, target_format):
2700
 
        pass
2701
 
 
2702
2722
 
2703
2723
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
2704
2724
    """A repository with rich roots and stacking.
2731
2751
 
2732
2752
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2733
2753
 
2734
 
    def check_conversion_target(self, target_format):
2735
 
        if not target_format.rich_root_data:
2736
 
            raise errors.BadConversionTarget(
2737
 
                'Does not support rich root data.', target_format)
2738
 
 
2739
2754
    def get_format_string(self):
2740
2755
        """See RepositoryFormat.get_format_string()."""
2741
2756
        return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
2782
2797
 
2783
2798
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2784
2799
 
2785
 
    def check_conversion_target(self, target_format):
2786
 
        if not target_format.rich_root_data:
2787
 
            raise errors.BadConversionTarget(
2788
 
                'Does not support rich root data.', target_format)
2789
 
 
2790
2800
    def get_format_string(self):
2791
2801
        """See RepositoryFormat.get_format_string()."""
2792
2802
        return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
2830
2840
        """See RepositoryFormat.get_format_description()."""
2831
2841
        return "Packs 6 (uses btree indexes, requires bzr 1.9)"
2832
2842
 
2833
 
    def check_conversion_target(self, target_format):
2834
 
        pass
2835
 
 
2836
2843
 
2837
2844
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
2838
2845
    """A repository with rich roots, no subtrees, stacking and btree indexes.
2862
2869
 
2863
2870
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2864
2871
 
2865
 
    def check_conversion_target(self, target_format):
2866
 
        if not target_format.rich_root_data:
2867
 
            raise errors.BadConversionTarget(
2868
 
                'Does not support rich root data.', target_format)
2869
 
 
2870
2872
    def get_format_string(self):
2871
2873
        """See RepositoryFormat.get_format_string()."""
2872
2874
        return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
2889
2891
    repository_class = KnitPackRepository
2890
2892
    _commit_builder_class = PackRootCommitBuilder
2891
2893
    rich_root_data = True
 
2894
    experimental = True
2892
2895
    supports_tree_reference = True
2893
2896
    supports_external_lookups = True
2894
2897
    # What index classes to use
2908
2911
 
2909
2912
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2910
2913
 
2911
 
    def check_conversion_target(self, target_format):
2912
 
        if not target_format.rich_root_data:
2913
 
            raise errors.BadConversionTarget(
2914
 
                'Does not support rich root data.', target_format)
2915
 
        if not getattr(target_format, 'supports_tree_reference', False):
2916
 
            raise errors.BadConversionTarget(
2917
 
                'Does not support nested trees', target_format)
2918
 
 
2919
2914
    def get_format_string(self):
2920
2915
        """See RepositoryFormat.get_format_string()."""
2921
2916
        return ("Bazaar development format 2 with subtree support "