~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/pack_repo.py

(gz) Backslash escape selftest output when printing to non-unicode consoles
 (Martin [gz])

Show diffs side-by-side

added added

removed removed

Lines of Context:
24
24
 
25
25
from bzrlib import (
26
26
    chk_map,
 
27
    cleanup,
27
28
    debug,
28
29
    graph,
29
30
    osutils,
48
49
""")
49
50
from bzrlib import (
50
51
    bzrdir,
 
52
    btree_index,
51
53
    errors,
52
54
    lockable_files,
53
55
    lockdir,
55
57
    )
56
58
 
57
59
from bzrlib.decorators import needs_write_lock, only_raises
58
 
from bzrlib.btree_index import (
59
 
    BTreeGraphIndex,
60
 
    BTreeBuilder,
61
 
    )
62
60
from bzrlib.index import (
63
61
    GraphIndex,
64
62
    InMemoryGraphIndex,
65
63
    )
 
64
from bzrlib.lock import LogicalLockResult
66
65
from bzrlib.repofmt.knitrepo import KnitRepository
67
66
from bzrlib.repository import (
68
67
    CommitBuilder,
69
68
    MetaDirRepositoryFormat,
70
69
    RepositoryFormat,
 
70
    RepositoryWriteLockResult,
71
71
    RootCommitBuilder,
72
72
    StreamSource,
73
73
    )
228
228
        unlimited_cache = False
229
229
        if index_type == 'chk':
230
230
            unlimited_cache = True
231
 
        setattr(self, index_type + '_index',
232
 
            self.index_class(self.index_transport,
233
 
                self.index_name(index_type, self.name),
234
 
                self.index_sizes[self.index_offset(index_type)],
235
 
                unlimited_cache=unlimited_cache))
 
231
        index = self.index_class(self.index_transport,
 
232
                    self.index_name(index_type, self.name),
 
233
                    self.index_sizes[self.index_offset(index_type)],
 
234
                    unlimited_cache=unlimited_cache)
 
235
        if index_type == 'chk':
 
236
            index._leaf_factory = btree_index._gcchk_factory
 
237
        setattr(self, index_type + '_index', index)
236
238
 
237
239
 
238
240
class ExistingPack(Pack):
586
588
                                             flush_func=flush_func)
587
589
        self.add_callback = None
588
590
 
589
 
    def replace_indices(self, index_to_pack, indices):
590
 
        """Replace the current mappings with fresh ones.
591
 
 
592
 
        This should probably not be used eventually, rather incremental add and
593
 
        removal of indices. It has been added during refactoring of existing
594
 
        code.
595
 
 
596
 
        :param index_to_pack: A mapping from index objects to
597
 
            (transport, name) tuples for the pack file data.
598
 
        :param indices: A list of indices.
599
 
        """
600
 
        # refresh the revision pack map dict without replacing the instance.
601
 
        self.index_to_pack.clear()
602
 
        self.index_to_pack.update(index_to_pack)
603
 
        # XXX: API break - clearly a 'replace' method would be good?
604
 
        self.combined_index._indices[:] = indices
605
 
        # the current add nodes callback for the current writable index if
606
 
        # there is one.
607
 
        self.add_callback = None
608
 
 
609
591
    def add_index(self, index, pack):
610
592
        """Add index to the aggregate, which is an index for Pack pack.
611
593
 
618
600
        # expose it to the index map
619
601
        self.index_to_pack[index] = pack.access_tuple()
620
602
        # put it at the front of the linear index list
621
 
        self.combined_index.insert_index(0, index)
 
603
        self.combined_index.insert_index(0, index, pack.name)
622
604
 
623
605
    def add_writable_index(self, index, pack):
624
606
        """Add an index which is able to have data added to it.
644
626
        self.data_access.set_writer(None, None, (None, None))
645
627
        self.index_to_pack.clear()
646
628
        del self.combined_index._indices[:]
 
629
        del self.combined_index._index_names[:]
647
630
        self.add_callback = None
648
631
 
649
 
    def remove_index(self, index, pack):
 
632
    def remove_index(self, index):
650
633
        """Remove index from the indices used to answer queries.
651
634
 
652
635
        :param index: An index from the pack parameter.
653
 
        :param pack: A Pack instance.
654
636
        """
655
637
        del self.index_to_pack[index]
656
 
        self.combined_index._indices.remove(index)
 
638
        pos = self.combined_index._indices.index(index)
 
639
        del self.combined_index._indices[pos]
 
640
        del self.combined_index._index_names[pos]
657
641
        if (self.add_callback is not None and
658
642
            getattr(index, 'add_nodes', None) == self.add_callback):
659
643
            self.add_callback = None
738
722
        :return: A Pack object, or None if nothing was copied.
739
723
        """
740
724
        # open a pack - using the same name as the last temporary file
741
 
        # - which has already been flushed, so its safe.
 
725
        # - which has already been flushed, so it's safe.
742
726
        # XXX: - duplicate code warning with start_write_group; fix before
743
727
        #      considering 'done'.
744
728
        if self._pack_collection._new_pack is not None:
1308
1292
        # reinserted, and if d3 has incorrect parents it will also be
1309
1293
        # reinserted. If we insert d3 first, d2 is present (as it was bulk
1310
1294
        # copied), so we will try to delta, but d2 is not currently able to be
1311
 
        # extracted because it's basis d1 is not present. Topologically sorting
 
1295
        # extracted because its basis d1 is not present. Topologically sorting
1312
1296
        # addresses this. The following generates a sort for all the texts that
1313
1297
        # are being inserted without having to reference the entire text key
1314
1298
        # space (we only topo sort the revisions, which is smaller).
1415
1399
        self.inventory_index = AggregateIndex(self.reload_pack_names, flush)
1416
1400
        self.text_index = AggregateIndex(self.reload_pack_names, flush)
1417
1401
        self.signature_index = AggregateIndex(self.reload_pack_names, flush)
 
1402
        all_indices = [self.revision_index, self.inventory_index,
 
1403
                self.text_index, self.signature_index]
1418
1404
        if use_chk_index:
1419
1405
            self.chk_index = AggregateIndex(self.reload_pack_names, flush)
 
1406
            all_indices.append(self.chk_index)
1420
1407
        else:
1421
1408
            # used to determine if we're using a chk_index elsewhere.
1422
1409
            self.chk_index = None
 
1410
        # Tell all the CombinedGraphIndex objects about each other, so they can
 
1411
        # share hints about which pack names to search first.
 
1412
        all_combined = [agg_idx.combined_index for agg_idx in all_indices]
 
1413
        for combined_idx in all_combined:
 
1414
            combined_idx.set_sibling_indices(
 
1415
                set(all_combined).difference([combined_idx]))
1423
1416
        # resumed packs
1424
1417
        self._resumed_packs = []
1425
1418
 
1568
1561
        """Is the collection already packed?"""
1569
1562
        return not (self.repo._format.pack_compresses or (len(self._names) > 1))
1570
1563
 
1571
 
    def pack(self, hint=None):
 
1564
    def pack(self, hint=None, clean_obsolete_packs=False):
1572
1565
        """Pack the pack collection totally."""
1573
1566
        self.ensure_loaded()
1574
1567
        total_packs = len(self._names)
1590
1583
                pack_operations[-1][1].append(pack)
1591
1584
        self._execute_pack_operations(pack_operations, OptimisingPacker)
1592
1585
 
 
1586
        if clean_obsolete_packs:
 
1587
            self._clear_obsolete_packs()
 
1588
 
1593
1589
    def plan_autopack_combinations(self, existing_packs, pack_distribution):
1594
1590
        """Plan a pack operation.
1595
1591
 
1604
1600
        pack_operations = [[0, []]]
1605
1601
        # plan out what packs to keep, and what to reorganise
1606
1602
        while len(existing_packs):
1607
 
            # take the largest pack, and if its less than the head of the
 
1603
            # take the largest pack, and if it's less than the head of the
1608
1604
            # distribution chart we will include its contents in the new pack
1609
 
            # for that position. If its larger, we remove its size from the
 
1605
            # for that position. If it's larger, we remove its size from the
1610
1606
            # distribution chart
1611
1607
            next_pack_rev_count, next_pack = existing_packs.pop(0)
1612
1608
            if next_pack_rev_count >= pack_distribution[0]:
1647
1643
 
1648
1644
        :return: True if the disk names had not been previously read.
1649
1645
        """
1650
 
        # NB: if you see an assertion error here, its probably access against
 
1646
        # NB: if you see an assertion error here, it's probably access against
1651
1647
        # an unlocked repo. Naughty.
1652
1648
        if not self.repo.is_locked():
1653
1649
            raise errors.ObjectNotLocked(self.repo)
1683
1679
            txt_index = self._make_index(name, '.tix')
1684
1680
            sig_index = self._make_index(name, '.six')
1685
1681
            if self.chk_index is not None:
1686
 
                chk_index = self._make_index(name, '.cix', unlimited_cache=True)
 
1682
                chk_index = self._make_index(name, '.cix', is_chk=True)
1687
1683
            else:
1688
1684
                chk_index = None
1689
1685
            result = ExistingPack(self._pack_transport, name, rev_index,
1709
1705
            sig_index = self._make_index(name, '.six', resume=True)
1710
1706
            if self.chk_index is not None:
1711
1707
                chk_index = self._make_index(name, '.cix', resume=True,
1712
 
                                             unlimited_cache=True)
 
1708
                                             is_chk=True)
1713
1709
            else:
1714
1710
                chk_index = None
1715
1711
            result = self.resumed_pack_factory(name, rev_index, inv_index,
1745
1741
        return self._index_class(self.transport, 'pack-names', None
1746
1742
                ).iter_all_entries()
1747
1743
 
1748
 
    def _make_index(self, name, suffix, resume=False, unlimited_cache=False):
 
1744
    def _make_index(self, name, suffix, resume=False, is_chk=False):
1749
1745
        size_offset = self._suffix_offsets[suffix]
1750
1746
        index_name = name + suffix
1751
1747
        if resume:
1754
1750
        else:
1755
1751
            transport = self._index_transport
1756
1752
            index_size = self._names[name][size_offset]
1757
 
        return self._index_class(transport, index_name, index_size,
1758
 
                                 unlimited_cache=unlimited_cache)
 
1753
        index = self._index_class(transport, index_name, index_size,
 
1754
                                  unlimited_cache=is_chk)
 
1755
        if is_chk and self._index_class is btree_index.BTreeGraphIndex: 
 
1756
            index._leaf_factory = btree_index._gcchk_factory
 
1757
        return index
1759
1758
 
1760
1759
    def _max_pack_count(self, total_revisions):
1761
1760
        """Return the maximum number of packs to use for total revisions.
1840
1839
        self._remove_pack_indices(pack)
1841
1840
        self.packs.remove(pack)
1842
1841
 
1843
 
    def _remove_pack_indices(self, pack):
1844
 
        """Remove the indices for pack from the aggregated indices."""
1845
 
        self.revision_index.remove_index(pack.revision_index, pack)
1846
 
        self.inventory_index.remove_index(pack.inventory_index, pack)
1847
 
        self.text_index.remove_index(pack.text_index, pack)
1848
 
        self.signature_index.remove_index(pack.signature_index, pack)
1849
 
        if self.chk_index is not None:
1850
 
            self.chk_index.remove_index(pack.chk_index, pack)
 
1842
    def _remove_pack_indices(self, pack, ignore_missing=False):
 
1843
        """Remove the indices for pack from the aggregated indices.
 
1844
        
 
1845
        :param ignore_missing: Suppress KeyErrors from calling remove_index.
 
1846
        """
 
1847
        for index_type in Pack.index_definitions.keys():
 
1848
            attr_name = index_type + '_index'
 
1849
            aggregate_index = getattr(self, attr_name)
 
1850
            if aggregate_index is not None:
 
1851
                pack_index = getattr(pack, attr_name)
 
1852
                try:
 
1853
                    aggregate_index.remove_index(pack_index)
 
1854
                except KeyError:
 
1855
                    if ignore_missing:
 
1856
                        continue
 
1857
                    raise
1851
1858
 
1852
1859
    def reset(self):
1853
1860
        """Clear all cached data."""
1939
1946
                    # disk index because the set values are the same, unless
1940
1947
                    # the only index shows up as deleted by the set difference
1941
1948
                    # - which it may. Until there is a specific test for this,
1942
 
                    # assume its broken. RBC 20071017.
 
1949
                    # assume it's broken. RBC 20071017.
1943
1950
                    self._remove_pack_from_memory(self.get_pack_by_name(name))
1944
1951
                    self._names[name] = sizes
1945
1952
                    self.get_pack_by_name(name)
2010
2017
        """
2011
2018
        # The ensure_loaded call is to handle the case where the first call
2012
2019
        # made involving the collection was to reload_pack_names, where we 
2013
 
        # don't have a view of disk contents. Its a bit of a bandaid, and
2014
 
        # causes two reads of pack-names, but its a rare corner case not struck
2015
 
        # with regular push/pull etc.
 
2020
        # don't have a view of disk contents. It's a bit of a bandaid, and
 
2021
        # causes two reads of pack-names, but it's a rare corner case not
 
2022
        # struck with regular push/pull etc.
2016
2023
        first_read = self.ensure_loaded()
2017
2024
        if first_read:
2018
2025
            return True
2091
2098
        # FIXME: just drop the transient index.
2092
2099
        # forget what names there are
2093
2100
        if self._new_pack is not None:
2094
 
            try:
2095
 
                self._new_pack.abort()
2096
 
            finally:
2097
 
                # XXX: If we aborted while in the middle of finishing the write
2098
 
                # group, _remove_pack_indices can fail because the indexes are
2099
 
                # already gone.  If they're not there we shouldn't fail in this
2100
 
                # case.  -- mbp 20081113
2101
 
                self._remove_pack_indices(self._new_pack)
2102
 
                self._new_pack = None
 
2101
            operation = cleanup.OperationWithCleanups(self._new_pack.abort)
 
2102
            operation.add_cleanup(setattr, self, '_new_pack', None)
 
2103
            # If we aborted while in the middle of finishing the write
 
2104
            # group, _remove_pack_indices could fail because the indexes are
 
2105
            # already gone.  But they're not there we shouldn't fail in this
 
2106
            # case, so we pass ignore_missing=True.
 
2107
            operation.add_cleanup(self._remove_pack_indices, self._new_pack,
 
2108
                ignore_missing=True)
 
2109
            operation.run_simple()
2103
2110
        for resumed_pack in self._resumed_packs:
2104
 
            try:
2105
 
                resumed_pack.abort()
2106
 
            finally:
2107
 
                # See comment in previous finally block.
2108
 
                try:
2109
 
                    self._remove_pack_indices(resumed_pack)
2110
 
                except KeyError:
2111
 
                    pass
 
2111
            operation = cleanup.OperationWithCleanups(resumed_pack.abort)
 
2112
            # See comment in previous finally block.
 
2113
            operation.add_cleanup(self._remove_pack_indices, resumed_pack,
 
2114
                ignore_missing=True)
 
2115
            operation.run_simple()
2112
2116
        del self._resumed_packs[:]
2113
2117
 
2114
2118
    def _remove_resumed_pack_indices(self):
2340
2344
        return self._write_lock_count
2341
2345
 
2342
2346
    def lock_write(self, token=None):
 
2347
        """Lock the repository for writes.
 
2348
 
 
2349
        :return: A bzrlib.repository.RepositoryWriteLockResult.
 
2350
        """
2343
2351
        locked = self.is_locked()
2344
2352
        if not self._write_lock_count and locked:
2345
2353
            raise errors.ReadOnlyError(self)
2354
2362
                # Writes don't affect fallback repos
2355
2363
                repo.lock_read()
2356
2364
            self._refresh_data()
 
2365
        return RepositoryWriteLockResult(self.unlock, None)
2357
2366
 
2358
2367
    def lock_read(self):
 
2368
        """Lock the repository for reads.
 
2369
 
 
2370
        :return: A bzrlib.lock.LogicalLockResult.
 
2371
        """
2359
2372
        locked = self.is_locked()
2360
2373
        if self._write_lock_count:
2361
2374
            self._write_lock_count += 1
2368
2381
            for repo in self._fallback_repositories:
2369
2382
                repo.lock_read()
2370
2383
            self._refresh_data()
 
2384
        return LogicalLockResult(self.unlock)
2371
2385
 
2372
2386
    def leave_lock_in_place(self):
2373
2387
        # not supported - raise an error
2378
2392
        raise NotImplementedError(self.dont_leave_lock_in_place)
2379
2393
 
2380
2394
    @needs_write_lock
2381
 
    def pack(self, hint=None):
 
2395
    def pack(self, hint=None, clean_obsolete_packs=False):
2382
2396
        """Compress the data within the repository.
2383
2397
 
2384
2398
        This will pack all the data to a single pack. In future it may
2385
2399
        recompress deltas or do other such expensive operations.
2386
2400
        """
2387
 
        self._pack_collection.pack(hint=hint)
 
2401
        self._pack_collection.pack(hint=hint, clean_obsolete_packs=clean_obsolete_packs)
2388
2402
 
2389
2403
    @needs_write_lock
2390
2404
    def reconcile(self, other=None, thorough=False):
2546
2560
        utf8_files = [('format', self.get_format_string())]
2547
2561
 
2548
2562
        self._upload_blank_content(a_bzrdir, dirs, files, utf8_files, shared)
2549
 
        return self.open(a_bzrdir=a_bzrdir, _found=True)
 
2563
        repository = self.open(a_bzrdir=a_bzrdir, _found=True)
 
2564
        self._run_post_repo_init_hooks(repository, a_bzrdir, shared)
 
2565
        return repository
2550
2566
 
2551
2567
    def open(self, a_bzrdir, _found=False, _override_transport=None):
2552
2568
        """See RepositoryFormat.open().
2615
2631
    repository_class = KnitPackRepository
2616
2632
    _commit_builder_class = PackRootCommitBuilder
2617
2633
    rich_root_data = True
 
2634
    experimental = True
2618
2635
    supports_tree_reference = True
2619
2636
    @property
2620
2637
    def _serializer(self):
2814
2831
    _commit_builder_class = PackCommitBuilder
2815
2832
    supports_external_lookups = True
2816
2833
    # What index classes to use
2817
 
    index_builder_class = BTreeBuilder
2818
 
    index_class = BTreeGraphIndex
 
2834
    index_builder_class = btree_index.BTreeBuilder
 
2835
    index_class = btree_index.BTreeGraphIndex
2819
2836
 
2820
2837
    @property
2821
2838
    def _serializer(self):
2850
2867
    supports_tree_reference = False # no subtrees
2851
2868
    supports_external_lookups = True
2852
2869
    # What index classes to use
2853
 
    index_builder_class = BTreeBuilder
2854
 
    index_class = BTreeGraphIndex
 
2870
    index_builder_class = btree_index.BTreeBuilder
 
2871
    index_class = btree_index.BTreeGraphIndex
2855
2872
 
2856
2873
    @property
2857
2874
    def _serializer(self):
2877
2894
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
2878
2895
    """A subtrees development repository.
2879
2896
 
2880
 
    This format should be retained until the second release after bzr 1.7.
 
2897
    This format should be retained in 2.3, to provide an upgrade path from this
 
2898
    to RepositoryFormat2aSubtree.  It can be removed in later releases.
2881
2899
 
2882
2900
    1.6.1-subtree[as it might have been] with B+Tree indices.
2883
 
 
2884
 
    This is [now] retained until we have a CHK based subtree format in
2885
 
    development.
2886
2901
    """
2887
2902
 
2888
2903
    repository_class = KnitPackRepository
2889
2904
    _commit_builder_class = PackRootCommitBuilder
2890
2905
    rich_root_data = True
 
2906
    experimental = True
2891
2907
    supports_tree_reference = True
2892
2908
    supports_external_lookups = True
2893
2909
    # What index classes to use
2894
 
    index_builder_class = BTreeBuilder
2895
 
    index_class = BTreeGraphIndex
 
2910
    index_builder_class = btree_index.BTreeBuilder
 
2911
    index_class = btree_index.BTreeGraphIndex
2896
2912
 
2897
2913
    @property
2898
2914
    def _serializer(self):
2900
2916
 
2901
2917
    def _get_matching_bzrdir(self):
2902
2918
        return bzrdir.format_registry.make_bzrdir(
2903
 
            'development-subtree')
 
2919
            'development5-subtree')
2904
2920
 
2905
2921
    def _ignore_setting_bzrdir(self, format):
2906
2922
        pass