~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/pack_repo.py

merge 2.0 branch rev 4647

Show diffs side-by-side

added added

removed removed

Lines of Context:
36
36
    )
37
37
from bzrlib.index import (
38
38
    CombinedGraphIndex,
39
 
    GraphIndex,
40
 
    GraphIndexBuilder,
41
39
    GraphIndexPrefixAdapter,
42
 
    InMemoryGraphIndex,
43
40
    )
44
41
from bzrlib.knit import (
45
42
    KnitPlainFactory,
55
52
    lockable_files,
56
53
    lockdir,
57
54
    revision as _mod_revision,
58
 
    symbol_versioning,
59
55
    )
60
56
 
61
57
from bzrlib.decorators import needs_write_lock
73
69
    MetaDirRepositoryFormat,
74
70
    RepositoryFormat,
75
71
    RootCommitBuilder,
 
72
    StreamSource,
76
73
    )
77
 
import bzrlib.revision as _mod_revision
78
74
from bzrlib.trace import (
79
75
    mutter,
80
76
    warning,
312
308
 
313
309
    def finish(self):
314
310
        self._check_references()
315
 
        new_name = '../packs/' + self.file_name()
316
 
        self.upload_transport.rename(self.file_name(), new_name)
317
311
        index_types = ['revision', 'inventory', 'text', 'signature']
318
312
        if self.chk_index is not None:
319
313
            index_types.append('chk')
322
316
            new_name = '../indices/' + old_name
323
317
            self.upload_transport.rename(old_name, new_name)
324
318
            self._replace_index_with_readonly(index_type)
 
319
        new_name = '../packs/' + self.file_name()
 
320
        self.upload_transport.rename(self.file_name(), new_name)
325
321
        self._state = 'finished'
326
322
 
327
323
    def _get_external_refs(self, index):
426
422
        self._writer.begin()
427
423
        # what state is the pack in? (open, finished, aborted)
428
424
        self._state = 'open'
 
425
        # no name until we finish writing the content
 
426
        self.name = None
429
427
 
430
428
    def abort(self):
431
429
        """Cancel creating this pack."""
452
450
            self.signature_index.key_count() or
453
451
            (self.chk_index is not None and self.chk_index.key_count()))
454
452
 
 
453
    def finish_content(self):
 
454
        if self.name is not None:
 
455
            return
 
456
        self._writer.end()
 
457
        if self._buffer[1]:
 
458
            self._write_data('', flush=True)
 
459
        self.name = self._hash.hexdigest()
 
460
 
455
461
    def finish(self, suspend=False):
456
462
        """Finish the new pack.
457
463
 
463
469
         - stores the index size tuple for the pack in the index_sizes
464
470
           attribute.
465
471
        """
466
 
        self._writer.end()
467
 
        if self._buffer[1]:
468
 
            self._write_data('', flush=True)
469
 
        self.name = self._hash.hexdigest()
 
472
        self.finish_content()
470
473
        if not suspend:
471
474
            self._check_references()
472
475
        # write indices
1458
1461
        in synchronisation with certain steps. Otherwise the names collection
1459
1462
        is not flushed.
1460
1463
 
1461
 
        :return: True if packing took place.
 
1464
        :return: Something evaluating true if packing took place.
1462
1465
        """
1463
1466
        while True:
1464
1467
            try:
1465
1468
                return self._do_autopack()
1466
 
            except errors.RetryAutopack, e:
 
1469
            except errors.RetryAutopack:
1467
1470
                # If we get a RetryAutopack exception, we should abort the
1468
1471
                # current action, and retry.
1469
1472
                pass
1473
1476
        total_revisions = self.revision_index.combined_index.key_count()
1474
1477
        total_packs = len(self._names)
1475
1478
        if self._max_pack_count(total_revisions) >= total_packs:
1476
 
            return False
 
1479
            return None
1477
1480
        # determine which packs need changing
1478
1481
        pack_distribution = self.pack_distribution(total_revisions)
1479
1482
        existing_packs = []
1501
1504
            'containing %d revisions. Packing %d files into %d affecting %d'
1502
1505
            ' revisions', self, total_packs, total_revisions, num_old_packs,
1503
1506
            num_new_packs, num_revs_affected)
1504
 
        self._execute_pack_operations(pack_operations,
 
1507
        result = self._execute_pack_operations(pack_operations,
1505
1508
                                      reload_func=self._restart_autopack)
1506
1509
        mutter('Auto-packing repository %s completed', self)
1507
 
        return True
 
1510
        return result
1508
1511
 
1509
1512
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
1510
1513
                                 reload_func=None):
1512
1515
 
1513
1516
        :param pack_operations: A list of [revision_count, packs_to_combine].
1514
1517
        :param _packer_class: The class of packer to use (default: Packer).
1515
 
        :return: None.
 
1518
        :return: The new pack names.
1516
1519
        """
1517
1520
        for revision_count, packs in pack_operations:
1518
1521
            # we may have no-ops from the setup logic
1534
1537
                self._remove_pack_from_memory(pack)
1535
1538
        # record the newly available packs and stop advertising the old
1536
1539
        # packs
1537
 
        self._save_pack_names(clear_obsolete_packs=True)
 
1540
        result = self._save_pack_names(clear_obsolete_packs=True)
1538
1541
        # Move the old packs out of the way now they are no longer referenced.
1539
1542
        for revision_count, packs in pack_operations:
1540
1543
            self._obsolete_packs(packs)
 
1544
        return result
1541
1545
 
1542
1546
    def _flush_new_pack(self):
1543
1547
        if self._new_pack is not None:
1553
1557
 
1554
1558
    def _already_packed(self):
1555
1559
        """Is the collection already packed?"""
1556
 
        return len(self._names) < 2
 
1560
        return not (self.repo._format.pack_compresses or (len(self._names) > 1))
1557
1561
 
1558
 
    def pack(self):
 
1562
    def pack(self, hint=None):
1559
1563
        """Pack the pack collection totally."""
1560
1564
        self.ensure_loaded()
1561
1565
        total_packs = len(self._names)
1562
1566
        if self._already_packed():
1563
 
            # This is arguably wrong because we might not be optimal, but for
1564
 
            # now lets leave it in. (e.g. reconcile -> one pack. But not
1565
 
            # optimal.
1566
1567
            return
1567
1568
        total_revisions = self.revision_index.combined_index.key_count()
1568
1569
        # XXX: the following may want to be a class, to pack with a given
1569
1570
        # policy.
1570
1571
        mutter('Packing repository %s, which has %d pack files, '
1571
 
            'containing %d revisions into 1 packs.', self, total_packs,
1572
 
            total_revisions)
 
1572
            'containing %d revisions with hint %r.', self, total_packs,
 
1573
            total_revisions, hint)
1573
1574
        # determine which packs need changing
1574
 
        pack_distribution = [1]
1575
1575
        pack_operations = [[0, []]]
1576
1576
        for pack in self.all_packs():
1577
 
            pack_operations[-1][0] += pack.get_revision_count()
1578
 
            pack_operations[-1][1].append(pack)
 
1577
            if hint is None or pack.name in hint:
 
1578
                # Either no hint was provided (so we are packing everything),
 
1579
                # or this pack was included in the hint.
 
1580
                pack_operations[-1][0] += pack.get_revision_count()
 
1581
                pack_operations[-1][1].append(pack)
1579
1582
        self._execute_pack_operations(pack_operations, OptimisingPacker)
1580
1583
 
1581
1584
    def plan_autopack_combinations(self, existing_packs, pack_distribution):
1937
1940
 
1938
1941
        :param clear_obsolete_packs: If True, clear out the contents of the
1939
1942
            obsolete_packs directory.
 
1943
        :return: A list of the names saved that were not previously on disk.
1940
1944
        """
1941
1945
        self.lock_names()
1942
1946
        try:
1957
1961
            self._unlock_names()
1958
1962
        # synchronise the memory packs list with what we just wrote:
1959
1963
        self._syncronize_pack_names_from_disk_nodes(disk_nodes)
 
1964
        return [new_node[0][0] for new_node in new_nodes]
1960
1965
 
1961
1966
    def reload_pack_names(self):
1962
1967
        """Sync our pack listing with what is present in the repository.
2096
2101
            if not self.autopack():
2097
2102
                # when autopack takes no steps, the names list is still
2098
2103
                # unsaved.
2099
 
                self._save_pack_names()
 
2104
                return self._save_pack_names()
 
2105
        return []
2100
2106
 
2101
2107
    def _suspend_write_group(self):
2102
2108
        tokens = [pack.name for pack in self._resumed_packs]
2219
2225
        self.revisions._index._key_dependencies.refs.clear()
2220
2226
        self._pack_collection._abort_write_group()
2221
2227
 
2222
 
    def _find_inconsistent_revision_parents(self):
2223
 
        """Find revisions with incorrectly cached parents.
2224
 
 
2225
 
        :returns: an iterator yielding tuples of (revison-id, parents-in-index,
2226
 
            parents-in-revision).
2227
 
        """
2228
 
        if not self.is_locked():
2229
 
            raise errors.ObjectNotLocked(self)
2230
 
        pb = ui.ui_factory.nested_progress_bar()
2231
 
        result = []
2232
 
        try:
2233
 
            revision_nodes = self._pack_collection.revision_index \
2234
 
                .combined_index.iter_all_entries()
2235
 
            index_positions = []
2236
 
            # Get the cached index values for all revisions, and also the
2237
 
            # location in each index of the revision text so we can perform
2238
 
            # linear IO.
2239
 
            for index, key, value, refs in revision_nodes:
2240
 
                node = (index, key, value, refs)
2241
 
                index_memo = self.revisions._index._node_to_position(node)
2242
 
                if index_memo[0] != index:
2243
 
                    raise AssertionError('%r != %r' % (index_memo[0], index))
2244
 
                index_positions.append((index_memo, key[0],
2245
 
                                       tuple(parent[0] for parent in refs[0])))
2246
 
                pb.update("Reading revision index", 0, 0)
2247
 
            index_positions.sort()
2248
 
            batch_size = 1000
2249
 
            pb.update("Checking cached revision graph", 0,
2250
 
                      len(index_positions))
2251
 
            for offset in xrange(0, len(index_positions), 1000):
2252
 
                pb.update("Checking cached revision graph", offset)
2253
 
                to_query = index_positions[offset:offset + batch_size]
2254
 
                if not to_query:
2255
 
                    break
2256
 
                rev_ids = [item[1] for item in to_query]
2257
 
                revs = self.get_revisions(rev_ids)
2258
 
                for revision, item in zip(revs, to_query):
2259
 
                    index_parents = item[2]
2260
 
                    rev_parents = tuple(revision.parent_ids)
2261
 
                    if index_parents != rev_parents:
2262
 
                        result.append((revision.revision_id, index_parents,
2263
 
                                       rev_parents))
2264
 
        finally:
2265
 
            pb.finished()
2266
 
        return result
 
2228
    def _get_source(self, to_format):
 
2229
        if to_format.network_name() == self._format.network_name():
 
2230
            return KnitPackStreamSource(self, to_format)
 
2231
        return super(KnitPackRepository, self)._get_source(to_format)
2267
2232
 
2268
2233
    def _make_parents_provider(self):
2269
2234
        return graph.CachingParentsProvider(self)
2342
2307
        raise NotImplementedError(self.dont_leave_lock_in_place)
2343
2308
 
2344
2309
    @needs_write_lock
2345
 
    def pack(self):
 
2310
    def pack(self, hint=None):
2346
2311
        """Compress the data within the repository.
2347
2312
 
2348
2313
        This will pack all the data to a single pack. In future it may
2349
2314
        recompress deltas or do other such expensive operations.
2350
2315
        """
2351
 
        self._pack_collection.pack()
 
2316
        self._pack_collection.pack(hint=hint)
2352
2317
 
2353
2318
    @needs_write_lock
2354
2319
    def reconcile(self, other=None, thorough=False):
2384
2349
                repo.unlock()
2385
2350
 
2386
2351
 
 
2352
class KnitPackStreamSource(StreamSource):
 
2353
    """A StreamSource used to transfer data between same-format KnitPack repos.
 
2354
 
 
2355
    This source assumes:
 
2356
        1) Same serialization format for all objects
 
2357
        2) Same root information
 
2358
        3) XML format inventories
 
2359
        4) Atomic inserts (so we can stream inventory texts before text
 
2360
           content)
 
2361
        5) No chk_bytes
 
2362
    """
 
2363
 
 
2364
    def __init__(self, from_repository, to_format):
 
2365
        super(KnitPackStreamSource, self).__init__(from_repository, to_format)
 
2366
        self._text_keys = None
 
2367
        self._text_fetch_order = 'unordered'
 
2368
 
 
2369
    def _get_filtered_inv_stream(self, revision_ids):
 
2370
        from_repo = self.from_repository
 
2371
        parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
 
2372
        parent_keys = [(p,) for p in parent_ids]
 
2373
        find_text_keys = from_repo._find_text_key_references_from_xml_inventory_lines
 
2374
        parent_text_keys = set(find_text_keys(
 
2375
            from_repo._inventory_xml_lines_for_keys(parent_keys)))
 
2376
        content_text_keys = set()
 
2377
        knit = KnitVersionedFiles(None, None)
 
2378
        factory = KnitPlainFactory()
 
2379
        def find_text_keys_from_content(record):
 
2380
            if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
 
2381
                raise ValueError("Unknown content storage kind for"
 
2382
                    " inventory text: %s" % (record.storage_kind,))
 
2383
            # It's a knit record, it has a _raw_record field (even if it was
 
2384
            # reconstituted from a network stream).
 
2385
            raw_data = record._raw_record
 
2386
            # read the entire thing
 
2387
            revision_id = record.key[-1]
 
2388
            content, _ = knit._parse_record(revision_id, raw_data)
 
2389
            if record.storage_kind == 'knit-delta-gz':
 
2390
                line_iterator = factory.get_linedelta_content(content)
 
2391
            elif record.storage_kind == 'knit-ft-gz':
 
2392
                line_iterator = factory.get_fulltext_content(content)
 
2393
            content_text_keys.update(find_text_keys(
 
2394
                [(line, revision_id) for line in line_iterator]))
 
2395
        revision_keys = [(r,) for r in revision_ids]
 
2396
        def _filtered_inv_stream():
 
2397
            source_vf = from_repo.inventories
 
2398
            stream = source_vf.get_record_stream(revision_keys,
 
2399
                                                 'unordered', False)
 
2400
            for record in stream:
 
2401
                if record.storage_kind == 'absent':
 
2402
                    raise errors.NoSuchRevision(from_repo, record.key)
 
2403
                find_text_keys_from_content(record)
 
2404
                yield record
 
2405
            self._text_keys = content_text_keys - parent_text_keys
 
2406
        return ('inventories', _filtered_inv_stream())
 
2407
 
 
2408
    def _get_text_stream(self):
 
2409
        # Note: We know we don't have to handle adding root keys, because both
 
2410
        # the source and target are the identical network name.
 
2411
        text_stream = self.from_repository.texts.get_record_stream(
 
2412
                        self._text_keys, self._text_fetch_order, False)
 
2413
        return ('texts', text_stream)
 
2414
 
 
2415
    def get_stream(self, search):
 
2416
        revision_ids = search.get_keys()
 
2417
        for stream_info in self._fetch_revision_texts(revision_ids):
 
2418
            yield stream_info
 
2419
        self._revision_keys = [(rev_id,) for rev_id in revision_ids]
 
2420
        yield self._get_filtered_inv_stream(revision_ids)
 
2421
        yield self._get_text_stream()
 
2422
 
 
2423
 
 
2424
 
2387
2425
class RepositoryFormatPack(MetaDirRepositoryFormat):
2388
2426
    """Format logic for pack structured repositories.
2389
2427
 
2491
2529
        """See RepositoryFormat.get_format_description()."""
2492
2530
        return "Packs containing knits without subtree support"
2493
2531
 
2494
 
    def check_conversion_target(self, target_format):
2495
 
        pass
2496
 
 
2497
2532
 
2498
2533
class RepositoryFormatKnitPack3(RepositoryFormatPack):
2499
2534
    """A subtrees parameterized Pack repository.
2525
2560
 
2526
2561
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2527
2562
 
2528
 
    def check_conversion_target(self, target_format):
2529
 
        if not target_format.rich_root_data:
2530
 
            raise errors.BadConversionTarget(
2531
 
                'Does not support rich root data.', target_format)
2532
 
        if not getattr(target_format, 'supports_tree_reference', False):
2533
 
            raise errors.BadConversionTarget(
2534
 
                'Does not support nested trees', target_format)
2535
 
 
2536
2563
    def get_format_string(self):
2537
2564
        """See RepositoryFormat.get_format_string()."""
2538
2565
        return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
2571
2598
 
2572
2599
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2573
2600
 
2574
 
    def check_conversion_target(self, target_format):
2575
 
        if not target_format.rich_root_data:
2576
 
            raise errors.BadConversionTarget(
2577
 
                'Does not support rich root data.', target_format)
2578
 
 
2579
2601
    def get_format_string(self):
2580
2602
        """See RepositoryFormat.get_format_string()."""
2581
2603
        return ("Bazaar pack repository format 1 with rich root"
2622
2644
        """See RepositoryFormat.get_format_description()."""
2623
2645
        return "Packs 5 (adds stacking support, requires bzr 1.6)"
2624
2646
 
2625
 
    def check_conversion_target(self, target_format):
2626
 
        pass
2627
 
 
2628
2647
 
2629
2648
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
2630
2649
    """A repository with rich roots and stacking.
2657
2676
 
2658
2677
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2659
2678
 
2660
 
    def check_conversion_target(self, target_format):
2661
 
        if not target_format.rich_root_data:
2662
 
            raise errors.BadConversionTarget(
2663
 
                'Does not support rich root data.', target_format)
2664
 
 
2665
2679
    def get_format_string(self):
2666
2680
        """See RepositoryFormat.get_format_string()."""
2667
2681
        return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
2708
2722
 
2709
2723
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2710
2724
 
2711
 
    def check_conversion_target(self, target_format):
2712
 
        if not target_format.rich_root_data:
2713
 
            raise errors.BadConversionTarget(
2714
 
                'Does not support rich root data.', target_format)
2715
 
 
2716
2725
    def get_format_string(self):
2717
2726
        """See RepositoryFormat.get_format_string()."""
2718
2727
        return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
2756
2765
        """See RepositoryFormat.get_format_description()."""
2757
2766
        return "Packs 6 (uses btree indexes, requires bzr 1.9)"
2758
2767
 
2759
 
    def check_conversion_target(self, target_format):
2760
 
        pass
2761
 
 
2762
2768
 
2763
2769
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
2764
2770
    """A repository with rich roots, no subtrees, stacking and btree indexes.
2788
2794
 
2789
2795
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2790
2796
 
2791
 
    def check_conversion_target(self, target_format):
2792
 
        if not target_format.rich_root_data:
2793
 
            raise errors.BadConversionTarget(
2794
 
                'Does not support rich root data.', target_format)
2795
 
 
2796
2797
    def get_format_string(self):
2797
2798
        """See RepositoryFormat.get_format_string()."""
2798
2799
        return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
2834
2835
 
2835
2836
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2836
2837
 
2837
 
    def check_conversion_target(self, target_format):
2838
 
        if not target_format.rich_root_data:
2839
 
            raise errors.BadConversionTarget(
2840
 
                'Does not support rich root data.', target_format)
2841
 
        if not getattr(target_format, 'supports_tree_reference', False):
2842
 
            raise errors.BadConversionTarget(
2843
 
                'Does not support nested trees', target_format)
2844
 
 
2845
2838
    def get_format_string(self):
2846
2839
        """See RepositoryFormat.get_format_string()."""
2847
2840
        return ("Bazaar development format 2 with subtree support "