~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/pack_repo.py

Committer: Jelmer Vernooij
Date: 2011-05-10 07:46:15 UTC
mfrom: (5844 +trunk)
mto: This revision was merged to the branch mainline in revision 5845.
Revision ID: jelmer@samba.org-20110510074615-eptod049ndjxc4i7

Merge bzr.dev.

files added:
bzrlib/plugins/changelog_merge

bzrlib/plugins/changelog_merge/__init__.py

bzrlib/plugins/changelog_merge/changelog_merge.py

bzrlib/plugins/changelog_merge/tests

bzrlib/plugins/changelog_merge/tests/__init__.py

bzrlib/plugins/changelog_merge/tests/test_changelog_merge.py

bzrlib/repofmt/knitpack_repo.py

bzrlib/tests/per_controldir/test_format.py

bzrlib/tests/per_interbranch/test_fetch.py

bzrlib/tests/per_repository_reference/test__make_parents_provider.py

bzrlib/tests/per_repository_vf/test_check.py

bzrlib/tests/per_repository_vf/test_fetch.py

bzrlib/tests/per_repository_vf/test_reconcile.py

bzrlib/tests/per_tree/test_export.py

bzrlib/vf_repository.py

bzrlib/workingtree_3.py

doc/developers/configuration.txt

files removed:
bzrlib/tests/per_interbranch/test_update_revisions.py

files renamed:
bzrlib/tests/per_repository/helpers.py => bzrlib/tests/per_repository_vf/helpers.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py => bzrlib/tests/per_repository_vf/test_add_inventory_by_delta.py

files modified:
Makefile

bzrlib/__init__.py

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_groupcompress_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/annotate.py

bzrlib/api.py

bzrlib/branch.py

bzrlib/btree_index.py

bzrlib/builtins.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/chk_map.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/controldir.py

bzrlib/crash.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/fetch.py

bzrlib/filters/__init__.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics/en/conflict-types.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/library_state.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_propose.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/news_merge/__init__.py

bzrlib/plugins/weave_fmt/__init__.py

bzrlib/plugins/weave_fmt/branch.py

bzrlib/plugins/weave_fmt/bzrdir.py

bzrlib/plugins/weave_fmt/repository.py

bzrlib/plugins/weave_fmt/test_bzrdir.py

bzrlib/plugins/weave_fmt/workingtree.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisiontree.py

bzrlib/rules.py

bzrlib/sign_my_commits.py

bzrlib/smart/branch.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/status.py

bzrlib/switch.py

bzrlib/testament.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_repair_workingtree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_rmbranch.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/doc_generate/builders/test_texinfo.py

bzrlib/tests/features.py

bzrlib/tests/per_branch/test_bound_sftp.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_hooks.py

bzrlib/tests/per_branch/test_last_revision_info.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_sprout.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_controldir/__init__.py

bzrlib/tests/per_controldir/test_controldir.py

bzrlib/tests/per_controldir_colo/test_supported.py

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_interrepository/test_interrepository.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_vf/__init__.py

bzrlib/tests/per_repository_vf/test_repository.py

bzrlib/tests/per_transport.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_test_trees.py

bzrlib/tests/per_versionedfile.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_add.py

bzrlib/tests/per_workingtree/test_add_reference.py

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/per_workingtree/test_check_state.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_inv.py

bzrlib/tests/per_workingtree/test_is_control_filename.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_read_working_inventory.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_symlinks.py

bzrlib/tests/per_workingtree/test_uncommit.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_controldir.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_export.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_import_tariff.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_mergetools.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_script.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_store.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_treeshape.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/transport_util.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/local.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/developers/bug-handling.txt

doc/developers/contribution-quickstart.txt

doc/developers/dirstate.txt

doc/developers/index.txt

doc/developers/integration.txt

doc/developers/releasing.txt

doc/developers/testing.txt

doc/en/_static/en/bzr-en-quick-reference.pdf

doc/en/_static/en/bzr-en-quick-reference.png

doc/en/_static/en/bzr-en-quick-reference.svg

doc/en/admin-guide/simple-setups.txt

doc/en/mini-tutorial/index.txt

doc/en/release-notes/bzr-2.3.txt

doc/en/release-notes/bzr-2.4.txt

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/using_checkouts.txt

doc/en/whats-new/whats-new-in-2.4.txt

tools/win32/py2exe_boot_common.py

Show diffs side-by-side

added added

removed removed

bzrlib/repofmt/pack_repo.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

osutils,

pack,

transactions,

tsort,

ui,

xml5,

xml6,

xml7,

)

from bzrlib.index import (

CombinedGraphIndex,

GraphIndexPrefixAdapter,

)

from bzrlib.knit import (

KnitPlainFactory,

KnitVersionedFiles,

_KnitGraphIndex,

_DirectPackAccess,

)

from bzrlib import tsort

""")

from bzrlib import (

bzrdir,

btree_index,

errors,

lockable_files,

lockdir,

revision as _mod_revision,

)

from bzrlib.decorators import needs_write_lock, only_raises

from bzrlib.index import (

GraphIndex,

InMemoryGraphIndex,

from bzrlib.decorators import (

needs_read_lock,

needs_write_lock,

only_raises,

)

from bzrlib.lock import LogicalLockResult

from bzrlib.repofmt.knitrepo import KnitRepository

from bzrlib.repository import (

CommitBuilder,

MetaDirRepositoryFormat,

_LazyListJoin,

MetaDirRepository,

RepositoryFormat,

RepositoryWriteLockResult,

RootCommitBuilder,

StreamSource,

)

from bzrlib.vf_repository import (

MetaDirVersionedFileRepository,

MetaDirVersionedFileRepositoryFormat,

VersionedFileCommitBuilder,

VersionedFileRootCommitBuilder,

)

from bzrlib.trace import (

mutter,

)

class PackCommitBuilder(CommitBuilder):

"""A subclass of CommitBuilder to add texts with pack semantics.

class PackCommitBuilder(VersionedFileCommitBuilder):

"""Subclass of VersionedFileCommitBuilder to add texts with pack semantics.

Specifically this uses one knit object rather than one knit object per

added text, reducing memory and object pressure.

def __init__(self, repository, parents, config, timestamp=None,

timezone=None, committer=None, revprops=None,

revision_id=None):

CommitBuilder.__init__(self, repository, parents, config,

revision_id=None, lossy=False):

VersionedFileCommitBuilder.__init__(self, repository, parents, config,

timestamp=timestamp, timezone=timezone, committer=committer,

revprops=revprops, revision_id=revision_id)

revprops=revprops, revision_id=revision_id, lossy=lossy)

self._file_graph = graph.Graph(

repository._pack_collection.text_index.combined_index)

return set([key[1] for key in self._file_graph.heads(keys)])

100

101

102

class PackRootCommitBuilder(RootCommitBuilder):

class PackRootCommitBuilder(VersionedFileRootCommitBuilder):

103

"""A subclass of RootCommitBuilder to add texts with pack semantics.

104

105

Specifically this uses one knit object rather than one knit object per

108

100

109

101

def __init__(self, repository, parents, config, timestamp=None,

110

102

timezone=None, committer=None, revprops=None,

111

revision_id=None):

112

CommitBuilder.__init__(self, repository, parents, config,

113

timestamp=timestamp, timezone=timezone, committer=committer,

114

revprops=revprops, revision_id=revision_id)

103

revision_id=None, lossy=False):

104

super(PackRootCommitBuilder, self).__init__(repository, parents,

105

config, timestamp=timestamp, timezone=timezone,

106

committer=committer, revprops=revprops, revision_id=revision_id,

107

lossy=lossy)

115

108

self._file_graph = graph.Graph(

116

109

repository._pack_collection.text_index.combined_index)

117

110

673

666

# What text keys to copy. None for 'all texts'. This is set by

674

667

# _copy_inventory_texts

675

668

self._text_filter = None

676

self._extra_init()

677

678

def _extra_init(self):

679

"""A template hook to allow extending the constructor trivially."""

680

681

def _pack_map_and_index_list(self, index_attribute):

682

"""Convert a list of packs to an index pack map and index list.

683

684

:param index_attribute: The attribute that the desired index is found

685

on.

686

:return: A tuple (map, list) where map contains the dict from

687

index:pack_tuple, and list contains the indices in the preferred

688

access order.

689

"""

690

indices = []

691

pack_map = {}

692

for pack_obj in self.packs:

693

index = getattr(pack_obj, index_attribute)

694

indices.append(index)

695

pack_map[index] = pack_obj

696

return pack_map, indices

697

698

def _index_contents(self, indices, key_filter=None):

699

"""Get an iterable of the index contents from a pack_map.

700

701

:param indices: The list of indices to query

702

:param key_filter: An optional filter to limit the keys returned.

703

"""

704

all_index = CombinedGraphIndex(indices)

705

if key_filter is None:

706

return all_index.iter_all_entries()

707

else:

708

return all_index.iter_entries(key_filter)

709

669

710

670

def pack(self, pb=None):

711

671

"""Create a new pack by reading data from other packs.

760

720

new_pack.signature_index.set_optimize(combine_backing_indices=False)

761

721

return new_pack

762

722

763

def _update_pack_order(self, entries, index_to_pack_map):

764

"""Determine how we want our packs to be ordered.

765

766

This changes the sort order of the self.packs list so that packs unused

767

by 'entries' will be at the end of the list, so that future requests

768

can avoid probing them. Used packs will be at the front of the

769

self.packs list, in the order of their first use in 'entries'.

770

771

:param entries: A list of (index, ...) tuples

772

:param index_to_pack_map: A mapping from index objects to pack objects.

773

"""

774

packs = []

775

seen_indexes = set()

776

for entry in entries:

777

index = entry[0]

778

if index not in seen_indexes:

779

packs.append(index_to_pack_map[index])

780

seen_indexes.add(index)

781

if len(packs) == len(self.packs):

782

if 'pack' in debug.debug_flags:

783

mutter('Not changing pack list, all packs used.')

784

return

785

seen_packs = set(packs)

786

for pack in self.packs:

787

if pack not in seen_packs:

788

packs.append(pack)

789

seen_packs.add(pack)

790

if 'pack' in debug.debug_flags:

791

old_names = [p.access_tuple()[1] for p in self.packs]

792

new_names = [p.access_tuple()[1] for p in packs]

793

mutter('Reordering packs\nfrom: %s\n to: %s',

794

old_names, new_names)

795

self.packs = packs

796

797

723

def _copy_revision_texts(self):

798

724

"""Copy revision data to the new pack."""

799

# select revisions

800

if self.revision_ids:

801

revision_keys = [(revision_id,) for revision_id in self.revision_ids]

802

else:

803

revision_keys = None

804

# select revision keys

805

revision_index_map, revision_indices = self._pack_map_and_index_list(

806

'revision_index')

807

revision_nodes = self._index_contents(revision_indices, revision_keys)

808

revision_nodes = list(revision_nodes)

809

self._update_pack_order(revision_nodes, revision_index_map)

810

# copy revision keys and adjust values

811

self.pb.update("Copying revision texts", 1)

812

total_items, readv_group_iter = self._revision_node_readv(revision_nodes)

813

list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,

814

self.new_pack.revision_index, readv_group_iter, total_items))

815

if 'pack' in debug.debug_flags:

816

mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',

817

time.ctime(), self._pack_collection._upload_transport.base,

818

self.new_pack.random_name,

819

self.new_pack.revision_index.key_count(),

820

time.time() - self.new_pack.start_time)

821

self._revision_keys = revision_keys

725

raise NotImplementedError(self._copy_revision_texts)

822

726

823

727

def _copy_inventory_texts(self):

824

728

"""Copy the inventory texts to the new pack.

827

731

828

732

Sets self._text_filter appropriately.

829

733

"""

830

# select inventory keys

831

inv_keys = self._revision_keys # currently the same keyspace, and note that

832

# querying for keys here could introduce a bug where an inventory item

833

# is missed, so do not change it to query separately without cross

834

# checking like the text key check below.

835

inventory_index_map, inventory_indices = self._pack_map_and_index_list(

836

'inventory_index')

837

inv_nodes = self._index_contents(inventory_indices, inv_keys)

838

# copy inventory keys and adjust values

839

# XXX: Should be a helper function to allow different inv representation

840

# at this point.

841

self.pb.update("Copying inventory texts", 2)

842

total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)

843

# Only grab the output lines if we will be processing them

844

output_lines = bool(self.revision_ids)

845

inv_lines = self._copy_nodes_graph(inventory_index_map,

846

self.new_pack._writer, self.new_pack.inventory_index,

847

readv_group_iter, total_items, output_lines=output_lines)

848

if self.revision_ids:

849

self._process_inventory_lines(inv_lines)

850

else:

851

# eat the iterator to cause it to execute.

852

list(inv_lines)

853

self._text_filter = None

854

if 'pack' in debug.debug_flags:

855

mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',

856

time.ctime(), self._pack_collection._upload_transport.base,

857

self.new_pack.random_name,

858

self.new_pack.inventory_index.key_count(),

859

time.time() - self.new_pack.start_time)

734

raise NotImplementedError(self._copy_inventory_texts)

860

735

861

736

def _copy_text_texts(self):

862

# select text keys

863

text_index_map, text_nodes = self._get_text_nodes()

864

if self._text_filter is not None:

865

# We could return the keys copied as part of the return value from

866

# _copy_nodes_graph but this doesn't work all that well with the

867

# need to get line output too, so we check separately, and as we're

868

# going to buffer everything anyway, we check beforehand, which

869

# saves reading knit data over the wire when we know there are

870

# mising records.

871

text_nodes = set(text_nodes)

872

present_text_keys = set(_node[1] for _node in text_nodes)

873

missing_text_keys = set(self._text_filter) - present_text_keys

874

if missing_text_keys:

875

# TODO: raise a specific error that can handle many missing

876

# keys.

877

mutter("missing keys during fetch: %r", missing_text_keys)

878

a_missing_key = missing_text_keys.pop()

879

raise errors.RevisionNotPresent(a_missing_key[1],

880

a_missing_key[0])

881

# copy text keys and adjust values

882

self.pb.update("Copying content texts", 3)

883

total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)

884

list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,

885

self.new_pack.text_index, readv_group_iter, total_items))

886

self._log_copied_texts()

737

raise NotImplementedError(self._copy_text_texts)

887

738

888

739

def _create_pack_from_packs(self):

889

self.pb.update("Opening pack", 0, 5)

890

self.new_pack = self.open_pack()

891

new_pack = self.new_pack

892

# buffer data - we won't be reading-back during the pack creation and

893

# this makes a significant difference on sftp pushes.

894

new_pack.set_write_cache_size(1024*1024)

895

if 'pack' in debug.debug_flags:

896

plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)

897

for a_pack in self.packs]

898

if self.revision_ids is not None:

899

rev_count = len(self.revision_ids)

900

else:

901

rev_count = 'all'

902

mutter('%s: create_pack: creating pack from source packs: '

903

'%s%s %s revisions wanted %s t=0',

904

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

905

plain_pack_list, rev_count)

906

self._copy_revision_texts()

907

self._copy_inventory_texts()

908

self._copy_text_texts()

909

# select signature keys

910

signature_filter = self._revision_keys # same keyspace

911

signature_index_map, signature_indices = self._pack_map_and_index_list(

912

'signature_index')

913

signature_nodes = self._index_contents(signature_indices,

914

signature_filter)

915

# copy signature keys and adjust values

916

self.pb.update("Copying signature texts", 4)

917

self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,

918

new_pack.signature_index)

919

if 'pack' in debug.debug_flags:

920

mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',

921

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

922

new_pack.signature_index.key_count(),

923

time.time() - new_pack.start_time)

924

# copy chk contents

925

# NB XXX: how to check CHK references are present? perhaps by yielding

926

# the items? How should that interact with stacked repos?

927

if new_pack.chk_index is not None:

928

self._copy_chks()

929

if 'pack' in debug.debug_flags:

930

mutter('%s: create_pack: chk content copied: %s%s %d items t+%6.3fs',

931

time.ctime(), self._pack_collection._upload_transport.base,

932

new_pack.random_name,

933

new_pack.chk_index.key_count(),

934

time.time() - new_pack.start_time)

935

new_pack._check_references()

936

if not self._use_pack(new_pack):

937

new_pack.abort()

938

return None

939

self.pb.update("Finishing pack", 5)

940

new_pack.finish()

941

self._pack_collection.allocate(new_pack)

942

return new_pack

943

944

def _copy_chks(self, refs=None):

945

# XXX: Todo, recursive follow-pointers facility when fetching some

946

# revisions only.

947

chk_index_map, chk_indices = self._pack_map_and_index_list(

948

'chk_index')

949

chk_nodes = self._index_contents(chk_indices, refs)

950

new_refs = set()

951

# TODO: This isn't strictly tasteful as we are accessing some private

952

# variables (_serializer). Perhaps a better way would be to have

953

# Repository._deserialise_chk_node()

954

search_key_func = chk_map.search_key_registry.get(

955

self._pack_collection.repo._serializer.search_key_name)

956

def accumlate_refs(lines):

957

# XXX: move to a generic location

958

# Yay mismatch:

959

bytes = ''.join(lines)

960

node = chk_map._deserialise(bytes, ("unknown",), search_key_func)

961

new_refs.update(node.refs())

962

self._copy_nodes(chk_nodes, chk_index_map, self.new_pack._writer,

963

self.new_pack.chk_index, output_lines=accumlate_refs)

964

return new_refs

965

966

def _copy_nodes(self, nodes, index_map, writer, write_index,

967

output_lines=None):

968

"""Copy knit nodes between packs with no graph references.

969

970

:param output_lines: Output full texts of copied items.

971

"""

972

pb = ui.ui_factory.nested_progress_bar()

973

try:

974

return self._do_copy_nodes(nodes, index_map, writer,

975

write_index, pb, output_lines=output_lines)

976

finally:

977

pb.finished()

978

979

def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,

980

output_lines=None):

981

# for record verification

982

knit = KnitVersionedFiles(None, None)

983

# plan a readv on each source pack:

984

# group by pack

985

nodes = sorted(nodes)

986

# how to map this into knit.py - or knit.py into this?

987

# we don't want the typical knit logic, we want grouping by pack

988

# at this point - perhaps a helper library for the following code

989

# duplication points?

990

request_groups = {}

991

for index, key, value in nodes:

992

if index not in request_groups:

993

request_groups[index] = []

994

request_groups[index].append((key, value))

995

record_index = 0

996

pb.update("Copied record", record_index, len(nodes))

997

for index, items in request_groups.iteritems():

998

pack_readv_requests = []

999

for key, value in items:

1000

# ---- KnitGraphIndex.get_position

1001

bits = value[1:].split(' ')

1002

offset, length = int(bits[0]), int(bits[1])

1003

pack_readv_requests.append((offset, length, (key, value[0])))

1004

# linear scan up the pack

1005

pack_readv_requests.sort()

1006

# copy the data

1007

pack_obj = index_map[index]

1008

transport, path = pack_obj.access_tuple()

1009

try:

1010

reader = pack.make_readv_reader(transport, path,

1011

[offset[0:2] for offset in pack_readv_requests])

1012

except errors.NoSuchFile:

1013

if self._reload_func is not None:

1014

self._reload_func()

1015

raise

1016

for (names, read_func), (_1, _2, (key, eol_flag)) in \

1017

izip(reader.iter_records(), pack_readv_requests):

1018

raw_data = read_func(None)

1019

# check the header only

1020

if output_lines is not None:

1021

output_lines(knit._parse_record(key[-1], raw_data)[0])

1022

else:

1023

df, _ = knit._parse_record_header(key, raw_data)

1024

df.close()

1025

pos, size = writer.add_bytes_record(raw_data, names)

1026

write_index.add_node(key, eol_flag + "%d %d" % (pos, size))

1027

pb.update("Copied record", record_index)

1028

record_index += 1

1029

1030

def _copy_nodes_graph(self, index_map, writer, write_index,

1031

readv_group_iter, total_items, output_lines=False):

1032

"""Copy knit nodes between packs.

1033

1034

:param output_lines: Return lines present in the copied data as

1035

an iterator of line,version_id.

1036

"""

1037

pb = ui.ui_factory.nested_progress_bar()

1038

try:

1039

for result in self._do_copy_nodes_graph(index_map, writer,

1040

write_index, output_lines, pb, readv_group_iter, total_items):

1041

yield result

1042

except Exception:

1043

# Python 2.4 does not permit try:finally: in a generator.

1044

pb.finished()

1045

raise

1046

else:

1047

pb.finished()

1048

1049

def _do_copy_nodes_graph(self, index_map, writer, write_index,

1050

output_lines, pb, readv_group_iter, total_items):

1051

# for record verification

1052

knit = KnitVersionedFiles(None, None)

1053

# for line extraction when requested (inventories only)

1054

if output_lines:

1055

factory = KnitPlainFactory()

1056

record_index = 0

1057

pb.update("Copied record", record_index, total_items)

1058

for index, readv_vector, node_vector in readv_group_iter:

1059

# copy the data

1060

pack_obj = index_map[index]

1061

transport, path = pack_obj.access_tuple()

1062

try:

1063

reader = pack.make_readv_reader(transport, path, readv_vector)

1064

except errors.NoSuchFile:

1065

if self._reload_func is not None:

1066

self._reload_func()

1067

raise

1068

for (names, read_func), (key, eol_flag, references) in \

1069

izip(reader.iter_records(), node_vector):

1070

raw_data = read_func(None)

1071

if output_lines:

1072

# read the entire thing

1073

content, _ = knit._parse_record(key[-1], raw_data)

1074

if len(references[-1]) == 0:

1075

line_iterator = factory.get_fulltext_content(content)

1076

else:

1077

line_iterator = factory.get_linedelta_content(content)

1078

for line in line_iterator:

1079

yield line, key

1080

else:

1081

# check the header only

1082

df, _ = knit._parse_record_header(key, raw_data)

1083

df.close()

1084

pos, size = writer.add_bytes_record(raw_data, names)

1085

write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)

1086

pb.update("Copied record", record_index)

1087

record_index += 1

1088

1089

def _get_text_nodes(self):

1090

text_index_map, text_indices = self._pack_map_and_index_list(

1091

'text_index')

1092

return text_index_map, self._index_contents(text_indices,

1093

self._text_filter)

1094

1095

def _least_readv_node_readv(self, nodes):

1096

"""Generate request groups for nodes using the least readv's.

1097

1098

:param nodes: An iterable of graph index nodes.

1099

:return: Total node count and an iterator of the data needed to perform

1100

readvs to obtain the data for nodes. Each item yielded by the

1101

iterator is a tuple with:

1102

index, readv_vector, node_vector. readv_vector is a list ready to

1103

hand to the transport readv method, and node_vector is a list of

1104

(key, eol_flag, references) for the node retrieved by the

1105

matching readv_vector.

1106

"""

1107

# group by pack so we do one readv per pack

1108

nodes = sorted(nodes)

1109

total = len(nodes)

1110

request_groups = {}

1111

for index, key, value, references in nodes:

1112

if index not in request_groups:

1113

request_groups[index] = []

1114

request_groups[index].append((key, value, references))

1115

result = []

1116

for index, items in request_groups.iteritems():

1117

pack_readv_requests = []

1118

for key, value, references in items:

1119

# ---- KnitGraphIndex.get_position

1120

bits = value[1:].split(' ')

1121

offset, length = int(bits[0]), int(bits[1])

1122

pack_readv_requests.append(

1123

((offset, length), (key, value[0], references)))

1124

# linear scan up the pack to maximum range combining.

1125

pack_readv_requests.sort()

1126

# split out the readv and the node data.

1127

pack_readv = [readv for readv, node in pack_readv_requests]

1128

node_vector = [node for readv, node in pack_readv_requests]

1129

result.append((index, pack_readv, node_vector))

1130

return total, result

740

raise NotImplementedError(self._create_pack_from_packs)

1131

741

1132

742

def _log_copied_texts(self):

1133

743

if 'pack' in debug.debug_flags:

1137

747

self.new_pack.text_index.key_count(),

1138

748

time.time() - self.new_pack.start_time)

1139

749

1140

def _process_inventory_lines(self, inv_lines):

1141

"""Use up the inv_lines generator and setup a text key filter."""

1142

repo = self._pack_collection.repo

1143

fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(

1144

inv_lines, self.revision_keys)

1145

text_filter = []

1146

for fileid, file_revids in fileid_revisions.iteritems():

1147

text_filter.extend([(fileid, file_revid) for file_revid in file_revids])

1148

self._text_filter = text_filter

1149

1150

def _revision_node_readv(self, revision_nodes):

1151

"""Return the total revisions and the readv's to issue.

1152

1153

:param revision_nodes: The revision index contents for the packs being

1154

incorporated into the new pack.

1155

:return: As per _least_readv_node_readv.

1156

"""

1157

return self._least_readv_node_readv(revision_nodes)

1158

1159

750

def _use_pack(self, new_pack):

1160

751

"""Return True if new_pack should be used.

1161

752

1165

756

return new_pack.data_inserted()

1166

757

1167

758

1168

class OptimisingPacker(Packer):

1169

"""A packer which spends more time to create better disk layouts."""

1170

1171

def _revision_node_readv(self, revision_nodes):

1172

"""Return the total revisions and the readv's to issue.

1173

1174

This sort places revisions in topological order with the ancestors

1175

after the children.

1176

1177

:param revision_nodes: The revision index contents for the packs being

1178

incorporated into the new pack.

1179

:return: As per _least_readv_node_readv.

1180

"""

1181

# build an ancestors dict

1182

ancestors = {}

1183

by_key = {}

1184

for index, key, value, references in revision_nodes:

1185

ancestors[key] = references[0]

1186

by_key[key] = (index, value, references)

1187

order = tsort.topo_sort(ancestors)

1188

total = len(order)

1189

# Single IO is pathological, but it will work as a starting point.

1190

requests = []

1191

for key in reversed(order):

1192

index, value, references = by_key[key]

1193

# ---- KnitGraphIndex.get_position

1194

bits = value[1:].split(' ')

1195

offset, length = int(bits[0]), int(bits[1])

1196

requests.append(

1197

(index, [(offset, length)], [(key, value[0], references)]))

1198

# TODO: combine requests in the same index that are in ascending order.

1199

return total, requests

1200

1201

def open_pack(self):

1202

"""Open a pack for the pack we are creating."""

1203

new_pack = super(OptimisingPacker, self).open_pack()

1204

# Turn on the optimization flags for all the index builders.

1205

new_pack.revision_index.set_optimize(for_size=True)

1206

new_pack.inventory_index.set_optimize(for_size=True)

1207

new_pack.text_index.set_optimize(for_size=True)

1208

new_pack.signature_index.set_optimize(for_size=True)

1209

return new_pack

1210

1211

1212

class ReconcilePacker(Packer):

1213

"""A packer which regenerates indices etc as it copies.

1214

1215

This is used by ``bzr reconcile`` to cause parent text pointers to be

1216

regenerated.

1217

"""

1218

1219

def _extra_init(self):

1220

self._data_changed = False

1221

1222

def _process_inventory_lines(self, inv_lines):

1223

"""Generate a text key reference map rather for reconciling with."""

1224

repo = self._pack_collection.repo

1225

refs = repo._serializer._find_text_key_references(inv_lines)

1226

self._text_refs = refs

1227

# during reconcile we:

1228

# - convert unreferenced texts to full texts

1229

# - correct texts which reference a text not copied to be full texts

1230

# - copy all others as-is but with corrected parents.

1231

# - so at this point we don't know enough to decide what becomes a full

1232

# text.

1233

self._text_filter = None

1234

1235

def _copy_text_texts(self):

1236

"""generate what texts we should have and then copy."""

1237

self.pb.update("Copying content texts", 3)

1238

# we have three major tasks here:

1239

# 1) generate the ideal index

1240

repo = self._pack_collection.repo

1241

ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for

1242

_1, key, _2, refs in

1243

self.new_pack.revision_index.iter_all_entries()])

1244

ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)

1245

# 2) generate a text_nodes list that contains all the deltas that can

1246

# be used as-is, with corrected parents.

1247

ok_nodes = []

1248

bad_texts = []

1249

discarded_nodes = []

1250

NULL_REVISION = _mod_revision.NULL_REVISION

1251

text_index_map, text_nodes = self._get_text_nodes()

1252

for node in text_nodes:

1253

# 0 - index

1254

# 1 - key

1255

# 2 - value

1256

# 3 - refs

1257

try:

1258

ideal_parents = tuple(ideal_index[node[1]])

1259

except KeyError:

1260

discarded_nodes.append(node)

1261

self._data_changed = True

1262

else:

1263

if ideal_parents == (NULL_REVISION,):

1264

ideal_parents = ()

1265

if ideal_parents == node[3][0]:

1266

# no change needed.

1267

ok_nodes.append(node)

1268

elif ideal_parents[0:1] == node[3][0][0:1]:

1269

# the left most parent is the same, or there are no parents

1270

# today. Either way, we can preserve the representation as

1271

# long as we change the refs to be inserted.

1272

self._data_changed = True

1273

ok_nodes.append((node[0], node[1], node[2],

1274

(ideal_parents, node[3][1])))

1275

self._data_changed = True

1276

else:

1277

# Reinsert this text completely

1278

bad_texts.append((node[1], ideal_parents))

1279

self._data_changed = True

1280

# we're finished with some data.

1281

del ideal_index

1282

del text_nodes

1283

# 3) bulk copy the ok data

1284

total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)

1285

list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,

1286

self.new_pack.text_index, readv_group_iter, total_items))

1287

# 4) adhoc copy all the other texts.

1288

# We have to topologically insert all texts otherwise we can fail to

1289

# reconcile when parts of a single delta chain are preserved intact,

1290

# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be

1291

# reinserted, and if d3 has incorrect parents it will also be

1292

# reinserted. If we insert d3 first, d2 is present (as it was bulk

1293

# copied), so we will try to delta, but d2 is not currently able to be

1294

# extracted because its basis d1 is not present. Topologically sorting

1295

# addresses this. The following generates a sort for all the texts that

1296

# are being inserted without having to reference the entire text key

1297

# space (we only topo sort the revisions, which is smaller).

1298

topo_order = tsort.topo_sort(ancestors)

1299

rev_order = dict(zip(topo_order, range(len(topo_order))))

1300

bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))

1301

transaction = repo.get_transaction()

1302

file_id_index = GraphIndexPrefixAdapter(

1303

self.new_pack.text_index,

1304

('blank', ), 1,

1305

add_nodes_callback=self.new_pack.text_index.add_nodes)

1306

data_access = _DirectPackAccess(

1307

{self.new_pack.text_index:self.new_pack.access_tuple()})

1308

data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,

1309

self.new_pack.access_tuple())

1310

output_texts = KnitVersionedFiles(

1311

_KnitGraphIndex(self.new_pack.text_index,

1312

add_callback=self.new_pack.text_index.add_nodes,

1313

deltas=True, parents=True, is_locked=repo.is_locked),

1314

data_access=data_access, max_delta_chain=200)

1315

for key, parent_keys in bad_texts:

1316

# We refer to the new pack to delta data being output.

1317

# A possible improvement would be to catch errors on short reads

1318

# and only flush then.

1319

self.new_pack.flush()

1320

parents = []

1321

for parent_key in parent_keys:

1322

if parent_key[0] != key[0]:

1323

# Graph parents must match the fileid

1324

raise errors.BzrError('Mismatched key parent %r:%r' %

1325

(key, parent_keys))

1326

parents.append(parent_key[1])

1327

text_lines = osutils.split_lines(repo.texts.get_record_stream(

1328

[key], 'unordered', True).next().get_bytes_as('fulltext'))

1329

output_texts.add_lines(key, parent_keys, text_lines,

1330

random_id=True, check_content=False)

1331

# 5) check that nothing inserted has a reference outside the keyspace.

1332

missing_text_keys = self.new_pack.text_index._external_references()

1333

if missing_text_keys:

1334

raise errors.BzrCheckError('Reference to missing compression parents %r'

1335

% (missing_text_keys,))

1336

self._log_copied_texts()

1337

1338

def _use_pack(self, new_pack):

1339

"""Override _use_pack to check for reconcile having changed content."""

1340

# XXX: we might be better checking this at the copy time.

1341

original_inventory_keys = set()

1342

inv_index = self._pack_collection.inventory_index.combined_index

1343

for entry in inv_index.iter_all_entries():

1344

original_inventory_keys.add(entry[1])

1345

new_inventory_keys = set()

1346

for entry in new_pack.inventory_index.iter_all_entries():

1347

new_inventory_keys.add(entry[1])

1348

if new_inventory_keys != original_inventory_keys:

1349

self._data_changed = True

1350

return new_pack.data_inserted() and self._data_changed

1351

1352

1353

759

class RepositoryPackCollection(object):

1354

760

"""Management of packs within a repository.

1355

761

1356

762

:ivar _names: map of {pack_name: (index_size,)}

1357

763

"""

1358

764

1359

pack_factory = NewPack

1360

resumed_pack_factory = ResumedPack

765

pack_factory = None

766

resumed_pack_factory = None

767

normal_packer_class = None

768

optimising_packer_class = None

1361

769

1362

770

def __init__(self, repo, transport, index_transport, upload_transport,

1363

771

pack_transport, index_builder_class, index_class,

1504

912

'containing %d revisions. Packing %d files into %d affecting %d'

1505

913

' revisions', self, total_packs, total_revisions, num_old_packs,

1506

914

num_new_packs, num_revs_affected)

1507

result = self._execute_pack_operations(pack_operations,

915

result = self._execute_pack_operations(pack_operations, packer_class=self.normal_packer_class,

1508

916

reload_func=self._restart_autopack)

1509

917

mutter('Auto-packing repository %s completed', self)

1510

918

return result

1511

919

1512

def _execute_pack_operations(self, pack_operations, _packer_class=Packer,

1513

reload_func=None):

920

def _execute_pack_operations(self, pack_operations, packer_class,

921

reload_func=None):

1514

922

"""Execute a series of pack operations.

1515

923

1516

924

:param pack_operations: A list of [revision_count, packs_to_combine].

1517

:param _packer_class: The class of packer to use (default: Packer).

925

:param packer_class: The class of packer to use

1518

926

:return: The new pack names.

1519

927

"""

1520

928

for revision_count, packs in pack_operations:

1521

929

# we may have no-ops from the setup logic

1522

930

if len(packs) == 0:

1523

931

continue

1524

packer = _packer_class(self, packs, '.autopack',

932

packer = packer_class(self, packs, '.autopack',

1525

933

reload_func=reload_func)

1526

934

try:

1527

packer.pack()

935

result = packer.pack()

1528

936

except errors.RetryWithNewPacks:

1529

937

# An exception is propagating out of this context, make sure

1530

938

# this packer has cleaned up. Packer() doesn't set its new_pack

1533

941

if packer.new_pack is not None:

1534

942

packer.new_pack.abort()

1535

943

raise

944

if result is None:

945

return

1536

946

for pack in packs:

1537

947

self._remove_pack_from_memory(pack)

1538

948

# record the newly available packs and stop advertising the old

1594

1004

# or this pack was included in the hint.

1595

1005

pack_operations[-1][0] += pack.get_revision_count()

1596

1006

pack_operations[-1][1].append(pack)

1597

self._execute_pack_operations(pack_operations, OptimisingPacker,

1007

self._execute_pack_operations(pack_operations,

1008

packer_class=self.optimising_packer_class,

1598

1009

reload_func=self._restart_pack_operations)

1599

1010

1600

1011

def plan_autopack_combinations(self, existing_packs, pack_distribution):

2216

1627

self._resume_pack(token)

2217

1628

2218

1629

2219

class KnitPackRepository(KnitRepository):

1630

class PackRepository(MetaDirVersionedFileRepository):

2220

1631

"""Repository with knit objects stored inside pack containers.

2221

1632

2222

1633

The layering for a KnitPackRepository is:

2225

1636

===================================================

2226

1637

Tuple based apis below, string based, and key based apis above

2227

1638

---------------------------------------------------

2228

KnitVersionedFiles

1639

VersionedFiles

2229

1640

Provides .texts, .revisions etc

2230

1641

This adapts the N-tuple keys to physical knit records which only have a

2231

1642

single string identifier (for historical reasons), which in older formats

2241

1652

2242

1653

"""

2243

1654

1655

# These attributes are inherited from the Repository base class. Setting

1656

# them to None ensures that if the constructor is changed to not initialize

1657

# them, or a subclass fails to call the constructor, that an error will

1658

# occur rather than the system working but generating incorrect data.

1659

_commit_builder_class = None

1660

_serializer = None

1661

2244

1662

def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,

2245

1663

_serializer):

2246

KnitRepository.__init__(self, _format, a_bzrdir, control_files,

2247

_commit_builder_class, _serializer)

2248

index_transport = self._transport.clone('indices')

2249

self._pack_collection = RepositoryPackCollection(self, self._transport,

2250

index_transport,

2251

self._transport.clone('upload'),

2252

self._transport.clone('packs'),

2253

_format.index_builder_class,

2254

_format.index_class,

2255

use_chk_index=self._format.supports_chks,

2256

)

2257

self.inventories = KnitVersionedFiles(

2258

_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,

2259

add_callback=self._pack_collection.inventory_index.add_callback,

2260

deltas=True, parents=True, is_locked=self.is_locked),

2261

data_access=self._pack_collection.inventory_index.data_access,

2262

max_delta_chain=200)

2263

self.revisions = KnitVersionedFiles(

2264

_KnitGraphIndex(self._pack_collection.revision_index.combined_index,

2265

add_callback=self._pack_collection.revision_index.add_callback,

2266

deltas=False, parents=True, is_locked=self.is_locked,

2267

track_external_parent_refs=True),

2268

data_access=self._pack_collection.revision_index.data_access,

2269

max_delta_chain=0)

2270

self.signatures = KnitVersionedFiles(

2271

_KnitGraphIndex(self._pack_collection.signature_index.combined_index,

2272

add_callback=self._pack_collection.signature_index.add_callback,

2273

deltas=False, parents=False, is_locked=self.is_locked),

2274

data_access=self._pack_collection.signature_index.data_access,

2275

max_delta_chain=0)

2276

self.texts = KnitVersionedFiles(

2277

_KnitGraphIndex(self._pack_collection.text_index.combined_index,

2278

add_callback=self._pack_collection.text_index.add_callback,

2279

deltas=True, parents=True, is_locked=self.is_locked),

2280

data_access=self._pack_collection.text_index.data_access,

2281

max_delta_chain=200)

2282

if _format.supports_chks:

2283

# No graph, no compression:- references from chks are between

2284

# different objects not temporal versions of the same; and without

2285

# some sort of temporal structure knit compression will just fail.

2286

self.chk_bytes = KnitVersionedFiles(

2287

_KnitGraphIndex(self._pack_collection.chk_index.combined_index,

2288

add_callback=self._pack_collection.chk_index.add_callback,

2289

deltas=False, parents=False, is_locked=self.is_locked),

2290

data_access=self._pack_collection.chk_index.data_access,

2291

max_delta_chain=0)

2292

else:

2293

self.chk_bytes = None

2294

# True when the repository object is 'write locked' (as opposed to the

2295

# physical lock only taken out around changes to the pack-names list.)

2296

# Another way to represent this would be a decorator around the control

2297

# files object that presents logical locks as physical ones - if this

2298

# gets ugly consider that alternative design. RBC 20071011

2299

self._write_lock_count = 0

2300

self._transaction = None

2301

# for tests

2302

self._reconcile_does_inventory_gc = True

1664

MetaDirRepository.__init__(self, _format, a_bzrdir, control_files)

1665

self._commit_builder_class = _commit_builder_class

1666

self._serializer = _serializer

2303

1667

self._reconcile_fixes_text_parents = True

2304

self._reconcile_backsup_inventory = False

1668

if self._format.supports_external_lookups:

1669

self._unstacked_provider = graph.CachingParentsProvider(

1670

self._make_parents_provider_unstacked())

1671

else:

1672

self._unstacked_provider = graph.CachingParentsProvider(self)

1673

self._unstacked_provider.disable_cache()

1674

1675

@needs_read_lock

1676

def _all_revision_ids(self):

1677

"""See Repository.all_revision_ids()."""

1678

return [key[0] for key in self.revisions.keys()]

2305

1679

2306

1680

def _abort_write_group(self):

2307

1681

self.revisions._index._key_dependencies.clear()

2308

1682

self._pack_collection._abort_write_group()

2309

1683

2310

def _get_source(self, to_format):

2311

if to_format.network_name() == self._format.network_name():

2312

return KnitPackStreamSource(self, to_format)

2313

return super(KnitPackRepository, self)._get_source(to_format)

2314

2315

1684

def _make_parents_provider(self):

2316

return graph.CachingParentsProvider(self)

1685

if not self._format.supports_external_lookups:

1686

return self._unstacked_provider

1687

return graph.StackedParentsProvider(_LazyListJoin(

1688

[self._unstacked_provider], self._fallback_repositories))

2317

1689

2318

1690

def _refresh_data(self):

2319

1691

if not self.is_locked():

2320

1692

return

2321

1693

self._pack_collection.reload_pack_names()

1694

self._unstacked_provider.disable_cache()

1695

self._unstacked_provider.enable_cache()

2322

1696

2323

1697

def _start_write_group(self):

2324

1698

self._pack_collection._start_write_group()

2326

1700

def _commit_write_group(self):

2327

1701

hint = self._pack_collection._commit_write_group()

2328

1702

self.revisions._index._key_dependencies.clear()

1703

# The commit may have added keys that were previously cached as

1704

# missing, so reset the cache.

1705

self._unstacked_provider.disable_cache()

1706

self._unstacked_provider.enable_cache()

2329

1707

return hint

2330

1708

2331

1709

def suspend_write_group(self):

2372

1750

if 'relock' in debug.debug_flags and self._prev_lock == 'w':

2373

1751

note('%r was write locked again', self)

2374

1752

self._prev_lock = 'w'

1753

self._unstacked_provider.enable_cache()

2375

1754

for repo in self._fallback_repositories:

2376

1755

# Writes don't affect fallback repos

2377

1756

repo.lock_read()

2392

1771

if 'relock' in debug.debug_flags and self._prev_lock == 'r':

2393

1772

note('%r was read locked again', self)

2394

1773

self._prev_lock = 'r'

1774

self._unstacked_provider.enable_cache()

2395

1775

for repo in self._fallback_repositories:

2396

1776

repo.lock_read()

2397

1777

self._refresh_data()

2423

1803

return reconciler

2424

1804

2425

1805

def _reconcile_pack(self, collection, packs, extension, revs, pb):

2426

packer = ReconcilePacker(collection, packs, extension, revs)

2427

return packer.pack(pb)

1806

raise NotImplementedError(self._reconcile_pack)

2428

1807

2429

1808

@only_raises(errors.LockNotHeld, errors.LockBroken)

2430

1809

def unlock(self):

2431

1810

if self._write_lock_count == 1 and self._write_group is not None:

2432

1811

self.abort_write_group()

1812

self._unstacked_provider.disable_cache()

2433

1813

self._transaction = None

2434

1814

self._write_lock_count = 0

2435

1815

raise errors.BzrError(

2445

1825

self.control_files.unlock()

2446

1826

2447

1827

if not self.is_locked():

1828

self._unstacked_provider.disable_cache()

2448

1829

for repo in self._fallback_repositories:

2449

1830

repo.unlock()

2450

1831

2451

1832

2452

class KnitPackStreamSource(StreamSource):

2453

"""A StreamSource used to transfer data between same-format KnitPack repos.

2454

2455

This source assumes:

2456

1) Same serialization format for all objects

2457

2) Same root information

2458

3) XML format inventories

2459

4) Atomic inserts (so we can stream inventory texts before text

2460

content)

2461

5) No chk_bytes

2462

"""

2463

2464

def __init__(self, from_repository, to_format):

2465

super(KnitPackStreamSource, self).__init__(from_repository, to_format)

2466

self._text_keys = None

2467

self._text_fetch_order = 'unordered'

2468

2469

def _get_filtered_inv_stream(self, revision_ids):

2470

from_repo = self.from_repository

2471

parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)

2472

parent_keys = [(p,) for p in parent_ids]

2473

find_text_keys = from_repo._serializer._find_text_key_references

2474

parent_text_keys = set(find_text_keys(

2475

from_repo._inventory_xml_lines_for_keys(parent_keys)))

2476

content_text_keys = set()

2477

knit = KnitVersionedFiles(None, None)

2478

factory = KnitPlainFactory()

2479

def find_text_keys_from_content(record):

2480

if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):

2481

raise ValueError("Unknown content storage kind for"

2482

" inventory text: %s" % (record.storage_kind,))

2483

# It's a knit record, it has a _raw_record field (even if it was

2484

# reconstituted from a network stream).

2485

raw_data = record._raw_record

2486

# read the entire thing

2487

revision_id = record.key[-1]

2488

content, _ = knit._parse_record(revision_id, raw_data)

2489

if record.storage_kind == 'knit-delta-gz':

2490

line_iterator = factory.get_linedelta_content(content)

2491

elif record.storage_kind == 'knit-ft-gz':

2492

line_iterator = factory.get_fulltext_content(content)

2493

content_text_keys.update(find_text_keys(

2494

[(line, revision_id) for line in line_iterator]))

2495

revision_keys = [(r,) for r in revision_ids]

2496

def _filtered_inv_stream():

2497

source_vf = from_repo.inventories

2498

stream = source_vf.get_record_stream(revision_keys,

2499

'unordered', False)

2500

for record in stream:

2501

if record.storage_kind == 'absent':

2502

raise errors.NoSuchRevision(from_repo, record.key)

2503

find_text_keys_from_content(record)

2504

yield record

2505

self._text_keys = content_text_keys - parent_text_keys

2506

return ('inventories', _filtered_inv_stream())

2507

2508

def _get_text_stream(self):

2509

# Note: We know we don't have to handle adding root keys, because both

2510

# the source and target are the identical network name.

2511

text_stream = self.from_repository.texts.get_record_stream(

2512

self._text_keys, self._text_fetch_order, False)

2513

return ('texts', text_stream)

2514

2515

def get_stream(self, search):

2516

revision_ids = search.get_keys()

2517

for stream_info in self._fetch_revision_texts(revision_ids):

2518

yield stream_info

2519

self._revision_keys = [(rev_id,) for rev_id in revision_ids]

2520

yield self._get_filtered_inv_stream(revision_ids)

2521

yield self._get_text_stream()

2522

2523

2524

2525

class RepositoryFormatPack(MetaDirRepositoryFormat):

1833

class RepositoryFormatPack(MetaDirVersionedFileRepositoryFormat):

2526

1834

"""Format logic for pack structured repositories.

2527

1835

2528

1836

This repository format has:

2558

1866

index_class = None

2559

1867

_fetch_uses_deltas = True

2560

1868

fast_deltas = False

2561

supports_full_versioned_files = True

2562

1869

supports_funky_characters = True

1870

revision_graph_can_have_wrong_parents = True

2563

1871

2564

1872

def initialize(self, a_bzrdir, shared=False):

2565

1873

"""Create a pack based repository.

2602

1910

_serializer=self._serializer)

2603

1911

2604

1912

2605

class RepositoryFormatKnitPack1(RepositoryFormatPack):

2606

"""A no-subtrees parameterized Pack repository.

2607

2608

This format was introduced in 0.92.

2609

"""

2610

2611

repository_class = KnitPackRepository

2612

_commit_builder_class = PackCommitBuilder

2613

@property

2614

def _serializer(self):

2615

return xml5.serializer_v5

2616

# What index classes to use

2617

index_builder_class = InMemoryGraphIndex

2618

index_class = GraphIndex

2619

2620

def _get_matching_bzrdir(self):

2621

return bzrdir.format_registry.make_bzrdir('pack-0.92')

2622

2623

def _ignore_setting_bzrdir(self, format):

2624

pass

2625

2626

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

2627

2628

def get_format_string(self):

2629

"""See RepositoryFormat.get_format_string()."""

2630

return "Bazaar pack repository format 1 (needs bzr 0.92)\n"

2631

2632

def get_format_description(self):

2633

"""See RepositoryFormat.get_format_description()."""

2634

return "Packs containing knits without subtree support"

2635

2636

2637

class RepositoryFormatKnitPack3(RepositoryFormatPack):

2638

"""A subtrees parameterized Pack repository.

2639

2640

This repository format uses the xml7 serializer to get:

2641

- support for recording full info about the tree root

2642

- support for recording tree-references

2643

2644

This format was introduced in 0.92.

2645

"""

2646

2647

repository_class = KnitPackRepository

2648

_commit_builder_class = PackRootCommitBuilder

2649

rich_root_data = True

2650

experimental = True

2651

supports_tree_reference = True

2652

@property

2653

def _serializer(self):

2654

return xml7.serializer_v7

2655

# What index classes to use

2656

index_builder_class = InMemoryGraphIndex

2657

index_class = GraphIndex

2658

2659

def _get_matching_bzrdir(self):

2660

return bzrdir.format_registry.make_bzrdir(

2661

'pack-0.92-subtree')

2662

2663

def _ignore_setting_bzrdir(self, format):

2664

pass

2665

2666

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

2667

2668

def get_format_string(self):

2669

"""See RepositoryFormat.get_format_string()."""

2670

return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"

2671

2672

def get_format_description(self):

2673

"""See RepositoryFormat.get_format_description()."""

2674

return "Packs containing knits with subtree support\n"

2675

2676

2677

class RepositoryFormatKnitPack4(RepositoryFormatPack):

2678

"""A rich-root, no subtrees parameterized Pack repository.

2679

2680

This repository format uses the xml6 serializer to get:

2681

- support for recording full info about the tree root

2682

2683

This format was introduced in 1.0.

2684

"""

2685

2686

repository_class = KnitPackRepository

2687

_commit_builder_class = PackRootCommitBuilder

2688

rich_root_data = True

2689

supports_tree_reference = False

2690

@property

2691

def _serializer(self):

2692

return xml6.serializer_v6

2693

# What index classes to use

2694

index_builder_class = InMemoryGraphIndex

2695

index_class = GraphIndex

2696

2697

def _get_matching_bzrdir(self):

2698

return bzrdir.format_registry.make_bzrdir(

2699

'rich-root-pack')

2700

2701

def _ignore_setting_bzrdir(self, format):

2702

pass

2703

2704

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

2705

2706

def get_format_string(self):

2707

"""See RepositoryFormat.get_format_string()."""

2708

return ("Bazaar pack repository format 1 with rich root"

2709

" (needs bzr 1.0)\n")

2710

2711

def get_format_description(self):

2712

"""See RepositoryFormat.get_format_description()."""

2713

return "Packs containing knits with rich root support\n"

2714

2715

2716

class RepositoryFormatKnitPack5(RepositoryFormatPack):

2717

"""Repository that supports external references to allow stacking.

2718

2719

New in release 1.6.

2720

2721

Supports external lookups, which results in non-truncated ghosts after

2722

reconcile compared to pack-0.92 formats.

2723

"""

2724

2725

repository_class = KnitPackRepository

2726

_commit_builder_class = PackCommitBuilder

2727

supports_external_lookups = True

2728

# What index classes to use

2729

index_builder_class = InMemoryGraphIndex

2730

index_class = GraphIndex

2731

2732

@property

2733

def _serializer(self):

2734

return xml5.serializer_v5

2735

2736

def _get_matching_bzrdir(self):

2737

return bzrdir.format_registry.make_bzrdir('1.6')

2738

2739

def _ignore_setting_bzrdir(self, format):

2740

pass

2741

2742

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

2743

2744

def get_format_string(self):

2745

"""See RepositoryFormat.get_format_string()."""

2746

return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"

2747

2748

def get_format_description(self):

2749

"""See RepositoryFormat.get_format_description()."""

2750

return "Packs 5 (adds stacking support, requires bzr 1.6)"

2751

2752

2753

class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):

2754

"""A repository with rich roots and stacking.

2755

2756

New in release 1.6.1.

2757

2758

Supports stacking on other repositories, allowing data to be accessed

2759

without being stored locally.

2760

"""

2761

2762

repository_class = KnitPackRepository

2763

_commit_builder_class = PackRootCommitBuilder

2764

rich_root_data = True

2765

supports_tree_reference = False # no subtrees

2766

supports_external_lookups = True

2767

# What index classes to use

2768

index_builder_class = InMemoryGraphIndex

2769

index_class = GraphIndex

2770

2771

@property

2772

def _serializer(self):

2773

return xml6.serializer_v6

2774

2775

def _get_matching_bzrdir(self):

2776

return bzrdir.format_registry.make_bzrdir(

2777

'1.6.1-rich-root')

2778

2779

def _ignore_setting_bzrdir(self, format):

2780

pass

2781

2782

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

2783

2784

def get_format_string(self):

2785

"""See RepositoryFormat.get_format_string()."""

2786

return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"

2787

2788

def get_format_description(self):

2789

return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"

2790

2791

2792

class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):

2793

"""A repository with rich roots and external references.

2794

2795

New in release 1.6.

2796

2797

Supports external lookups, which results in non-truncated ghosts after

2798

reconcile compared to pack-0.92 formats.

2799

2800

This format was deprecated because the serializer it uses accidentally

2801

supported subtrees, when the format was not intended to. This meant that

2802

someone could accidentally fetch from an incorrect repository.

2803

"""

2804

2805

repository_class = KnitPackRepository

2806

_commit_builder_class = PackRootCommitBuilder

2807

rich_root_data = True

2808

supports_tree_reference = False # no subtrees

2809

2810

supports_external_lookups = True

2811

# What index classes to use

2812

index_builder_class = InMemoryGraphIndex

2813

index_class = GraphIndex

2814

2815

@property

2816

def _serializer(self):

2817

return xml7.serializer_v7

2818

2819

def _get_matching_bzrdir(self):

2820

matching = bzrdir.format_registry.make_bzrdir(

2821

'1.6.1-rich-root')

2822

matching.repository_format = self

2823

return matching

2824

2825

def _ignore_setting_bzrdir(self, format):

2826

pass

2827

2828

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

2829

2830

def get_format_string(self):

2831

"""See RepositoryFormat.get_format_string()."""

2832

return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"

2833

2834

def get_format_description(self):

2835

return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"

2836

" (deprecated)")

2837

2838

def is_deprecated(self):

2839

return True

2840

2841

2842

class RepositoryFormatKnitPack6(RepositoryFormatPack):

2843

"""A repository with stacking and btree indexes,

2844

without rich roots or subtrees.

2845

2846

This is equivalent to pack-1.6 with B+Tree indices.

2847

"""

2848

2849

repository_class = KnitPackRepository

2850

_commit_builder_class = PackCommitBuilder

2851

supports_external_lookups = True

2852

# What index classes to use

2853

index_builder_class = btree_index.BTreeBuilder

2854

index_class = btree_index.BTreeGraphIndex

2855

2856

@property

2857

def _serializer(self):

2858

return xml5.serializer_v5

2859

2860

def _get_matching_bzrdir(self):

2861

return bzrdir.format_registry.make_bzrdir('1.9')

2862

2863

def _ignore_setting_bzrdir(self, format):

2864

pass

2865

2866

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

2867

2868

def get_format_string(self):

2869

"""See RepositoryFormat.get_format_string()."""

2870

return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"

2871

2872

def get_format_description(self):

2873

"""See RepositoryFormat.get_format_description()."""

2874

return "Packs 6 (uses btree indexes, requires bzr 1.9)"

2875

2876

2877

class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):

2878

"""A repository with rich roots, no subtrees, stacking and btree indexes.

2879

2880

1.6-rich-root with B+Tree indices.

2881

"""

2882

2883

repository_class = KnitPackRepository

2884

_commit_builder_class = PackRootCommitBuilder

2885

rich_root_data = True

2886

supports_tree_reference = False # no subtrees

2887

supports_external_lookups = True

2888

# What index classes to use

2889

index_builder_class = btree_index.BTreeBuilder

2890

index_class = btree_index.BTreeGraphIndex

2891

2892

@property

2893

def _serializer(self):

2894

return xml6.serializer_v6

2895

2896

def _get_matching_bzrdir(self):

2897

return bzrdir.format_registry.make_bzrdir(

2898

'1.9-rich-root')

2899

2900

def _ignore_setting_bzrdir(self, format):

2901

pass

2902

2903

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

2904

2905

def get_format_string(self):

2906

"""See RepositoryFormat.get_format_string()."""

2907

return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"

2908

2909

def get_format_description(self):

2910

return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"

2911

2912

2913

class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):

2914

"""A subtrees development repository.

2915

2916

This format should be retained in 2.3, to provide an upgrade path from this

2917

to RepositoryFormat2aSubtree. It can be removed in later releases.

2918

2919

1.6.1-subtree[as it might have been] with B+Tree indices.

2920

"""

2921

2922

repository_class = KnitPackRepository

2923

_commit_builder_class = PackRootCommitBuilder

2924

rich_root_data = True

2925

experimental = True

2926

supports_tree_reference = True

2927

supports_external_lookups = True

2928

# What index classes to use

2929

index_builder_class = btree_index.BTreeBuilder

2930

index_class = btree_index.BTreeGraphIndex

2931

2932

@property

2933

def _serializer(self):

2934

return xml7.serializer_v7

2935

2936

def _get_matching_bzrdir(self):

2937

return bzrdir.format_registry.make_bzrdir(

2938

'development5-subtree')

2939

2940

def _ignore_setting_bzrdir(self, format):

2941

pass

2942

2943

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

2944

2945

def get_format_string(self):

2946

"""See RepositoryFormat.get_format_string()."""

2947

return ("Bazaar development format 2 with subtree support "

2948

"(needs bzr.dev from before 1.8)\n")

2949

2950

def get_format_description(self):

2951

"""See RepositoryFormat.get_format_description()."""

2952

return ("Development repository format, currently the same as "

2953

"1.6.1-subtree with B+Tree indices.\n")

2954

2955

2956

1913

class RetryPackOperations(errors.RetryWithNewPacks):

2957

1914

"""Raised when we are packing and we find a missing file.

2958

1915

2966

1923

" context: %(context)s %(orig_error)s")

2967

1924

2968

1925

1926

class _DirectPackAccess(object):

1927

"""Access to data in one or more packs with less translation."""

1928

1929

def __init__(self, index_to_packs, reload_func=None, flush_func=None):

1930

"""Create a _DirectPackAccess object.

1931

1932

:param index_to_packs: A dict mapping index objects to the transport

1933

and file names for obtaining data.

1934

:param reload_func: A function to call if we determine that the pack

1935

files have moved and we need to reload our caches. See

1936

bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.

1937

"""

1938

self._container_writer = None

1939

self._write_index = None

1940

self._indices = index_to_packs

1941

self._reload_func = reload_func

1942

self._flush_func = flush_func

1943

1944

def add_raw_records(self, key_sizes, raw_data):

1945

"""Add raw knit bytes to a storage area.

1946

1947

The data is spooled to the container writer in one bytes-record per

1948

raw data item.

1949

1950

:param sizes: An iterable of tuples containing the key and size of each

1951

raw data segment.

1952

:param raw_data: A bytestring containing the data.

1953

:return: A list of memos to retrieve the record later. Each memo is an

1954

opaque index memo. For _DirectPackAccess the memo is (index, pos,

1955

length), where the index field is the write_index object supplied

1956

to the PackAccess object.

1957

"""

1958

if type(raw_data) is not str:

1959

raise AssertionError(

1960

'data must be plain bytes was %s' % type(raw_data))

1961

result = []

1962

offset = 0

1963

for key, size in key_sizes:

1964

p_offset, p_length = self._container_writer.add_bytes_record(

1965

raw_data[offset:offset+size], [])

1966

offset += size

1967

result.append((self._write_index, p_offset, p_length))

1968

return result

1969

1970

def flush(self):

1971

"""Flush pending writes on this access object.

1972

1973

This will flush any buffered writes to a NewPack.

1974

"""

1975

if self._flush_func is not None:

1976

self._flush_func()

1977

1978

def get_raw_records(self, memos_for_retrieval):

1979

"""Get the raw bytes for a records.

1980

1981

:param memos_for_retrieval: An iterable containing the (index, pos,

1982

length) memo for retrieving the bytes. The Pack access method

1983

looks up the pack to use for a given record in its index_to_pack

1984

map.

1985

:return: An iterator over the bytes of the records.

1986

"""

1987

# first pass, group into same-index requests

1988

request_lists = []

1989

current_index = None

1990

for (index, offset, length) in memos_for_retrieval:

1991

if current_index == index:

1992

current_list.append((offset, length))

1993

else:

1994

if current_index is not None:

1995

request_lists.append((current_index, current_list))

1996

current_index = index

1997

current_list = [(offset, length)]

1998

# handle the last entry

1999

if current_index is not None:

2000

request_lists.append((current_index, current_list))

2001

for index, offsets in request_lists:

2002

try:

2003

transport, path = self._indices[index]

2004

except KeyError:

2005

# A KeyError here indicates that someone has triggered an index

2006

# reload, and this index has gone missing, we need to start

2007

# over.

2008

if self._reload_func is None:

2009

# If we don't have a _reload_func there is nothing that can

2010

# be done

2011

raise

2012

raise errors.RetryWithNewPacks(index,

2013

reload_occurred=True,

2014

exc_info=sys.exc_info())

2015

try:

2016

reader = pack.make_readv_reader(transport, path, offsets)

2017

for names, read_func in reader.iter_records():

2018

yield read_func(None)

2019

except errors.NoSuchFile:

2020

# A NoSuchFile error indicates that a pack file has gone

2021

# missing on disk, we need to trigger a reload, and start over.

2022

if self._reload_func is None:

2023

raise

2024

raise errors.RetryWithNewPacks(transport.abspath(path),

2025

reload_occurred=False,

2026

exc_info=sys.exc_info())

2027

2028

def set_writer(self, writer, index, transport_packname):

2029

"""Set a writer to use for adding data."""

2030

if index is not None:

2031

self._indices[index] = transport_packname

2032

self._container_writer = writer

2033

self._write_index = index

2034

2035

def reload_or_raise(self, retry_exc):

2036

"""Try calling the reload function, or re-raise the original exception.

2037

2038

This should be called after _DirectPackAccess raises a

2039

RetryWithNewPacks exception. This function will handle the common logic

2040

of determining when the error is fatal versus being temporary.

2041

It will also make sure that the original exception is raised, rather

2042

than the RetryWithNewPacks exception.

2043

2044

If this function returns, then the calling function should retry

2045

whatever operation was being performed. Otherwise an exception will

2046

be raised.

2047

2048

:param retry_exc: A RetryWithNewPacks exception.

2049

"""

2050

is_error = False

2051

if self._reload_func is None:

2052

is_error = True

2053

elif not self._reload_func():

2054

# The reload claimed that nothing changed

2055

if not retry_exc.reload_occurred:

2056

# If there wasn't an earlier reload, then we really were

2057

# expecting to find changes. We didn't find them, so this is a

2058

# hard error

2059

is_error = True

2060

if is_error:

2061

exc_class, exc_value, exc_traceback = retry_exc.exc_info

2062

raise exc_class, exc_value, exc_traceback

2063

2064

2065

Older »