~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repository.py

Committer: John Arbash Meinel
Date: 2010-01-13 16:23:07 UTC
mto: (4634.119.7 2.0)
mto: This revision was merged to the branch mainline in revision 4959.
Revision ID: john@arbash-meinel.com-20100113162307-0bs82td16gzih827

Update the MANIFEST.in file.

files added:
MANIFEST.in

README_BDIST_RPM

bzrlib/crash.py

bzrlib/doc_generate/sphinx_conf.py

bzrlib/tests/features.py

bzrlib/tests/per_repository/test_merge_directive.py

bzrlib/tests/per_repository_reference/test_get_record_stream.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_lock.py

bzrlib/tests/test_patches_data/binary-after-normal.patch

bzrlib/tests/test_patches_data/binary.patch

doc/Bazaar-Logo-For-Manuals.png

doc/developers/_static

doc/developers/_static/bzr icon 16.png

doc/developers/_static/bzr.ico

doc/developers/_templates

doc/developers/_templates/layout.html

doc/developers/apport.txt

doc/developers/check.txt

doc/developers/conf.py

doc/developers/content-filtering.txt

doc/developers/implementation-notes.txt

doc/developers/index-plain.txt

doc/developers/miscellaneous-notes.txt

doc/developers/plans.txt

doc/developers/process.txt

doc/developers/specifications.txt

doc/en/Makefile

doc/en/_static

doc/en/_static/bzr icon 16.png

doc/en/_static/bzr.ico

doc/en/_static/en

doc/en/_templates

doc/en/_templates/index.html

doc/en/_templates/layout.html

doc/en/conf.py

doc/en/index.txt

doc/en/make.bat

doc/en/quick-reference/index.txt

doc/en/tutorials/index.txt

doc/en/user-guide/index-plain.txt

doc/en/user-guide/index.txt

doc/es/_static

doc/es/_static/bzr icon 16.png

doc/es/_static/bzr.ico

doc/es/_static/es

doc/es/_templates

doc/es/_templates/layout.html

doc/es/conf.py

doc/es/quick-reference/index.txt

doc/es/user-guide/index-plain.txt

doc/index.es.txt

doc/index.ja.txt

doc/index.ru.txt

doc/ja

doc/ja/_static

doc/ja/_static/bzr icon 16.png

doc/ja/_static/bzr.ico

doc/ja/_templates

doc/ja/conf.py

doc/ja/index.txt

doc/ja/mini-tutorial

doc/ja/mini-tutorial/index.txt

doc/ja/tutorials

doc/ja/tutorials/centralized_workflow.txt

doc/ja/tutorials/index.txt

doc/ja/tutorials/tutorial.txt

doc/ja/tutorials/using_bazaar_with_launchpad.txt

doc/ja/upgrade-guide

doc/ja/upgrade-guide/data_migration.txt

doc/ja/upgrade-guide/index.txt

doc/ja/upgrade-guide/overview.txt

doc/ja/upgrade-guide/tips_and_tricks.txt

doc/ja/user-guide

doc/ja/user-guide/adv_merging.txt

doc/ja/user-guide/annotating_changes.txt

doc/ja/user-guide/bazaar_workflows.txt

doc/ja/user-guide/branching_a_project.txt

doc/ja/user-guide/browsing_history.txt

doc/ja/user-guide/bug_trackers.txt

doc/ja/user-guide/bzrtools_plugin.txt

doc/ja/user-guide/central_intro.txt

doc/ja/user-guide/configuring_bazaar.txt

doc/ja/user-guide/controlling_registration.txt

doc/ja/user-guide/core_concepts.txt

doc/ja/user-guide/distributed_intro.txt

doc/ja/user-guide/entering_commands.txt

doc/ja/user-guide/filtered_views.txt

doc/ja/user-guide/getting_help.txt

doc/ja/user-guide/hooks.txt

doc/ja/user-guide/http_smart_server.txt

doc/ja/user-guide/images

doc/ja/user-guide/images/workflows_centralized.png

doc/ja/user-guide/images/workflows_centralized.svg

doc/ja/user-guide/images/workflows_gatekeeper.png

doc/ja/user-guide/images/workflows_gatekeeper.svg

doc/ja/user-guide/images/workflows_localcommit.png

doc/ja/user-guide/images/workflows_localcommit.svg

doc/ja/user-guide/images/workflows_peer.png

doc/ja/user-guide/images/workflows_peer.svg

doc/ja/user-guide/images/workflows_pqm.png

doc/ja/user-guide/images/workflows_pqm.svg

doc/ja/user-guide/images/workflows_shared.png

doc/ja/user-guide/images/workflows_shared.svg

doc/ja/user-guide/images/workflows_single.png

doc/ja/user-guide/images/workflows_single.svg

doc/ja/user-guide/index.txt

doc/ja/user-guide/installing_bazaar.txt

doc/ja/user-guide/introducing_bazaar.txt

doc/ja/user-guide/merging_changes.txt

doc/ja/user-guide/organizing_branches.txt

doc/ja/user-guide/organizing_your_workspace.txt

doc/ja/user-guide/part2_intro.txt

doc/ja/user-guide/partner_intro.txt

doc/ja/user-guide/plugins.txt

doc/ja/user-guide/publishing_a_branch.txt

doc/ja/user-guide/recording_changes.txt

doc/ja/user-guide/releasing_a_project.txt

doc/ja/user-guide/resolving_conflicts.txt

doc/ja/user-guide/reusing_a_checkout.txt

doc/ja/user-guide/reviewing_changes.txt

doc/ja/user-guide/sending_changes.txt

doc/ja/user-guide/server.txt

doc/ja/user-guide/setting_up_email.txt

doc/ja/user-guide/shared_repository_layouts.txt

doc/ja/user-guide/shelving_changes.txt

doc/ja/user-guide/solo_intro.txt

doc/ja/user-guide/specifying_revisions.txt

doc/ja/user-guide/stacked.txt

doc/ja/user-guide/starting_a_project.txt

doc/ja/user-guide/svn_plugin.txt

doc/ja/user-guide/undoing_mistakes.txt

doc/ja/user-guide/using_aliases.txt

doc/ja/user-guide/using_checkouts.txt

doc/ja/user-guide/using_gatekeepers.txt

doc/ja/user-guide/version_info.txt

doc/ja/user-guide/web_browsing.txt

doc/ja/user-guide/working_offline_central.txt

doc/ja/user-guide/writing_a_plugin.txt

doc/ja/user-guide/zen.txt

doc/ja/user-reference

doc/ja/user-reference/index.txt

doc/ru/_static

doc/ru/_static/bzr icon 16.png

doc/ru/_static/bzr.ico

doc/ru/_static/ru

doc/ru/_templates

doc/ru/_templates/layout.html

doc/ru/conf.py

doc/ru/quick-reference/index.txt

doc/ru/user-guide/index-plain.txt

tools/generate_release_notes.py

tools/package_docs.py

files removed:
doc/bazaar-vcs.org.kid

doc/en/developer-guide

doc/en/user-guide/index.txt

doc/es/developer-guide

doc/es/release-notes

doc/es/user-reference

files renamed:
bzrlib/tests/test_pack_repository.py => bzrlib/tests/per_pack_repository.py

bzrlib/tests/test_versionedfile.py => bzrlib/tests/per_versionedfile.py

doc/en/developer-guide/HACKING.txt => doc/developers/HACKING.txt

doc/en/quick-reference/Makefile => doc/en/_static/en/Makefile

doc/en/quick-reference/quick-start-summary.pdf => doc/en/_static/en/bzr-en-quick-reference.pdf

doc/en/quick-reference/quick-start-summary.png => doc/en/_static/en/bzr-en-quick-reference.png

doc/en/quick-reference/quick-start-summary.svg => doc/en/_static/en/bzr-en-quick-reference.svg

doc/es/quick-reference/Makefile => doc/es/_static/es/Makefile

doc/es/quick-reference/quick-start-summary.pdf => doc/es/_static/es/bzr-es-quick-reference.pdf

doc/es/quick-reference/quick-start-summary.png => doc/es/_static/es/bzr-es-quick-reference.png

doc/es/quick-reference/quick-start-summary.svg => doc/es/_static/es/bzr-es-quick-reference.svg

doc/index.es.txt => doc/es/index.txt

doc/ru/quick-reference/Makefile => doc/ru/_static/ru/Makefile

doc/ru/quick-reference/quick-start-summary.pdf => doc/ru/_static/ru/bzr-ru-quick-reference.pdf

doc/ru/quick-reference/quick-start-summary.png => doc/ru/_static/ru/bzr-ru-quick-reference.png

doc/ru/quick-reference/quick-start-summary.svg => doc/ru/_static/ru/bzr-ru-quick-reference.svg

doc/index.ru.txt => doc/ru/index.txt

files modified:
.bzrignore

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/_annotator_pyx.pyx

bzrlib/_bencode_pyx.pyx

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_chk_map_pyx.pyx

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_readdir_pyx.pyx

bzrlib/_rio_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/annotate.py

bzrlib/benchmarks/bench_knit.py

bzrlib/branch.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/counted_lock.py

bzrlib/diff-delta.c

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/errors.py

bzrlib/export/dir_exporter.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/debug-flags.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/inventory.py

bzrlib/inventory_delta.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/smart/branch.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_index.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_xml.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/ui/text.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

doc/developers/bug-handling.txt

doc/developers/cycle.txt

doc/developers/dirstate.txt

doc/developers/index.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/overview.txt

doc/developers/plugin-api.txt

doc/developers/releasing.txt

doc/developers/testing.txt

doc/en/mini-tutorial/index.txt

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/es/mini-tutorial/index.txt

doc/es/user-guide/index.txt

doc/index.txt

setup.py

tools/win32/build_release.py

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

bzrlib/repository.py

gpg,

graph,

inventory,

inventory_delta,

lazy_regex,

lockable_files,

lockdir,

464

465

if content_summary[2] is None:

465

466

raise ValueError("Files must not have executable = None")

466

467

if not store:

467

if (# if the file length changed we have to store:

468

parent_entry.text_size != content_summary[1] or

469

# if the exec bit has changed we have to store:

468

# We can't trust a check of the file length because of content

469

# filtering...

470

if (# if the exec bit has changed we have to store:

470

471

parent_entry.executable != content_summary[2]):

471

472

store = True

472

473

elif parent_entry.text_sha1 == content_summary[3]:

539

540

ie.revision = parent_entry.revision

540

541

return self._get_delta(ie, basis_inv, path), False, None

541

542

ie.reference_revision = content_summary[3]

543

if ie.reference_revision is None:

544

raise AssertionError("invalid content_summary for nested tree: %r"

545

% (content_summary,))

542

546

self._add_text_to_weave(ie.file_id, '', heads, None)

543

547

else:

544

548

raise NotImplementedError('unknown kind')

806

810

seen_root = True

807

811

self.new_inventory = None

808

812

if len(inv_delta):

813

# This should perhaps be guarded by a check that the basis we

814

# commit against is the basis for the commit and if not do a delta

815

# against the basis.

809

816

self._any_changes = True

810

817

if not seen_root:

811

818

# housekeeping root entry changes do not affect no-change commits.

848

855

######################################################################

849

856

# Repositories

850

857

858

851

859

class Repository(object):

852

860

"""Repository holding history for one or more branches.

853

861

923

931

"""

924

932

if self._write_group is not self.get_transaction():

925

933

# has an unlock or relock occured ?

934

if suppress_errors:

935

mutter(

936

'(suppressed) mismatched lock context and write group. %r, %r',

937

self._write_group, self.get_transaction())

938

return

926

939

raise errors.BzrError(

927

940

'mismatched lock context and write group. %r, %r' %

928

941

(self._write_group, self.get_transaction()))

1062

1075

check_content=True):

1063

1076

"""Store lines in inv_vf and return the sha1 of the inventory."""

1064

1077

parents = [(parent,) for parent in parents]

1065

return self.inventories.add_lines((revision_id,), parents, lines,

1078

result = self.inventories.add_lines((revision_id,), parents, lines,

1066

1079

check_content=check_content)[0]

1080

self.inventories._access.flush()

1081

return result

1067

1082

1068

1083

def add_revision(self, revision_id, rev, inv=None, config=None):

1069

1084

"""Add rev to the revision store as revision_id.

1145

1160

# The old API returned a list, should this actually be a set?

1146

1161

return parent_map.keys()

1147

1162

1163

def _check_inventories(self, checker):

1164

"""Check the inventories found from the revision scan.

1165

1166

This is responsible for verifying the sha1 of inventories and

1167

creating a pending_keys set that covers data referenced by inventories.

1168

"""

1169

bar = ui.ui_factory.nested_progress_bar()

1170

try:

1171

self._do_check_inventories(checker, bar)

1172

finally:

1173

bar.finished()

1174

1175

def _do_check_inventories(self, checker, bar):

1176

"""Helper for _check_inventories."""

1177

revno = 0

1178

keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}

1179

kinds = ['chk_bytes', 'texts']

1180

count = len(checker.pending_keys)

1181

bar.update("inventories", 0, 2)

1182

current_keys = checker.pending_keys

1183

checker.pending_keys = {}

1184

# Accumulate current checks.

1185

for key in current_keys:

1186

if key[0] != 'inventories' and key[0] not in kinds:

1187

checker._report_items.append('unknown key type %r' % (key,))

1188

keys[key[0]].add(key[1:])

1189

if keys['inventories']:

1190

# NB: output order *should* be roughly sorted - topo or

1191

# inverse topo depending on repository - either way decent

1192

# to just delta against. However, pre-CHK formats didn't

1193

# try to optimise inventory layout on disk. As such the

1194

# pre-CHK code path does not use inventory deltas.

1195

last_object = None

1196

for record in self.inventories.check(keys=keys['inventories']):

1197

if record.storage_kind == 'absent':

1198

checker._report_items.append(

1199

'Missing inventory {%s}' % (record.key,))

1200

else:

1201

last_object = self._check_record('inventories', record,

1202

checker, last_object,

1203

current_keys[('inventories',) + record.key])

1204

del keys['inventories']

1205

else:

1206

return

1207

bar.update("texts", 1)

1208

while (checker.pending_keys or keys['chk_bytes']

1209

or keys['texts']):

1210

# Something to check.

1211

current_keys = checker.pending_keys

1212

checker.pending_keys = {}

1213

# Accumulate current checks.

1214

for key in current_keys:

1215

if key[0] not in kinds:

1216

checker._report_items.append('unknown key type %r' % (key,))

1217

keys[key[0]].add(key[1:])

1218

# Check the outermost kind only - inventories || chk_bytes || texts

1219

for kind in kinds:

1220

if keys[kind]:

1221

last_object = None

1222

for record in getattr(self, kind).check(keys=keys[kind]):

1223

if record.storage_kind == 'absent':

1224

checker._report_items.append(

1225

'Missing %s {%s}' % (kind, record.key,))

1226

else:

1227

last_object = self._check_record(kind, record,

1228

checker, last_object, current_keys[(kind,) + record.key])

1229

keys[kind] = set()

1230

break

1231

1232

def _check_record(self, kind, record, checker, last_object, item_data):

1233

"""Check a single text from this repository."""

1234

if kind == 'inventories':

1235

rev_id = record.key[0]

1236

inv = self.deserialise_inventory(rev_id,

1237

record.get_bytes_as('fulltext'))

1238

if last_object is not None:

1239

delta = inv._make_delta(last_object)

1240

for old_path, path, file_id, ie in delta:

1241

if ie is None:

1242

continue

1243

ie.check(checker, rev_id, inv)

1244

else:

1245

for path, ie in inv.iter_entries():

1246

ie.check(checker, rev_id, inv)

1247

if self._format.fast_deltas:

1248

return inv

1249

elif kind == 'chk_bytes':

1250

# No code written to check chk_bytes for this repo format.

1251

checker._report_items.append(

1252

'unsupported key type chk_bytes for %s' % (record.key,))

1253

elif kind == 'texts':

1254

self._check_text(record, checker, item_data)

1255

else:

1256

checker._report_items.append(

1257

'unknown key type %s for %s' % (kind, record.key))

1258

1259

def _check_text(self, record, checker, item_data):

1260

"""Check a single text."""

1261

# Check it is extractable.

1262

# TODO: check length.

1263

if record.storage_kind == 'chunked':

1264

chunks = record.get_bytes_as(record.storage_kind)

1265

sha1 = osutils.sha_strings(chunks)

1266

length = sum(map(len, chunks))

1267

else:

1268

content = record.get_bytes_as('fulltext')

1269

sha1 = osutils.sha_string(content)

1270

length = len(content)

1271

if item_data and sha1 != item_data[1]:

1272

checker._report_items.append(

1273

'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %

1274

(record.key, sha1, item_data[1], item_data[2]))

1275

1148

1276

@staticmethod

1149

1277

def create(a_bzrdir):

1150

1278

"""Construct the current default format repository in a_bzrdir."""

1183

1311

self._inventory_entry_cache = fifo_cache.FIFOCache(10*1024)

1184

1312

1185

1313

def __repr__(self):

1186

return '%s(%r)' % (self.__class__.__name__,

1187

self.base)

1314

if self._fallback_repositories:

1315

return '%s(%r, fallback_repositories=%r)' % (

1316

self.__class__.__name__,

1317

self.base,

1318

self._fallback_repositories)

1319

else:

1320

return '%s(%r)' % (self.__class__.__name__,

1321

self.base)

1322

1323

def _has_same_fallbacks(self, other_repo):

1324

"""Returns true if the repositories have the same fallbacks."""

1325

my_fb = self._fallback_repositories

1326

other_fb = other_repo._fallback_repositories

1327

if len(my_fb) != len(other_fb):

1328

return False

1329

for f, g in zip(my_fb, other_fb):

1330

if not f.has_same_location(g):

1331

return False

1332

return True

1188

1333

1189

1334

def has_same_location(self, other):

1190

1335

"""Returns a boolean indicating if this repository is at the same

1398

1543

"""Commit the contents accrued within the current write group.

1399

1544

1400

1545

:seealso: start_write_group.

1546

1547

:return: it may return an opaque hint that can be passed to 'pack'.

1401

1548

"""

1402

1549

if self._write_group is not self.get_transaction():

1403

1550

# has an unlock or relock occured ?

1457

1604

# but at the moment we're only checking for texts referenced by

1458

1605

# inventories at the graph's edge.

1459

1606

key_deps = self.revisions._index._key_dependencies

1460

key_deps.add_keys(present_inventories)

1607

key_deps.satisfy_refs_for_keys(present_inventories)

1461

1608

referrers = frozenset(r[0] for r in key_deps.get_referrers())

1462

1609

file_ids = self.fileids_altered_by_revision_ids(referrers)

1463

1610

missing_texts = set()

1529

1676

raise errors.InternalBzrError(

1530

1677

"May not fetch while in a write group.")

1531

1678

# fast path same-url fetch operations

1532

if self.has_same_location(source) and fetch_spec is None:

1679

# TODO: lift out to somewhere common with RemoteRepository

1680

# <https://bugs.edge.launchpad.net/bzr/+bug/401646>

1681

if (self.has_same_location(source)

1682

and fetch_spec is None

1683

and self._has_same_fallbacks(source)):

1533

1684

# check that last_revision is in 'from' and then return a

1534

1685

# no-operation.

1535

1686

if (revision_id is not None and

1560

1711

:param revprops: Optional dictionary of revision properties.

1561

1712

:param revision_id: Optional revision id.

1562

1713

"""

1714

if self._fallback_repositories:

1715

raise errors.BzrError("Cannot commit from a lightweight checkout "

1716

"to a stacked branch. See "

1717

"https://bugs.launchpad.net/bzr/+bug/375013 for details.")

1563

1718

result = self._commit_builder_class(self, parents, config,

1564

1719

timestamp, timezone, committer, revprops, revision_id)

1565

1720

self.start_write_group()

1692

1847

1693

1848

@needs_read_lock

1694

1849

def get_revisions(self, revision_ids):

1695

"""Get many revisions at once."""

1850

"""Get many revisions at once.

1851

1852

Repositories that need to check data on every revision read should

1853

subclass this method.

1854

"""

1696

1855

return self._get_revisions(revision_ids)

1697

1856

1698

1857

@needs_read_lock

1699

1858

def _get_revisions(self, revision_ids):

1700

1859

"""Core work logic to get many revisions without sanity checks."""

1701

for rev_id in revision_ids:

1702

if not rev_id or not isinstance(rev_id, basestring):

1703

raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)

1860

revs = {}

1861

for revid, rev in self._iter_revisions(revision_ids):

1862

if rev is None:

1863

raise errors.NoSuchRevision(self, revid)

1864

revs[revid] = rev

1865

return [revs[revid] for revid in revision_ids]

1866

1867

def _iter_revisions(self, revision_ids):

1868

"""Iterate over revision objects.

1869

1870

:param revision_ids: An iterable of revisions to examine. None may be

1871

passed to request all revisions known to the repository. Note that

1872

not all repositories can find unreferenced revisions; for those

1873

repositories only referenced ones will be returned.

1874

:return: An iterator of (revid, revision) tuples. Absent revisions (

1875

those asked for but not available) are returned as (revid, None).

1876

"""

1877

if revision_ids is None:

1878

revision_ids = self.all_revision_ids()

1879

else:

1880

for rev_id in revision_ids:

1881

if not rev_id or not isinstance(rev_id, basestring):

1882

raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)

1704

1883

keys = [(key,) for key in revision_ids]

1705

1884

stream = self.revisions.get_record_stream(keys, 'unordered', True)

1706

revs = {}

1707

1885

for record in stream:

1886

revid = record.key[0]

1708

1887

if record.storage_kind == 'absent':

1709

raise errors.NoSuchRevision(self, record.key[0])

1710

text = record.get_bytes_as('fulltext')

1711

rev = self._serializer.read_revision_from_string(text)

1712

revs[record.key[0]] = rev

1713

return [revs[revid] for revid in revision_ids]

1888

yield (revid, None)

1889

else:

1890

text = record.get_bytes_as('fulltext')

1891

rev = self._serializer.read_revision_from_string(text)

1892

yield (revid, rev)

1714

1893

1715

1894

@needs_read_lock

1716

1895

def get_revision_xml(self, revision_id):

2071

2250

batch_size]

2072

2251

if not to_query:

2073

2252

break

2074

for rev_tree in self.revision_trees(to_query):

2075

revision_id = rev_tree.get_revision_id()

2253

for revision_id in to_query:

2076

2254

parent_ids = ancestors[revision_id]

2077

2255

for text_key in revision_keys[revision_id]:

2078

2256

pb.update("Calculating text parents", processed_texts)

2178

2356

"""Get Inventory object by revision id."""

2179

2357

return self.iter_inventories([revision_id]).next()

2180

2358

2181

def iter_inventories(self, revision_ids):

2359

def iter_inventories(self, revision_ids, ordering=None):

2182

2360

"""Get many inventories by revision_ids.

2183

2361

2184

2362

This will buffer some or all of the texts used in constructing the

2186

2364

time.

2187

2365

2188

2366

:param revision_ids: The expected revision ids of the inventories.

2367

:param ordering: optional ordering, e.g. 'topological'. If not

2368

specified, the order of revision_ids will be preserved (by

2369

buffering if necessary).

2189

2370

:return: An iterator of inventories.

2190

2371

"""

2191

2372

if ((None in revision_ids)

2192

2373

or (_mod_revision.NULL_REVISION in revision_ids)):

2193

2374

raise ValueError('cannot get null revision inventory')

2194

return self._iter_inventories(revision_ids)

2375

return self._iter_inventories(revision_ids, ordering)

2195

2376

2196

def _iter_inventories(self, revision_ids):

2377

def _iter_inventories(self, revision_ids, ordering):

2197

2378

"""single-document based inventory iteration."""

2198

for text, revision_id in self._iter_inventory_xmls(revision_ids):

2379

inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)

2380

for text, revision_id in inv_xmls:

2199

2381

yield self.deserialise_inventory(revision_id, text)

2200

2382

2201

def _iter_inventory_xmls(self, revision_ids):

2383

def _iter_inventory_xmls(self, revision_ids, ordering):

2384

if ordering is None:

2385

order_as_requested = True

2386

ordering = 'unordered'

2387

else:

2388

order_as_requested = False

2202

2389

keys = [(revision_id,) for revision_id in revision_ids]

2203

stream = self.inventories.get_record_stream(keys, 'unordered', True)

2390

if not keys:

2391

return

2392

if order_as_requested:

2393

key_iter = iter(keys)

2394

next_key = key_iter.next()

2395

stream = self.inventories.get_record_stream(keys, ordering, True)

2204

2396

text_chunks = {}

2205

2397

for record in stream:

2206

2398

if record.storage_kind != 'absent':

2207

text_chunks[record.key] = record.get_bytes_as('chunked')

2399

chunks = record.get_bytes_as('chunked')

2400

if order_as_requested:

2401

text_chunks[record.key] = chunks

2402

else:

2403

yield ''.join(chunks), record.key[-1]

2208

2404

else:

2209

2405

raise errors.NoSuchRevision(self, record.key)

2210

for key in keys:

2211

chunks = text_chunks.pop(key)

2212

yield ''.join(chunks), key[-1]

2406

if order_as_requested:

2407

# Yield as many results as we can while preserving order.

2408

while next_key in text_chunks:

2409

chunks = text_chunks.pop(next_key)

2410

yield ''.join(chunks), next_key[-1]

2411

try:

2412

next_key = key_iter.next()

2413

except StopIteration:

2414

# We still want to fully consume the get_record_stream,

2415

# just in case it is not actually finished at this point

2416

next_key = None

2417

break

2213

2418

2214

2419

def deserialise_inventory(self, revision_id, xml):

2215

2420

"""Transform the xml into an inventory object.

2236

2441

@needs_read_lock

2237

2442

def get_inventory_xml(self, revision_id):

2238

2443

"""Get inventory XML as a file object."""

2239

texts = self._iter_inventory_xmls([revision_id])

2444

texts = self._iter_inventory_xmls([revision_id], 'unordered')

2240

2445

try:

2241

2446

text, revision_id = texts.next()

2242

2447

except StopIteration:

2474

2679

[parents_provider, other_repository._make_parents_provider()])

2475

2680

return graph.Graph(parents_provider)

2476

2681

2477

def _get_versioned_file_checker(self, text_key_references=None):

2682

def _get_versioned_file_checker(self, text_key_references=None,

2683

ancestors=None):

2478

2684

"""Return an object suitable for checking versioned files.

2479

2685

2480

2686

:param text_key_references: if non-None, an already built

2482

2688

to whether they were referred to by the inventory of the

2483

2689

revision_id that they contain. If None, this will be

2484

2690

calculated.

2691

:param ancestors: Optional result from

2692

self.get_graph().get_parent_map(self.all_revision_ids()) if already

2693

available.

2485

2694

"""

2486

2695

return _VersionedFileChecker(self,

2487

text_key_references=text_key_references)

2696

text_key_references=text_key_references, ancestors=ancestors)

2488

2697

2489

2698

def revision_ids_to_search_result(self, result_set):

2490

2699

"""Convert a set of revision ids to a graph SearchResult."""

2540

2749

return record.get_bytes_as('fulltext')

2541

2750

2542

2751

@needs_read_lock

2543

def check(self, revision_ids=None):

2752

def check(self, revision_ids=None, callback_refs=None, check_repo=True):

2544

2753

"""Check consistency of all history of given revision_ids.

2545

2754

2546

2755

Different repository implementations should override _check().

2547

2756

2548

2757

:param revision_ids: A non-empty list of revision_ids whose ancestry

2549

2758

will be checked. Typically the last revision_id of a branch.

2759

:param callback_refs: A dict of check-refs to resolve and callback

2760

the check/_check method on the items listed as wanting the ref.

2761

see bzrlib.check.

2762

:param check_repo: If False do not check the repository contents, just

2763

calculate the data callback_refs requires and call them back.

2550

2764

"""

2551

return self._check(revision_ids)

2765

return self._check(revision_ids, callback_refs=callback_refs,

2766

check_repo=check_repo)

2552

2767

2553

def _check(self, revision_ids):

2554

result = check.Check(self)

2555

result.check()

2768

def _check(self, revision_ids, callback_refs, check_repo):

2769

result = check.Check(self, check_repo=check_repo)

2770

result.check(callback_refs)

2556

2771

return result

2557

2772

2558

2773

def _warn_if_deprecated(self):

2848

3063

# help), and for fetching when data won't have come from the same

2849

3064

# compressor.

2850

3065

pack_compresses = False

3066

# Does the repository inventory storage understand references to trees?

3067

supports_tree_reference = None

2851

3068

2852

3069

def __str__(self):

2853

3070

return "<%s>" % self.__class__.__name__

2957

3174

raise NotImplementedError(self.network_name)

2958

3175

2959

3176

def check_conversion_target(self, target_format):

2960

raise NotImplementedError(self.check_conversion_target)

3177

if self.rich_root_data and not target_format.rich_root_data:

3178

raise errors.BadConversionTarget(

3179

'Does not support rich root data.', target_format,

3180

from_format=self)

3181

if (self.supports_tree_reference and

3182

not getattr(target_format, 'supports_tree_reference', False)):

3183

raise errors.BadConversionTarget(

3184

'Does not support nested trees', target_format,

3185

from_format=self)

2961

3186

2962

3187

def open(self, a_bzrdir, _found=False):

2963

3188

"""Return an instance of this format for the bzrdir a_bzrdir.

3492

3717

# This is redundant with format.check_conversion_target(), however that

3493

3718

# raises an exception, and we just want to say "False" as in we won't

3494

3719

# support converting between these formats.

3720

if 'IDS_never' in debug.debug_flags:

3721

return False

3495

3722

if source.supports_rich_root() and not target.supports_rich_root():

3496

3723

return False

3497

3724

if (source._format.supports_tree_reference

3498

3725

and not target._format.supports_tree_reference):

3499

3726

return False

3727

if target._fallback_repositories and target._format.supports_chks:

3728

# IDS doesn't know how to copy CHKs for the parent inventories it

3729

# adds to stacked repos.

3730

return False

3731

if 'IDS_always' in debug.debug_flags:

3732

return True

3733

# Only use this code path for local source and target. IDS does far

3734

# too much IO (both bandwidth and roundtrips) over a network.

3735

if not source.bzrdir.transport.base.startswith('file:///'):

3736

return False

3737

if not target.bzrdir.transport.base.startswith('file:///'):

3738

return False

3500

3739

return True

3501

3740

3502

def _get_delta_for_revision(self, tree, parent_ids, basis_id, cache):

3741

def _get_trees(self, revision_ids, cache):

3742

possible_trees = []

3743

for rev_id in revision_ids:

3744

if rev_id in cache:

3745

possible_trees.append((rev_id, cache[rev_id]))

3746

else:

3747

# Not cached, but inventory might be present anyway.

3748

try:

3749

tree = self.source.revision_tree(rev_id)

3750

except errors.NoSuchRevision:

3751

# Nope, parent is ghost.

3752

pass

3753

else:

3754

cache[rev_id] = tree

3755

possible_trees.append((rev_id, tree))

3756

return possible_trees

3757

3758

def _get_delta_for_revision(self, tree, parent_ids, possible_trees):

3503

3759

"""Get the best delta and base for this revision.

3504

3760

3505

3761

:return: (basis_id, delta)

3506

3762

"""

3507

possible_trees = [(parent_id, cache[parent_id])

3508

for parent_id in parent_ids

3509

if parent_id in cache]

3510

if len(possible_trees) == 0:

3511

# There either aren't any parents, or the parents aren't in the

3512

# cache, so just use the last converted tree

3513

possible_trees.append((basis_id, cache[basis_id]))

3514

3763

deltas = []

3764

# Generate deltas against each tree, to find the shortest.

3765

texts_possibly_new_in_tree = set()

3515

3766

for basis_id, basis_tree in possible_trees:

3516

3767

delta = tree.inventory._make_delta(basis_tree.inventory)

3768

for old_path, new_path, file_id, new_entry in delta:

3769

if new_path is None:

3770

# This file_id isn't present in the new rev, so we don't

3771

# care about it.

3772

continue

3773

if not new_path:

3774

# Rich roots are handled elsewhere...

3775

continue

3776

kind = new_entry.kind

3777

if kind != 'directory' and kind != 'file':

3778

# No text record associated with this inventory entry.

3779

continue

3780

# This is a directory or file that has changed somehow.

3781

texts_possibly_new_in_tree.add((file_id, new_entry.revision))

3517

3782

deltas.append((len(delta), basis_id, delta))

3518

3783

deltas.sort()

3519

3784

return deltas[0][1:]

3520

3785

3521

def _get_parent_keys(self, root_key, parent_map):

3522

"""Get the parent keys for a given root id."""

3523

root_id, rev_id = root_key

3524

# Include direct parents of the revision, but only if they used

3525

# the same root_id and are heads.

3526

parent_keys = []

3527

for parent_id in parent_map[rev_id]:

3528

if parent_id == _mod_revision.NULL_REVISION:

3529

continue

3530

if parent_id not in self._revision_id_to_root_id:

3531

# We probably didn't read this revision, go spend the

3532

# extra effort to actually check

3533

try:

3534

tree = self.source.revision_tree(parent_id)

3535

except errors.NoSuchRevision:

3536

# Ghost, fill out _revision_id_to_root_id in case we

3537

# encounter this again.

3538

# But set parent_root_id to None since we don't really know

3539

parent_root_id = None

3540

else:

3541

parent_root_id = tree.get_root_id()

3542

self._revision_id_to_root_id[parent_id] = None

3543

else:

3544

parent_root_id = self._revision_id_to_root_id[parent_id]

3545

if root_id == parent_root_id:

3546

# With stacking we _might_ want to refer to a non-local

3547

# revision, but this code path only applies when we have the

3548

# full content available, so ghosts really are ghosts, not just

3549

# the edge of local data.

3550

parent_keys.append((parent_id,))

3551

else:

3552

# root_id may be in the parent anyway.

3553

try:

3554

tree = self.source.revision_tree(parent_id)

3555

except errors.NoSuchRevision:

3556

# ghost, can't refer to it.

3557

pass

3558

else:

3559

try:

3560

parent_keys.append((tree.inventory[root_id].revision,))

3561

except errors.NoSuchId:

3562

# not in the tree

3563

pass

3564

g = graph.Graph(self.source.revisions)

3565

heads = g.heads(parent_keys)

3566

selected_keys = []

3567

for key in parent_keys:

3568

if key in heads and key not in selected_keys:

3569

selected_keys.append(key)

3570

return tuple([(root_id,)+ key for key in selected_keys])

3786

def _fetch_parent_invs_for_stacking(self, parent_map, cache):

3787

"""Find all parent revisions that are absent, but for which the

3788

inventory is present, and copy those inventories.

3571

3789

3572

def _new_root_data_stream(self, root_keys_to_create, parent_map):

3573

for root_key in root_keys_to_create:

3574

parent_keys = self._get_parent_keys(root_key, parent_map)

3575

yield versionedfile.FulltextContentFactory(root_key,

3576

parent_keys, None, '')

3790

This is necessary to preserve correctness when the source is stacked

3791

without fallbacks configured. (Note that in cases like upgrade the

3792

source may be not have _fallback_repositories even though it is

3793

stacked.)

3794

"""

3795

parent_revs = set()

3796

for parents in parent_map.values():

3797

parent_revs.update(parents)

3798

present_parents = self.source.get_parent_map(parent_revs)

3799

absent_parents = set(parent_revs).difference(present_parents)

3800

parent_invs_keys_for_stacking = self.source.inventories.get_parent_map(

3801

(rev_id,) for rev_id in absent_parents)

3802

parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking]

3803

for parent_tree in self.source.revision_trees(parent_inv_ids):

3804

current_revision_id = parent_tree.get_revision_id()

3805

parents_parents_keys = parent_invs_keys_for_stacking[

3806

(current_revision_id,)]

3807

parents_parents = [key[-1] for key in parents_parents_keys]

3808

basis_id = _mod_revision.NULL_REVISION

3809

basis_tree = self.source.revision_tree(basis_id)

3810

delta = parent_tree.inventory._make_delta(basis_tree.inventory)

3811

self.target.add_inventory_by_delta(

3812

basis_id, delta, current_revision_id, parents_parents)

3813

cache[current_revision_id] = parent_tree

3577

3814

3578

3815

def _fetch_batch(self, revision_ids, basis_id, cache):

3579

3816

"""Fetch across a few revisions.

3593

3830

pending_deltas = []

3594

3831

pending_revisions = []

3595

3832

parent_map = self.source.get_parent_map(revision_ids)

3833

self._fetch_parent_invs_for_stacking(parent_map, cache)

3596

3834

for tree in self.source.revision_trees(revision_ids):

3835

# Find a inventory delta for this revision.

3836

# Find text entries that need to be copied, too.

3597

3837

current_revision_id = tree.get_revision_id()

3598

3838

parent_ids = parent_map.get(current_revision_id, ())

3839

parent_trees = self._get_trees(parent_ids, cache)

3840

possible_trees = list(parent_trees)

3841

if len(possible_trees) == 0:

3842

# There either aren't any parents, or the parents are ghosts,

3843

# so just use the last converted tree.

3844

possible_trees.append((basis_id, cache[basis_id]))

3599

3845

basis_id, delta = self._get_delta_for_revision(tree, parent_ids,

3600

basis_id, cache)

3846

possible_trees)

3847

revision = self.source.get_revision(current_revision_id)

3848

pending_deltas.append((basis_id, delta,

3849

current_revision_id, revision.parent_ids))

3601

3850

if self._converting_to_rich_root:

3602

3851

self._revision_id_to_root_id[current_revision_id] = \

3603

3852

tree.get_root_id()

3604

# Find text entries that need to be copied

3853

# Determine which texts are in present in this revision but not in

3854

# any of the available parents.

3855

texts_possibly_new_in_tree = set()

3605

3856

for old_path, new_path, file_id, entry in delta:

3606

if new_path is not None:

3607

if not new_path:

3608

# This is the root

3609

if not self.target.supports_rich_root():

3610

# The target doesn't support rich root, so we don't

3611

# copy

3612

continue

3613

if self._converting_to_rich_root:

3614

# This can't be copied normally, we have to insert

3615

# it specially

3616

root_keys_to_create.add((file_id, entry.revision))

3617

continue

3618

text_keys.add((file_id, entry.revision))

3619

revision = self.source.get_revision(current_revision_id)

3620

pending_deltas.append((basis_id, delta,

3621

current_revision_id, revision.parent_ids))

3857

if new_path is None:

3858

# This file_id isn't present in the new rev

3859

continue

3860

if not new_path:

3861

# This is the root

3862

if not self.target.supports_rich_root():

3863

# The target doesn't support rich root, so we don't

3864

# copy

3865

continue

3866

if self._converting_to_rich_root:

3867

# This can't be copied normally, we have to insert

3868

# it specially

3869

root_keys_to_create.add((file_id, entry.revision))

3870

continue

3871

kind = entry.kind

3872

texts_possibly_new_in_tree.add((file_id, entry.revision))

3873

for basis_id, basis_tree in possible_trees:

3874

basis_inv = basis_tree.inventory

3875

for file_key in list(texts_possibly_new_in_tree):

3876

file_id, file_revision = file_key

3877

try:

3878

entry = basis_inv[file_id]

3879

except errors.NoSuchId:

3880

continue

3881

if entry.revision == file_revision:

3882

texts_possibly_new_in_tree.remove(file_key)

3883

text_keys.update(texts_possibly_new_in_tree)

3622

3884

pending_revisions.append(revision)

3623

3885

cache[current_revision_id] = tree

3624

3886

basis_id = current_revision_id

3626

3888

from_texts = self.source.texts

3627

3889

to_texts = self.target.texts

3628

3890

if root_keys_to_create:

3629

root_stream = self._new_root_data_stream(root_keys_to_create,

3630

parent_map)

3891

from bzrlib.fetch import _new_root_data_stream

3892

root_stream = _new_root_data_stream(

3893

root_keys_to_create, self._revision_id_to_root_id, parent_map,

3894

self.source)

3631

3895

to_texts.insert_record_stream(root_stream)

3632

3896

to_texts.insert_record_stream(from_texts.get_record_stream(

3633

3897

text_keys, self.target._format._fetch_order,

3640

3904

# for the new revisions that we are about to insert. We do this

3641

3905

# before adding the revisions so that no revision is added until

3642

3906

# all the inventories it may depend on are added.

3907

# Note that this is overzealous, as we may have fetched these in an

3908

# earlier batch.

3643

3909

parent_ids = set()

3644

3910

revision_ids = set()

3645

3911

for revision in pending_revisions:

3648

3914

parent_ids.difference_update(revision_ids)

3649

3915

parent_ids.discard(_mod_revision.NULL_REVISION)

3650

3916

parent_map = self.source.get_parent_map(parent_ids)

3651

for parent_tree in self.source.revision_trees(parent_ids):

3652

basis_id, delta = self._get_delta_for_revision(tree, parent_ids, basis_id, cache)

3917

# we iterate over parent_map and not parent_ids because we don't

3918

# want to try copying any revision which is a ghost

3919

for parent_tree in self.source.revision_trees(parent_map):

3653

3920

current_revision_id = parent_tree.get_revision_id()

3654

3921

parents_parents = parent_map[current_revision_id]

3922

possible_trees = self._get_trees(parents_parents, cache)

3923

if len(possible_trees) == 0:

3924

# There either aren't any parents, or the parents are

3925

# ghosts, so just use the last converted tree.

3926

possible_trees.append((basis_id, cache[basis_id]))

3927

basis_id, delta = self._get_delta_for_revision(parent_tree,

3928

parents_parents, possible_trees)

3655

3929

self.target.add_inventory_by_delta(

3656

3930

basis_id, delta, current_revision_id, parents_parents)

3657

3931

# insert signatures and revisions

3671

3945

3672

3946

:param revision_ids: The list of revisions to fetch. Must be in

3673

3947

topological order.

3674

:param pb: A ProgressBar

3948

:param pb: A ProgressTask

3675

3949

:return: None

3676

3950

"""

3677

3951

basis_id, basis_tree = self._get_basis(revision_ids[0])

3722

3996

# Walk though all revisions; get inventory deltas, copy referenced

3723

3997

# texts that delta references, insert the delta, revision and

3724

3998

# signature.

3725

first_rev = self.source.get_revision(revision_ids[0])

3726

3999

if pb is None:

3727

4000

my_pb = ui.ui_factory.nested_progress_bar()

3728

4001

pb = my_pb

3851

4124

3852

4125

class _VersionedFileChecker(object):

3853

4126

3854

def __init__(self, repository, text_key_references=None):

4127

def __init__(self, repository, text_key_references=None, ancestors=None):

3855

4128

self.repository = repository

3856

4129

self.text_index = self.repository._generate_text_key_index(

3857

text_key_references=text_key_references)

4130

text_key_references=text_key_references, ancestors=ancestors)

3858

4131

3859

4132

def calculate_file_version_parents(self, text_key):

3860

4133

"""Calculate the correct parents for a file version according to

3878

4151

revision_id) tuples for versions that are present in this versioned

3879

4152

file, but not used by the corresponding inventory.

3880

4153

"""

4154

local_progress = None

4155

if progress_bar is None:

4156

local_progress = ui.ui_factory.nested_progress_bar()

4157

progress_bar = local_progress

4158

try:

4159

return self._check_file_version_parents(texts, progress_bar)

4160

finally:

4161

if local_progress:

4162

local_progress.finished()

4163

4164

def _check_file_version_parents(self, texts, progress_bar):

4165

"""See check_file_version_parents."""

3881

4166

wrong_parents = {}

3882

4167

self.file_ids = set([file_id for file_id, _ in

3883

4168

self.text_index.iterkeys()])

3884

4169

# text keys is now grouped by file_id

3885

n_weaves = len(self.file_ids)

3886

files_in_revisions = {}

3887

revisions_of_files = {}

3888

4170

n_versions = len(self.text_index)

3889

4171

progress_bar.update('loading text store', 0, n_versions)

3890

4172

parent_map = self.repository.texts.get_parent_map(self.text_index)

3892

4174

text_keys = self.repository.texts.keys()

3893

4175

unused_keys = frozenset(text_keys) - set(self.text_index)

3894

4176

for num, key in enumerate(self.text_index.iterkeys()):

3895

if progress_bar is not None:

3896

progress_bar.update('checking text graph', num, n_versions)

4177

progress_bar.update('checking text graph', num, n_versions)

3897

4178

correct_parents = self.calculate_file_version_parents(key)

3898

4179

try:

3899

4180

knit_parents = parent_map[key]

3984

4265

else:

3985

4266

new_pack.set_write_cache_size(1024*1024)

3986

4267

for substream_type, substream in stream:

4268

if 'stream' in debug.debug_flags:

4269

mutter('inserting substream: %s', substream_type)

3987

4270

if substream_type == 'texts':

3988

4271

self.target_repo.texts.insert_record_stream(substream)

3989

4272

elif substream_type == 'inventories':

3993

4276

else:

3994

4277

self._extract_and_insert_inventories(

3995

4278

substream, src_serializer)

4279

elif substream_type == 'inventory-deltas':

4280

self._extract_and_insert_inventory_deltas(

4281

substream, src_serializer)

3996

4282

elif substream_type == 'chk_bytes':

3997

4283

# XXX: This doesn't support conversions, as it assumes the

3998

4284

# conversion was done in the fetch code.

4049

4335

self.target_repo.pack(hint=hint)

4050

4336

return [], set()

4051

4337

4052

def _extract_and_insert_inventories(self, substream, serializer):

4338

def _extract_and_insert_inventory_deltas(self, substream, serializer):

4339

target_rich_root = self.target_repo._format.rich_root_data

4340

target_tree_refs = self.target_repo._format.supports_tree_reference

4341

for record in substream:

4342

# Insert the delta directly

4343

inventory_delta_bytes = record.get_bytes_as('fulltext')

4344

deserialiser = inventory_delta.InventoryDeltaDeserializer()

4345

try:

4346

parse_result = deserialiser.parse_text_bytes(

4347

inventory_delta_bytes)

4348

except inventory_delta.IncompatibleInventoryDelta, err:

4349

trace.mutter("Incompatible delta: %s", err.msg)

4350

raise errors.IncompatibleRevision(self.target_repo._format)

4351

basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result

4352

revision_id = new_id

4353

parents = [key[0] for key in record.parents]

4354

self.target_repo.add_inventory_by_delta(

4355

basis_id, inv_delta, revision_id, parents)

4356

4357

def _extract_and_insert_inventories(self, substream, serializer,

4358

parse_delta=None):

4053

4359

"""Generate a new inventory versionedfile in target, converting data.

4054

4360

4055

4361

The inventory is retrieved from the source, (deserializing it), and

4056

4362

stored in the target (reserializing it in a different format).

4057

4363

"""

4364

target_rich_root = self.target_repo._format.rich_root_data

4365

target_tree_refs = self.target_repo._format.supports_tree_reference

4058

4366

for record in substream:

4367

# It's not a delta, so it must be a fulltext in the source

4368

# serializer's format.

4059

4369

bytes = record.get_bytes_as('fulltext')

4060

4370

revision_id = record.key[0]

4061

4371

inv = serializer.read_inventory_from_string(bytes, revision_id)

4062

4372

parents = [key[0] for key in record.parents]

4063

4373

self.target_repo.add_inventory(revision_id, inv, parents)

4374

# No need to keep holding this full inv in memory when the rest of

4375

# the substream is likely to be all deltas.

4376

del inv

4064

4377

4065

4378

def _extract_and_insert_revisions(self, substream, serializer):

4066

4379

for record in substream:

4115

4428

return [('signatures', signatures), ('revisions', revisions)]

4116

4429

4117

4430

def _generate_root_texts(self, revs):

4118

"""This will be called by __fetch between fetching weave texts and

4431

"""This will be called by get_stream between fetching weave texts and

4119

4432

fetching the inventory weave.

4120

4121

Subclasses should override this if they need to generate root texts

4122

after fetching weave texts.

4123

4433

"""

4124

4434

if self._rich_root_upgrade():

4125

4435

import bzrlib.fetch

4132

4442

phase = 'file'

4133

4443

revs = search.get_keys()

4134

4444

graph = self.from_repository.get_graph()

4135

revs = list(graph.iter_topo_order(revs))

4445

revs = tsort.topo_sort(graph.get_parent_map(revs))

4136

4446

data_to_fetch = self.from_repository.item_keys_introduced_by(revs)

4137

4447

text_keys = []

4138

4448

for knit_kind, file_id, revisions in data_to_fetch:

4157

4467

# will be valid.

4158

4468

for _ in self._generate_root_texts(revs):

4159

4469

yield _

4160

# NB: This currently reopens the inventory weave in source;

4161

# using a single stream interface instead would avoid this.

4162

from_weave = self.from_repository.inventories

4163

4470

# we fetch only the referenced inventories because we do not

4164

4471

# know for unselected inventories whether all their required

4165

4472

# texts are present in the other repository - it could be

4204

4511

if not keys:

4205

4512

# No need to stream something we don't have

4206

4513

continue

4514

if substream_kind == 'inventories':

4515

# Some missing keys are genuinely ghosts, filter those out.

4516

present = self.from_repository.inventories.get_parent_map(keys)

4517

revs = [key[0] for key in present]

4518

# Get the inventory stream more-or-less as we do for the

4519

# original stream; there's no reason to assume that records

4520

# direct from the source will be suitable for the sink. (Think

4521

# e.g. 2a -> 1.9-rich-root).

4522

for info in self._get_inventory_stream(revs, missing=True):

4523

yield info

4524

continue

4525

4207

4526

# Ask for full texts always so that we don't need more round trips

4208

4527

# after this stream.

4209

4528

# Some of the missing keys are genuinely ghosts, so filter absent

4224

4543

return (not self.from_repository._format.rich_root_data and

4225

4544

self.to_format.rich_root_data)

4226

4545

4227

def _get_inventory_stream(self, revision_ids):

4546

def _get_inventory_stream(self, revision_ids, missing=False):

4228

4547

from_format = self.from_repository._format

4229

if (from_format.supports_chks and self.to_format.supports_chks

4230

and (from_format._serializer == self.to_format._serializer)):

4231

# Both sides support chks, and they use the same serializer, so it

4232

# is safe to transmit the chk pages and inventory pages across

4233

# as-is.

4234

return self._get_chk_inventory_stream(revision_ids)

4235

elif (not from_format.supports_chks):

4236

# Source repository doesn't support chks. So we can transmit the

4237

# inventories 'as-is' and either they are just accepted on the

4238

# target, or the Sink will properly convert it.

4239

return self._get_simple_inventory_stream(revision_ids)

4548

if (from_format.supports_chks and self.to_format.supports_chks and

4549

from_format.network_name() == self.to_format.network_name()):

4550

raise AssertionError(

4551

"this case should be handled by GroupCHKStreamSource")

4552

elif 'forceinvdeltas' in debug.debug_flags:

4553

return self._get_convertable_inventory_stream(revision_ids,

4554

delta_versus_null=missing)

4555

elif from_format.network_name() == self.to_format.network_name():

4556

# Same format.

4557

return self._get_simple_inventory_stream(revision_ids,

4558

missing=missing)

4559

elif (not from_format.supports_chks and not self.to_format.supports_chks

4560

and from_format._serializer == self.to_format._serializer):

4561

# Essentially the same format.

4562

return self._get_simple_inventory_stream(revision_ids,

4563

missing=missing)

4240

4564

else:

4241

# XXX: Hack to make not-chk->chk fetch: copy the inventories as

4242

# inventories. Note that this should probably be done somehow

4243

# as part of bzrlib.repository.StreamSink. Except JAM couldn't

4244

# figure out how a non-chk repository could possibly handle

4245

# deserializing an inventory stream from a chk repo, as it

4246

# doesn't have a way to understand individual pages.

4247

return self._get_convertable_inventory_stream(revision_ids)

4565

# Any time we switch serializations, we want to use an

4566

# inventory-delta based approach.

4567

return self._get_convertable_inventory_stream(revision_ids,

4568

delta_versus_null=missing)

4248

4569

4249

def _get_simple_inventory_stream(self, revision_ids):

4570

def _get_simple_inventory_stream(self, revision_ids, missing=False):

4571

# NB: This currently reopens the inventory weave in source;

4572

# using a single stream interface instead would avoid this.

4250

4573

from_weave = self.from_repository.inventories

4574

if missing:

4575

delta_closure = True

4576

else:

4577

delta_closure = not self.delta_on_metadata()

4251

4578

yield ('inventories', from_weave.get_record_stream(

4252

4579

[(rev_id,) for rev_id in revision_ids],

4253

self.inventory_fetch_order(),

4254

not self.delta_on_metadata()))

4255

4256

def _get_chk_inventory_stream(self, revision_ids):

4257

"""Fetch the inventory texts, along with the associated chk maps."""

4258

# We want an inventory outside of the search set, so that we can filter

4259

# out uninteresting chk pages. For now we use

4260

# _find_revision_outside_set, but if we had a Search with cut_revs, we

4261

# could use that instead.

4262

start_rev_id = self.from_repository._find_revision_outside_set(

4263

revision_ids)

4264

start_rev_key = (start_rev_id,)

4265

inv_keys_to_fetch = [(rev_id,) for rev_id in revision_ids]

4266

if start_rev_id != _mod_revision.NULL_REVISION:

4267

inv_keys_to_fetch.append((start_rev_id,))

4268

# Any repo that supports chk_bytes must also support out-of-order

4269

# insertion. At least, that is how we expect it to work

4270

# We use get_record_stream instead of iter_inventories because we want

4271

# to be able to insert the stream as well. We could instead fetch

4272

# allowing deltas, and then iter_inventories, but we don't know whether

4273

# source or target is more 'local' anway.

4274

inv_stream = self.from_repository.inventories.get_record_stream(

4275

inv_keys_to_fetch, 'unordered',

4276

True) # We need them as full-texts so we can find their references

4277

uninteresting_chk_roots = set()

4278

interesting_chk_roots = set()

4279

def filter_inv_stream(inv_stream):

4280

for idx, record in enumerate(inv_stream):

4281

### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))

4282

bytes = record.get_bytes_as('fulltext')

4283

chk_inv = inventory.CHKInventory.deserialise(

4284

self.from_repository.chk_bytes, bytes, record.key)

4285

if record.key == start_rev_key:

4286

uninteresting_chk_roots.add(chk_inv.id_to_entry.key())

4287

p_id_map = chk_inv.parent_id_basename_to_file_id

4288

if p_id_map is not None:

4289

uninteresting_chk_roots.add(p_id_map.key())

4290

else:

4291

yield record

4292

interesting_chk_roots.add(chk_inv.id_to_entry.key())

4293

p_id_map = chk_inv.parent_id_basename_to_file_id

4294

if p_id_map is not None:

4295

interesting_chk_roots.add(p_id_map.key())

4296

### pb.update('fetch inventory', 0, 2)

4297

yield ('inventories', filter_inv_stream(inv_stream))

4298

# Now that we have worked out all of the interesting root nodes, grab

4299

# all of the interesting pages and insert them

4300

### pb.update('fetch inventory', 1, 2)

4301

interesting = chk_map.iter_interesting_nodes(

4302

self.from_repository.chk_bytes, interesting_chk_roots,

4303

uninteresting_chk_roots)

4304

def to_stream_adapter():

4305

"""Adapt the iter_interesting_nodes result to a single stream.

4306

4307

iter_interesting_nodes returns records as it processes them, along

4308

with keys. However, we only want to return the records themselves.

4309

"""

4310

for record, items in interesting:

4311

if record is not None:

4312

yield record

4313

# XXX: We could instead call get_record_stream(records.keys())

4314

# ATM, this will always insert the records as fulltexts, and

4315

# requires that you can hang on to records once you have gone

4316

# on to the next one. Further, it causes the target to

4317

# recompress the data. Testing shows it to be faster than

4318

# requesting the records again, though.

4319

yield ('chk_bytes', to_stream_adapter())

4320

### pb.update('fetch inventory', 2, 2)

4321

4322

def _get_convertable_inventory_stream(self, revision_ids):

4323

# XXX: One of source or target is using chks, and they don't have

4324

# compatible serializations. The StreamSink code expects to be

4325

# able to convert on the target, so we need to put

4326

# bytes-on-the-wire that can be converted

4327

yield ('inventories', self._stream_invs_as_fulltexts(revision_ids))

4328

4329

def _stream_invs_as_fulltexts(self, revision_ids):

4580

self.inventory_fetch_order(), delta_closure))

4581

4582

def _get_convertable_inventory_stream(self, revision_ids,

4583

delta_versus_null=False):

4584

# The source is using CHKs, but the target either doesn't or it has a

4585

# different serializer. The StreamSink code expects to be able to

4586

# convert on the target, so we need to put bytes-on-the-wire that can

4587

# be converted. That means inventory deltas (if the remote is <1.19,

4588

# RemoteStreamSink will fallback to VFS to insert the deltas).

4589

yield ('inventory-deltas',

4590

self._stream_invs_as_deltas(revision_ids,

4591

delta_versus_null=delta_versus_null))

4592

4593

def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):

4594

"""Return a stream of inventory-deltas for the given rev ids.

4595

4596

:param revision_ids: The list of inventories to transmit

4597

:param delta_versus_null: Don't try to find a minimal delta for this

4598

entry, instead compute the delta versus the NULL_REVISION. This

4599

effectively streams a complete inventory. Used for stuff like

4600

filling in missing parents, etc.

4601

"""

4330

4602

from_repo = self.from_repository

4331

from_serializer = from_repo._format._serializer

4332

4603

revision_keys = [(rev_id,) for rev_id in revision_ids]

4333

4604

parent_map = from_repo.inventories.get_parent_map(revision_keys)

4334

for inv in self.from_repository.iter_inventories(revision_ids):

4335

# XXX: This is a bit hackish, but it works. Basically,

4336

# CHKSerializer 'accidentally' supports

4337

# read/write_inventory_to_string, even though that is never

4338

# the format that is stored on disk. It *does* give us a

4339

# single string representation for an inventory, so live with

4340

# it for now.

4341

# This would be far better if we had a 'serialized inventory

4342

# delta' form. Then we could use 'inventory._make_delta', and

4343

# transmit that. This would both be faster to generate, and

4344

# result in fewer bytes-on-the-wire.

4345

as_bytes = from_serializer.write_inventory_to_string(inv)

4605

# XXX: possibly repos could implement a more efficient iter_inv_deltas

4606

# method...

4607

inventories = self.from_repository.iter_inventories(

4608

revision_ids, 'topological')

4609

format = from_repo._format

4610

invs_sent_so_far = set([_mod_revision.NULL_REVISION])

4611

inventory_cache = lru_cache.LRUCache(50)

4612

null_inventory = from_repo.revision_tree(

4613

_mod_revision.NULL_REVISION).inventory

4614

# XXX: ideally the rich-root/tree-refs flags would be per-revision, not

4615

# per-repo (e.g. streaming a non-rich-root revision out of a rich-root

4616

# repo back into a non-rich-root repo ought to be allowed)

4617

serializer = inventory_delta.InventoryDeltaSerializer(

4618

versioned_root=format.rich_root_data,

4619

tree_references=format.supports_tree_reference)

4620

for inv in inventories:

4346

4621

key = (inv.revision_id,)

4347

4622

parent_keys = parent_map.get(key, ())

4623

delta = None

4624

if not delta_versus_null and parent_keys:

4625

# The caller did not ask for complete inventories and we have

4626

# some parents that we can delta against. Make a delta against

4627

# each parent so that we can find the smallest.

4628

parent_ids = [parent_key[0] for parent_key in parent_keys]

4629

for parent_id in parent_ids:

4630

if parent_id not in invs_sent_so_far:

4631

# We don't know that the remote side has this basis, so

4632

# we can't use it.

4633

continue

4634

if parent_id == _mod_revision.NULL_REVISION:

4635

parent_inv = null_inventory

4636

else:

4637

parent_inv = inventory_cache.get(parent_id, None)

4638

if parent_inv is None:

4639

parent_inv = from_repo.get_inventory(parent_id)

4640

candidate_delta = inv._make_delta(parent_inv)

4641

if (delta is None or

4642

len(delta) > len(candidate_delta)):

4643

delta = candidate_delta

4644

basis_id = parent_id

4645

if delta is None:

4646

# Either none of the parents ended up being suitable, or we

4647

# were asked to delta against NULL

4648

basis_id = _mod_revision.NULL_REVISION

4649

delta = inv._make_delta(null_inventory)

4650

invs_sent_so_far.add(inv.revision_id)

4651

inventory_cache[inv.revision_id] = inv

4652

delta_serialized = ''.join(

4653

serializer.delta_to_lines(basis_id, key[-1], delta))

4348

4654

yield versionedfile.FulltextContentFactory(

4349

key, parent_keys, None, as_bytes)

4655

key, parent_keys, None, delta_serialized)

4350

4656

4351

4657

4352

4658

def _iter_for_revno(repo, partial_history_cache, stop_index=None,

Older »