1
# Copyright (C) 2005-2010 Canonical Ltd
1
# Copyright (C) 2005, 2006, 2007, 2008, 2009 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
860
856
# versioned roots do not change unless the tree found a change.
863
class RepositoryWriteLockResult(object):
864
"""The result of write locking a repository.
866
:ivar repository_token: The token obtained from the underlying lock, or
868
:ivar unlock: A callable which will unlock the lock.
871
def __init__(self, unlock, repository_token):
872
self.repository_token = repository_token
876
return "RepositoryWriteLockResult(%s, %s)" % (self.repository_token,
880
859
######################################################################
884
class Repository(_RelockDebugMixin, bzrdir.ControlComponent):
863
class Repository(_RelockDebugMixin):
885
864
"""Repository holding history for one or more branches.
887
866
The repository holds and retrieves historical information including
1046
1025
:seealso: add_inventory, for the contract.
1048
inv_lines = self._serializer.write_inventory_to_lines(inv)
1027
inv_lines = self._serialise_inventory_to_lines(inv)
1049
1028
return self._inventory_add_lines(revision_id, parents,
1050
1029
inv_lines, check_content=False)
1258
1237
"""Check a single text from this repository."""
1259
1238
if kind == 'inventories':
1260
1239
rev_id = record.key[0]
1261
inv = self._deserialise_inventory(rev_id,
1240
inv = self.deserialise_inventory(rev_id,
1262
1241
record.get_bytes_as('fulltext'))
1263
1242
if last_object is not None:
1264
1243
delta = inv._make_delta(last_object)
1309
1288
:param _format: The format of the repository on disk.
1310
1289
:param a_bzrdir: The BzrDir of the repository.
1291
In the future we will have a single api for all stores for
1292
getting file texts, inventories and revisions, then
1293
this construct will accept instances of those things.
1312
# In the future we will have a single api for all stores for
1313
# getting file texts, inventories and revisions, then
1314
# this construct will accept instances of those things.
1315
1295
super(Repository, self).__init__()
1316
1296
self._format = _format
1317
1297
# the following are part of the public API for Repository:
1323
1303
self._reconcile_does_inventory_gc = True
1324
1304
self._reconcile_fixes_text_parents = False
1325
1305
self._reconcile_backsup_inventory = True
1306
# not right yet - should be more semantically clear ?
1308
# TODO: make sure to construct the right store classes, etc, depending
1309
# on whether escaping is required.
1310
self._warn_if_deprecated()
1326
1311
self._write_group = None
1327
1312
# Additional places to query for data.
1328
1313
self._fallback_repositories = []
1329
1314
# An InventoryEntry cache, used during deserialization
1330
1315
self._inventory_entry_cache = fifo_cache.FIFOCache(10*1024)
1331
# Is it safe to return inventory entries directly from the entry cache,
1332
# rather copying them?
1333
self._safe_to_return_from_cache = False
1336
def user_transport(self):
1337
return self.bzrdir.user_transport
1340
def control_transport(self):
1341
return self._transport
1343
1317
def __repr__(self):
1344
1318
if self._fallback_repositories:
1393
1367
data during reads, and allows a 'write_group' to be obtained. Write
1394
1368
groups must be used for actual data insertion.
1396
A token should be passed in if you know that you have locked the object
1397
some other way, and need to synchronise this object's state with that
1400
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
1402
1370
:param token: if this is already locked, then lock_write will fail
1403
1371
unless the token matches the existing lock.
1404
1372
:returns: a token if this instance supports tokens, otherwise None.
1407
1375
:raises MismatchedToken: if the specified token doesn't match the token
1408
1376
of the existing lock.
1409
1377
:seealso: start_write_group.
1410
:return: A RepositoryWriteLockResult.
1379
A token should be passed in if you know that you have locked the object
1380
some other way, and need to synchronise this object's state with that
1383
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
1412
1385
locked = self.is_locked()
1413
token = self.control_files.lock_write(token=token)
1386
result = self.control_files.lock_write(token=token)
1415
self._warn_if_deprecated()
1416
1388
self._note_lock('w')
1417
1389
for repo in self._fallback_repositories:
1418
1390
# Writes don't affect fallback repos
1419
1391
repo.lock_read()
1420
1392
self._refresh_data()
1421
return RepositoryWriteLockResult(self.unlock, token)
1423
1395
def lock_read(self):
1424
"""Lock the repository for read operations.
1426
:return: An object with an unlock method which will release the lock
1429
1396
locked = self.is_locked()
1430
1397
self.control_files.lock_read()
1432
self._warn_if_deprecated()
1433
1399
self._note_lock('r')
1434
1400
for repo in self._fallback_repositories:
1435
1401
repo.lock_read()
1436
1402
self._refresh_data()
1439
1404
def get_physical_lock_status(self):
1440
1405
return self.control_files.get_physical_lock_status()
1501
1466
# now gather global repository information
1502
1467
# XXX: This is available for many repos regardless of listability.
1503
if self.user_transport.listable():
1468
if self.bzrdir.root_transport.listable():
1504
1469
# XXX: do we want to __define len__() ?
1505
1470
# Maybe the versionedfiles object should provide a different
1506
1471
# method to get the number of keys.
1516
1481
:param using: If True, list only branches using this repository.
1518
1483
if using and not self.is_shared():
1519
return self.bzrdir.list_branches()
1485
return [self.bzrdir.open_branch()]
1486
except errors.NotBranchError:
1520
1488
class Evaluator(object):
1522
1490
def __init__(self):
1531
1499
except errors.NoRepositoryPresent:
1534
return False, ([], repository)
1502
return False, (None, repository)
1535
1503
self.first_call = False
1536
value = (bzrdir.list_branches(), None)
1505
value = (bzrdir.open_branch(), None)
1506
except errors.NotBranchError:
1507
value = (None, None)
1537
1508
return True, value
1540
for branches, repository in bzrdir.BzrDir.find_bzrdirs(
1541
self.user_transport, evaluate=Evaluator()):
1542
if branches is not None:
1543
ret.extend(branches)
1511
for branch, repository in bzrdir.BzrDir.find_bzrdirs(
1512
self.bzrdir.root_transport, evaluate=Evaluator()):
1513
if branch is not None:
1514
branches.append(branch)
1544
1515
if not using and repository is not None:
1545
ret.extend(repository.find_branches())
1516
branches.extend(repository.find_branches())
1548
1519
@needs_read_lock
1549
1520
def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
1927
1898
rev = self._serializer.read_revision_from_string(text)
1928
1899
yield (revid, rev)
1902
def get_revision_xml(self, revision_id):
1903
# TODO: jam 20070210 This shouldn't be necessary since get_revision
1904
# would have already do it.
1905
# TODO: jam 20070210 Just use _serializer.write_revision_to_string()
1906
# TODO: this can't just be replaced by:
1907
# return self._serializer.write_revision_to_string(
1908
# self.get_revision(revision_id))
1909
# as cStringIO preservers the encoding unlike write_revision_to_string
1910
# or some other call down the path.
1911
rev = self.get_revision(revision_id)
1912
rev_tmp = cStringIO.StringIO()
1913
# the current serializer..
1914
self._serializer.write_revision(rev, rev_tmp)
1916
return rev_tmp.getvalue()
1930
1918
def get_deltas_for_revisions(self, revisions, specific_fileids=None):
1931
1919
"""Produce a generator of revision deltas.
2175
2163
selected_keys = set((revid,) for revid in revision_ids)
2176
2164
w = _inv_weave or self.inventories
2177
return self._find_file_ids_from_xml_inventory_lines(
2178
w.iter_lines_added_or_present_in_keys(
2179
selected_keys, pb=None),
2165
pb = ui.ui_factory.nested_progress_bar()
2167
return self._find_file_ids_from_xml_inventory_lines(
2168
w.iter_lines_added_or_present_in_keys(
2169
selected_keys, pb=pb),
2182
2174
def iter_files_bytes(self, desired_files):
2183
2175
"""Iterate through file versions.
2393
2385
"""single-document based inventory iteration."""
2394
2386
inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)
2395
2387
for text, revision_id in inv_xmls:
2396
yield self._deserialise_inventory(revision_id, text)
2388
yield self.deserialise_inventory(revision_id, text)
2398
2390
def _iter_inventory_xmls(self, revision_ids, ordering):
2399
2391
if ordering is None:
2431
2423
next_key = None
2434
def _deserialise_inventory(self, revision_id, xml):
2426
def deserialise_inventory(self, revision_id, xml):
2435
2427
"""Transform the xml into an inventory object.
2437
2429
:param revision_id: The expected revision id of the inventory.
2438
2430
:param xml: A serialised inventory.
2440
2432
result = self._serializer.read_inventory_from_string(xml, revision_id,
2441
entry_cache=self._inventory_entry_cache,
2442
return_from_cache=self._safe_to_return_from_cache)
2433
entry_cache=self._inventory_entry_cache)
2443
2434
if result.revision_id != revision_id:
2444
2435
raise AssertionError('revision id mismatch %s != %s' % (
2445
2436
result.revision_id, revision_id))
2439
def serialise_inventory(self, inv):
2440
return self._serializer.write_inventory_to_string(inv)
2442
def _serialise_inventory_to_lines(self, inv):
2443
return self._serializer.write_inventory_to_lines(inv)
2448
2445
def get_serializer_format(self):
2449
2446
return self._serializer.format_num
2451
2448
@needs_read_lock
2452
def _get_inventory_xml(self, revision_id):
2453
"""Get serialized inventory as a string."""
2449
def get_inventory_xml(self, revision_id):
2450
"""Get inventory XML as a file object."""
2454
2451
texts = self._iter_inventory_xmls([revision_id], 'unordered')
2456
2453
text, revision_id = texts.next()
2458
2455
raise errors.HistoryMissing(self, 'inventory', revision_id)
2459
def get_inventory_sha1(self, revision_id):
2460
"""Return the sha1 hash of the inventory entry
2462
return self.get_revision(revision_id).inventory_sha1
2461
2464
def get_rev_id_for_revno(self, revno, known_pair):
2462
2465
"""Return the revision id of a revno, given a later (revno, revid)
2463
2466
pair in the same history.
2515
2518
next_id = parents[0]
2521
def get_revision_inventory(self, revision_id):
2522
"""Return inventory of a past revision."""
2523
# TODO: Unify this with get_inventory()
2524
# bzr 0.0.6 and later imposes the constraint that the inventory_id
2525
# must be the same as its revision, so this is trivial.
2526
if revision_id is None:
2527
# This does not make sense: if there is no revision,
2528
# then it is the current tree inventory surely ?!
2529
# and thus get_root_id() is something that looks at the last
2530
# commit on the branch, and the get_root_id is an inventory check.
2531
raise NotImplementedError
2532
# return Inventory(self.get_root_id())
2534
return self.get_inventory(revision_id)
2517
2536
def is_shared(self):
2518
2537
"""Return True if this repository is flagged as a shared repository."""
2519
2538
raise NotImplementedError(self.is_shared)
2553
2572
return RevisionTree(self, Inventory(root_id=None),
2554
2573
_mod_revision.NULL_REVISION)
2556
inv = self.get_inventory(revision_id)
2575
inv = self.get_revision_inventory(revision_id)
2557
2576
return RevisionTree(self, inv, revision_id)
2559
2578
def revision_trees(self, revision_ids):
2612
2631
keys = tsort.topo_sort(parent_map)
2613
2632
return [None] + list(keys)
2615
def pack(self, hint=None, clean_obsolete_packs=False):
2634
def pack(self, hint=None):
2616
2635
"""Compress the data within the repository.
2618
2637
This operation only makes sense for some repository types. For other
2628
2647
obtained from the result of commit_write_group(). Out of
2629
2648
date hints are simply ignored, because concurrent operations
2630
2649
can obsolete them rapidly.
2632
:param clean_obsolete_packs: Clean obsolete packs immediately after
2636
2652
def get_transaction(self):
2652
2668
for ((revision_id,), parent_keys) in \
2653
2669
self.revisions.get_parent_map(query_keys).iteritems():
2654
2670
if parent_keys:
2655
result[revision_id] = tuple([parent_revid
2656
for (parent_revid,) in parent_keys])
2671
result[revision_id] = tuple(parent_revid
2672
for (parent_revid,) in parent_keys)
2658
2674
result[revision_id] = (_mod_revision.NULL_REVISION,)
2661
2677
def _make_parents_provider(self):
2665
def get_known_graph_ancestry(self, revision_ids):
2666
"""Return the known graph for a set of revision ids and their ancestors.
2668
st = static_tuple.StaticTuple
2669
revision_keys = [st(r_id).intern() for r_id in revision_ids]
2670
known_graph = self.revisions.get_known_graph_ancestry(revision_keys)
2671
return graph.GraphThunkIdsToKeys(known_graph)
2673
2680
def get_graph(self, other_repository=None):
2674
2681
"""Return the graph walker for this repository format"""
2675
2682
parents_provider = self._make_parents_provider()
2770
2777
result.check(callback_refs)
2773
def _warn_if_deprecated(self, branch=None):
2780
def _warn_if_deprecated(self):
2774
2781
global _deprecation_warning_done
2775
2782
if _deprecation_warning_done:
2779
conf = config.GlobalConfig()
2781
conf = branch.get_config()
2782
if conf.suppress_warning('format_deprecation'):
2784
warning("Format %s for %s is deprecated -"
2785
" please use 'bzr upgrade' to get better performance"
2786
% (self._format, self.bzrdir.transport.base))
2788
_deprecation_warning_done = True
2784
_deprecation_warning_done = True
2785
warning("Format %s for %s is deprecated - please use 'bzr upgrade' to get better performance"
2786
% (self._format, self.bzrdir.transport.base))
2790
2788
def supports_rich_root(self):
2791
2789
return self._format.rich_root_data
3074
3072
pack_compresses = False
3075
3073
# Does the repository inventory storage understand references to trees?
3076
3074
supports_tree_reference = None
3077
# Is the format experimental ?
3078
experimental = False
3081
return "%s()" % self.__class__.__name__
3077
return "<%s>" % self.__class__.__name__
3083
3079
def __eq__(self, other):
3084
3080
# format objects are generally stateless
3099
3095
transport = a_bzrdir.get_repository_transport(None)
3100
format_string = transport.get_bytes("format")
3096
format_string = transport.get("format").read()
3101
3097
return format_registry.get(format_string)
3102
3098
except errors.NoSuchFile:
3103
3099
raise errors.NoRepositoryPresent(a_bzrdir)
3203
3199
raise NotImplementedError(self.open)
3205
def _run_post_repo_init_hooks(self, repository, a_bzrdir, shared):
3206
from bzrlib.bzrdir import BzrDir, RepoInitHookParams
3207
hooks = BzrDir.hooks['post_repo_init']
3210
params = RepoInitHookParams(repository, self, a_bzrdir, shared)
3215
3202
class MetaDirRepositoryFormat(RepositoryFormat):
3216
3203
"""Common base class for the new repositories using the metadir layout."""
3422
3409
:param revision_id: if None all content is copied, if NULL_REVISION no
3423
3410
content is copied.
3411
:param pb: optional progress bar to use for progress reports. If not
3412
provided a default one will be created.
3427
ui.ui_factory.warn_experimental_format_fetch(self)
3428
3415
from bzrlib.fetch import RepoFetcher
3429
# See <https://launchpad.net/bugs/456077> asking for a warning here
3430
if self.source._format.network_name() != self.target._format.network_name():
3431
ui.ui_factory.show_user_warning('cross_format_fetch',
3432
from_format=self.source._format,
3433
to_format=self.target._format)
3434
3416
f = RepoFetcher(to_repository=self.target,
3435
3417
from_repository=self.source,
3436
3418
last_revision=revision_id,
3437
3419
fetch_spec=fetch_spec,
3438
find_ghosts=find_ghosts)
3420
pb=pb, find_ghosts=find_ghosts)
3440
3422
def _walk_to_common_revisions(self, revision_ids):
3441
3423
"""Walk out from revision_ids in source to revisions target has.
3837
3819
basis_id, delta, current_revision_id, parents_parents)
3838
3820
cache[current_revision_id] = parent_tree
3840
def _fetch_batch(self, revision_ids, basis_id, cache, a_graph=None):
3822
def _fetch_batch(self, revision_ids, basis_id, cache):
3841
3823
"""Fetch across a few revisions.
3843
3825
:param revision_ids: The revisions to copy
3844
3826
:param basis_id: The revision_id of a tree that must be in cache, used
3845
3827
as a basis for delta when no other base is available
3846
3828
:param cache: A cache of RevisionTrees that we can use.
3847
:param a_graph: A Graph object to determine the heads() of the
3848
rich-root data stream.
3849
3829
:return: The revision_id of the last converted tree. The RevisionTree
3850
3830
for it will be in cache
3858
3838
pending_revisions = []
3859
3839
parent_map = self.source.get_parent_map(revision_ids)
3860
3840
self._fetch_parent_invs_for_stacking(parent_map, cache)
3861
self.source._safe_to_return_from_cache = True
3862
3841
for tree in self.source.revision_trees(revision_ids):
3863
3842
# Find a inventory delta for this revision.
3864
3843
# Find text entries that need to be copied, too.
3912
3891
pending_revisions.append(revision)
3913
3892
cache[current_revision_id] = tree
3914
3893
basis_id = current_revision_id
3915
self.source._safe_to_return_from_cache = False
3916
3894
# Copy file texts
3917
3895
from_texts = self.source.texts
3918
3896
to_texts = self.target.texts
3919
3897
if root_keys_to_create:
3920
root_stream = _mod_fetch._new_root_data_stream(
3898
from bzrlib.fetch import _new_root_data_stream
3899
root_stream = _new_root_data_stream(
3921
3900
root_keys_to_create, self._revision_id_to_root_id, parent_map,
3922
self.source, graph=a_graph)
3923
3902
to_texts.insert_record_stream(root_stream)
3924
3903
to_texts.insert_record_stream(from_texts.get_record_stream(
3925
3904
text_keys, self.target._format._fetch_order,
3982
3961
cache[basis_id] = basis_tree
3983
3962
del basis_tree # We don't want to hang on to it here
3985
if self._converting_to_rich_root and len(revision_ids) > 100:
3986
a_graph = _mod_fetch._get_rich_root_heads_graph(self.source,
3991
3964
for offset in range(0, len(revision_ids), batch_size):
3992
3965
self.target.start_write_group()
3994
3967
pb.update('Transferring revisions', offset,
3995
3968
len(revision_ids))
3996
3969
batch = revision_ids[offset:offset+batch_size]
3997
basis_id = self._fetch_batch(batch, basis_id, cache,
3970
basis_id = self._fetch_batch(batch, basis_id, cache)
4000
self.source._safe_to_return_from_cache = False
4001
3972
self.target.abort_write_group()
4015
3986
"""See InterRepository.fetch()."""
4016
3987
if fetch_spec is not None:
4017
3988
raise AssertionError("Not implemented yet...")
4018
ui.ui_factory.warn_experimental_format_fetch(self)
4019
3989
if (not self.source.supports_rich_root()
4020
3990
and self.target.supports_rich_root()):
4021
3991
self._converting_to_rich_root = True
4022
3992
self._revision_id_to_root_id = {}
4024
3994
self._converting_to_rich_root = False
4025
# See <https://launchpad.net/bugs/456077> asking for a warning here
4026
if self.source._format.network_name() != self.target._format.network_name():
4027
ui.ui_factory.show_user_warning('cross_format_fetch',
4028
from_format=self.source._format,
4029
to_format=self.target._format)
4030
3995
revision_ids = self.target.search_missing_revision_ids(self.source,
4031
3996
revision_id, find_ghosts=find_ghosts).get_keys()
4032
3997
if not revision_ids:
4101
4066
:param to_convert: The disk object to convert.
4102
4067
:param pb: a progress bar to use for progress information.
4104
pb = ui.ui_factory.nested_progress_bar()
4107
4072
# this is only useful with metadir layouts - separated repo content.
4108
4073
# trigger an assertion if not such
4109
4074
repo._format.get_format_string()
4110
4075
self.repo_dir = repo.bzrdir
4111
pb.update('Moving repository to repository.backup')
4076
self.step('Moving repository to repository.backup')
4112
4077
self.repo_dir.transport.move('repository', 'repository.backup')
4113
4078
backup_transport = self.repo_dir.transport.clone('repository.backup')
4114
4079
repo._format.check_conversion_target(self.target_format)
4115
4080
self.source_repo = repo._format.open(self.repo_dir,
4117
4082
_override_transport=backup_transport)
4118
pb.update('Creating new repository')
4083
self.step('Creating new repository')
4119
4084
converted = self.target_format.initialize(self.repo_dir,
4120
4085
self.source_repo.is_shared())
4121
4086
converted.lock_write()
4123
pb.update('Copying content')
4088
self.step('Copying content')
4124
4089
self.source_repo.copy_content_into(converted)
4126
4091
converted.unlock()
4127
pb.update('Deleting old repository content')
4092
self.step('Deleting old repository content')
4128
4093
self.repo_dir.transport.delete_tree('repository.backup')
4129
4094
ui.ui_factory.note('repository converted')
4096
def step(self, message):
4097
"""Update the pb by a step."""
4099
self.pb.update(message, self.count, self.total)
4133
4102
_unescape_map = {
4477
4446
fetching the inventory weave.
4479
4448
if self._rich_root_upgrade():
4480
return _mod_fetch.Inter1and2Helper(
4450
return bzrlib.fetch.Inter1and2Helper(
4481
4451
self.from_repository).generate_root_texts(revs)
4626
4596
def _get_convertable_inventory_stream(self, revision_ids,
4627
4597
delta_versus_null=False):
4628
# The two formats are sufficiently different that there is no fast
4629
# path, so we need to send just inventorydeltas, which any
4630
# sufficiently modern client can insert into any repository.
4631
# The StreamSink code expects to be able to
4598
# The source is using CHKs, but the target either doesn't or it has a
4599
# different serializer. The StreamSink code expects to be able to
4632
4600
# convert on the target, so we need to put bytes-on-the-wire that can
4633
4601
# be converted. That means inventory deltas (if the remote is <1.19,
4634
4602
# RemoteStreamSink will fallback to VFS to insert the deltas).