50
51
from bzrlib import (
56
revision as _mod_revision,
59
from bzrlib.decorators import needs_write_lock, only_raises
59
from bzrlib.decorators import needs_write_lock
60
from bzrlib.btree_index import (
60
64
from bzrlib.index import (
62
66
InMemoryGraphIndex,
64
from bzrlib.lock import LogicalLockResult
65
68
from bzrlib.repofmt.knitrepo import KnitRepository
66
69
from bzrlib.repository import (
68
71
MetaDirRepositoryFormat,
70
RepositoryWriteLockResult,
75
import bzrlib.revision as _mod_revision
74
76
from bzrlib.trace import (
151
151
texts/deltas (via (fileid, revisionid) tuples).
152
152
:param signature_index: A GraphIndex for determining what signatures are
153
153
present in the Pack and accessing the locations of their texts.
154
:param chk_index: A GraphIndex for accessing content by CHK, if the
157
155
self.revision_index = revision_index
158
156
self.inventory_index = inventory_index
159
157
self.text_index = text_index
160
158
self.signature_index = signature_index
161
self.chk_index = chk_index
163
160
def access_tuple(self):
164
161
"""Return a tuple (transport, name) for the pack content."""
225
222
return self.index_name('text', name)
227
224
def _replace_index_with_readonly(self, index_type):
228
unlimited_cache = False
229
if index_type == 'chk':
230
unlimited_cache = True
231
index = self.index_class(self.index_transport,
232
self.index_name(index_type, self.name),
233
self.index_sizes[self.index_offset(index_type)],
234
unlimited_cache=unlimited_cache)
235
if index_type == 'chk':
236
index._leaf_factory = btree_index._gcchk_factory
237
setattr(self, index_type + '_index', index)
225
setattr(self, index_type + '_index',
226
self.index_class(self.index_transport,
227
self.index_name(index_type, self.name),
228
self.index_sizes[self.index_offset(index_type)]))
240
231
class ExistingPack(Pack):
241
232
"""An in memory proxy for an existing .pack and its disk indices."""
243
234
def __init__(self, pack_transport, name, revision_index, inventory_index,
244
text_index, signature_index, chk_index=None):
235
text_index, signature_index):
245
236
"""Create an ExistingPack object.
247
238
:param pack_transport: The transport where the pack file resides.
248
239
:param name: The name of the pack on disk in the pack_transport.
250
241
Pack.__init__(self, revision_index, inventory_index, text_index,
251
signature_index, chk_index)
253
244
self.pack_transport = pack_transport
254
245
if None in (revision_index, inventory_index, text_index,
272
263
def __init__(self, name, revision_index, inventory_index, text_index,
273
264
signature_index, upload_transport, pack_transport, index_transport,
274
pack_collection, chk_index=None):
275
266
"""Create a ResumedPack object."""
276
267
ExistingPack.__init__(self, pack_transport, name, revision_index,
277
inventory_index, text_index, signature_index,
268
inventory_index, text_index, signature_index)
279
269
self.upload_transport = upload_transport
280
270
self.index_transport = index_transport
281
271
self.index_sizes = [None, None, None, None]
308
295
self.upload_transport.delete(self.file_name())
309
296
indices = [self.revision_index, self.inventory_index, self.text_index,
310
297
self.signature_index]
311
if self.chk_index is not None:
312
indices.append(self.chk_index)
313
298
for index in indices:
314
299
index._transport.delete(index._name)
316
301
def finish(self):
317
302
self._check_references()
318
index_types = ['revision', 'inventory', 'text', 'signature']
319
if self.chk_index is not None:
320
index_types.append('chk')
321
for index_type in index_types:
303
new_name = '../packs/' + self.file_name()
304
self.upload_transport.rename(self.file_name(), new_name)
305
for index_type in ['revision', 'inventory', 'text', 'signature']:
322
306
old_name = self.index_name(index_type, self.name)
323
307
new_name = '../indices/' + old_name
324
308
self.upload_transport.rename(old_name, new_name)
325
309
self._replace_index_with_readonly(index_type)
326
new_name = '../packs/' + self.file_name()
327
self.upload_transport.rename(self.file_name(), new_name)
328
310
self._state = 'finished'
330
312
def _get_external_refs(self, index):
331
"""Return compression parents for this index that are not present.
333
This returns any compression parents that are referenced by this index,
334
which are not contained *in* this index. They may be present elsewhere.
336
313
return index.external_references(1)
384
355
self._file_mode = file_mode
385
356
# tracks the content written to the .pack file.
386
357
self._hash = osutils.md5()
387
# a tuple with the length in bytes of the indices, once the pack
388
# is finalised. (rev, inv, text, sigs, chk_if_in_use)
358
# a four-tuple with the length in bytes of the indices, once the pack
359
# is finalised. (rev, inv, text, sigs)
389
360
self.index_sizes = None
390
361
# How much data to cache when writing packs. Note that this is not
391
362
# synchronised with reads, because it's not in the transport layer, so
454
423
return bool(self.get_revision_count() or
455
424
self.inventory_index.key_count() or
456
425
self.text_index.key_count() or
457
self.signature_index.key_count() or
458
(self.chk_index is not None and self.chk_index.key_count()))
460
def finish_content(self):
461
if self.name is not None:
465
self._write_data('', flush=True)
466
self.name = self._hash.hexdigest()
426
self.signature_index.key_count())
468
428
def finish(self, suspend=False):
469
429
"""Finish the new pack.
491
454
self._write_index('text', self.text_index, 'file texts', suspend)
492
455
self._write_index('signature', self.signature_index,
493
456
'revision signatures', suspend)
494
if self.chk_index is not None:
495
self.index_sizes.append(None)
496
self._write_index('chk', self.chk_index,
497
'content hash bytes', suspend)
498
457
self.write_stream.close()
499
458
# Note that this will clobber an existing pack with the same name,
500
459
# without checking for hash collisions. While this is undesirable this
588
547
flush_func=flush_func)
589
548
self.add_callback = None
550
def replace_indices(self, index_to_pack, indices):
551
"""Replace the current mappings with fresh ones.
553
This should probably not be used eventually, rather incremental add and
554
removal of indices. It has been added during refactoring of existing
557
:param index_to_pack: A mapping from index objects to
558
(transport, name) tuples for the pack file data.
559
:param indices: A list of indices.
561
# refresh the revision pack map dict without replacing the instance.
562
self.index_to_pack.clear()
563
self.index_to_pack.update(index_to_pack)
564
# XXX: API break - clearly a 'replace' method would be good?
565
self.combined_index._indices[:] = indices
566
# the current add nodes callback for the current writable index if
568
self.add_callback = None
591
570
def add_index(self, index, pack):
592
571
"""Add index to the aggregate, which is an index for Pack pack.
626
605
self.data_access.set_writer(None, None, (None, None))
627
606
self.index_to_pack.clear()
628
607
del self.combined_index._indices[:]
629
del self.combined_index._index_names[:]
630
608
self.add_callback = None
632
def remove_index(self, index):
610
def remove_index(self, index, pack):
633
611
"""Remove index from the indices used to answer queries.
635
613
:param index: An index from the pack parameter.
614
:param pack: A Pack instance.
637
616
del self.index_to_pack[index]
638
pos = self.combined_index._indices.index(index)
639
del self.combined_index._indices[pos]
640
del self.combined_index._index_names[pos]
617
self.combined_index._indices.remove(index)
641
618
if (self.add_callback is not None and
642
619
getattr(index, 'add_nodes', None) == self.add_callback):
643
620
self.add_callback = None
921
897
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
922
898
new_pack.signature_index.key_count(),
923
899
time.time() - new_pack.start_time)
925
# NB XXX: how to check CHK references are present? perhaps by yielding
926
# the items? How should that interact with stacked repos?
927
if new_pack.chk_index is not None:
929
if 'pack' in debug.debug_flags:
930
mutter('%s: create_pack: chk content copied: %s%s %d items t+%6.3fs',
931
time.ctime(), self._pack_collection._upload_transport.base,
932
new_pack.random_name,
933
new_pack.chk_index.key_count(),
934
time.time() - new_pack.start_time)
935
900
new_pack._check_references()
936
901
if not self._use_pack(new_pack):
941
906
self._pack_collection.allocate(new_pack)
944
def _copy_chks(self, refs=None):
945
# XXX: Todo, recursive follow-pointers facility when fetching some
947
chk_index_map, chk_indices = self._pack_map_and_index_list(
949
chk_nodes = self._index_contents(chk_indices, refs)
951
# TODO: This isn't strictly tasteful as we are accessing some private
952
# variables (_serializer). Perhaps a better way would be to have
953
# Repository._deserialise_chk_node()
954
search_key_func = chk_map.search_key_registry.get(
955
self._pack_collection.repo._serializer.search_key_name)
956
def accumlate_refs(lines):
957
# XXX: move to a generic location
959
bytes = ''.join(lines)
960
node = chk_map._deserialise(bytes, ("unknown",), search_key_func)
961
new_refs.update(node.refs())
962
self._copy_nodes(chk_nodes, chk_index_map, self.new_pack._writer,
963
self.new_pack.chk_index, output_lines=accumlate_refs)
966
def _copy_nodes(self, nodes, index_map, writer, write_index,
968
"""Copy knit nodes between packs with no graph references.
970
:param output_lines: Output full texts of copied items.
909
def _copy_nodes(self, nodes, index_map, writer, write_index):
910
"""Copy knit nodes between packs with no graph references."""
972
911
pb = ui.ui_factory.nested_progress_bar()
974
913
return self._do_copy_nodes(nodes, index_map, writer,
975
write_index, pb, output_lines=output_lines)
979
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
918
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):
981
919
# for record verification
982
920
knit = KnitVersionedFiles(None, None)
983
921
# plan a readv on each source pack:
1017
955
izip(reader.iter_records(), pack_readv_requests):
1018
956
raw_data = read_func(None)
1019
957
# check the header only
1020
if output_lines is not None:
1021
output_lines(knit._parse_record(key[-1], raw_data)[0])
1023
df, _ = knit._parse_record_header(key, raw_data)
958
df, _ = knit._parse_record_header(key, raw_data)
1025
960
pos, size = writer.add_bytes_record(raw_data, names)
1026
961
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
1027
962
pb.update("Copied record", record_index)
1292
1227
# reinserted, and if d3 has incorrect parents it will also be
1293
1228
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1294
1229
# copied), so we will try to delta, but d2 is not currently able to be
1295
# extracted because its basis d1 is not present. Topologically sorting
1230
# extracted because it's basis d1 is not present. Topologically sorting
1296
1231
# addresses this. The following generates a sort for all the texts that
1297
1232
# are being inserted without having to reference the entire text key
1298
1233
# space (we only topo sort the revisions, which is smaller).
1299
1234
topo_order = tsort.topo_sort(ancestors)
1300
1235
rev_order = dict(zip(topo_order, range(len(topo_order))))
1301
bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1236
bad_texts.sort(key=lambda key:rev_order[key[0][1]])
1302
1237
transaction = repo.get_transaction()
1303
1238
file_id_index = GraphIndexPrefixAdapter(
1304
1239
self.new_pack.text_index,
1357
1292
:ivar _names: map of {pack_name: (index_size,)}
1360
pack_factory = NewPack
1361
resumed_pack_factory = ResumedPack
1363
1295
def __init__(self, repo, transport, index_transport, upload_transport,
1364
pack_transport, index_builder_class, index_class,
1296
pack_transport, index_builder_class, index_class):
1366
1297
"""Create a new RepositoryPackCollection.
1368
1299
:param transport: Addresses the repository base directory
1399
1328
self.inventory_index = AggregateIndex(self.reload_pack_names, flush)
1400
1329
self.text_index = AggregateIndex(self.reload_pack_names, flush)
1401
1330
self.signature_index = AggregateIndex(self.reload_pack_names, flush)
1402
all_indices = [self.revision_index, self.inventory_index,
1403
self.text_index, self.signature_index]
1405
self.chk_index = AggregateIndex(self.reload_pack_names, flush)
1406
all_indices.append(self.chk_index)
1408
# used to determine if we're using a chk_index elsewhere.
1409
self.chk_index = None
1410
# Tell all the CombinedGraphIndex objects about each other, so they can
1411
# share hints about which pack names to search first.
1412
all_combined = [agg_idx.combined_index for agg_idx in all_indices]
1413
for combined_idx in all_combined:
1414
combined_idx.set_sibling_indices(
1415
set(all_combined).difference([combined_idx]))
1416
1331
# resumed packs
1417
1332
self._resumed_packs = []
1420
return '%s(%r)' % (self.__class__.__name__, self.repo)
1422
1334
def add_pack_to_memory(self, pack):
1423
1335
"""Make a Pack object available to the repository to satisfy queries.
1505
1417
'containing %d revisions. Packing %d files into %d affecting %d'
1506
1418
' revisions', self, total_packs, total_revisions, num_old_packs,
1507
1419
num_new_packs, num_revs_affected)
1508
result = self._execute_pack_operations(pack_operations,
1420
self._execute_pack_operations(pack_operations,
1509
1421
reload_func=self._restart_autopack)
1510
mutter('Auto-packing repository %s completed', self)
1513
1424
def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
1514
1425
reload_func=None):
1538
1449
self._remove_pack_from_memory(pack)
1539
1450
# record the newly available packs and stop advertising the old
1541
to_be_obsoleted = []
1542
for _, packs in pack_operations:
1543
to_be_obsoleted.extend(packs)
1544
result = self._save_pack_names(clear_obsolete_packs=True,
1545
obsolete_packs=to_be_obsoleted)
1452
self._save_pack_names(clear_obsolete_packs=True)
1453
# Move the old packs out of the way now they are no longer referenced.
1454
for revision_count, packs in pack_operations:
1455
self._obsolete_packs(packs)
1548
1457
def _flush_new_pack(self):
1549
1458
if self._new_pack is not None:
1558
1467
self.repo.control_files.lock_write()
1560
def _already_packed(self):
1561
"""Is the collection already packed?"""
1562
return not (self.repo._format.pack_compresses or (len(self._names) > 1))
1564
def pack(self, hint=None, clean_obsolete_packs=False):
1565
1470
"""Pack the pack collection totally."""
1566
1471
self.ensure_loaded()
1567
1472
total_packs = len(self._names)
1568
if self._already_packed():
1474
# This is arguably wrong because we might not be optimal, but for
1475
# now lets leave it in. (e.g. reconcile -> one pack. But not
1570
1478
total_revisions = self.revision_index.combined_index.key_count()
1571
1479
# XXX: the following may want to be a class, to pack with a given
1573
1481
mutter('Packing repository %s, which has %d pack files, '
1574
'containing %d revisions with hint %r.', self, total_packs,
1575
total_revisions, hint)
1482
'containing %d revisions into 1 packs.', self, total_packs,
1576
1484
# determine which packs need changing
1485
pack_distribution = [1]
1577
1486
pack_operations = [[0, []]]
1578
1487
for pack in self.all_packs():
1579
if hint is None or pack.name in hint:
1580
# Either no hint was provided (so we are packing everything),
1581
# or this pack was included in the hint.
1582
pack_operations[-1][0] += pack.get_revision_count()
1583
pack_operations[-1][1].append(pack)
1488
pack_operations[-1][0] += pack.get_revision_count()
1489
pack_operations[-1][1].append(pack)
1584
1490
self._execute_pack_operations(pack_operations, OptimisingPacker)
1586
if clean_obsolete_packs:
1587
self._clear_obsolete_packs()
1589
1492
def plan_autopack_combinations(self, existing_packs, pack_distribution):
1590
1493
"""Plan a pack operation.
1600
1503
pack_operations = [[0, []]]
1601
1504
# plan out what packs to keep, and what to reorganise
1602
1505
while len(existing_packs):
1603
# take the largest pack, and if it's less than the head of the
1506
# take the largest pack, and if its less than the head of the
1604
1507
# distribution chart we will include its contents in the new pack
1605
# for that position. If it's larger, we remove its size from the
1508
# for that position. If its larger, we remove its size from the
1606
1509
# distribution chart
1607
1510
next_pack_rev_count, next_pack = existing_packs.pop(0)
1608
1511
if next_pack_rev_count >= pack_distribution[0]:
1678
1581
inv_index = self._make_index(name, '.iix')
1679
1582
txt_index = self._make_index(name, '.tix')
1680
1583
sig_index = self._make_index(name, '.six')
1681
if self.chk_index is not None:
1682
chk_index = self._make_index(name, '.cix', is_chk=True)
1685
1584
result = ExistingPack(self._pack_transport, name, rev_index,
1686
inv_index, txt_index, sig_index, chk_index)
1585
inv_index, txt_index, sig_index)
1687
1586
self.add_pack_to_memory(result)
1703
1602
inv_index = self._make_index(name, '.iix', resume=True)
1704
1603
txt_index = self._make_index(name, '.tix', resume=True)
1705
1604
sig_index = self._make_index(name, '.six', resume=True)
1706
if self.chk_index is not None:
1707
chk_index = self._make_index(name, '.cix', resume=True,
1711
result = self.resumed_pack_factory(name, rev_index, inv_index,
1712
txt_index, sig_index, self._upload_transport,
1713
self._pack_transport, self._index_transport, self,
1714
chk_index=chk_index)
1605
result = ResumedPack(name, rev_index, inv_index, txt_index,
1606
sig_index, self._upload_transport, self._pack_transport,
1607
self._index_transport, self)
1715
1608
except errors.NoSuchFile, e:
1716
1609
raise errors.UnresumableWriteGroup(self.repo, [name], str(e))
1717
1610
self.add_pack_to_memory(result)
1751
1644
transport = self._index_transport
1752
1645
index_size = self._names[name][size_offset]
1753
index = self._index_class(transport, index_name, index_size,
1754
unlimited_cache=is_chk)
1755
if is_chk and self._index_class is btree_index.BTreeGraphIndex:
1756
index._leaf_factory = btree_index._gcchk_factory
1646
return self._index_class(transport, index_name, index_size)
1759
1648
def _max_pack_count(self, total_revisions):
1760
1649
"""Return the maximum number of packs to use for total revisions.
1788
1677
:param return: None.
1790
1679
for pack in packs:
1792
pack.pack_transport.rename(pack.file_name(),
1793
'../obsolete_packs/' + pack.file_name())
1794
except (errors.PathError, errors.TransportError), e:
1795
# TODO: Should these be warnings or mutters?
1796
mutter("couldn't rename obsolete pack, skipping it:\n%s"
1680
pack.pack_transport.rename(pack.file_name(),
1681
'../obsolete_packs/' + pack.file_name())
1798
1682
# TODO: Probably needs to know all possible indices for this pack
1799
1683
# - or maybe list the directory and move all indices matching this
1800
1684
# name whether we recognize it or not?
1801
suffixes = ['.iix', '.six', '.tix', '.rix']
1802
if self.chk_index is not None:
1803
suffixes.append('.cix')
1804
for suffix in suffixes:
1806
self._index_transport.rename(pack.name + suffix,
1807
'../obsolete_packs/' + pack.name + suffix)
1808
except (errors.PathError, errors.TransportError), e:
1809
mutter("couldn't rename obsolete index, skipping it:\n%s"
1685
for suffix in ('.iix', '.six', '.tix', '.rix'):
1686
self._index_transport.rename(pack.name + suffix,
1687
'../obsolete_packs/' + pack.name + suffix)
1812
1689
def pack_distribution(self, total_revisions):
1813
1690
"""Generate a list of the number of revisions to put in each pack.
1839
1716
self._remove_pack_indices(pack)
1840
1717
self.packs.remove(pack)
1842
def _remove_pack_indices(self, pack, ignore_missing=False):
1843
"""Remove the indices for pack from the aggregated indices.
1845
:param ignore_missing: Suppress KeyErrors from calling remove_index.
1847
for index_type in Pack.index_definitions.keys():
1848
attr_name = index_type + '_index'
1849
aggregate_index = getattr(self, attr_name)
1850
if aggregate_index is not None:
1851
pack_index = getattr(pack, attr_name)
1853
aggregate_index.remove_index(pack_index)
1719
def _remove_pack_indices(self, pack):
1720
"""Remove the indices for pack from the aggregated indices."""
1721
self.revision_index.remove_index(pack.revision_index, pack)
1722
self.inventory_index.remove_index(pack.inventory_index, pack)
1723
self.text_index.remove_index(pack.text_index, pack)
1724
self.signature_index.remove_index(pack.signature_index, pack)
1859
1726
def reset(self):
1860
1727
"""Clear all cached data."""
1861
1728
# cached revision data
1729
self.repo._revision_knit = None
1862
1730
self.revision_index.clear()
1863
1731
# cached signature data
1732
self.repo._signature_knit = None
1864
1733
self.signature_index.clear()
1865
1734
# cached file text data
1866
1735
self.text_index.clear()
1736
self.repo._text_knit = None
1867
1737
# cached inventory data
1868
1738
self.inventory_index.clear()
1870
if self.chk_index is not None:
1871
self.chk_index.clear()
1872
1739
# remove the open pack
1873
1740
self._new_pack = None
1874
1741
# information about packs.
1969
1835
:param clear_obsolete_packs: If True, clear out the contents of the
1970
1836
obsolete_packs directory.
1971
:param obsolete_packs: Packs that are obsolete once the new pack-names
1972
file has been written.
1973
:return: A list of the names saved that were not previously on disk.
1975
already_obsolete = []
1976
1838
self.lock_names()
1978
1840
builder = self._index_builder_class()
1979
(disk_nodes, deleted_nodes, new_nodes,
1980
orig_disk_nodes) = self._diff_pack_names()
1841
disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
1981
1842
# TODO: handle same-name, index-size-changes here -
1982
1843
# e.g. use the value from disk, not ours, *unless* we're the one
1985
1846
builder.add_node(key, value)
1986
1847
self.transport.put_file('pack-names', builder.finish(),
1987
1848
mode=self.repo.bzrdir._get_file_mode())
1849
# move the baseline forward
1988
1850
self._packs_at_load = disk_nodes
1989
1851
if clear_obsolete_packs:
1992
to_preserve = set([o.name for o in obsolete_packs])
1993
already_obsolete = self._clear_obsolete_packs(to_preserve)
1852
self._clear_obsolete_packs()
1995
1854
self._unlock_names()
1996
1855
# synchronise the memory packs list with what we just wrote:
1997
1856
self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1999
# TODO: We could add one more condition here. "if o.name not in
2000
# orig_disk_nodes and o != the new_pack we haven't written to
2001
# disk yet. However, the new pack object is not easily
2002
# accessible here (it would have to be passed through the
2003
# autopacking code, etc.)
2004
obsolete_packs = [o for o in obsolete_packs
2005
if o.name not in already_obsolete]
2006
self._obsolete_packs(obsolete_packs)
2007
return [new_node[0][0] for new_node in new_nodes]
2009
1858
def reload_pack_names(self):
2010
1859
"""Sync our pack listing with what is present in the repository.
2018
1867
# The ensure_loaded call is to handle the case where the first call
2019
1868
# made involving the collection was to reload_pack_names, where we
2020
# don't have a view of disk contents. It's a bit of a bandaid, and
2021
# causes two reads of pack-names, but it's a rare corner case not
2022
# struck with regular push/pull etc.
1869
# don't have a view of disk contents. Its a bit of a bandaid, and
1870
# causes two reads of pack-names, but its a rare corner case not struck
1871
# with regular push/pull etc.
2023
1872
first_read = self.ensure_loaded()
2026
1875
# out the new value.
2027
(disk_nodes, deleted_nodes, new_nodes,
2028
orig_disk_nodes) = self._diff_pack_names()
2029
# _packs_at_load is meant to be the explicit list of names in
2030
# 'pack-names' at then start. As such, it should not contain any
2031
# pending names that haven't been written out yet.
2032
self._packs_at_load = orig_disk_nodes
1876
disk_nodes, _, _ = self._diff_pack_names()
1877
self._packs_at_load = disk_nodes
2033
1878
(removed, added,
2034
1879
modified) = self._syncronize_pack_names_from_disk_nodes(disk_nodes)
2035
1880
if removed or added or modified:
2045
1890
raise errors.RetryAutopack(self.repo, False, sys.exc_info())
2047
def _clear_obsolete_packs(self, preserve=None):
1892
def _clear_obsolete_packs(self):
2048
1893
"""Delete everything from the obsolete-packs directory.
2050
:return: A list of pack identifiers (the filename without '.pack') that
2051
were found in obsolete_packs.
2054
1895
obsolete_pack_transport = self.transport.clone('obsolete_packs')
2055
if preserve is None:
2057
1896
for filename in obsolete_pack_transport.list_dir('.'):
2058
name, ext = osutils.splitext(filename)
2061
if name in preserve:
2064
1898
obsolete_pack_transport.delete(filename)
2065
1899
except (errors.PathError, errors.TransportError), e:
2066
warning("couldn't delete obsolete pack, skipping it:\n%s"
1900
warning("couldn't delete obsolete pack, skipping it:\n%s" % (e,))
2070
1902
def _start_write_group(self):
2071
1903
# Do not permit preparation for writing if we're not in a 'write lock'.
2072
1904
if not self.repo.is_write_locked():
2073
1905
raise errors.NotWriteLocked(self)
2074
self._new_pack = self.pack_factory(self, upload_suffix='.pack',
1906
self._new_pack = NewPack(self, upload_suffix='.pack',
2075
1907
file_mode=self.repo.bzrdir._get_file_mode())
2076
1908
# allow writing: queue writes to a new index
2077
1909
self.revision_index.add_writable_index(self._new_pack.revision_index,
2080
1912
self._new_pack)
2081
1913
self.text_index.add_writable_index(self._new_pack.text_index,
2082
1914
self._new_pack)
2083
self._new_pack.text_index.set_optimize(combine_backing_indices=False)
2084
1915
self.signature_index.add_writable_index(self._new_pack.signature_index,
2085
1916
self._new_pack)
2086
if self.chk_index is not None:
2087
self.chk_index.add_writable_index(self._new_pack.chk_index,
2089
self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
2090
self._new_pack.chk_index.set_optimize(combine_backing_indices=False)
2092
1918
self.repo.inventories._index._add_callback = self.inventory_index.add_callback
2093
1919
self.repo.revisions._index._add_callback = self.revision_index.add_callback
2098
1924
# FIXME: just drop the transient index.
2099
1925
# forget what names there are
2100
1926
if self._new_pack is not None:
2101
operation = cleanup.OperationWithCleanups(self._new_pack.abort)
2102
operation.add_cleanup(setattr, self, '_new_pack', None)
2103
# If we aborted while in the middle of finishing the write
2104
# group, _remove_pack_indices could fail because the indexes are
2105
# already gone. But they're not there we shouldn't fail in this
2106
# case, so we pass ignore_missing=True.
2107
operation.add_cleanup(self._remove_pack_indices, self._new_pack,
2108
ignore_missing=True)
2109
operation.run_simple()
1928
self._new_pack.abort()
1930
# XXX: If we aborted while in the middle of finishing the write
1931
# group, _remove_pack_indices can fail because the indexes are
1932
# already gone. If they're not there we shouldn't fail in this
1933
# case. -- mbp 20081113
1934
self._remove_pack_indices(self._new_pack)
1935
self._new_pack = None
2110
1936
for resumed_pack in self._resumed_packs:
2111
operation = cleanup.OperationWithCleanups(resumed_pack.abort)
2112
# See comment in previous finally block.
2113
operation.add_cleanup(self._remove_pack_indices, resumed_pack,
2114
ignore_missing=True)
2115
operation.run_simple()
1938
resumed_pack.abort()
1940
# See comment in previous finally block.
1942
self._remove_pack_indices(resumed_pack)
2116
1945
del self._resumed_packs[:]
1946
self.repo._text_knit = None
2118
1948
def _remove_resumed_pack_indices(self):
2119
1949
for resumed_pack in self._resumed_packs:
2120
1950
self._remove_pack_indices(resumed_pack)
2121
1951
del self._resumed_packs[:]
2123
def _check_new_inventories(self):
2124
"""Detect missing inventories in this write group.
2126
:returns: list of strs, summarising any problems found. If the list is
2127
empty no problems were found.
2129
# The base implementation does no checks. GCRepositoryPackCollection
2133
1953
def _commit_write_group(self):
2134
1954
all_missing = set()
2135
1955
for prefix, versioned_file in (
2144
1964
raise errors.BzrCheckError(
2145
1965
"Repository %s has missing compression parent(s) %r "
2146
1966
% (self.repo, sorted(all_missing)))
2147
problems = self._check_new_inventories()
2149
problems_summary = '\n'.join(problems)
2150
raise errors.BzrCheckError(
2151
"Cannot add revision(s) to repository: " + problems_summary)
2152
1967
self._remove_pack_indices(self._new_pack)
2153
any_new_content = False
1968
should_autopack = False
2154
1969
if self._new_pack.data_inserted():
2155
1970
# get all the data to disk and read to use
2156
1971
self._new_pack.finish()
2157
1972
self.allocate(self._new_pack)
2158
1973
self._new_pack = None
2159
any_new_content = True
1974
should_autopack = True
2161
1976
self._new_pack.abort()
2162
1977
self._new_pack = None
2244
2056
self.revisions = KnitVersionedFiles(
2245
2057
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
2246
2058
add_callback=self._pack_collection.revision_index.add_callback,
2247
deltas=False, parents=True, is_locked=self.is_locked,
2248
track_external_parent_refs=True),
2059
deltas=False, parents=True, is_locked=self.is_locked),
2249
2060
data_access=self._pack_collection.revision_index.data_access,
2250
2061
max_delta_chain=0)
2251
2062
self.signatures = KnitVersionedFiles(
2260
2071
deltas=True, parents=True, is_locked=self.is_locked),
2261
2072
data_access=self._pack_collection.text_index.data_access,
2262
2073
max_delta_chain=200)
2263
if _format.supports_chks:
2264
# No graph, no compression:- references from chks are between
2265
# different objects not temporal versions of the same; and without
2266
# some sort of temporal structure knit compression will just fail.
2267
self.chk_bytes = KnitVersionedFiles(
2268
_KnitGraphIndex(self._pack_collection.chk_index.combined_index,
2269
add_callback=self._pack_collection.chk_index.add_callback,
2270
deltas=False, parents=False, is_locked=self.is_locked),
2271
data_access=self._pack_collection.chk_index.data_access,
2274
self.chk_bytes = None
2074
self.chk_bytes = None
2275
2075
# True when the repository object is 'write locked' (as opposed to the
2276
2076
# physical lock only taken out around changes to the pack-names list.)
2277
2077
# Another way to represent this would be a decorator around the control
2284
2084
self._reconcile_fixes_text_parents = True
2285
2085
self._reconcile_backsup_inventory = False
2287
def _warn_if_deprecated(self, branch=None):
2087
def _warn_if_deprecated(self):
2288
2088
# This class isn't deprecated, but one sub-format is
2289
2089
if isinstance(self._format, RepositoryFormatKnitPack5RichRootBroken):
2290
super(KnitPackRepository, self)._warn_if_deprecated(branch)
2090
from bzrlib import repository
2091
if repository._deprecation_warning_done:
2093
repository._deprecation_warning_done = True
2094
warning("Format %s for %s is deprecated - please use"
2095
" 'bzr upgrade --1.6.1-rich-root'"
2096
% (self._format, self.bzrdir.transport.base))
2292
2098
def _abort_write_group(self):
2293
self.revisions._index._key_dependencies.clear()
2294
2099
self._pack_collection._abort_write_group()
2296
def _get_source(self, to_format):
2297
if to_format.network_name() == self._format.network_name():
2298
return KnitPackStreamSource(self, to_format)
2299
return super(KnitPackRepository, self)._get_source(to_format)
2101
def _find_inconsistent_revision_parents(self):
2102
"""Find revisions with incorrectly cached parents.
2104
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
2105
parents-in-revision).
2107
if not self.is_locked():
2108
raise errors.ObjectNotLocked(self)
2109
pb = ui.ui_factory.nested_progress_bar()
2112
revision_nodes = self._pack_collection.revision_index \
2113
.combined_index.iter_all_entries()
2114
index_positions = []
2115
# Get the cached index values for all revisions, and also the location
2116
# in each index of the revision text so we can perform linear IO.
2117
for index, key, value, refs in revision_nodes:
2118
pos, length = value[1:].split(' ')
2119
index_positions.append((index, int(pos), key[0],
2120
tuple(parent[0] for parent in refs[0])))
2121
pb.update("Reading revision index", 0, 0)
2122
index_positions.sort()
2123
batch_count = len(index_positions) / 1000 + 1
2124
pb.update("Checking cached revision graph", 0, batch_count)
2125
for offset in xrange(batch_count):
2126
pb.update("Checking cached revision graph", offset)
2127
to_query = index_positions[offset * 1000:(offset + 1) * 1000]
2130
rev_ids = [item[2] for item in to_query]
2131
revs = self.get_revisions(rev_ids)
2132
for revision, item in zip(revs, to_query):
2133
index_parents = item[3]
2134
rev_parents = tuple(revision.parent_ids)
2135
if index_parents != rev_parents:
2136
result.append((revision.revision_id, index_parents, rev_parents))
2301
2141
def _make_parents_provider(self):
2302
2142
return graph.CachingParentsProvider(self)
2310
2150
self._pack_collection._start_write_group()
2312
2152
def _commit_write_group(self):
2313
hint = self._pack_collection._commit_write_group()
2314
self.revisions._index._key_dependencies.clear()
2153
return self._pack_collection._commit_write_group()
2317
2155
def suspend_write_group(self):
2318
2156
# XXX check self._write_group is self.get_transaction()?
2319
2157
tokens = self._pack_collection._suspend_write_group()
2320
self.revisions._index._key_dependencies.clear()
2321
2158
self._write_group = None
2324
2161
def _resume_write_group(self, tokens):
2325
2162
self._start_write_group()
2327
self._pack_collection._resume_write_group(tokens)
2328
except errors.UnresumableWriteGroup:
2329
self._abort_write_group()
2331
for pack in self._pack_collection._resumed_packs:
2332
self.revisions._index.scan_unvalidated_index(pack.revision_index)
2163
self._pack_collection._resume_write_group(tokens)
2334
2165
def get_transaction(self):
2335
2166
if self._write_lock_count:
2344
2175
return self._write_lock_count
2346
2177
def lock_write(self, token=None):
2347
"""Lock the repository for writes.
2349
:return: A bzrlib.repository.RepositoryWriteLockResult.
2351
2178
locked = self.is_locked()
2352
2179
if not self._write_lock_count and locked:
2353
2180
raise errors.ReadOnlyError(self)
2354
2181
self._write_lock_count += 1
2355
2182
if self._write_lock_count == 1:
2356
2183
self._transaction = transactions.WriteTransaction()
2358
if 'relock' in debug.debug_flags and self._prev_lock == 'w':
2359
note('%r was write locked again', self)
2360
self._prev_lock = 'w'
2361
2184
for repo in self._fallback_repositories:
2362
2185
# Writes don't affect fallback repos
2363
2186
repo.lock_read()
2364
2188
self._refresh_data()
2365
return RepositoryWriteLockResult(self.unlock, None)
2367
2190
def lock_read(self):
2368
"""Lock the repository for reads.
2370
:return: A bzrlib.lock.LogicalLockResult.
2372
2191
locked = self.is_locked()
2373
2192
if self._write_lock_count:
2374
2193
self._write_lock_count += 1
2376
2195
self.control_files.lock_read()
2378
if 'relock' in debug.debug_flags and self._prev_lock == 'r':
2379
note('%r was read locked again', self)
2380
self._prev_lock = 'r'
2381
2196
for repo in self._fallback_repositories:
2197
# Writes don't affect fallback repos
2382
2198
repo.lock_read()
2383
2200
self._refresh_data()
2384
return LogicalLockResult(self.unlock)
2386
2202
def leave_lock_in_place(self):
2387
2203
# not supported - raise an error
2427
2242
transaction = self._transaction
2428
2243
self._transaction = None
2429
2244
transaction.finish()
2245
for repo in self._fallback_repositories:
2431
2248
self.control_files.unlock()
2433
if not self.is_locked():
2434
2249
for repo in self._fallback_repositories:
2438
class KnitPackStreamSource(StreamSource):
2439
"""A StreamSource used to transfer data between same-format KnitPack repos.
2441
This source assumes:
2442
1) Same serialization format for all objects
2443
2) Same root information
2444
3) XML format inventories
2445
4) Atomic inserts (so we can stream inventory texts before text
2450
def __init__(self, from_repository, to_format):
2451
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
2452
self._text_keys = None
2453
self._text_fetch_order = 'unordered'
2455
def _get_filtered_inv_stream(self, revision_ids):
2456
from_repo = self.from_repository
2457
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
2458
parent_keys = [(p,) for p in parent_ids]
2459
find_text_keys = from_repo._find_text_key_references_from_xml_inventory_lines
2460
parent_text_keys = set(find_text_keys(
2461
from_repo._inventory_xml_lines_for_keys(parent_keys)))
2462
content_text_keys = set()
2463
knit = KnitVersionedFiles(None, None)
2464
factory = KnitPlainFactory()
2465
def find_text_keys_from_content(record):
2466
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
2467
raise ValueError("Unknown content storage kind for"
2468
" inventory text: %s" % (record.storage_kind,))
2469
# It's a knit record, it has a _raw_record field (even if it was
2470
# reconstituted from a network stream).
2471
raw_data = record._raw_record
2472
# read the entire thing
2473
revision_id = record.key[-1]
2474
content, _ = knit._parse_record(revision_id, raw_data)
2475
if record.storage_kind == 'knit-delta-gz':
2476
line_iterator = factory.get_linedelta_content(content)
2477
elif record.storage_kind == 'knit-ft-gz':
2478
line_iterator = factory.get_fulltext_content(content)
2479
content_text_keys.update(find_text_keys(
2480
[(line, revision_id) for line in line_iterator]))
2481
revision_keys = [(r,) for r in revision_ids]
2482
def _filtered_inv_stream():
2483
source_vf = from_repo.inventories
2484
stream = source_vf.get_record_stream(revision_keys,
2486
for record in stream:
2487
if record.storage_kind == 'absent':
2488
raise errors.NoSuchRevision(from_repo, record.key)
2489
find_text_keys_from_content(record)
2491
self._text_keys = content_text_keys - parent_text_keys
2492
return ('inventories', _filtered_inv_stream())
2494
def _get_text_stream(self):
2495
# Note: We know we don't have to handle adding root keys, because both
2496
# the source and target are the identical network name.
2497
text_stream = self.from_repository.texts.get_record_stream(
2498
self._text_keys, self._text_fetch_order, False)
2499
return ('texts', text_stream)
2501
def get_stream(self, search):
2502
revision_ids = search.get_keys()
2503
for stream_info in self._fetch_revision_texts(revision_ids):
2505
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
2506
yield self._get_filtered_inv_stream(revision_ids)
2507
yield self._get_text_stream()
2511
2253
class RepositoryFormatPack(MetaDirRepositoryFormat):
2512
2254
"""Format logic for pack structured repositories.
2560
2302
utf8_files = [('format', self.get_format_string())]
2562
2304
self._upload_blank_content(a_bzrdir, dirs, files, utf8_files, shared)
2563
repository = self.open(a_bzrdir=a_bzrdir, _found=True)
2564
self._run_post_repo_init_hooks(repository, a_bzrdir, shared)
2305
return self.open(a_bzrdir=a_bzrdir, _found=True)
2567
2307
def open(self, a_bzrdir, _found=False, _override_transport=None):
2568
2308
"""See RepositoryFormat.open().
2650
2392
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2394
def check_conversion_target(self, target_format):
2395
if not target_format.rich_root_data:
2396
raise errors.BadConversionTarget(
2397
'Does not support rich root data.', target_format)
2398
if not getattr(target_format, 'supports_tree_reference', False):
2399
raise errors.BadConversionTarget(
2400
'Does not support nested trees', target_format)
2652
2402
def get_format_string(self):
2653
2403
"""See RepositoryFormat.get_format_string()."""
2654
2404
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
2891
2667
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
2670
class RepositoryFormatPackDevelopment2(RepositoryFormatPack):
2671
"""A no-subtrees development repository.
2673
This format should be retained until the second release after bzr 1.7.
2675
This is pack-1.6.1 with B+Tree indices.
2678
repository_class = KnitPackRepository
2679
_commit_builder_class = PackCommitBuilder
2680
supports_external_lookups = True
2681
# What index classes to use
2682
index_builder_class = BTreeBuilder
2683
index_class = BTreeGraphIndex
2684
# Set to true to get the fast-commit code path tested until a really fast
2685
# format lands in trunk. Not actually fast in this format.
2689
def _serializer(self):
2690
return xml5.serializer_v5
2692
def _get_matching_bzrdir(self):
2693
return bzrdir.format_registry.make_bzrdir('development2')
2695
def _ignore_setting_bzrdir(self, format):
2698
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2700
def get_format_string(self):
2701
"""See RepositoryFormat.get_format_string()."""
2702
return "Bazaar development format 2 (needs bzr.dev from before 1.8)\n"
2704
def get_format_description(self):
2705
"""See RepositoryFormat.get_format_description()."""
2706
return ("Development repository format, currently the same as "
2707
"1.6.1 with B+Trees.\n")
2709
def check_conversion_target(self, target_format):
2894
2713
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
2895
2714
"""A subtrees development repository.
2897
2716
This format should be retained until the second release after bzr 1.7.
2899
2718
1.6.1-subtree[as it might have been] with B+Tree indices.
2901
This is [now] retained until we have a CHK based subtree format in
2905
2721
repository_class = KnitPackRepository
2906
2722
_commit_builder_class = PackRootCommitBuilder
2907
2723
rich_root_data = True
2909
2724
supports_tree_reference = True
2910
2725
supports_external_lookups = True
2911
2726
# What index classes to use
2912
index_builder_class = btree_index.BTreeBuilder
2913
index_class = btree_index.BTreeGraphIndex
2727
index_builder_class = BTreeBuilder
2728
index_class = BTreeGraphIndex
2916
2731
def _serializer(self):
2919
2734
def _get_matching_bzrdir(self):
2920
2735
return bzrdir.format_registry.make_bzrdir(
2921
'development5-subtree')
2736
'development2-subtree')
2923
2738
def _ignore_setting_bzrdir(self, format):
2926
2741
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2743
def check_conversion_target(self, target_format):
2744
if not target_format.rich_root_data:
2745
raise errors.BadConversionTarget(
2746
'Does not support rich root data.', target_format)
2747
if not getattr(target_format, 'supports_tree_reference', False):
2748
raise errors.BadConversionTarget(
2749
'Does not support nested trees', target_format)
2928
2751
def get_format_string(self):
2929
2752
"""See RepositoryFormat.get_format_string()."""
2930
2753
return ("Bazaar development format 2 with subtree support "