214
259
return not self.__eq__(other)
216
261
def __repr__(self):
217
return "<bzrlib.repofmt.pack_repo.Pack object at 0x%x, %s, %s" % (
218
id(self), self.transport, self.name)
262
return "<%s.%s object at 0x%x, %s, %s" % (
263
self.__class__.__module__, self.__class__.__name__, id(self),
264
self.pack_transport, self.name)
267
class ResumedPack(ExistingPack):
269
def __init__(self, name, revision_index, inventory_index, text_index,
270
signature_index, upload_transport, pack_transport, index_transport,
271
pack_collection, chk_index=None):
272
"""Create a ResumedPack object."""
273
ExistingPack.__init__(self, pack_transport, name, revision_index,
274
inventory_index, text_index, signature_index,
276
self.upload_transport = upload_transport
277
self.index_transport = index_transport
278
self.index_sizes = [None, None, None, None]
280
('revision', revision_index),
281
('inventory', inventory_index),
282
('text', text_index),
283
('signature', signature_index),
285
if chk_index is not None:
286
indices.append(('chk', chk_index))
287
self.index_sizes.append(None)
288
for index_type, index in indices:
289
offset = self.index_offset(index_type)
290
self.index_sizes[offset] = index._size
291
self.index_class = pack_collection._index_class
292
self._pack_collection = pack_collection
293
self._state = 'resumed'
294
# XXX: perhaps check that the .pack file exists?
296
def access_tuple(self):
297
if self._state == 'finished':
298
return Pack.access_tuple(self)
299
elif self._state == 'resumed':
300
return self.upload_transport, self.file_name()
302
raise AssertionError(self._state)
305
self.upload_transport.delete(self.file_name())
306
indices = [self.revision_index, self.inventory_index, self.text_index,
307
self.signature_index]
308
if self.chk_index is not None:
309
indices.append(self.chk_index)
310
for index in indices:
311
index._transport.delete(index._name)
314
self._check_references()
315
new_name = '../packs/' + self.file_name()
316
self.upload_transport.rename(self.file_name(), new_name)
317
index_types = ['revision', 'inventory', 'text', 'signature']
318
if self.chk_index is not None:
319
index_types.append('chk')
320
for index_type in index_types:
321
old_name = self.index_name(index_type, self.name)
322
new_name = '../indices/' + old_name
323
self.upload_transport.rename(old_name, new_name)
324
self._replace_index_with_readonly(index_type)
325
self._state = 'finished'
327
def _get_external_refs(self, index):
328
"""Return compression parents for this index that are not present.
330
This returns any compression parents that are referenced by this index,
331
which are not contained *in* this index. They may be present elsewhere.
333
return index.external_references(1)
221
336
class NewPack(Pack):
222
337
"""An in memory proxy for a pack which is being created."""
224
# A map of index 'type' to the file extension and position in the
226
index_definitions = {
227
'revision': ('.rix', 0),
228
'inventory': ('.iix', 1),
230
'signature': ('.six', 3),
233
def __init__(self, upload_transport, index_transport, pack_transport,
234
upload_suffix='', file_mode=None):
339
def __init__(self, pack_collection, upload_suffix='', file_mode=None):
235
340
"""Create a NewPack instance.
237
:param upload_transport: A writable transport for the pack to be
238
incrementally uploaded to.
239
:param index_transport: A writable transport for the pack's indices to
240
be written to when the pack is finished.
241
:param pack_transport: A writable transport for the pack to be renamed
242
to when the upload is complete. This *must* be the same as
243
upload_transport.clone('../packs').
342
:param pack_collection: A PackCollection into which this is being inserted.
244
343
:param upload_suffix: An optional suffix to be given to any temporary
245
344
files created during the pack creation. e.g '.autopack'
246
:param file_mode: An optional file mode to create the new files with.
345
:param file_mode: Unix permissions for newly created file.
248
347
# The relative locations of the packs are constrained, but all are
249
348
# passed in because the caller has them, so as to avoid object churn.
349
index_builder_class = pack_collection._index_builder_class
350
if pack_collection.chk_index is not None:
351
chk_index = index_builder_class(reference_lists=0)
250
354
Pack.__init__(self,
251
355
# Revisions: parents list, no text compression.
252
InMemoryGraphIndex(reference_lists=1),
356
index_builder_class(reference_lists=1),
253
357
# Inventory: We want to map compression only, but currently the
254
358
# knit code hasn't been updated enough to understand that, so we
255
359
# have a regular 2-list index giving parents and compression
257
InMemoryGraphIndex(reference_lists=2),
361
index_builder_class(reference_lists=2),
258
362
# Texts: compression and per file graph, for all fileids - so two
259
363
# reference lists and two elements in the key tuple.
260
InMemoryGraphIndex(reference_lists=2, key_elements=2),
364
index_builder_class(reference_lists=2, key_elements=2),
261
365
# Signatures: Just blobs to store, no compression, no parents
263
InMemoryGraphIndex(reference_lists=0),
367
index_builder_class(reference_lists=0),
368
# CHK based storage - just blobs, no compression or parents.
371
self._pack_collection = pack_collection
372
# When we make readonly indices, we need this.
373
self.index_class = pack_collection._index_class
265
374
# where should the new pack be opened
266
self.upload_transport = upload_transport
375
self.upload_transport = pack_collection._upload_transport
267
376
# where are indices written out to
268
self.index_transport = index_transport
377
self.index_transport = pack_collection._index_transport
269
378
# where is the pack renamed to when it is finished?
270
self.pack_transport = pack_transport
379
self.pack_transport = pack_collection._pack_transport
271
380
# What file mode to upload the pack and indices with.
272
381
self._file_mode = file_mode
273
382
# tracks the content written to the .pack file.
274
self._hash = md5.new()
275
# a four-tuple with the length in bytes of the indices, once the pack
276
# is finalised. (rev, inv, text, sigs)
383
self._hash = osutils.md5()
384
# a tuple with the length in bytes of the indices, once the pack
385
# is finalised. (rev, inv, text, sigs, chk_if_in_use)
277
386
self.index_sizes = None
278
387
# How much data to cache when writing packs. Note that this is not
279
388
# synchronised with reads, because it's not in the transport layer, so
1122
1362
class RepositoryPackCollection(object):
1123
"""Management of packs within a repository."""
1363
"""Management of packs within a repository.
1365
:ivar _names: map of {pack_name: (index_size,)}
1368
pack_factory = NewPack
1369
resumed_pack_factory = ResumedPack
1125
1371
def __init__(self, repo, transport, index_transport, upload_transport,
1372
pack_transport, index_builder_class, index_class,
1127
1374
"""Create a new RepositoryPackCollection.
1129
:param transport: Addresses the repository base directory
1376
:param transport: Addresses the repository base directory
1130
1377
(typically .bzr/repository/).
1131
1378
:param index_transport: Addresses the directory containing indices.
1132
1379
:param upload_transport: Addresses the directory into which packs are written
1133
1380
while they're being created.
1134
1381
:param pack_transport: Addresses the directory of existing complete packs.
1382
:param index_builder_class: The index builder class to use.
1383
:param index_class: The index class to use.
1384
:param use_chk_index: Whether to setup and manage a CHK index.
1386
# XXX: This should call self.reset()
1136
1387
self.repo = repo
1137
1388
self.transport = transport
1138
1389
self._index_transport = index_transport
1139
1390
self._upload_transport = upload_transport
1140
1391
self._pack_transport = pack_transport
1141
self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}
1392
self._index_builder_class = index_builder_class
1393
self._index_class = index_class
1394
self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3,
1142
1396
self.packs = []
1143
1397
# name:Pack mapping
1144
1399
self._packs_by_name = {}
1145
1400
# the previous pack-names content
1146
1401
self._packs_at_load = None
1147
1402
# when a pack is being created by this object, the state of that pack.
1148
1403
self._new_pack = None
1149
1404
# aggregated revision index data
1150
self.revision_index = AggregateIndex()
1151
self.inventory_index = AggregateIndex()
1152
self.text_index = AggregateIndex()
1153
self.signature_index = AggregateIndex()
1405
flush = self._flush_new_pack
1406
self.revision_index = AggregateIndex(self.reload_pack_names, flush)
1407
self.inventory_index = AggregateIndex(self.reload_pack_names, flush)
1408
self.text_index = AggregateIndex(self.reload_pack_names, flush)
1409
self.signature_index = AggregateIndex(self.reload_pack_names, flush)
1411
self.chk_index = AggregateIndex(self.reload_pack_names, flush)
1413
# used to determine if we're using a chk_index elsewhere.
1414
self.chk_index = None
1416
self._resumed_packs = []
1155
1418
def add_pack_to_memory(self, pack):
1156
1419
"""Make a Pack object available to the repository to satisfy queries.
1158
1421
:param pack: A Pack object.
1160
1423
if pack.name in self._packs_by_name:
1161
raise AssertionError()
1424
raise AssertionError(
1425
'pack %s already in _packs_by_name' % (pack.name,))
1162
1426
self.packs.append(pack)
1163
1427
self._packs_by_name[pack.name] = pack
1164
1428
self.revision_index.add_index(pack.revision_index, pack)
1165
1429
self.inventory_index.add_index(pack.inventory_index, pack)
1166
1430
self.text_index.add_index(pack.text_index, pack)
1167
1431
self.signature_index.add_index(pack.signature_index, pack)
1169
def _add_text_to_weave(self, file_id, revision_id, new_lines, parents,
1170
nostore_sha, random_revid):
1171
file_id_index = GraphIndexPrefixAdapter(
1172
self.text_index.combined_index,
1174
add_nodes_callback=self.text_index.add_callback)
1175
self.repo._text_knit._index._graph_index = file_id_index
1176
self.repo._text_knit._index._add_callback = file_id_index.add_nodes
1177
return self.repo._text_knit.add_lines_with_ghosts(
1178
revision_id, parents, new_lines, nostore_sha=nostore_sha,
1179
random_id=random_revid, check_content=False)[0:2]
1432
if self.chk_index is not None:
1433
self.chk_index.add_index(pack.chk_index, pack)
1181
1435
def all_packs(self):
1182
1436
"""Return a list of all the Pack objects this repository has.
1498
1847
self._packs_by_name = {}
1499
1848
self._packs_at_load = None
1501
def _make_index_map(self, index_suffix):
1502
"""Return information on existing indices.
1504
:param suffix: Index suffix added to pack name.
1506
:returns: (pack_map, indices) where indices is a list of GraphIndex
1507
objects, and pack_map is a mapping from those objects to the
1508
pack tuple they describe.
1510
# TODO: stop using this; it creates new indices unnecessarily.
1511
self.ensure_loaded()
1512
suffix_map = {'.rix': 'revision_index',
1513
'.six': 'signature_index',
1514
'.iix': 'inventory_index',
1515
'.tix': 'text_index',
1517
return self._packs_list_to_pack_map_and_index_list(self.all_packs(),
1518
suffix_map[index_suffix])
1520
def _packs_list_to_pack_map_and_index_list(self, packs, index_attribute):
1521
"""Convert a list of packs to an index pack map and index list.
1523
:param packs: The packs list to process.
1524
:param index_attribute: The attribute that the desired index is found
1526
:return: A tuple (map, list) where map contains the dict from
1527
index:pack_tuple, and lsit contains the indices in the same order
1533
index = getattr(pack, index_attribute)
1534
indices.append(index)
1535
pack_map[index] = (pack.pack_transport, pack.file_name())
1536
return pack_map, indices
1538
def _index_contents(self, pack_map, key_filter=None):
1539
"""Get an iterable of the index contents from a pack_map.
1541
:param pack_map: A map from indices to pack details.
1542
:param key_filter: An optional filter to limit the
1545
indices = [index for index in pack_map.iterkeys()]
1546
all_index = CombinedGraphIndex(indices)
1547
if key_filter is None:
1548
return all_index.iter_all_entries()
1550
return all_index.iter_entries(key_filter)
1552
1850
def _unlock_names(self):
1553
1851
"""Release the mutex around the pack-names index."""
1554
1852
self.repo.control_files.unlock()
1556
def _save_pack_names(self, clear_obsolete_packs=False):
1557
"""Save the list of packs.
1559
This will take out the mutex around the pack names list for the
1560
duration of the method call. If concurrent updates have been made, a
1561
three-way merge between the current list and the current in memory list
1564
:param clear_obsolete_packs: If True, clear out the contents of the
1565
obsolete_packs directory.
1569
builder = GraphIndexBuilder()
1570
# load the disk nodes across
1572
for index, key, value in self._iter_disk_pack_index():
1573
disk_nodes.add((key, value))
1574
# do a two-way diff against our original content
1575
current_nodes = set()
1576
for name, sizes in self._names.iteritems():
1578
((name, ), ' '.join(str(size) for size in sizes)))
1579
deleted_nodes = self._packs_at_load - current_nodes
1580
new_nodes = current_nodes - self._packs_at_load
1581
disk_nodes.difference_update(deleted_nodes)
1582
disk_nodes.update(new_nodes)
1583
# TODO: handle same-name, index-size-changes here -
1584
# e.g. use the value from disk, not ours, *unless* we're the one
1586
for key, value in disk_nodes:
1587
builder.add_node(key, value)
1588
self.transport.put_file('pack-names', builder.finish(),
1589
mode=self.repo.control_files._file_mode)
1590
# move the baseline forward
1591
self._packs_at_load = disk_nodes
1592
# now clear out the obsolete packs directory
1593
if clear_obsolete_packs:
1594
self.transport.clone('obsolete_packs').delete_multi(
1595
self.transport.list_dir('obsolete_packs'))
1597
self._unlock_names()
1598
# synchronise the memory packs list with what we just wrote:
1854
def _diff_pack_names(self):
1855
"""Read the pack names from disk, and compare it to the one in memory.
1857
:return: (disk_nodes, deleted_nodes, new_nodes)
1858
disk_nodes The final set of nodes that should be referenced
1859
deleted_nodes Nodes which have been removed from when we started
1860
new_nodes Nodes that are newly introduced
1862
# load the disk nodes across
1864
for index, key, value in self._iter_disk_pack_index():
1865
disk_nodes.add((key, value))
1867
# do a two-way diff against our original content
1868
current_nodes = set()
1869
for name, sizes in self._names.iteritems():
1871
((name, ), ' '.join(str(size) for size in sizes)))
1873
# Packs no longer present in the repository, which were present when we
1874
# locked the repository
1875
deleted_nodes = self._packs_at_load - current_nodes
1876
# Packs which this process is adding
1877
new_nodes = current_nodes - self._packs_at_load
1879
# Update the disk_nodes set to include the ones we are adding, and
1880
# remove the ones which were removed by someone else
1881
disk_nodes.difference_update(deleted_nodes)
1882
disk_nodes.update(new_nodes)
1884
return disk_nodes, deleted_nodes, new_nodes
1886
def _syncronize_pack_names_from_disk_nodes(self, disk_nodes):
1887
"""Given the correct set of pack files, update our saved info.
1889
:return: (removed, added, modified)
1890
removed pack names removed from self._names
1891
added pack names added to self._names
1892
modified pack names that had changed value
1897
## self._packs_at_load = disk_nodes
1599
1898
new_names = dict(disk_nodes)
1600
1899
# drop no longer present nodes
1601
1900
for pack in self.all_packs():
1602
1901
if (pack.name,) not in new_names:
1902
removed.append(pack.name)
1603
1903
self._remove_pack_from_memory(pack)
1604
1904
# add new nodes/refresh existing ones
1605
1905
for key, value in disk_nodes:
1619
1919
self._remove_pack_from_memory(self.get_pack_by_name(name))
1620
1920
self._names[name] = sizes
1621
1921
self.get_pack_by_name(name)
1922
modified.append(name)
1624
1925
self._names[name] = sizes
1625
1926
self.get_pack_by_name(name)
1928
return removed, added, modified
1930
def _save_pack_names(self, clear_obsolete_packs=False):
1931
"""Save the list of packs.
1933
This will take out the mutex around the pack names list for the
1934
duration of the method call. If concurrent updates have been made, a
1935
three-way merge between the current list and the current in memory list
1938
:param clear_obsolete_packs: If True, clear out the contents of the
1939
obsolete_packs directory.
1943
builder = self._index_builder_class()
1944
disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
1945
# TODO: handle same-name, index-size-changes here -
1946
# e.g. use the value from disk, not ours, *unless* we're the one
1948
for key, value in disk_nodes:
1949
builder.add_node(key, value)
1950
self.transport.put_file('pack-names', builder.finish(),
1951
mode=self.repo.bzrdir._get_file_mode())
1952
# move the baseline forward
1953
self._packs_at_load = disk_nodes
1954
if clear_obsolete_packs:
1955
self._clear_obsolete_packs()
1957
self._unlock_names()
1958
# synchronise the memory packs list with what we just wrote:
1959
self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1961
def reload_pack_names(self):
1962
"""Sync our pack listing with what is present in the repository.
1964
This should be called when we find out that something we thought was
1965
present is now missing. This happens when another process re-packs the
1968
:return: True if the in-memory list of packs has been altered at all.
1970
# The ensure_loaded call is to handle the case where the first call
1971
# made involving the collection was to reload_pack_names, where we
1972
# don't have a view of disk contents. Its a bit of a bandaid, and
1973
# causes two reads of pack-names, but its a rare corner case not struck
1974
# with regular push/pull etc.
1975
first_read = self.ensure_loaded()
1978
# out the new value.
1979
disk_nodes, _, _ = self._diff_pack_names()
1980
self._packs_at_load = disk_nodes
1982
modified) = self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1983
if removed or added or modified:
1987
def _restart_autopack(self):
1988
"""Reload the pack names list, and restart the autopack code."""
1989
if not self.reload_pack_names():
1990
# Re-raise the original exception, because something went missing
1991
# and a restart didn't find it
1993
raise errors.RetryAutopack(self.repo, False, sys.exc_info())
1995
def _clear_obsolete_packs(self):
1996
"""Delete everything from the obsolete-packs directory.
1998
obsolete_pack_transport = self.transport.clone('obsolete_packs')
1999
for filename in obsolete_pack_transport.list_dir('.'):
2001
obsolete_pack_transport.delete(filename)
2002
except (errors.PathError, errors.TransportError), e:
2003
warning("couldn't delete obsolete pack, skipping it:\n%s" % (e,))
1627
2005
def _start_write_group(self):
1628
2006
# Do not permit preparation for writing if we're not in a 'write lock'.
1629
2007
if not self.repo.is_write_locked():
1630
2008
raise errors.NotWriteLocked(self)
1631
self._new_pack = NewPack(self._upload_transport, self._index_transport,
1632
self._pack_transport, upload_suffix='.pack',
1633
file_mode=self.repo.control_files._file_mode)
2009
self._new_pack = self.pack_factory(self, upload_suffix='.pack',
2010
file_mode=self.repo.bzrdir._get_file_mode())
1634
2011
# allow writing: queue writes to a new index
1635
2012
self.revision_index.add_writable_index(self._new_pack.revision_index,
1636
2013
self._new_pack)
1638
2015
self._new_pack)
1639
2016
self.text_index.add_writable_index(self._new_pack.text_index,
1640
2017
self._new_pack)
2018
self._new_pack.text_index.set_optimize(combine_backing_indices=False)
1641
2019
self.signature_index.add_writable_index(self._new_pack.signature_index,
1642
2020
self._new_pack)
2021
if self.chk_index is not None:
2022
self.chk_index.add_writable_index(self._new_pack.chk_index,
2024
self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
2025
self._new_pack.chk_index.set_optimize(combine_backing_indices=False)
1644
# reused revision and signature knits may need updating
1646
# "Hysterical raisins. client code in bzrlib grabs those knits outside
1647
# of write groups and then mutates it inside the write group."
1648
if self.repo._revision_knit is not None:
1649
self.repo._revision_knit._index._add_callback = \
1650
self.revision_index.add_callback
1651
if self.repo._signature_knit is not None:
1652
self.repo._signature_knit._index._add_callback = \
1653
self.signature_index.add_callback
1654
# create a reused knit object for text addition in commit.
1655
self.repo._text_knit = self.repo.weave_store.get_weave_or_empty(
2027
self.repo.inventories._index._add_callback = self.inventory_index.add_callback
2028
self.repo.revisions._index._add_callback = self.revision_index.add_callback
2029
self.repo.signatures._index._add_callback = self.signature_index.add_callback
2030
self.repo.texts._index._add_callback = self.text_index.add_callback
1658
2032
def _abort_write_group(self):
1659
2033
# FIXME: just drop the transient index.
1660
2034
# forget what names there are
1661
2035
if self._new_pack is not None:
1662
self._new_pack.abort()
1663
self._remove_pack_indices(self._new_pack)
1664
self._new_pack = None
1665
self.repo._text_knit = None
2037
self._new_pack.abort()
2039
# XXX: If we aborted while in the middle of finishing the write
2040
# group, _remove_pack_indices can fail because the indexes are
2041
# already gone. If they're not there we shouldn't fail in this
2042
# case. -- mbp 20081113
2043
self._remove_pack_indices(self._new_pack)
2044
self._new_pack = None
2045
for resumed_pack in self._resumed_packs:
2047
resumed_pack.abort()
2049
# See comment in previous finally block.
2051
self._remove_pack_indices(resumed_pack)
2054
del self._resumed_packs[:]
2056
def _remove_resumed_pack_indices(self):
2057
for resumed_pack in self._resumed_packs:
2058
self._remove_pack_indices(resumed_pack)
2059
del self._resumed_packs[:]
1667
2061
def _commit_write_group(self):
2063
for prefix, versioned_file in (
2064
('revisions', self.repo.revisions),
2065
('inventories', self.repo.inventories),
2066
('texts', self.repo.texts),
2067
('signatures', self.repo.signatures),
2069
missing = versioned_file.get_missing_compression_parent_keys()
2070
all_missing.update([(prefix,) + key for key in missing])
2072
raise errors.BzrCheckError(
2073
"Repository %s has missing compression parent(s) %r "
2074
% (self.repo, sorted(all_missing)))
1668
2075
self._remove_pack_indices(self._new_pack)
2076
should_autopack = False
1669
2077
if self._new_pack.data_inserted():
1670
2078
# get all the data to disk and read to use
1671
2079
self._new_pack.finish()
1672
2080
self.allocate(self._new_pack)
1673
2081
self._new_pack = None
2082
should_autopack = True
2084
self._new_pack.abort()
2085
self._new_pack = None
2086
for resumed_pack in self._resumed_packs:
2087
# XXX: this is a pretty ugly way to turn the resumed pack into a
2088
# properly committed pack.
2089
self._names[resumed_pack.name] = None
2090
self._remove_pack_from_memory(resumed_pack)
2091
resumed_pack.finish()
2092
self.allocate(resumed_pack)
2093
should_autopack = True
2094
del self._resumed_packs[:]
1674
2096
if not self.autopack():
1675
2097
# when autopack takes no steps, the names list is still
1677
2099
self._save_pack_names()
2101
def _suspend_write_group(self):
2102
tokens = [pack.name for pack in self._resumed_packs]
2103
self._remove_pack_indices(self._new_pack)
2104
if self._new_pack.data_inserted():
2105
# get all the data to disk and read to use
2106
self._new_pack.finish(suspend=True)
2107
tokens.append(self._new_pack.name)
2108
self._new_pack = None
1679
2110
self._new_pack.abort()
1680
2111
self._new_pack = None
1681
self.repo._text_knit = None
1684
class KnitPackRevisionStore(KnitRevisionStore):
1685
"""An object to adapt access from RevisionStore's to use KnitPacks.
1687
This class works by replacing the original RevisionStore.
1688
We need to do this because the KnitPackRevisionStore is less
1689
isolated in its layering - it uses services from the repo.
1692
def __init__(self, repo, transport, revisionstore):
1693
"""Create a KnitPackRevisionStore on repo with revisionstore.
1695
This will store its state in the Repository, use the
1696
indices to provide a KnitGraphIndex,
1697
and at the end of transactions write new indices.
1699
KnitRevisionStore.__init__(self, revisionstore.versioned_file_store)
1701
self._serializer = revisionstore._serializer
1702
self.transport = transport
1704
def get_revision_file(self, transaction):
1705
"""Get the revision versioned file object."""
1706
if getattr(self.repo, '_revision_knit', None) is not None:
1707
return self.repo._revision_knit
1708
self.repo._pack_collection.ensure_loaded()
1709
add_callback = self.repo._pack_collection.revision_index.add_callback
1710
# setup knit specific objects
1711
knit_index = KnitGraphIndex(
1712
self.repo._pack_collection.revision_index.combined_index,
1713
add_callback=add_callback)
1714
self.repo._revision_knit = knit.KnitVersionedFile(
1715
'revisions', self.transport.clone('..'),
1716
self.repo.control_files._file_mode,
1718
index=knit_index, delta=False, factory=knit.KnitPlainFactory(),
1719
access_method=self.repo._pack_collection.revision_index.knit_access)
1720
return self.repo._revision_knit
1722
def get_signature_file(self, transaction):
1723
"""Get the signature versioned file object."""
1724
if getattr(self.repo, '_signature_knit', None) is not None:
1725
return self.repo._signature_knit
1726
self.repo._pack_collection.ensure_loaded()
1727
add_callback = self.repo._pack_collection.signature_index.add_callback
1728
# setup knit specific objects
1729
knit_index = KnitGraphIndex(
1730
self.repo._pack_collection.signature_index.combined_index,
1731
add_callback=add_callback, parents=False)
1732
self.repo._signature_knit = knit.KnitVersionedFile(
1733
'signatures', self.transport.clone('..'),
1734
self.repo.control_files._file_mode,
1736
index=knit_index, delta=False, factory=knit.KnitPlainFactory(),
1737
access_method=self.repo._pack_collection.signature_index.knit_access)
1738
return self.repo._signature_knit
1741
class KnitPackTextStore(VersionedFileStore):
1742
"""Presents a TextStore abstraction on top of packs.
1744
This class works by replacing the original VersionedFileStore.
1745
We need to do this because the KnitPackRevisionStore is less
1746
isolated in its layering - it uses services from the repo and shares them
1747
with all the data written in a single write group.
1750
def __init__(self, repo, transport, weavestore):
1751
"""Create a KnitPackTextStore on repo with weavestore.
1753
This will store its state in the Repository, use the
1754
indices FileNames to provide a KnitGraphIndex,
1755
and at the end of transactions write new indices.
1757
# don't call base class constructor - it's not suitable.
1758
# no transient data stored in the transaction
1760
self._precious = False
1762
self.transport = transport
1763
self.weavestore = weavestore
1764
# XXX for check() which isn't updated yet
1765
self._transport = weavestore._transport
1767
def get_weave_or_empty(self, file_id, transaction):
1768
"""Get a 'Knit' backed by the .tix indices.
1770
The transaction parameter is ignored.
1772
self.repo._pack_collection.ensure_loaded()
1773
add_callback = self.repo._pack_collection.text_index.add_callback
1774
# setup knit specific objects
1775
file_id_index = GraphIndexPrefixAdapter(
1776
self.repo._pack_collection.text_index.combined_index,
1777
(file_id, ), 1, add_nodes_callback=add_callback)
1778
knit_index = KnitGraphIndex(file_id_index,
1779
add_callback=file_id_index.add_nodes,
1780
deltas=True, parents=True)
1781
return knit.KnitVersionedFile('text:' + file_id,
1782
self.transport.clone('..'),
1785
access_method=self.repo._pack_collection.text_index.knit_access,
1786
factory=knit.KnitPlainFactory())
1788
get_weave = get_weave_or_empty
1791
"""Generate a list of the fileids inserted, for use by check."""
1792
self.repo._pack_collection.ensure_loaded()
1794
for index, key, value, refs in \
1795
self.repo._pack_collection.text_index.combined_index.iter_all_entries():
1800
class InventoryKnitThunk(object):
1801
"""An object to manage thunking get_inventory_weave to pack based knits."""
1803
def __init__(self, repo, transport):
1804
"""Create an InventoryKnitThunk for repo at transport.
1806
This will store its state in the Repository, use the
1807
indices FileNames to provide a KnitGraphIndex,
1808
and at the end of transactions write a new index..
1811
self.transport = transport
1813
def get_weave(self):
1814
"""Get a 'Knit' that contains inventory data."""
1815
self.repo._pack_collection.ensure_loaded()
1816
add_callback = self.repo._pack_collection.inventory_index.add_callback
1817
# setup knit specific objects
1818
knit_index = KnitGraphIndex(
1819
self.repo._pack_collection.inventory_index.combined_index,
1820
add_callback=add_callback, deltas=True, parents=True)
1821
return knit.KnitVersionedFile(
1822
'inventory', self.transport.clone('..'),
1823
self.repo.control_files._file_mode,
1825
index=knit_index, delta=True, factory=knit.KnitPlainFactory(),
1826
access_method=self.repo._pack_collection.inventory_index.knit_access)
2112
self._remove_resumed_pack_indices()
2115
def _resume_write_group(self, tokens):
2116
for token in tokens:
2117
self._resume_pack(token)
1829
2120
class KnitPackRepository(KnitRepository):
1830
"""Experimental graph-knit using repository."""
1832
def __init__(self, _format, a_bzrdir, control_files, _revision_store,
1833
control_store, text_store, _commit_builder_class, _serializer):
2121
"""Repository with knit objects stored inside pack containers.
2123
The layering for a KnitPackRepository is:
2125
Graph | HPSS | Repository public layer |
2126
===================================================
2127
Tuple based apis below, string based, and key based apis above
2128
---------------------------------------------------
2130
Provides .texts, .revisions etc
2131
This adapts the N-tuple keys to physical knit records which only have a
2132
single string identifier (for historical reasons), which in older formats
2133
was always the revision_id, and in the mapped code for packs is always
2134
the last element of key tuples.
2135
---------------------------------------------------
2137
A separate GraphIndex is used for each of the
2138
texts/inventories/revisions/signatures contained within each individual
2139
pack file. The GraphIndex layer works in N-tuples and is unaware of any
2141
===================================================
2145
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
1834
2147
KnitRepository.__init__(self, _format, a_bzrdir, control_files,
1835
_revision_store, control_store, text_store, _commit_builder_class,
1837
index_transport = control_files._transport.clone('indices')
1838
self._pack_collection = RepositoryPackCollection(self, control_files._transport,
2148
_commit_builder_class, _serializer)
2149
index_transport = self._transport.clone('indices')
2150
self._pack_collection = RepositoryPackCollection(self, self._transport,
1839
2151
index_transport,
1840
control_files._transport.clone('upload'),
1841
control_files._transport.clone('packs'))
1842
self._revision_store = KnitPackRevisionStore(self, index_transport, self._revision_store)
1843
self.weave_store = KnitPackTextStore(self, index_transport, self.weave_store)
1844
self._inv_thunk = InventoryKnitThunk(self, index_transport)
2152
self._transport.clone('upload'),
2153
self._transport.clone('packs'),
2154
_format.index_builder_class,
2155
_format.index_class,
2156
use_chk_index=self._format.supports_chks,
2158
self.inventories = KnitVersionedFiles(
2159
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
2160
add_callback=self._pack_collection.inventory_index.add_callback,
2161
deltas=True, parents=True, is_locked=self.is_locked),
2162
data_access=self._pack_collection.inventory_index.data_access,
2163
max_delta_chain=200)
2164
self.revisions = KnitVersionedFiles(
2165
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
2166
add_callback=self._pack_collection.revision_index.add_callback,
2167
deltas=False, parents=True, is_locked=self.is_locked,
2168
track_external_parent_refs=True),
2169
data_access=self._pack_collection.revision_index.data_access,
2171
self.signatures = KnitVersionedFiles(
2172
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
2173
add_callback=self._pack_collection.signature_index.add_callback,
2174
deltas=False, parents=False, is_locked=self.is_locked),
2175
data_access=self._pack_collection.signature_index.data_access,
2177
self.texts = KnitVersionedFiles(
2178
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
2179
add_callback=self._pack_collection.text_index.add_callback,
2180
deltas=True, parents=True, is_locked=self.is_locked),
2181
data_access=self._pack_collection.text_index.data_access,
2182
max_delta_chain=200)
2183
if _format.supports_chks:
2184
# No graph, no compression:- references from chks are between
2185
# different objects not temporal versions of the same; and without
2186
# some sort of temporal structure knit compression will just fail.
2187
self.chk_bytes = KnitVersionedFiles(
2188
_KnitGraphIndex(self._pack_collection.chk_index.combined_index,
2189
add_callback=self._pack_collection.chk_index.add_callback,
2190
deltas=False, parents=False, is_locked=self.is_locked),
2191
data_access=self._pack_collection.chk_index.data_access,
2194
self.chk_bytes = None
1845
2195
# True when the repository object is 'write locked' (as opposed to the
1846
# physical lock only taken out around changes to the pack-names list.)
2196
# physical lock only taken out around changes to the pack-names list.)
1847
2197
# Another way to represent this would be a decorator around the control
1848
2198
# files object that presents logical locks as physical ones - if this
1849
2199
# gets ugly consider that alternative design. RBC 20071011
1871
2233
revision_nodes = self._pack_collection.revision_index \
1872
2234
.combined_index.iter_all_entries()
1873
2235
index_positions = []
1874
# Get the cached index values for all revisions, and also the location
1875
# in each index of the revision text so we can perform linear IO.
2236
# Get the cached index values for all revisions, and also the
2237
# location in each index of the revision text so we can perform
1876
2239
for index, key, value, refs in revision_nodes:
1877
pos, length = value[1:].split(' ')
1878
index_positions.append((index, int(pos), key[0],
1879
tuple(parent[0] for parent in refs[0])))
1880
pb.update("Reading revision index.", 0, 0)
2240
node = (index, key, value, refs)
2241
index_memo = self.revisions._index._node_to_position(node)
2242
if index_memo[0] != index:
2243
raise AssertionError('%r != %r' % (index_memo[0], index))
2244
index_positions.append((index_memo, key[0],
2245
tuple(parent[0] for parent in refs[0])))
2246
pb.update("Reading revision index", 0, 0)
1881
2247
index_positions.sort()
1882
batch_count = len(index_positions) / 1000 + 1
1883
pb.update("Checking cached revision graph.", 0, batch_count)
1884
for offset in xrange(batch_count):
1885
pb.update("Checking cached revision graph.", offset)
1886
to_query = index_positions[offset * 1000:(offset + 1) * 1000]
2249
pb.update("Checking cached revision graph", 0,
2250
len(index_positions))
2251
for offset in xrange(0, len(index_positions), 1000):
2252
pb.update("Checking cached revision graph", offset)
2253
to_query = index_positions[offset:offset + batch_size]
1887
2254
if not to_query:
1889
rev_ids = [item[2] for item in to_query]
2256
rev_ids = [item[1] for item in to_query]
1890
2257
revs = self.get_revisions(rev_ids)
1891
2258
for revision, item in zip(revs, to_query):
1892
index_parents = item[3]
2259
index_parents = item[2]
1893
2260
rev_parents = tuple(revision.parent_ids)
1894
2261
if index_parents != rev_parents:
1895
result.append((revision.revision_id, index_parents, rev_parents))
2262
result.append((revision.revision_id, index_parents,
1900
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
1901
def get_parents(self, revision_ids):
1902
"""See graph._StackedParentsProvider.get_parents."""
1903
parent_map = self.get_parent_map(revision_ids)
1904
return [parent_map.get(r, None) for r in revision_ids]
1906
def get_parent_map(self, keys):
1907
"""See graph._StackedParentsProvider.get_parent_map
1909
This implementation accesses the combined revision index to provide
1912
self._pack_collection.ensure_loaded()
1913
index = self._pack_collection.revision_index.combined_index
1916
raise ValueError('get_parent_map(None) is not valid')
1917
if _mod_revision.NULL_REVISION in keys:
1918
keys.discard(_mod_revision.NULL_REVISION)
1919
found_parents = {_mod_revision.NULL_REVISION:()}
1922
search_keys = set((revision_id,) for revision_id in keys)
1923
for index, key, value, refs in index.iter_entries(search_keys):
1926
parents = (_mod_revision.NULL_REVISION,)
1928
parents = tuple(parent[0] for parent in parents)
1929
found_parents[key[0]] = parents
1930
return found_parents
1932
def has_revisions(self, revision_ids):
1933
"""See Repository.has_revisions()."""
1934
revision_ids = set(revision_ids)
1935
result = revision_ids.intersection(
1936
set([None, _mod_revision.NULL_REVISION]))
1937
revision_ids.difference_update(result)
1938
index = self._pack_collection.revision_index.combined_index
1939
keys = [(revision_id,) for revision_id in revision_ids]
1940
result.update(node[1][0] for node in index.iter_entries(keys))
1943
2268
def _make_parents_provider(self):
1944
2269
return graph.CachingParentsProvider(self)
1946
2271
def _refresh_data(self):
1947
if self._write_lock_count == 1 or (
1948
self.control_files._lock_count == 1 and
1949
self.control_files._lock_mode == 'r'):
1950
# forget what names there are
1951
self._pack_collection.reset()
1952
# XXX: Better to do an in-memory merge when acquiring a new lock -
1953
# factor out code from _save_pack_names.
1954
self._pack_collection.ensure_loaded()
2272
if not self.is_locked():
2274
self._pack_collection.reload_pack_names()
1956
2276
def _start_write_group(self):
1957
2277
self._pack_collection._start_write_group()
1959
2279
def _commit_write_group(self):
2280
self.revisions._index._key_dependencies.refs.clear()
1960
2281
return self._pack_collection._commit_write_group()
1962
def get_inventory_weave(self):
1963
return self._inv_thunk.get_weave()
2283
def suspend_write_group(self):
2284
# XXX check self._write_group is self.get_transaction()?
2285
tokens = self._pack_collection._suspend_write_group()
2286
self.revisions._index._key_dependencies.refs.clear()
2287
self._write_group = None
2290
def _resume_write_group(self, tokens):
2291
self._start_write_group()
2293
self._pack_collection._resume_write_group(tokens)
2294
except errors.UnresumableWriteGroup:
2295
self._abort_write_group()
2297
for pack in self._pack_collection._resumed_packs:
2298
self.revisions._index.scan_unvalidated_index(pack.revision_index)
1965
2300
def get_transaction(self):
1966
2301
if self._write_lock_count:
2255
2586
return "Packs containing knits with rich root support\n"
2258
class RepositoryFormatPackDevelopment0(RepositoryFormatPack):
2259
"""A no-subtrees development repository.
2261
This format should be retained until the second release after bzr 1.0.
2263
No changes to the disk behaviour from pack-0.92.
2266
repository_class = KnitPackRepository
2267
_commit_builder_class = PackCommitBuilder
2268
_serializer = xml5.serializer_v5
2270
def _get_matching_bzrdir(self):
2271
return bzrdir.format_registry.make_bzrdir('development0')
2273
def _ignore_setting_bzrdir(self, format):
2276
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2278
def get_format_string(self):
2279
"""See RepositoryFormat.get_format_string()."""
2280
return "Bazaar development format 0 (needs bzr.dev from before 1.3)\n"
2282
def get_format_description(self):
2283
"""See RepositoryFormat.get_format_description()."""
2284
return ("Development repository format, currently the same as "
2287
def check_conversion_target(self, target_format):
2291
class RepositoryFormatPackDevelopment0Subtree(RepositoryFormatPack):
2589
class RepositoryFormatKnitPack5(RepositoryFormatPack):
2590
"""Repository that supports external references to allow stacking.
2594
Supports external lookups, which results in non-truncated ghosts after
2595
reconcile compared to pack-0.92 formats.
2598
repository_class = KnitPackRepository
2599
_commit_builder_class = PackCommitBuilder
2600
supports_external_lookups = True
2601
# What index classes to use
2602
index_builder_class = InMemoryGraphIndex
2603
index_class = GraphIndex
2606
def _serializer(self):
2607
return xml5.serializer_v5
2609
def _get_matching_bzrdir(self):
2610
return bzrdir.format_registry.make_bzrdir('1.6')
2612
def _ignore_setting_bzrdir(self, format):
2615
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2617
def get_format_string(self):
2618
"""See RepositoryFormat.get_format_string()."""
2619
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
2621
def get_format_description(self):
2622
"""See RepositoryFormat.get_format_description()."""
2623
return "Packs 5 (adds stacking support, requires bzr 1.6)"
2625
def check_conversion_target(self, target_format):
2629
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
2630
"""A repository with rich roots and stacking.
2632
New in release 1.6.1.
2634
Supports stacking on other repositories, allowing data to be accessed
2635
without being stored locally.
2638
repository_class = KnitPackRepository
2639
_commit_builder_class = PackRootCommitBuilder
2640
rich_root_data = True
2641
supports_tree_reference = False # no subtrees
2642
supports_external_lookups = True
2643
# What index classes to use
2644
index_builder_class = InMemoryGraphIndex
2645
index_class = GraphIndex
2648
def _serializer(self):
2649
return xml6.serializer_v6
2651
def _get_matching_bzrdir(self):
2652
return bzrdir.format_registry.make_bzrdir(
2655
def _ignore_setting_bzrdir(self, format):
2658
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2660
def check_conversion_target(self, target_format):
2661
if not target_format.rich_root_data:
2662
raise errors.BadConversionTarget(
2663
'Does not support rich root data.', target_format)
2665
def get_format_string(self):
2666
"""See RepositoryFormat.get_format_string()."""
2667
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
2669
def get_format_description(self):
2670
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
2673
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
2674
"""A repository with rich roots and external references.
2678
Supports external lookups, which results in non-truncated ghosts after
2679
reconcile compared to pack-0.92 formats.
2681
This format was deprecated because the serializer it uses accidentally
2682
supported subtrees, when the format was not intended to. This meant that
2683
someone could accidentally fetch from an incorrect repository.
2686
repository_class = KnitPackRepository
2687
_commit_builder_class = PackRootCommitBuilder
2688
rich_root_data = True
2689
supports_tree_reference = False # no subtrees
2691
supports_external_lookups = True
2692
# What index classes to use
2693
index_builder_class = InMemoryGraphIndex
2694
index_class = GraphIndex
2697
def _serializer(self):
2698
return xml7.serializer_v7
2700
def _get_matching_bzrdir(self):
2701
matching = bzrdir.format_registry.make_bzrdir(
2703
matching.repository_format = self
2706
def _ignore_setting_bzrdir(self, format):
2709
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2711
def check_conversion_target(self, target_format):
2712
if not target_format.rich_root_data:
2713
raise errors.BadConversionTarget(
2714
'Does not support rich root data.', target_format)
2716
def get_format_string(self):
2717
"""See RepositoryFormat.get_format_string()."""
2718
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
2720
def get_format_description(self):
2721
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
2725
class RepositoryFormatKnitPack6(RepositoryFormatPack):
2726
"""A repository with stacking and btree indexes,
2727
without rich roots or subtrees.
2729
This is equivalent to pack-1.6 with B+Tree indices.
2732
repository_class = KnitPackRepository
2733
_commit_builder_class = PackCommitBuilder
2734
supports_external_lookups = True
2735
# What index classes to use
2736
index_builder_class = BTreeBuilder
2737
index_class = BTreeGraphIndex
2740
def _serializer(self):
2741
return xml5.serializer_v5
2743
def _get_matching_bzrdir(self):
2744
return bzrdir.format_registry.make_bzrdir('1.9')
2746
def _ignore_setting_bzrdir(self, format):
2749
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2751
def get_format_string(self):
2752
"""See RepositoryFormat.get_format_string()."""
2753
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
2755
def get_format_description(self):
2756
"""See RepositoryFormat.get_format_description()."""
2757
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
2759
def check_conversion_target(self, target_format):
2763
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
2764
"""A repository with rich roots, no subtrees, stacking and btree indexes.
2766
1.6-rich-root with B+Tree indices.
2769
repository_class = KnitPackRepository
2770
_commit_builder_class = PackRootCommitBuilder
2771
rich_root_data = True
2772
supports_tree_reference = False # no subtrees
2773
supports_external_lookups = True
2774
# What index classes to use
2775
index_builder_class = BTreeBuilder
2776
index_class = BTreeGraphIndex
2779
def _serializer(self):
2780
return xml6.serializer_v6
2782
def _get_matching_bzrdir(self):
2783
return bzrdir.format_registry.make_bzrdir(
2786
def _ignore_setting_bzrdir(self, format):
2789
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2791
def check_conversion_target(self, target_format):
2792
if not target_format.rich_root_data:
2793
raise errors.BadConversionTarget(
2794
'Does not support rich root data.', target_format)
2796
def get_format_string(self):
2797
"""See RepositoryFormat.get_format_string()."""
2798
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
2800
def get_format_description(self):
2801
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
2804
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
2292
2805
"""A subtrees development repository.
2294
This format should be retained until the second release after bzr 1.0.
2296
No changes to the disk behaviour from pack-0.92-subtree.
2807
This format should be retained until the second release after bzr 1.7.
2809
1.6.1-subtree[as it might have been] with B+Tree indices.
2811
This is [now] retained until we have a CHK based subtree format in
2299
2815
repository_class = KnitPackRepository
2300
2816
_commit_builder_class = PackRootCommitBuilder
2301
2817
rich_root_data = True
2302
2818
supports_tree_reference = True
2303
_serializer = xml7.serializer_v7
2819
supports_external_lookups = True
2820
# What index classes to use
2821
index_builder_class = BTreeBuilder
2822
index_class = BTreeGraphIndex
2825
def _serializer(self):
2826
return xml7.serializer_v7
2305
2828
def _get_matching_bzrdir(self):
2306
2829
return bzrdir.format_registry.make_bzrdir(
2307
'development0-subtree')
2830
'development-subtree')
2309
2832
def _ignore_setting_bzrdir(self, format):