261
199
return not self.__eq__(other)
263
201
def __repr__(self):
264
return "<%s.%s object at 0x%x, %s, %s" % (
265
self.__class__.__module__, self.__class__.__name__, id(self),
266
self.pack_transport, self.name)
269
class ResumedPack(ExistingPack):
271
def __init__(self, name, revision_index, inventory_index, text_index,
272
signature_index, upload_transport, pack_transport, index_transport,
274
"""Create a ResumedPack object."""
275
ExistingPack.__init__(self, pack_transport, name, revision_index,
276
inventory_index, text_index, signature_index)
277
self.upload_transport = upload_transport
278
self.index_transport = index_transport
279
self.index_sizes = [None, None, None, None]
281
('revision', revision_index),
282
('inventory', inventory_index),
283
('text', text_index),
284
('signature', signature_index),
286
for index_type, index in indices:
287
offset = self.index_offset(index_type)
288
self.index_sizes[offset] = index._size
289
self.index_class = pack_collection._index_class
290
self._pack_collection = pack_collection
291
self._state = 'resumed'
292
# XXX: perhaps check that the .pack file exists?
294
def access_tuple(self):
295
if self._state == 'finished':
296
return Pack.access_tuple(self)
297
elif self._state == 'resumed':
298
return self.upload_transport, self.file_name()
300
raise AssertionError(self._state)
303
self.upload_transport.delete(self.file_name())
304
indices = [self.revision_index, self.inventory_index, self.text_index,
305
self.signature_index]
306
for index in indices:
307
index._transport.delete(index._name)
310
self._check_references()
311
new_name = '../packs/' + self.file_name()
312
self.upload_transport.rename(self.file_name(), new_name)
313
for index_type in ['revision', 'inventory', 'text', 'signature']:
314
old_name = self.index_name(index_type, self.name)
315
new_name = '../indices/' + old_name
316
self.upload_transport.rename(old_name, new_name)
317
self._replace_index_with_readonly(index_type)
318
self._state = 'finished'
320
def _get_external_refs(self, index):
321
return index.external_references(1)
202
return "<bzrlib.repofmt.pack_repo.Pack object at 0x%x, %s, %s" % (
203
id(self), self.pack_transport, self.name)
324
206
class NewPack(Pack):
325
207
"""An in memory proxy for a pack which is being created."""
327
def __init__(self, pack_collection, upload_suffix='', file_mode=None):
209
# A map of index 'type' to the file extension and position in the
211
index_definitions = {
212
'revision': ('.rix', 0),
213
'inventory': ('.iix', 1),
215
'signature': ('.six', 3),
218
def __init__(self, upload_transport, index_transport, pack_transport,
219
upload_suffix='', file_mode=None):
328
220
"""Create a NewPack instance.
330
:param pack_collection: A PackCollection into which this is being inserted.
222
:param upload_transport: A writable transport for the pack to be
223
incrementally uploaded to.
224
:param index_transport: A writable transport for the pack's indices to
225
be written to when the pack is finished.
226
:param pack_transport: A writable transport for the pack to be renamed
227
to when the upload is complete. This *must* be the same as
228
upload_transport.clone('../packs').
331
229
:param upload_suffix: An optional suffix to be given to any temporary
332
230
files created during the pack creation. e.g '.autopack'
333
:param file_mode: Unix permissions for newly created file.
231
:param file_mode: An optional file mode to create the new files with.
335
233
# The relative locations of the packs are constrained, but all are
336
234
# passed in because the caller has them, so as to avoid object churn.
337
index_builder_class = pack_collection._index_builder_class
338
if pack_collection.chk_index is not None:
339
chk_index = index_builder_class(reference_lists=0)
342
235
Pack.__init__(self,
343
236
# Revisions: parents list, no text compression.
344
index_builder_class(reference_lists=1),
237
InMemoryGraphIndex(reference_lists=1),
345
238
# Inventory: We want to map compression only, but currently the
346
239
# knit code hasn't been updated enough to understand that, so we
347
240
# have a regular 2-list index giving parents and compression
349
index_builder_class(reference_lists=2),
242
InMemoryGraphIndex(reference_lists=2),
350
243
# Texts: compression and per file graph, for all fileids - so two
351
244
# reference lists and two elements in the key tuple.
352
index_builder_class(reference_lists=2, key_elements=2),
245
InMemoryGraphIndex(reference_lists=2, key_elements=2),
353
246
# Signatures: Just blobs to store, no compression, no parents
355
index_builder_class(reference_lists=0),
356
# CHK based storage - just blobs, no compression or parents.
248
InMemoryGraphIndex(reference_lists=0),
359
self._pack_collection = pack_collection
360
# When we make readonly indices, we need this.
361
self.index_class = pack_collection._index_class
362
250
# where should the new pack be opened
363
self.upload_transport = pack_collection._upload_transport
251
self.upload_transport = upload_transport
364
252
# where are indices written out to
365
self.index_transport = pack_collection._index_transport
253
self.index_transport = index_transport
366
254
# where is the pack renamed to when it is finished?
367
self.pack_transport = pack_collection._pack_transport
255
self.pack_transport = pack_transport
368
256
# What file mode to upload the pack and indices with.
369
257
self._file_mode = file_mode
370
258
# tracks the content written to the .pack file.
371
self._hash = osutils.md5()
372
# a tuple with the length in bytes of the indices, once the pack
373
# is finalised. (rev, inv, text, sigs, chk_if_in_use)
259
self._hash = md5.new()
260
# a four-tuple with the length in bytes of the indices, once the pack
261
# is finalised. (rev, inv, text, sigs)
374
262
self.index_sizes = None
375
263
# How much data to cache when writing packs. Note that this is not
376
264
# synchronised with reads, because it's not in the transport layer, so
1350
1101
class RepositoryPackCollection(object):
1351
1102
"""Management of packs within a repository.
1353
1104
:ivar _names: map of {pack_name: (index_size,)}
1356
pack_factory = NewPack
1358
1107
def __init__(self, repo, transport, index_transport, upload_transport,
1359
pack_transport, index_builder_class, index_class,
1361
1109
"""Create a new RepositoryPackCollection.
1363
:param transport: Addresses the repository base directory
1111
:param transport: Addresses the repository base directory
1364
1112
(typically .bzr/repository/).
1365
1113
:param index_transport: Addresses the directory containing indices.
1366
1114
:param upload_transport: Addresses the directory into which packs are written
1367
1115
while they're being created.
1368
1116
:param pack_transport: Addresses the directory of existing complete packs.
1369
:param index_builder_class: The index builder class to use.
1370
:param index_class: The index class to use.
1371
:param use_chk_index: Whether to setup and manage a CHK index.
1373
# XXX: This should call self.reset()
1374
1118
self.repo = repo
1375
1119
self.transport = transport
1376
1120
self._index_transport = index_transport
1377
1121
self._upload_transport = upload_transport
1378
1122
self._pack_transport = pack_transport
1379
self._index_builder_class = index_builder_class
1380
self._index_class = index_class
1381
self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3,
1123
self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}
1383
1124
self.packs = []
1384
1125
# name:Pack mapping
1386
1126
self._packs_by_name = {}
1387
1127
# the previous pack-names content
1388
1128
self._packs_at_load = None
1389
1129
# when a pack is being created by this object, the state of that pack.
1390
1130
self._new_pack = None
1391
1131
# aggregated revision index data
1392
flush = self._flush_new_pack
1393
self.revision_index = AggregateIndex(self.reload_pack_names, flush)
1394
self.inventory_index = AggregateIndex(self.reload_pack_names, flush)
1395
self.text_index = AggregateIndex(self.reload_pack_names, flush)
1396
self.signature_index = AggregateIndex(self.reload_pack_names, flush)
1398
self.chk_index = AggregateIndex(self.reload_pack_names, flush)
1400
# used to determine if we're using a chk_index elsewhere.
1401
self.chk_index = None
1403
self._resumed_packs = []
1132
self.revision_index = AggregateIndex()
1133
self.inventory_index = AggregateIndex()
1134
self.text_index = AggregateIndex()
1135
self.signature_index = AggregateIndex()
1405
1137
def add_pack_to_memory(self, pack):
1406
1138
"""Make a Pack object available to the repository to satisfy queries.
1408
1140
:param pack: A Pack object.
1410
1142
if pack.name in self._packs_by_name:
1411
raise AssertionError(
1412
'pack %s already in _packs_by_name' % (pack.name,))
1143
raise AssertionError()
1413
1144
self.packs.append(pack)
1414
1145
self._packs_by_name[pack.name] = pack
1415
1146
self.revision_index.add_index(pack.revision_index, pack)
1416
1147
self.inventory_index.add_index(pack.inventory_index, pack)
1417
1148
self.text_index.add_index(pack.text_index, pack)
1418
1149
self.signature_index.add_index(pack.signature_index, pack)
1419
if self.chk_index is not None:
1420
self.chk_index.add_index(pack.chk_index, pack)
1422
1151
def all_packs(self):
1423
1152
"""Return a list of all the Pack objects this repository has.
1832
1468
self._packs_by_name = {}
1833
1469
self._packs_at_load = None
1471
def _make_index_map(self, index_suffix):
1472
"""Return information on existing indices.
1474
:param suffix: Index suffix added to pack name.
1476
:returns: (pack_map, indices) where indices is a list of GraphIndex
1477
objects, and pack_map is a mapping from those objects to the
1478
pack tuple they describe.
1480
# TODO: stop using this; it creates new indices unnecessarily.
1481
self.ensure_loaded()
1482
suffix_map = {'.rix': 'revision_index',
1483
'.six': 'signature_index',
1484
'.iix': 'inventory_index',
1485
'.tix': 'text_index',
1487
return self._packs_list_to_pack_map_and_index_list(self.all_packs(),
1488
suffix_map[index_suffix])
1490
def _packs_list_to_pack_map_and_index_list(self, packs, index_attribute):
1491
"""Convert a list of packs to an index pack map and index list.
1493
:param packs: The packs list to process.
1494
:param index_attribute: The attribute that the desired index is found
1496
:return: A tuple (map, list) where map contains the dict from
1497
index:pack_tuple, and lsit contains the indices in the same order
1503
index = getattr(pack, index_attribute)
1504
indices.append(index)
1505
pack_map[index] = (pack.pack_transport, pack.file_name())
1506
return pack_map, indices
1508
def _index_contents(self, pack_map, key_filter=None):
1509
"""Get an iterable of the index contents from a pack_map.
1511
:param pack_map: A map from indices to pack details.
1512
:param key_filter: An optional filter to limit the
1515
indices = [index for index in pack_map.iterkeys()]
1516
all_index = CombinedGraphIndex(indices)
1517
if key_filter is None:
1518
return all_index.iter_all_entries()
1520
return all_index.iter_entries(key_filter)
1835
1522
def _unlock_names(self):
1836
1523
"""Release the mutex around the pack-names index."""
1837
1524
self.repo.control_files.unlock()
1839
def _diff_pack_names(self):
1840
"""Read the pack names from disk, and compare it to the one in memory.
1842
:return: (disk_nodes, deleted_nodes, new_nodes)
1843
disk_nodes The final set of nodes that should be referenced
1844
deleted_nodes Nodes which have been removed from when we started
1845
new_nodes Nodes that are newly introduced
1847
# load the disk nodes across
1849
for index, key, value in self._iter_disk_pack_index():
1850
disk_nodes.add((key, value))
1852
# do a two-way diff against our original content
1853
current_nodes = set()
1854
for name, sizes in self._names.iteritems():
1856
((name, ), ' '.join(str(size) for size in sizes)))
1858
# Packs no longer present in the repository, which were present when we
1859
# locked the repository
1860
deleted_nodes = self._packs_at_load - current_nodes
1861
# Packs which this process is adding
1862
new_nodes = current_nodes - self._packs_at_load
1864
# Update the disk_nodes set to include the ones we are adding, and
1865
# remove the ones which were removed by someone else
1866
disk_nodes.difference_update(deleted_nodes)
1867
disk_nodes.update(new_nodes)
1869
return disk_nodes, deleted_nodes, new_nodes
1871
def _syncronize_pack_names_from_disk_nodes(self, disk_nodes):
1872
"""Given the correct set of pack files, update our saved info.
1874
:return: (removed, added, modified)
1875
removed pack names removed from self._names
1876
added pack names added to self._names
1877
modified pack names that had changed value
1882
## self._packs_at_load = disk_nodes
1526
def _save_pack_names(self, clear_obsolete_packs=False):
1527
"""Save the list of packs.
1529
This will take out the mutex around the pack names list for the
1530
duration of the method call. If concurrent updates have been made, a
1531
three-way merge between the current list and the current in memory list
1534
:param clear_obsolete_packs: If True, clear out the contents of the
1535
obsolete_packs directory.
1539
builder = GraphIndexBuilder()
1540
# load the disk nodes across
1542
for index, key, value in self._iter_disk_pack_index():
1543
disk_nodes.add((key, value))
1544
# do a two-way diff against our original content
1545
current_nodes = set()
1546
for name, sizes in self._names.iteritems():
1548
((name, ), ' '.join(str(size) for size in sizes)))
1549
deleted_nodes = self._packs_at_load - current_nodes
1550
new_nodes = current_nodes - self._packs_at_load
1551
disk_nodes.difference_update(deleted_nodes)
1552
disk_nodes.update(new_nodes)
1553
# TODO: handle same-name, index-size-changes here -
1554
# e.g. use the value from disk, not ours, *unless* we're the one
1556
for key, value in disk_nodes:
1557
builder.add_node(key, value)
1558
self.transport.put_file('pack-names', builder.finish(),
1559
mode=self.repo.bzrdir._get_file_mode())
1560
# move the baseline forward
1561
self._packs_at_load = disk_nodes
1562
if clear_obsolete_packs:
1563
self._clear_obsolete_packs()
1565
self._unlock_names()
1566
# synchronise the memory packs list with what we just wrote:
1883
1567
new_names = dict(disk_nodes)
1884
1568
# drop no longer present nodes
1885
1569
for pack in self.all_packs():
1886
1570
if (pack.name,) not in new_names:
1887
removed.append(pack.name)
1888
1571
self._remove_pack_from_memory(pack)
1889
1572
# add new nodes/refresh existing ones
1890
1573
for key, value in disk_nodes:
1904
1587
self._remove_pack_from_memory(self.get_pack_by_name(name))
1905
1588
self._names[name] = sizes
1906
1589
self.get_pack_by_name(name)
1907
modified.append(name)
1910
1592
self._names[name] = sizes
1911
1593
self.get_pack_by_name(name)
1913
return removed, added, modified
1915
def _save_pack_names(self, clear_obsolete_packs=False):
1916
"""Save the list of packs.
1918
This will take out the mutex around the pack names list for the
1919
duration of the method call. If concurrent updates have been made, a
1920
three-way merge between the current list and the current in memory list
1923
:param clear_obsolete_packs: If True, clear out the contents of the
1924
obsolete_packs directory.
1928
builder = self._index_builder_class()
1929
disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
1930
# TODO: handle same-name, index-size-changes here -
1931
# e.g. use the value from disk, not ours, *unless* we're the one
1933
for key, value in disk_nodes:
1934
builder.add_node(key, value)
1935
self.transport.put_file('pack-names', builder.finish(),
1936
mode=self.repo.bzrdir._get_file_mode())
1937
# move the baseline forward
1938
self._packs_at_load = disk_nodes
1939
if clear_obsolete_packs:
1940
self._clear_obsolete_packs()
1942
self._unlock_names()
1943
# synchronise the memory packs list with what we just wrote:
1944
self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1946
def reload_pack_names(self):
1947
"""Sync our pack listing with what is present in the repository.
1949
This should be called when we find out that something we thought was
1950
present is now missing. This happens when another process re-packs the
1953
:return: True if the in-memory list of packs has been altered at all.
1955
# The ensure_loaded call is to handle the case where the first call
1956
# made involving the collection was to reload_pack_names, where we
1957
# don't have a view of disk contents. Its a bit of a bandaid, and
1958
# causes two reads of pack-names, but its a rare corner case not struck
1959
# with regular push/pull etc.
1960
first_read = self.ensure_loaded()
1963
# out the new value.
1964
disk_nodes, _, _ = self._diff_pack_names()
1965
self._packs_at_load = disk_nodes
1967
modified) = self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1968
if removed or added or modified:
1972
def _restart_autopack(self):
1973
"""Reload the pack names list, and restart the autopack code."""
1974
if not self.reload_pack_names():
1975
# Re-raise the original exception, because something went missing
1976
# and a restart didn't find it
1978
raise errors.RetryAutopack(self.repo, False, sys.exc_info())
1980
1595
def _clear_obsolete_packs(self):
1981
1596
"""Delete everything from the obsolete-packs directory.
2016
1628
# FIXME: just drop the transient index.
2017
1629
# forget what names there are
2018
1630
if self._new_pack is not None:
2020
self._new_pack.abort()
2022
# XXX: If we aborted while in the middle of finishing the write
2023
# group, _remove_pack_indices can fail because the indexes are
2024
# already gone. If they're not there we shouldn't fail in this
2025
# case. -- mbp 20081113
2026
self._remove_pack_indices(self._new_pack)
2027
self._new_pack = None
2028
for resumed_pack in self._resumed_packs:
2030
resumed_pack.abort()
2032
# See comment in previous finally block.
2034
self._remove_pack_indices(resumed_pack)
2037
del self._resumed_packs[:]
1631
self._new_pack.abort()
1632
self._remove_pack_indices(self._new_pack)
1633
self._new_pack = None
2038
1634
self.repo._text_knit = None
2040
def _remove_resumed_pack_indices(self):
2041
for resumed_pack in self._resumed_packs:
2042
self._remove_pack_indices(resumed_pack)
2043
del self._resumed_packs[:]
2045
1636
def _commit_write_group(self):
2047
for prefix, versioned_file in (
2048
('revisions', self.repo.revisions),
2049
('inventories', self.repo.inventories),
2050
('texts', self.repo.texts),
2051
('signatures', self.repo.signatures),
2053
missing = versioned_file.get_missing_compression_parent_keys()
2054
all_missing.update([(prefix,) + key for key in missing])
2056
raise errors.BzrCheckError(
2057
"Repository %s has missing compression parent(s) %r "
2058
% (self.repo, sorted(all_missing)))
2059
1637
self._remove_pack_indices(self._new_pack)
2060
should_autopack = False
2061
1638
if self._new_pack.data_inserted():
2062
1639
# get all the data to disk and read to use
2063
1640
self._new_pack.finish()
2064
1641
self.allocate(self._new_pack)
2065
1642
self._new_pack = None
2066
should_autopack = True
2068
self._new_pack.abort()
2069
self._new_pack = None
2070
for resumed_pack in self._resumed_packs:
2071
# XXX: this is a pretty ugly way to turn the resumed pack into a
2072
# properly committed pack.
2073
self._names[resumed_pack.name] = None
2074
self._remove_pack_from_memory(resumed_pack)
2075
resumed_pack.finish()
2076
self.allocate(resumed_pack)
2077
should_autopack = True
2078
del self._resumed_packs[:]
2080
1643
if not self.autopack():
2081
1644
# when autopack takes no steps, the names list is still
2083
1646
self._save_pack_names()
2084
self.repo._text_knit = None
2086
def _suspend_write_group(self):
2087
tokens = [pack.name for pack in self._resumed_packs]
2088
self._remove_pack_indices(self._new_pack)
2089
if self._new_pack.data_inserted():
2090
# get all the data to disk and read to use
2091
self._new_pack.finish(suspend=True)
2092
tokens.append(self._new_pack.name)
2093
self._new_pack = None
2095
1648
self._new_pack.abort()
2096
1649
self._new_pack = None
2097
self._remove_resumed_pack_indices()
2098
1650
self.repo._text_knit = None
2101
def _resume_write_group(self, tokens):
2102
for token in tokens:
2103
self._resume_pack(token)
2106
1653
class KnitPackRepository(KnitRepository):
2107
1654
"""Repository with knit objects stored inside pack containers.
2109
1656
The layering for a KnitPackRepository is:
2111
1658
Graph | HPSS | Repository public layer |
2217
1749
revision_nodes = self._pack_collection.revision_index \
2218
1750
.combined_index.iter_all_entries()
2219
1751
index_positions = []
2220
# Get the cached index values for all revisions, and also the
2221
# location in each index of the revision text so we can perform
1752
# Get the cached index values for all revisions, and also the location
1753
# in each index of the revision text so we can perform linear IO.
2223
1754
for index, key, value, refs in revision_nodes:
2224
node = (index, key, value, refs)
2225
index_memo = self.revisions._index._node_to_position(node)
2226
assert index_memo[0] == index
2227
index_positions.append((index_memo, key[0],
2228
tuple(parent[0] for parent in refs[0])))
2229
pb.update("Reading revision index", 0, 0)
1755
pos, length = value[1:].split(' ')
1756
index_positions.append((index, int(pos), key[0],
1757
tuple(parent[0] for parent in refs[0])))
1758
pb.update("Reading revision index.", 0, 0)
2230
1759
index_positions.sort()
2232
pb.update("Checking cached revision graph", 0,
2233
len(index_positions))
2234
for offset in xrange(0, len(index_positions), 1000):
2235
pb.update("Checking cached revision graph", offset)
2236
to_query = index_positions[offset:offset + batch_size]
1760
batch_count = len(index_positions) / 1000 + 1
1761
pb.update("Checking cached revision graph.", 0, batch_count)
1762
for offset in xrange(batch_count):
1763
pb.update("Checking cached revision graph.", offset)
1764
to_query = index_positions[offset * 1000:(offset + 1) * 1000]
2237
1765
if not to_query:
2239
rev_ids = [item[1] for item in to_query]
1767
rev_ids = [item[2] for item in to_query]
2240
1768
revs = self.get_revisions(rev_ids)
2241
1769
for revision, item in zip(revs, to_query):
2242
index_parents = item[2]
1770
index_parents = item[3]
2243
1771
rev_parents = tuple(revision.parent_ids)
2244
1772
if index_parents != rev_parents:
2245
result.append((revision.revision_id, index_parents,
1773
result.append((revision.revision_id, index_parents, rev_parents))
1778
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
1779
def get_parents(self, revision_ids):
1780
"""See graph._StackedParentsProvider.get_parents."""
1781
parent_map = self.get_parent_map(revision_ids)
1782
return [parent_map.get(r, None) for r in revision_ids]
2251
1784
def _make_parents_provider(self):
2252
1785
return graph.CachingParentsProvider(self)
2254
1787
def _refresh_data(self):
2255
if not self.is_locked():
2257
self._pack_collection.reload_pack_names()
1788
if self._write_lock_count == 1 or (
1789
self.control_files._lock_count == 1 and
1790
self.control_files._lock_mode == 'r'):
1791
# forget what names there are
1792
self._pack_collection.reset()
1793
# XXX: Better to do an in-memory merge when acquiring a new lock -
1794
# factor out code from _save_pack_names.
1795
self._pack_collection.ensure_loaded()
2259
1797
def _start_write_group(self):
2260
1798
self._pack_collection._start_write_group()
2363
class CHKInventoryRepository(KnitPackRepository):
2364
"""subclass of KnitPackRepository that uses CHK based inventories."""
2366
def _add_inventory_checked(self, revision_id, inv, parents):
2367
"""Add inv to the repository after checking the inputs.
2369
This function can be overridden to allow different inventory styles.
2371
:seealso: add_inventory, for the contract.
2374
serializer = self._format._serializer
2375
result = CHKInventory.from_inventory(self.chk_bytes, inv,
2376
maximum_size=serializer.maximum_size,
2377
search_key_name=serializer.search_key_name)
2378
inv_lines = result.to_lines()
2379
return self._inventory_add_lines(revision_id, parents,
2380
inv_lines, check_content=False)
2382
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
2383
parents, basis_inv=None, propagate_caches=False):
2384
"""Add a new inventory expressed as a delta against another revision.
2386
:param basis_revision_id: The inventory id the delta was created
2388
:param delta: The inventory delta (see Inventory.apply_delta for
2390
:param new_revision_id: The revision id that the inventory is being
2392
:param parents: The revision ids of the parents that revision_id is
2393
known to have and are in the repository already. These are supplied
2394
for repositories that depend on the inventory graph for revision
2395
graph access, as well as for those that pun ancestry with delta
2397
:param basis_inv: The basis inventory if it is already known,
2399
:param propagate_caches: If True, the caches for this inventory are
2400
copied to and updated for the result if possible.
2402
:returns: (validator, new_inv)
2403
The validator(which is a sha1 digest, though what is sha'd is
2404
repository format specific) of the serialized inventory, and the
2405
resulting inventory.
2407
if basis_revision_id == _mod_revision.NULL_REVISION:
2408
return KnitPackRepository.add_inventory_by_delta(self,
2409
basis_revision_id, delta, new_revision_id, parents)
2410
if not self.is_in_write_group():
2411
raise AssertionError("%r not in write group" % (self,))
2412
_mod_revision.check_not_reserved_id(new_revision_id)
2413
basis_tree = self.revision_tree(basis_revision_id)
2414
basis_tree.lock_read()
2416
if basis_inv is None:
2417
basis_inv = basis_tree.inventory
2418
result = basis_inv.create_by_apply_delta(delta, new_revision_id,
2419
propagate_caches=propagate_caches)
2420
inv_lines = result.to_lines()
2421
return self._inventory_add_lines(new_revision_id, parents,
2422
inv_lines, check_content=False), result
2426
def _iter_inventories(self, revision_ids):
2427
"""Iterate over many inventory objects."""
2428
keys = [(revision_id,) for revision_id in revision_ids]
2429
stream = self.inventories.get_record_stream(keys, 'unordered', True)
2431
for record in stream:
2432
if record.storage_kind != 'absent':
2433
texts[record.key] = record.get_bytes_as('fulltext')
2435
raise errors.NoSuchRevision(self, record.key)
2437
yield CHKInventory.deserialise(self.chk_bytes, texts[key], key)
2439
def _iter_inventory_xmls(self, revision_ids):
2440
# Without a native 'xml' inventory, this method doesn't make sense, so
2441
# make it raise to trap naughty direct users.
2442
raise NotImplementedError(self._iter_inventory_xmls)
2444
def _find_revision_outside_set(self, revision_ids):
2445
revision_set = frozenset(revision_ids)
2446
for revid in revision_ids:
2447
parent_ids = self.get_parent_map([revid]).get(revid, ())
2448
for parent in parent_ids:
2449
if parent in revision_set:
2450
# Parent is not outside the set
2452
if parent not in self.get_parent_map([parent]):
2456
return _mod_revision.NULL_REVISION
2458
def _find_file_keys_to_fetch(self, revision_ids, pb):
2459
rich_root = self.supports_rich_root()
2460
revision_outside_set = self._find_revision_outside_set(revision_ids)
2461
if revision_outside_set == _mod_revision.NULL_REVISION:
2462
uninteresting_root_keys = set()
2464
uninteresting_inv = self.get_inventory(revision_outside_set)
2465
uninteresting_root_keys = set([uninteresting_inv.id_to_entry.key()])
2466
interesting_root_keys = set()
2467
for idx, inv in enumerate(self.iter_inventories(revision_ids)):
2468
interesting_root_keys.add(inv.id_to_entry.key())
2469
revision_ids = frozenset(revision_ids)
2470
file_id_revisions = {}
2471
bytes_to_info = CHKInventory._bytes_to_utf8name_key
2472
for records, items in chk_map.iter_interesting_nodes(self.chk_bytes,
2473
interesting_root_keys, uninteresting_root_keys,
2475
# This is cheating a bit to use the last grabbed 'inv', but it
2477
for name, bytes in items:
2478
(name_utf8, file_id, revision_id) = bytes_to_info(bytes)
2479
if not rich_root and name_utf8 == '':
2481
if revision_id in revision_ids:
2482
# Would we rather build this up into file_id => revision
2485
file_id_revisions[file_id].add(revision_id)
2487
file_id_revisions[file_id] = set([revision_id])
2488
for file_id, revisions in file_id_revisions.iteritems():
2489
yield ('file', file_id, revisions)
2491
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
2492
"""Find the file ids and versions affected by revisions.
2494
:param revisions: an iterable containing revision ids.
2495
:param _inv_weave: The inventory weave from this repository or None.
2496
If None, the inventory weave will be opened automatically.
2497
:return: a dictionary mapping altered file-ids to an iterable of
2498
revision_ids. Each altered file-ids has the exact revision_ids that
2499
altered it listed explicitly.
2501
rich_roots = self.supports_rich_root()
2503
pb = ui.ui_factory.nested_progress_bar()
2505
total = len(revision_ids)
2506
for pos, inv in enumerate(self.iter_inventories(revision_ids)):
2507
pb.update("Finding text references", pos, total)
2508
for entry in inv.iter_just_entries():
2509
if entry.revision != inv.revision_id:
2511
if not rich_roots and entry.file_id == inv.root_id:
2513
alterations = result.setdefault(entry.file_id, set([]))
2514
alterations.add(entry.revision)
2519
def find_text_key_references(self):
2520
"""Find the text key references within the repository.
2522
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
2523
to whether they were referred to by the inventory of the
2524
revision_id that they contain. The inventory texts from all present
2525
revision ids are assessed to generate this report.
2527
# XXX: Slow version but correct: rewrite as a series of delta
2528
# examinations/direct tree traversal. Note that that will require care
2529
# as a common node is reachable both from the inventory that added it,
2530
# and others afterwards.
2531
revision_keys = self.revisions.keys()
2533
rich_roots = self.supports_rich_root()
2534
pb = ui.ui_factory.nested_progress_bar()
2536
all_revs = self.all_revision_ids()
2537
total = len(all_revs)
2538
for pos, inv in enumerate(self.iter_inventories(all_revs)):
2539
pb.update("Finding text references", pos, total)
2540
for _, entry in inv.iter_entries():
2541
if not rich_roots and entry.file_id == inv.root_id:
2543
key = (entry.file_id, entry.revision)
2544
result.setdefault(key, False)
2545
if entry.revision == inv.revision_id:
2551
def _reconcile_pack(self, collection, packs, extension, revs, pb):
2552
packer = CHKReconcilePacker(collection, packs, extension, revs)
2553
return packer.pack(pb)
2556
class CHKReconcilePacker(ReconcilePacker):
2557
"""Subclass of ReconcilePacker for handling chk inventories."""
2559
def _process_inventory_lines(self, inv_lines):
2560
"""Generate a text key reference map rather for reconciling with."""
2561
repo = self._pack_collection.repo
2562
# XXX: This double-reads the inventories; but it works.
2563
refs = repo.find_text_key_references()
2564
self._text_refs = refs
2565
# during reconcile we:
2566
# - convert unreferenced texts to full texts
2567
# - correct texts which reference a text not copied to be full texts
2568
# - copy all others as-is but with corrected parents.
2569
# - so at this point we don't know enough to decide what becomes a full
2571
self._text_filter = None
2572
# Copy the selected inventory roots, extracting the CHK references
2574
pending_refs = set()
2575
for line, revid in inv_lines:
2576
if line.startswith('id_to_entry: '):
2577
pending_refs.add((line[13:],))
2579
pending_refs = self._copy_chks(pending_refs)
2582
1883
class RepositoryFormatPack(MetaDirRepositoryFormat):
2583
1884
"""Format logic for pack structured repositories.
2917
2174
" (deprecated)")
2920
class RepositoryFormatKnitPack6(RepositoryFormatPack):
2921
"""A repository with stacking and btree indexes,
2922
without rich roots or subtrees.
2924
This is equivalent to pack-1.6 with B+Tree indices.
2927
repository_class = KnitPackRepository
2928
_commit_builder_class = PackCommitBuilder
2929
supports_external_lookups = True
2930
# What index classes to use
2931
index_builder_class = BTreeBuilder
2932
index_class = BTreeGraphIndex
2935
def _serializer(self):
2936
return xml5.serializer_v5
2938
def _get_matching_bzrdir(self):
2939
return bzrdir.format_registry.make_bzrdir('1.9')
2941
def _ignore_setting_bzrdir(self, format):
2944
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2946
def get_format_string(self):
2947
"""See RepositoryFormat.get_format_string()."""
2948
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
2950
def get_format_description(self):
2951
"""See RepositoryFormat.get_format_description()."""
2952
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
2954
def check_conversion_target(self, target_format):
2958
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
2959
"""A repository with rich roots, no subtrees, stacking and btree indexes.
2961
1.6-rich-root with B+Tree indices.
2964
repository_class = KnitPackRepository
2965
_commit_builder_class = PackRootCommitBuilder
2966
rich_root_data = True
2967
supports_tree_reference = False # no subtrees
2968
supports_external_lookups = True
2969
# What index classes to use
2970
index_builder_class = BTreeBuilder
2971
index_class = BTreeGraphIndex
2974
def _serializer(self):
2975
return xml6.serializer_v6
2977
def _get_matching_bzrdir(self):
2978
return bzrdir.format_registry.make_bzrdir(
2981
def _ignore_setting_bzrdir(self, format):
2984
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2986
def check_conversion_target(self, target_format):
2987
if not target_format.rich_root_data:
2988
raise errors.BadConversionTarget(
2989
'Does not support rich root data.', target_format)
2991
def get_format_string(self):
2992
"""See RepositoryFormat.get_format_string()."""
2993
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
2995
def get_format_description(self):
2996
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
2999
class RepositoryFormatPackDevelopment2(RepositoryFormatPack):
3000
"""A no-subtrees development repository.
3002
This format should be retained until the second release after bzr 1.7.
3004
This is pack-1.6.1 with B+Tree indices.
3007
repository_class = KnitPackRepository
3008
_commit_builder_class = PackCommitBuilder
3009
supports_external_lookups = True
3010
# What index classes to use
3011
index_builder_class = BTreeBuilder
3012
index_class = BTreeGraphIndex
3013
# Set to true to get the fast-commit code path tested until a really fast
3014
# format lands in trunk. Not actually fast in this format.
3018
def _serializer(self):
3019
return xml5.serializer_v5
3021
def _get_matching_bzrdir(self):
3022
return bzrdir.format_registry.make_bzrdir('development2')
3024
def _ignore_setting_bzrdir(self, format):
3027
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
3029
def get_format_string(self):
3030
"""See RepositoryFormat.get_format_string()."""
3031
return "Bazaar development format 2 (needs bzr.dev from before 1.8)\n"
3033
def get_format_description(self):
3034
"""See RepositoryFormat.get_format_description()."""
3035
return ("Development repository format, currently the same as "
3036
"1.6.1 with B+Trees.\n")
3038
def check_conversion_target(self, target_format):
3042
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
3043
"""A subtrees development repository.
3045
This format should be retained until the second release after bzr 1.7.
3047
1.6.1-subtree[as it might have been] with B+Tree indices.
3050
repository_class = KnitPackRepository
3051
_commit_builder_class = PackRootCommitBuilder
3052
rich_root_data = True
3053
supports_tree_reference = True
3054
supports_external_lookups = True
3055
# What index classes to use
3056
index_builder_class = BTreeBuilder
3057
index_class = BTreeGraphIndex
3060
def _serializer(self):
3061
return xml7.serializer_v7
3063
def _get_matching_bzrdir(self):
3064
return bzrdir.format_registry.make_bzrdir(
3065
'development2-subtree')
3067
def _ignore_setting_bzrdir(self, format):
3070
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
3072
def check_conversion_target(self, target_format):
3073
if not target_format.rich_root_data:
3074
raise errors.BadConversionTarget(
3075
'Does not support rich root data.', target_format)
3076
if not getattr(target_format, 'supports_tree_reference', False):
3077
raise errors.BadConversionTarget(
3078
'Does not support nested trees', target_format)
3080
def get_format_string(self):
3081
"""See RepositoryFormat.get_format_string()."""
3082
return ("Bazaar development format 2 with subtree support "
3083
"(needs bzr.dev from before 1.8)\n")
3085
def get_format_description(self):
3086
"""See RepositoryFormat.get_format_description()."""
3087
return ("Development repository format, currently the same as "
3088
"1.6.1-subtree with B+Tree indices.\n")
3091
class RepositoryFormatPackDevelopment5(RepositoryFormatPack):
3092
"""A no-subtrees development repository.
3094
This format should be retained until the second release after bzr 1.13.
3096
This is pack-1.9 with CHKMap based inventories.
3099
repository_class = CHKInventoryRepository
3100
_commit_builder_class = PackCommitBuilder
3101
_serializer = chk_serializer.chk_serializer_parent_id
3102
supports_external_lookups = True
3103
# What index classes to use
3104
index_builder_class = BTreeBuilder
3105
index_class = BTreeGraphIndex
3106
supports_chks = True
3107
_commit_inv_deltas = True
3109
def _get_matching_bzrdir(self):
3110
return bzrdir.format_registry.make_bzrdir('development5')
3112
def _ignore_setting_bzrdir(self, format):
3115
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
3117
def get_format_string(self):
3118
"""See RepositoryFormat.get_format_string()."""
3119
# This will need to be updated (at least replacing 1.13 with the target
3120
# bzr release) once we merge brisbane-core into bzr.dev, I've used
3121
# 'merge-bbc-dev4-to-bzr.dev' into comments at relevant places to make
3122
# them easily greppable. -- vila 2009016
3123
return "Bazaar development format 5 (needs bzr.dev from before 1.13)\n"
3125
def get_format_description(self):
3126
"""See RepositoryFormat.get_format_description()."""
3127
return ("Development repository format, currently the same as"
3128
" 1.9 with B+Trees and chk support.\n")
3130
def check_conversion_target(self, target_format):
3134
class RepositoryFormatPackDevelopment5Subtree(RepositoryFormatPack):
3135
# merge-bbc-dev4-to-bzr.dev
3136
"""A subtrees development repository.
3138
This format should be retained until the second release after bzr 1.13.
3140
1.9-subtree[as it might have been] with CHKMap based inventories.
3143
repository_class = CHKInventoryRepository
3144
_commit_builder_class = PackRootCommitBuilder
3145
rich_root_data = True
3146
supports_tree_reference = True
3147
_serializer = chk_serializer.chk_serializer_subtree_parent_id
3148
supports_external_lookups = True
3149
# What index classes to use
3150
index_builder_class = BTreeBuilder
3151
index_class = BTreeGraphIndex
3152
supports_chks = True
3153
_commit_inv_deltas = True
3155
def _get_matching_bzrdir(self):
3156
return bzrdir.format_registry.make_bzrdir(
3157
'development5-subtree')
3159
def _ignore_setting_bzrdir(self, format):
3162
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
3164
def check_conversion_target(self, target_format):
3165
if not target_format.rich_root_data:
3166
raise errors.BadConversionTarget(
3167
'Does not support rich root data.', target_format)
3168
if not getattr(target_format, 'supports_tree_reference', False):
3169
raise errors.BadConversionTarget(
3170
'Does not support nested trees', target_format)
3172
def get_format_string(self):
3173
"""See RepositoryFormat.get_format_string()."""
3174
# merge-bbc-dev4-to-bzr.dev
3175
return ("Bazaar development format 5 with subtree support"
3176
" (needs bzr.dev from before 1.13)\n")
3178
def get_format_description(self):
3179
"""See RepositoryFormat.get_format_description()."""
3180
return ("Development repository format, currently the same as"
3181
" 1.9-subtree with B+Tree and chk support.\n")
3184
class RepositoryFormatPackDevelopment5Hash16(RepositoryFormatPack):
3185
"""A no-subtrees development repository.
3187
This format should be retained until the second release after bzr 1.13.
3189
This is pack-1.9 with CHKMap based inventories with 16-way hash tries.
3192
repository_class = CHKInventoryRepository
3193
_commit_builder_class = PackCommitBuilder
3194
_serializer = chk_serializer.chk_serializer_16_parent_id
3195
supports_external_lookups = True
3196
# What index classes to use
3197
index_builder_class = BTreeBuilder
3198
index_class = BTreeGraphIndex
3199
supports_chks = True
3200
_commit_inv_deltas = True
3202
def _get_matching_bzrdir(self):
3203
return bzrdir.format_registry.make_bzrdir('development5-hash16')
3205
def _ignore_setting_bzrdir(self, format):
3208
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
3210
def get_format_string(self):
3211
"""See RepositoryFormat.get_format_string()."""
3212
return ("Bazaar development format 5 hash 16"
3213
" (needs bzr.dev from before 1.13)\n")
3215
def get_format_description(self):
3216
"""See RepositoryFormat.get_format_description()."""
3217
return ("Development repository format, currently the same as"
3218
" 1.9 with B+Trees and chk support and 16-way hash tries\n")
3220
def check_conversion_target(self, target_format):
3224
class RepositoryFormatPackDevelopment5Hash255(RepositoryFormatPack):
3225
"""A no-subtrees development repository.
3227
This format should be retained until the second release after bzr 1.13.
3229
This is pack-1.9 with CHKMap based inventories with 255-way hash tries.
3232
repository_class = CHKInventoryRepository
3233
_commit_builder_class = PackCommitBuilder
3234
_serializer = chk_serializer.chk_serializer_255_parent_id
3235
supports_external_lookups = True
3236
# What index classes to use
3237
index_builder_class = BTreeBuilder
3238
index_class = BTreeGraphIndex
3239
supports_chks = True
3240
_commit_inv_deltas = True
3242
def _get_matching_bzrdir(self):
3243
return bzrdir.format_registry.make_bzrdir('development5-hash255')
3245
def _ignore_setting_bzrdir(self, format):
3248
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
3250
def get_format_string(self):
3251
"""See RepositoryFormat.get_format_string()."""
3252
return ("Bazaar development format 5 hash 255"
3253
" (needs bzr.dev from before 1.13)\n")
3255
def get_format_description(self):
3256
"""See RepositoryFormat.get_format_description()."""
3257
return ("Development repository format, currently the same as"
3258
" 1.9 with B+Trees and chk support and 255-way hash tries\n")
3260
def check_conversion_target(self, target_format):
2177
class RepositoryFormatPackDevelopment1(RepositoryFormatPack):
2178
"""A no-subtrees development repository.
2180
This format should be retained until the second release after bzr 1.5.
2182
Supports external lookups, which results in non-truncated ghosts after
2183
reconcile compared to pack-0.92 formats.
2186
repository_class = KnitPackRepository
2187
_commit_builder_class = PackCommitBuilder
2188
_serializer = xml5.serializer_v5
2189
supports_external_lookups = True
2191
def _get_matching_bzrdir(self):
2192
return bzrdir.format_registry.make_bzrdir('development1')
2194
def _ignore_setting_bzrdir(self, format):
2197
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2199
def get_format_string(self):
2200
"""See RepositoryFormat.get_format_string()."""
2201
return "Bazaar development format 1 (needs bzr.dev from before 1.6)\n"
2203
def get_format_description(self):
2204
"""See RepositoryFormat.get_format_description()."""
2205
return ("Development repository format, currently the same as "
2206
"pack-0.92 with external reference support.\n")
2208
def check_conversion_target(self, target_format):
2212
class RepositoryFormatPackDevelopment1Subtree(RepositoryFormatPack):
2213
"""A subtrees development repository.
2215
This format should be retained until the second release after bzr 1.5.
2217
Supports external lookups, which results in non-truncated ghosts after
2218
reconcile compared to pack-0.92 formats.
2221
repository_class = KnitPackRepository
2222
_commit_builder_class = PackRootCommitBuilder
2223
rich_root_data = True
2224
supports_tree_reference = True
2225
_serializer = xml7.serializer_v7
2226
supports_external_lookups = True
2228
def _get_matching_bzrdir(self):
2229
return bzrdir.format_registry.make_bzrdir(
2230
'development1-subtree')
2232
def _ignore_setting_bzrdir(self, format):
2235
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2237
def check_conversion_target(self, target_format):
2238
if not target_format.rich_root_data:
2239
raise errors.BadConversionTarget(
2240
'Does not support rich root data.', target_format)
2241
if not getattr(target_format, 'supports_tree_reference', False):
2242
raise errors.BadConversionTarget(
2243
'Does not support nested trees', target_format)
2245
def get_format_string(self):
2246
"""See RepositoryFormat.get_format_string()."""
2247
return ("Bazaar development format 1 with subtree support "
2248
"(needs bzr.dev from before 1.6)\n")
2250
def get_format_description(self):
2251
"""See RepositoryFormat.get_format_description()."""
2252
return ("Development repository format, currently the same as "
2253
"pack-0.92-subtree with external reference support.\n")