132
152
texts/deltas (via (fileid, revisionid) tuples).
133
153
:param signature_index: A GraphIndex for determining what signatures are
134
154
present in the Pack and accessing the locations of their texts.
155
:param chk_index: A GraphIndex for accessing content by CHK, if the
136
158
self.revision_index = revision_index
137
159
self.inventory_index = inventory_index
138
160
self.text_index = text_index
139
161
self.signature_index = signature_index
162
self.chk_index = chk_index
141
164
def access_tuple(self):
142
165
"""Return a tuple (transport, name) for the pack content."""
143
166
return self.pack_transport, self.file_name()
168
def _check_references(self):
169
"""Make sure our external references are present.
171
Packs are allowed to have deltas whose base is not in the pack, but it
172
must be present somewhere in this collection. It is not allowed to
173
have deltas based on a fallback repository.
174
(See <https://bugs.launchpad.net/bzr/+bug/288751>)
177
for (index_name, external_refs, index) in [
179
self._get_external_refs(self.text_index),
180
self._pack_collection.text_index.combined_index),
182
self._get_external_refs(self.inventory_index),
183
self._pack_collection.inventory_index.combined_index),
185
missing = external_refs.difference(
186
k for (idx, k, v, r) in
187
index.iter_entries(external_refs))
189
missing_items[index_name] = sorted(list(missing))
191
from pprint import pformat
192
raise errors.BzrCheckError(
193
"Newly created pack file %r has delta references to "
194
"items not in its repository:\n%s"
195
% (self, pformat(missing_items)))
145
197
def file_name(self):
146
198
"""Get the file name for the pack on disk."""
147
199
return self.name + '.pack'
165
225
"""The text index is the name + .tix."""
166
226
return self.index_name('text', name)
168
def _external_compression_parents_of_texts(self):
171
for node in self.text_index.iter_all_entries():
173
refs.update(node[3][1])
228
def _replace_index_with_readonly(self, index_type):
229
unlimited_cache = False
230
if index_type == 'chk':
231
unlimited_cache = True
232
setattr(self, index_type + '_index',
233
self.index_class(self.index_transport,
234
self.index_name(index_type, self.name),
235
self.index_sizes[self.index_offset(index_type)],
236
unlimited_cache=unlimited_cache))
177
239
class ExistingPack(Pack):
178
240
"""An in memory proxy for an existing .pack and its disk indices."""
180
242
def __init__(self, pack_transport, name, revision_index, inventory_index,
181
text_index, signature_index):
243
text_index, signature_index, chk_index=None):
182
244
"""Create an ExistingPack object.
184
246
:param pack_transport: The transport where the pack file resides.
185
247
:param name: The name of the pack on disk in the pack_transport.
187
249
Pack.__init__(self, revision_index, inventory_index, text_index,
250
signature_index, chk_index)
190
252
self.pack_transport = pack_transport
191
253
if None in (revision_index, inventory_index, text_index,
199
261
return not self.__eq__(other)
201
263
def __repr__(self):
202
return "<bzrlib.repofmt.pack_repo.Pack object at 0x%x, %s, %s" % (
203
id(self), self.pack_transport, self.name)
264
return "<%s.%s object at 0x%x, %s, %s" % (
265
self.__class__.__module__, self.__class__.__name__, id(self),
266
self.pack_transport, self.name)
269
class ResumedPack(ExistingPack):
271
def __init__(self, name, revision_index, inventory_index, text_index,
272
signature_index, upload_transport, pack_transport, index_transport,
273
pack_collection, chk_index=None):
274
"""Create a ResumedPack object."""
275
ExistingPack.__init__(self, pack_transport, name, revision_index,
276
inventory_index, text_index, signature_index,
278
self.upload_transport = upload_transport
279
self.index_transport = index_transport
280
self.index_sizes = [None, None, None, None]
282
('revision', revision_index),
283
('inventory', inventory_index),
284
('text', text_index),
285
('signature', signature_index),
287
if chk_index is not None:
288
indices.append(('chk', chk_index))
289
self.index_sizes.append(None)
290
for index_type, index in indices:
291
offset = self.index_offset(index_type)
292
self.index_sizes[offset] = index._size
293
self.index_class = pack_collection._index_class
294
self._pack_collection = pack_collection
295
self._state = 'resumed'
296
# XXX: perhaps check that the .pack file exists?
298
def access_tuple(self):
299
if self._state == 'finished':
300
return Pack.access_tuple(self)
301
elif self._state == 'resumed':
302
return self.upload_transport, self.file_name()
304
raise AssertionError(self._state)
307
self.upload_transport.delete(self.file_name())
308
indices = [self.revision_index, self.inventory_index, self.text_index,
309
self.signature_index]
310
if self.chk_index is not None:
311
indices.append(self.chk_index)
312
for index in indices:
313
index._transport.delete(index._name)
316
self._check_references()
317
index_types = ['revision', 'inventory', 'text', 'signature']
318
if self.chk_index is not None:
319
index_types.append('chk')
320
for index_type in index_types:
321
old_name = self.index_name(index_type, self.name)
322
new_name = '../indices/' + old_name
323
self.upload_transport.rename(old_name, new_name)
324
self._replace_index_with_readonly(index_type)
325
new_name = '../packs/' + self.file_name()
326
self.upload_transport.rename(self.file_name(), new_name)
327
self._state = 'finished'
329
def _get_external_refs(self, index):
330
"""Return compression parents for this index that are not present.
332
This returns any compression parents that are referenced by this index,
333
which are not contained *in* this index. They may be present elsewhere.
335
return index.external_references(1)
206
338
class NewPack(Pack):
207
339
"""An in memory proxy for a pack which is being created."""
209
# A map of index 'type' to the file extension and position in the
211
index_definitions = {
212
'revision': ('.rix', 0),
213
'inventory': ('.iix', 1),
215
'signature': ('.six', 3),
218
def __init__(self, upload_transport, index_transport, pack_transport,
219
upload_suffix='', file_mode=None):
341
def __init__(self, pack_collection, upload_suffix='', file_mode=None):
220
342
"""Create a NewPack instance.
222
:param upload_transport: A writable transport for the pack to be
223
incrementally uploaded to.
224
:param index_transport: A writable transport for the pack's indices to
225
be written to when the pack is finished.
226
:param pack_transport: A writable transport for the pack to be renamed
227
to when the upload is complete. This *must* be the same as
228
upload_transport.clone('../packs').
344
:param pack_collection: A PackCollection into which this is being inserted.
229
345
:param upload_suffix: An optional suffix to be given to any temporary
230
346
files created during the pack creation. e.g '.autopack'
231
:param file_mode: An optional file mode to create the new files with.
347
:param file_mode: Unix permissions for newly created file.
233
349
# The relative locations of the packs are constrained, but all are
234
350
# passed in because the caller has them, so as to avoid object churn.
351
index_builder_class = pack_collection._index_builder_class
352
if pack_collection.chk_index is not None:
353
chk_index = index_builder_class(reference_lists=0)
235
356
Pack.__init__(self,
236
357
# Revisions: parents list, no text compression.
237
InMemoryGraphIndex(reference_lists=1),
358
index_builder_class(reference_lists=1),
238
359
# Inventory: We want to map compression only, but currently the
239
360
# knit code hasn't been updated enough to understand that, so we
240
361
# have a regular 2-list index giving parents and compression
242
InMemoryGraphIndex(reference_lists=2),
363
index_builder_class(reference_lists=2),
243
364
# Texts: compression and per file graph, for all fileids - so two
244
365
# reference lists and two elements in the key tuple.
245
InMemoryGraphIndex(reference_lists=2, key_elements=2),
366
index_builder_class(reference_lists=2, key_elements=2),
246
367
# Signatures: Just blobs to store, no compression, no parents
248
InMemoryGraphIndex(reference_lists=0),
369
index_builder_class(reference_lists=0),
370
# CHK based storage - just blobs, no compression or parents.
373
self._pack_collection = pack_collection
374
# When we make readonly indices, we need this.
375
self.index_class = pack_collection._index_class
250
376
# where should the new pack be opened
251
self.upload_transport = upload_transport
377
self.upload_transport = pack_collection._upload_transport
252
378
# where are indices written out to
253
self.index_transport = index_transport
379
self.index_transport = pack_collection._index_transport
254
380
# where is the pack renamed to when it is finished?
255
self.pack_transport = pack_transport
381
self.pack_transport = pack_collection._pack_transport
256
382
# What file mode to upload the pack and indices with.
257
383
self._file_mode = file_mode
258
384
# tracks the content written to the .pack file.
259
self._hash = md5.new()
260
# a four-tuple with the length in bytes of the indices, once the pack
261
# is finalised. (rev, inv, text, sigs)
385
self._hash = osutils.md5()
386
# a tuple with the length in bytes of the indices, once the pack
387
# is finalised. (rev, inv, text, sigs, chk_if_in_use)
262
388
self.index_sizes = None
263
389
# How much data to cache when writing packs. Note that this is not
264
390
# synchronised with reads, because it's not in the transport layer, so
338
475
- stores the index size tuple for the pack in the index_sizes
343
self._write_data('', flush=True)
344
self.name = self._hash.hexdigest()
478
self.finish_content()
480
self._check_references()
346
482
# XXX: It'd be better to write them all to temporary names, then
347
483
# rename them all into place, so that the window when only some are
348
484
# visible is smaller. On the other hand none will be seen until
349
485
# they're in the names list.
350
486
self.index_sizes = [None, None, None, None]
351
self._write_index('revision', self.revision_index, 'revision')
352
self._write_index('inventory', self.inventory_index, 'inventory')
353
self._write_index('text', self.text_index, 'file texts')
487
self._write_index('revision', self.revision_index, 'revision', suspend)
488
self._write_index('inventory', self.inventory_index, 'inventory',
490
self._write_index('text', self.text_index, 'file texts', suspend)
354
491
self._write_index('signature', self.signature_index,
355
'revision signatures')
492
'revision signatures', suspend)
493
if self.chk_index is not None:
494
self.index_sizes.append(None)
495
self._write_index('chk', self.chk_index,
496
'content hash bytes', suspend)
356
497
self.write_stream.close()
357
498
# Note that this will clobber an existing pack with the same name,
358
499
# without checking for hash collisions. While this is undesirable this
590
766
def open_pack(self):
591
767
"""Open a pack for the pack we are creating."""
592
return NewPack(self._pack_collection._upload_transport,
593
self._pack_collection._index_transport,
594
self._pack_collection._pack_transport, upload_suffix=self.suffix,
595
file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
768
new_pack = self._pack_collection.pack_factory(self._pack_collection,
769
upload_suffix=self.suffix,
770
file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
771
# We know that we will process all nodes in order, and don't need to
772
# query, so don't combine any indices spilled to disk until we are done
773
new_pack.revision_index.set_optimize(combine_backing_indices=False)
774
new_pack.inventory_index.set_optimize(combine_backing_indices=False)
775
new_pack.text_index.set_optimize(combine_backing_indices=False)
776
new_pack.signature_index.set_optimize(combine_backing_indices=False)
779
def _update_pack_order(self, entries, index_to_pack_map):
780
"""Determine how we want our packs to be ordered.
782
This changes the sort order of the self.packs list so that packs unused
783
by 'entries' will be at the end of the list, so that future requests
784
can avoid probing them. Used packs will be at the front of the
785
self.packs list, in the order of their first use in 'entries'.
787
:param entries: A list of (index, ...) tuples
788
:param index_to_pack_map: A mapping from index objects to pack objects.
792
for entry in entries:
794
if index not in seen_indexes:
795
packs.append(index_to_pack_map[index])
796
seen_indexes.add(index)
797
if len(packs) == len(self.packs):
798
if 'pack' in debug.debug_flags:
799
mutter('Not changing pack list, all packs used.')
801
seen_packs = set(packs)
802
for pack in self.packs:
803
if pack not in seen_packs:
806
if 'pack' in debug.debug_flags:
807
old_names = [p.access_tuple()[1] for p in self.packs]
808
new_names = [p.access_tuple()[1] for p in packs]
809
mutter('Reordering packs\nfrom: %s\n to: %s',
810
old_names, new_names)
597
813
def _copy_revision_texts(self):
598
814
"""Copy revision data to the new pack."""
740
957
self._pack_collection.allocate(new_pack)
743
def _copy_nodes(self, nodes, index_map, writer, write_index):
744
"""Copy knit nodes between packs with no graph references."""
960
def _copy_chks(self, refs=None):
961
# XXX: Todo, recursive follow-pointers facility when fetching some
963
chk_index_map, chk_indices = self._pack_map_and_index_list(
965
chk_nodes = self._index_contents(chk_indices, refs)
967
# TODO: This isn't strictly tasteful as we are accessing some private
968
# variables (_serializer). Perhaps a better way would be to have
969
# Repository._deserialise_chk_node()
970
search_key_func = chk_map.search_key_registry.get(
971
self._pack_collection.repo._serializer.search_key_name)
972
def accumlate_refs(lines):
973
# XXX: move to a generic location
975
bytes = ''.join(lines)
976
node = chk_map._deserialise(bytes, ("unknown",), search_key_func)
977
new_refs.update(node.refs())
978
self._copy_nodes(chk_nodes, chk_index_map, self.new_pack._writer,
979
self.new_pack.chk_index, output_lines=accumlate_refs)
982
def _copy_nodes(self, nodes, index_map, writer, write_index,
984
"""Copy knit nodes between packs with no graph references.
986
:param output_lines: Output full texts of copied items.
745
988
pb = ui.ui_factory.nested_progress_bar()
747
990
return self._do_copy_nodes(nodes, index_map, writer,
991
write_index, pb, output_lines=output_lines)
752
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):
995
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
753
997
# for record verification
754
998
knit = KnitVersionedFiles(None, None)
755
999
# plan a readv on each source pack:
776
1020
# linear scan up the pack
777
1021
pack_readv_requests.sort()
779
transport, path = index_map[index]
780
reader = pack.make_readv_reader(transport, path,
781
[offset[0:2] for offset in pack_readv_requests])
1023
pack_obj = index_map[index]
1024
transport, path = pack_obj.access_tuple()
1026
reader = pack.make_readv_reader(transport, path,
1027
[offset[0:2] for offset in pack_readv_requests])
1028
except errors.NoSuchFile:
1029
if self._reload_func is not None:
782
1032
for (names, read_func), (_1, _2, (key, eol_flag)) in \
783
1033
izip(reader.iter_records(), pack_readv_requests):
784
1034
raw_data = read_func(None)
785
1035
# check the header only
786
df, _ = knit._parse_record_header(key, raw_data)
1036
if output_lines is not None:
1037
output_lines(knit._parse_record(key[-1], raw_data)[0])
1039
df, _ = knit._parse_record_header(key, raw_data)
788
1041
pos, size = writer.add_bytes_record(raw_data, names)
789
1042
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
790
1043
pb.update("Copied record", record_index)
1101
1370
class RepositoryPackCollection(object):
1102
1371
"""Management of packs within a repository.
1104
1373
:ivar _names: map of {pack_name: (index_size,)}
1376
pack_factory = NewPack
1377
resumed_pack_factory = ResumedPack
1107
1379
def __init__(self, repo, transport, index_transport, upload_transport,
1380
pack_transport, index_builder_class, index_class,
1109
1382
"""Create a new RepositoryPackCollection.
1111
:param transport: Addresses the repository base directory
1384
:param transport: Addresses the repository base directory
1112
1385
(typically .bzr/repository/).
1113
1386
:param index_transport: Addresses the directory containing indices.
1114
1387
:param upload_transport: Addresses the directory into which packs are written
1115
1388
while they're being created.
1116
1389
:param pack_transport: Addresses the directory of existing complete packs.
1390
:param index_builder_class: The index builder class to use.
1391
:param index_class: The index class to use.
1392
:param use_chk_index: Whether to setup and manage a CHK index.
1394
# XXX: This should call self.reset()
1118
1395
self.repo = repo
1119
1396
self.transport = transport
1120
1397
self._index_transport = index_transport
1121
1398
self._upload_transport = upload_transport
1122
1399
self._pack_transport = pack_transport
1123
self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}
1400
self._index_builder_class = index_builder_class
1401
self._index_class = index_class
1402
self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3,
1124
1404
self.packs = []
1125
1405
# name:Pack mapping
1126
1407
self._packs_by_name = {}
1127
1408
# the previous pack-names content
1128
1409
self._packs_at_load = None
1129
1410
# when a pack is being created by this object, the state of that pack.
1130
1411
self._new_pack = None
1131
1412
# aggregated revision index data
1132
self.revision_index = AggregateIndex()
1133
self.inventory_index = AggregateIndex()
1134
self.text_index = AggregateIndex()
1135
self.signature_index = AggregateIndex()
1413
flush = self._flush_new_pack
1414
self.revision_index = AggregateIndex(self.reload_pack_names, flush)
1415
self.inventory_index = AggregateIndex(self.reload_pack_names, flush)
1416
self.text_index = AggregateIndex(self.reload_pack_names, flush)
1417
self.signature_index = AggregateIndex(self.reload_pack_names, flush)
1419
self.chk_index = AggregateIndex(self.reload_pack_names, flush)
1421
# used to determine if we're using a chk_index elsewhere.
1422
self.chk_index = None
1424
self._resumed_packs = []
1427
return '%s(%r)' % (self.__class__.__name__, self.repo)
1137
1429
def add_pack_to_memory(self, pack):
1138
1430
"""Make a Pack object available to the repository to satisfy queries.
1140
1432
:param pack: A Pack object.
1142
1434
if pack.name in self._packs_by_name:
1143
raise AssertionError()
1435
raise AssertionError(
1436
'pack %s already in _packs_by_name' % (pack.name,))
1144
1437
self.packs.append(pack)
1145
1438
self._packs_by_name[pack.name] = pack
1146
1439
self.revision_index.add_index(pack.revision_index, pack)
1147
1440
self.inventory_index.add_index(pack.inventory_index, pack)
1148
1441
self.text_index.add_index(pack.text_index, pack)
1149
1442
self.signature_index.add_index(pack.signature_index, pack)
1443
if self.chk_index is not None:
1444
self.chk_index.add_index(pack.chk_index, pack)
1151
1446
def all_packs(self):
1152
1447
"""Return a list of all the Pack objects this repository has.
1199
1498
# group their data with the relevant commit, and that may
1200
1499
# involve rewriting ancient history - which autopack tries to
1201
1500
# avoid. Alternatively we could not group the data but treat
1202
# each of these as having a single revision, and thus add
1501
# each of these as having a single revision, and thus add
1203
1502
# one revision for each to the total revision count, to get
1204
1503
# a matching distribution.
1206
1505
existing_packs.append((revision_count, pack))
1207
1506
pack_operations = self.plan_autopack_combinations(
1208
1507
existing_packs, pack_distribution)
1209
self._execute_pack_operations(pack_operations)
1508
num_new_packs = len(pack_operations)
1509
num_old_packs = sum([len(po[1]) for po in pack_operations])
1510
num_revs_affected = sum([po[0] for po in pack_operations])
1511
mutter('Auto-packing repository %s, which has %d pack files, '
1512
'containing %d revisions. Packing %d files into %d affecting %d'
1513
' revisions', self, total_packs, total_revisions, num_old_packs,
1514
num_new_packs, num_revs_affected)
1515
result = self._execute_pack_operations(pack_operations,
1516
reload_func=self._restart_autopack)
1517
mutter('Auto-packing repository %s completed', self)
1212
def _execute_pack_operations(self, pack_operations, _packer_class=Packer):
1520
def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
1213
1522
"""Execute a series of pack operations.
1215
1524
:param pack_operations: A list of [revision_count, packs_to_combine].
1216
1525
:param _packer_class: The class of packer to use (default: Packer).
1526
:return: The new pack names.
1219
1528
for revision_count, packs in pack_operations:
1220
1529
# we may have no-ops from the setup logic
1221
1530
if len(packs) == 0:
1223
_packer_class(self, packs, '.autopack').pack()
1532
packer = _packer_class(self, packs, '.autopack',
1533
reload_func=reload_func)
1536
except errors.RetryWithNewPacks:
1537
# An exception is propagating out of this context, make sure
1538
# this packer has cleaned up. Packer() doesn't set its new_pack
1539
# state into the RepositoryPackCollection object, so we only
1540
# have access to it directly here.
1541
if packer.new_pack is not None:
1542
packer.new_pack.abort()
1224
1544
for pack in packs:
1225
1545
self._remove_pack_from_memory(pack)
1226
1546
# record the newly available packs and stop advertising the old
1228
self._save_pack_names(clear_obsolete_packs=True)
1229
# Move the old packs out of the way now they are no longer referenced.
1230
for revision_count, packs in pack_operations:
1231
self._obsolete_packs(packs)
1548
to_be_obsoleted = []
1549
for _, packs in pack_operations:
1550
to_be_obsoleted.extend(packs)
1551
result = self._save_pack_names(clear_obsolete_packs=True,
1552
obsolete_packs=to_be_obsoleted)
1555
def _flush_new_pack(self):
1556
if self._new_pack is not None:
1557
self._new_pack.flush()
1233
1559
def lock_names(self):
1234
1560
"""Acquire the mutex around the pack-names index.
1236
1562
This cannot be used in the middle of a read-only transaction on the
1239
1565
self.repo.control_files.lock_write()
1567
def _already_packed(self):
1568
"""Is the collection already packed?"""
1569
return not (self.repo._format.pack_compresses or (len(self._names) > 1))
1571
def pack(self, hint=None):
1242
1572
"""Pack the pack collection totally."""
1243
1573
self.ensure_loaded()
1244
1574
total_packs = len(self._names)
1246
# This is arguably wrong because we might not be optimal, but for
1247
# now lets leave it in. (e.g. reconcile -> one pack. But not
1575
if self._already_packed():
1250
1577
total_revisions = self.revision_index.combined_index.key_count()
1251
1578
# XXX: the following may want to be a class, to pack with a given
1253
1580
mutter('Packing repository %s, which has %d pack files, '
1254
'containing %d revisions into 1 packs.', self, total_packs,
1581
'containing %d revisions with hint %r.', self, total_packs,
1582
total_revisions, hint)
1256
1583
# determine which packs need changing
1257
pack_distribution = [1]
1258
1584
pack_operations = [[0, []]]
1259
1585
for pack in self.all_packs():
1260
pack_operations[-1][0] += pack.get_revision_count()
1261
pack_operations[-1][1].append(pack)
1586
if hint is None or pack.name in hint:
1587
# Either no hint was provided (so we are packing everything),
1588
# or this pack was included in the hint.
1589
pack_operations[-1][0] += pack.get_revision_count()
1590
pack_operations[-1][1].append(pack)
1262
1591
self._execute_pack_operations(pack_operations, OptimisingPacker)
1264
1593
def plan_autopack_combinations(self, existing_packs, pack_distribution):
1335
1682
inv_index = self._make_index(name, '.iix')
1336
1683
txt_index = self._make_index(name, '.tix')
1337
1684
sig_index = self._make_index(name, '.six')
1685
if self.chk_index is not None:
1686
chk_index = self._make_index(name, '.cix', unlimited_cache=True)
1338
1689
result = ExistingPack(self._pack_transport, name, rev_index,
1339
inv_index, txt_index, sig_index)
1690
inv_index, txt_index, sig_index, chk_index)
1340
1691
self.add_pack_to_memory(result)
1694
def _resume_pack(self, name):
1695
"""Get a suspended Pack object by name.
1697
:param name: The name of the pack - e.g. '123456'
1698
:return: A Pack object.
1700
if not re.match('[a-f0-9]{32}', name):
1701
# Tokens should be md5sums of the suspended pack file, i.e. 32 hex
1703
raise errors.UnresumableWriteGroup(
1704
self.repo, [name], 'Malformed write group token')
1706
rev_index = self._make_index(name, '.rix', resume=True)
1707
inv_index = self._make_index(name, '.iix', resume=True)
1708
txt_index = self._make_index(name, '.tix', resume=True)
1709
sig_index = self._make_index(name, '.six', resume=True)
1710
if self.chk_index is not None:
1711
chk_index = self._make_index(name, '.cix', resume=True,
1712
unlimited_cache=True)
1715
result = self.resumed_pack_factory(name, rev_index, inv_index,
1716
txt_index, sig_index, self._upload_transport,
1717
self._pack_transport, self._index_transport, self,
1718
chk_index=chk_index)
1719
except errors.NoSuchFile, e:
1720
raise errors.UnresumableWriteGroup(self.repo, [name], str(e))
1721
self.add_pack_to_memory(result)
1722
self._resumed_packs.append(result)
1343
1725
def allocate(self, a_new_pack):
1344
1726
"""Allocate name in the list of packs.
1356
1738
def _iter_disk_pack_index(self):
1357
1739
"""Iterate over the contents of the pack-names index.
1359
1741
This is used when loading the list from disk, and before writing to
1360
1742
detect updates from others during our write operation.
1361
1743
:return: An iterator of the index contents.
1363
return GraphIndex(self.transport, 'pack-names', None
1745
return self._index_class(self.transport, 'pack-names', None
1364
1746
).iter_all_entries()
1366
def _make_index(self, name, suffix):
1748
def _make_index(self, name, suffix, resume=False, unlimited_cache=False):
1367
1749
size_offset = self._suffix_offsets[suffix]
1368
1750
index_name = name + suffix
1369
index_size = self._names[name][size_offset]
1371
self._index_transport, index_name, index_size)
1752
transport = self._upload_transport
1753
index_size = transport.stat(index_name).st_size
1755
transport = self._index_transport
1756
index_size = self._names[name][size_offset]
1757
return self._index_class(transport, index_name, index_size,
1758
unlimited_cache=unlimited_cache)
1373
1760
def _max_pack_count(self, total_revisions):
1374
1761
"""Return the maximum number of packs to use for total revisions.
1376
1763
:param total_revisions: The total number of revisions in the
1402
1789
:param return: None.
1404
1791
for pack in packs:
1405
pack.pack_transport.rename(pack.file_name(),
1406
'../obsolete_packs/' + pack.file_name())
1793
pack.pack_transport.rename(pack.file_name(),
1794
'../obsolete_packs/' + pack.file_name())
1795
except (errors.PathError, errors.TransportError), e:
1796
# TODO: Should these be warnings or mutters?
1797
mutter("couldn't rename obsolete pack, skipping it:\n%s"
1407
1799
# TODO: Probably needs to know all possible indices for this pack
1408
1800
# - or maybe list the directory and move all indices matching this
1409
1801
# name whether we recognize it or not?
1410
for suffix in ('.iix', '.six', '.tix', '.rix'):
1411
self._index_transport.rename(pack.name + suffix,
1412
'../obsolete_packs/' + pack.name + suffix)
1802
suffixes = ['.iix', '.six', '.tix', '.rix']
1803
if self.chk_index is not None:
1804
suffixes.append('.cix')
1805
for suffix in suffixes:
1807
self._index_transport.rename(pack.name + suffix,
1808
'../obsolete_packs/' + pack.name + suffix)
1809
except (errors.PathError, errors.TransportError), e:
1810
mutter("couldn't rename obsolete index, skipping it:\n%s"
1414
1813
def pack_distribution(self, total_revisions):
1415
1814
"""Generate a list of the number of revisions to put in each pack.
1434
1833
def _remove_pack_from_memory(self, pack):
1435
1834
"""Remove pack from the packs accessed by this repository.
1437
1836
Only affects memory state, until self._save_pack_names() is invoked.
1439
1838
self._names.pop(pack.name)
1440
1839
self._packs_by_name.pop(pack.name)
1441
1840
self._remove_pack_indices(pack)
1841
self.packs.remove(pack)
1443
def _remove_pack_indices(self, pack):
1444
"""Remove the indices for pack from the aggregated indices."""
1445
self.revision_index.remove_index(pack.revision_index, pack)
1446
self.inventory_index.remove_index(pack.inventory_index, pack)
1447
self.text_index.remove_index(pack.text_index, pack)
1448
self.signature_index.remove_index(pack.signature_index, pack)
1843
def _remove_pack_indices(self, pack, ignore_missing=False):
1844
"""Remove the indices for pack from the aggregated indices.
1846
:param ignore_missing: Suppress KeyErrors from calling remove_index.
1848
for index_type in Pack.index_definitions.keys():
1849
attr_name = index_type + '_index'
1850
aggregate_index = getattr(self, attr_name)
1851
if aggregate_index is not None:
1852
pack_index = getattr(pack, attr_name)
1854
aggregate_index.remove_index(pack_index)
1450
1860
def reset(self):
1451
1861
"""Clear all cached data."""
1452
1862
# cached revision data
1453
self.repo._revision_knit = None
1454
1863
self.revision_index.clear()
1455
1864
# cached signature data
1456
self.repo._signature_knit = None
1457
1865
self.signature_index.clear()
1458
1866
# cached file text data
1459
1867
self.text_index.clear()
1460
self.repo._text_knit = None
1461
1868
# cached inventory data
1462
1869
self.inventory_index.clear()
1871
if self.chk_index is not None:
1872
self.chk_index.clear()
1463
1873
# remove the open pack
1464
1874
self._new_pack = None
1465
1875
# information about packs.
1468
1878
self._packs_by_name = {}
1469
1879
self._packs_at_load = None
1471
def _make_index_map(self, index_suffix):
1472
"""Return information on existing indices.
1474
:param suffix: Index suffix added to pack name.
1476
:returns: (pack_map, indices) where indices is a list of GraphIndex
1477
objects, and pack_map is a mapping from those objects to the
1478
pack tuple they describe.
1480
# TODO: stop using this; it creates new indices unnecessarily.
1481
self.ensure_loaded()
1482
suffix_map = {'.rix': 'revision_index',
1483
'.six': 'signature_index',
1484
'.iix': 'inventory_index',
1485
'.tix': 'text_index',
1487
return self._packs_list_to_pack_map_and_index_list(self.all_packs(),
1488
suffix_map[index_suffix])
1490
def _packs_list_to_pack_map_and_index_list(self, packs, index_attribute):
1491
"""Convert a list of packs to an index pack map and index list.
1493
:param packs: The packs list to process.
1494
:param index_attribute: The attribute that the desired index is found
1496
:return: A tuple (map, list) where map contains the dict from
1497
index:pack_tuple, and lsit contains the indices in the same order
1503
index = getattr(pack, index_attribute)
1504
indices.append(index)
1505
pack_map[index] = (pack.pack_transport, pack.file_name())
1506
return pack_map, indices
1508
def _index_contents(self, pack_map, key_filter=None):
1509
"""Get an iterable of the index contents from a pack_map.
1511
:param pack_map: A map from indices to pack details.
1512
:param key_filter: An optional filter to limit the
1515
indices = [index for index in pack_map.iterkeys()]
1516
all_index = CombinedGraphIndex(indices)
1517
if key_filter is None:
1518
return all_index.iter_all_entries()
1520
return all_index.iter_entries(key_filter)
1522
1881
def _unlock_names(self):
1523
1882
"""Release the mutex around the pack-names index."""
1524
1883
self.repo.control_files.unlock()
1526
def _save_pack_names(self, clear_obsolete_packs=False):
1527
"""Save the list of packs.
1529
This will take out the mutex around the pack names list for the
1530
duration of the method call. If concurrent updates have been made, a
1531
three-way merge between the current list and the current in memory list
1534
:param clear_obsolete_packs: If True, clear out the contents of the
1535
obsolete_packs directory.
1539
builder = GraphIndexBuilder()
1540
# load the disk nodes across
1542
for index, key, value in self._iter_disk_pack_index():
1543
disk_nodes.add((key, value))
1544
# do a two-way diff against our original content
1545
current_nodes = set()
1546
for name, sizes in self._names.iteritems():
1548
((name, ), ' '.join(str(size) for size in sizes)))
1549
deleted_nodes = self._packs_at_load - current_nodes
1550
new_nodes = current_nodes - self._packs_at_load
1551
disk_nodes.difference_update(deleted_nodes)
1552
disk_nodes.update(new_nodes)
1553
# TODO: handle same-name, index-size-changes here -
1554
# e.g. use the value from disk, not ours, *unless* we're the one
1556
for key, value in disk_nodes:
1557
builder.add_node(key, value)
1558
self.transport.put_file('pack-names', builder.finish(),
1559
mode=self.repo.bzrdir._get_file_mode())
1560
# move the baseline forward
1561
self._packs_at_load = disk_nodes
1562
if clear_obsolete_packs:
1563
self._clear_obsolete_packs()
1565
self._unlock_names()
1566
# synchronise the memory packs list with what we just wrote:
1885
def _diff_pack_names(self):
1886
"""Read the pack names from disk, and compare it to the one in memory.
1888
:return: (disk_nodes, deleted_nodes, new_nodes)
1889
disk_nodes The final set of nodes that should be referenced
1890
deleted_nodes Nodes which have been removed from when we started
1891
new_nodes Nodes that are newly introduced
1893
# load the disk nodes across
1895
for index, key, value in self._iter_disk_pack_index():
1896
disk_nodes.add((key, value))
1897
orig_disk_nodes = set(disk_nodes)
1899
# do a two-way diff against our original content
1900
current_nodes = set()
1901
for name, sizes in self._names.iteritems():
1903
((name, ), ' '.join(str(size) for size in sizes)))
1905
# Packs no longer present in the repository, which were present when we
1906
# locked the repository
1907
deleted_nodes = self._packs_at_load - current_nodes
1908
# Packs which this process is adding
1909
new_nodes = current_nodes - self._packs_at_load
1911
# Update the disk_nodes set to include the ones we are adding, and
1912
# remove the ones which were removed by someone else
1913
disk_nodes.difference_update(deleted_nodes)
1914
disk_nodes.update(new_nodes)
1916
return disk_nodes, deleted_nodes, new_nodes, orig_disk_nodes
1918
def _syncronize_pack_names_from_disk_nodes(self, disk_nodes):
1919
"""Given the correct set of pack files, update our saved info.
1921
:return: (removed, added, modified)
1922
removed pack names removed from self._names
1923
added pack names added to self._names
1924
modified pack names that had changed value
1929
## self._packs_at_load = disk_nodes
1567
1930
new_names = dict(disk_nodes)
1568
1931
# drop no longer present nodes
1569
1932
for pack in self.all_packs():
1570
1933
if (pack.name,) not in new_names:
1934
removed.append(pack.name)
1571
1935
self._remove_pack_from_memory(pack)
1572
1936
# add new nodes/refresh existing ones
1573
1937
for key, value in disk_nodes:
1587
1951
self._remove_pack_from_memory(self.get_pack_by_name(name))
1588
1952
self._names[name] = sizes
1589
1953
self.get_pack_by_name(name)
1954
modified.append(name)
1592
1957
self._names[name] = sizes
1593
1958
self.get_pack_by_name(name)
1595
def _clear_obsolete_packs(self):
1960
return removed, added, modified
1962
def _save_pack_names(self, clear_obsolete_packs=False, obsolete_packs=None):
1963
"""Save the list of packs.
1965
This will take out the mutex around the pack names list for the
1966
duration of the method call. If concurrent updates have been made, a
1967
three-way merge between the current list and the current in memory list
1970
:param clear_obsolete_packs: If True, clear out the contents of the
1971
obsolete_packs directory.
1972
:param obsolete_packs: Packs that are obsolete once the new pack-names
1973
file has been written.
1974
:return: A list of the names saved that were not previously on disk.
1976
already_obsolete = []
1979
builder = self._index_builder_class()
1980
(disk_nodes, deleted_nodes, new_nodes,
1981
orig_disk_nodes) = self._diff_pack_names()
1982
# TODO: handle same-name, index-size-changes here -
1983
# e.g. use the value from disk, not ours, *unless* we're the one
1985
for key, value in disk_nodes:
1986
builder.add_node(key, value)
1987
self.transport.put_file('pack-names', builder.finish(),
1988
mode=self.repo.bzrdir._get_file_mode())
1989
self._packs_at_load = disk_nodes
1990
if clear_obsolete_packs:
1993
to_preserve = set([o.name for o in obsolete_packs])
1994
already_obsolete = self._clear_obsolete_packs(to_preserve)
1996
self._unlock_names()
1997
# synchronise the memory packs list with what we just wrote:
1998
self._syncronize_pack_names_from_disk_nodes(disk_nodes)
2000
# TODO: We could add one more condition here. "if o.name not in
2001
# orig_disk_nodes and o != the new_pack we haven't written to
2002
# disk yet. However, the new pack object is not easily
2003
# accessible here (it would have to be passed through the
2004
# autopacking code, etc.)
2005
obsolete_packs = [o for o in obsolete_packs
2006
if o.name not in already_obsolete]
2007
self._obsolete_packs(obsolete_packs)
2008
return [new_node[0][0] for new_node in new_nodes]
2010
def reload_pack_names(self):
2011
"""Sync our pack listing with what is present in the repository.
2013
This should be called when we find out that something we thought was
2014
present is now missing. This happens when another process re-packs the
2017
:return: True if the in-memory list of packs has been altered at all.
2019
# The ensure_loaded call is to handle the case where the first call
2020
# made involving the collection was to reload_pack_names, where we
2021
# don't have a view of disk contents. Its a bit of a bandaid, and
2022
# causes two reads of pack-names, but its a rare corner case not struck
2023
# with regular push/pull etc.
2024
first_read = self.ensure_loaded()
2027
# out the new value.
2028
(disk_nodes, deleted_nodes, new_nodes,
2029
orig_disk_nodes) = self._diff_pack_names()
2030
# _packs_at_load is meant to be the explicit list of names in
2031
# 'pack-names' at then start. As such, it should not contain any
2032
# pending names that haven't been written out yet.
2033
self._packs_at_load = orig_disk_nodes
2035
modified) = self._syncronize_pack_names_from_disk_nodes(disk_nodes)
2036
if removed or added or modified:
2040
def _restart_autopack(self):
2041
"""Reload the pack names list, and restart the autopack code."""
2042
if not self.reload_pack_names():
2043
# Re-raise the original exception, because something went missing
2044
# and a restart didn't find it
2046
raise errors.RetryAutopack(self.repo, False, sys.exc_info())
2048
def _clear_obsolete_packs(self, preserve=None):
1596
2049
"""Delete everything from the obsolete-packs directory.
2051
:return: A list of pack identifiers (the filename without '.pack') that
2052
were found in obsolete_packs.
1598
2055
obsolete_pack_transport = self.transport.clone('obsolete_packs')
2056
if preserve is None:
1599
2058
for filename in obsolete_pack_transport.list_dir('.'):
2059
name, ext = osutils.splitext(filename)
2062
if name in preserve:
1601
2065
obsolete_pack_transport.delete(filename)
1602
2066
except (errors.PathError, errors.TransportError), e:
1603
warning("couldn't delete obsolete pack, skipping it:\n%s" % (e,))
2067
warning("couldn't delete obsolete pack, skipping it:\n%s"
1605
2071
def _start_write_group(self):
1606
2072
# Do not permit preparation for writing if we're not in a 'write lock'.
1607
2073
if not self.repo.is_write_locked():
1608
2074
raise errors.NotWriteLocked(self)
1609
self._new_pack = NewPack(self._upload_transport, self._index_transport,
1610
self._pack_transport, upload_suffix='.pack',
2075
self._new_pack = self.pack_factory(self, upload_suffix='.pack',
1611
2076
file_mode=self.repo.bzrdir._get_file_mode())
1612
2077
# allow writing: queue writes to a new index
1613
2078
self.revision_index.add_writable_index(self._new_pack.revision_index,
1628
2099
# FIXME: just drop the transient index.
1629
2100
# forget what names there are
1630
2101
if self._new_pack is not None:
1631
self._new_pack.abort()
1632
self._remove_pack_indices(self._new_pack)
1633
self._new_pack = None
1634
self.repo._text_knit = None
2102
operation = cleanup.OperationWithCleanups(self._new_pack.abort)
2103
operation.add_cleanup(setattr, self, '_new_pack', None)
2104
# If we aborted while in the middle of finishing the write
2105
# group, _remove_pack_indices could fail because the indexes are
2106
# already gone. But they're not there we shouldn't fail in this
2107
# case, so we pass ignore_missing=True.
2108
operation.add_cleanup(self._remove_pack_indices, self._new_pack,
2109
ignore_missing=True)
2110
operation.run_simple()
2111
for resumed_pack in self._resumed_packs:
2112
operation = cleanup.OperationWithCleanups(resumed_pack.abort)
2113
# See comment in previous finally block.
2114
operation.add_cleanup(self._remove_pack_indices, resumed_pack,
2115
ignore_missing=True)
2116
operation.run_simple()
2117
del self._resumed_packs[:]
2119
def _remove_resumed_pack_indices(self):
2120
for resumed_pack in self._resumed_packs:
2121
self._remove_pack_indices(resumed_pack)
2122
del self._resumed_packs[:]
2124
def _check_new_inventories(self):
2125
"""Detect missing inventories in this write group.
2127
:returns: list of strs, summarising any problems found. If the list is
2128
empty no problems were found.
2130
# The base implementation does no checks. GCRepositoryPackCollection
1636
2134
def _commit_write_group(self):
2136
for prefix, versioned_file in (
2137
('revisions', self.repo.revisions),
2138
('inventories', self.repo.inventories),
2139
('texts', self.repo.texts),
2140
('signatures', self.repo.signatures),
2142
missing = versioned_file.get_missing_compression_parent_keys()
2143
all_missing.update([(prefix,) + key for key in missing])
2145
raise errors.BzrCheckError(
2146
"Repository %s has missing compression parent(s) %r "
2147
% (self.repo, sorted(all_missing)))
2148
problems = self._check_new_inventories()
2150
problems_summary = '\n'.join(problems)
2151
raise errors.BzrCheckError(
2152
"Cannot add revision(s) to repository: " + problems_summary)
1637
2153
self._remove_pack_indices(self._new_pack)
2154
any_new_content = False
1638
2155
if self._new_pack.data_inserted():
1639
2156
# get all the data to disk and read to use
1640
2157
self._new_pack.finish()
1641
2158
self.allocate(self._new_pack)
1642
2159
self._new_pack = None
1643
if not self.autopack():
2160
any_new_content = True
2162
self._new_pack.abort()
2163
self._new_pack = None
2164
for resumed_pack in self._resumed_packs:
2165
# XXX: this is a pretty ugly way to turn the resumed pack into a
2166
# properly committed pack.
2167
self._names[resumed_pack.name] = None
2168
self._remove_pack_from_memory(resumed_pack)
2169
resumed_pack.finish()
2170
self.allocate(resumed_pack)
2171
any_new_content = True
2172
del self._resumed_packs[:]
2174
result = self.autopack()
1644
2176
# when autopack takes no steps, the names list is still
1646
self._save_pack_names()
2178
return self._save_pack_names()
2182
def _suspend_write_group(self):
2183
tokens = [pack.name for pack in self._resumed_packs]
2184
self._remove_pack_indices(self._new_pack)
2185
if self._new_pack.data_inserted():
2186
# get all the data to disk and read to use
2187
self._new_pack.finish(suspend=True)
2188
tokens.append(self._new_pack.name)
2189
self._new_pack = None
1648
2191
self._new_pack.abort()
1649
2192
self._new_pack = None
1650
self.repo._text_knit = None
2193
self._remove_resumed_pack_indices()
2196
def _resume_write_group(self, tokens):
2197
for token in tokens:
2198
self._resume_pack(token)
1653
2201
class KnitPackRepository(KnitRepository):
1654
2202
"""Repository with knit objects stored inside pack containers.
1656
2204
The layering for a KnitPackRepository is:
1658
2206
Graph | HPSS | Repository public layer |
1719
2284
self._reconcile_does_inventory_gc = True
1720
2285
self._reconcile_fixes_text_parents = True
1721
2286
self._reconcile_backsup_inventory = False
1722
self._fetch_order = 'unordered'
1724
def _warn_if_deprecated(self):
2288
def _warn_if_deprecated(self, branch=None):
1725
2289
# This class isn't deprecated, but one sub-format is
1726
2290
if isinstance(self._format, RepositoryFormatKnitPack5RichRootBroken):
1727
from bzrlib import repository
1728
if repository._deprecation_warning_done:
1730
repository._deprecation_warning_done = True
1731
warning("Format %s for %s is deprecated - please use"
1732
" 'bzr upgrade --1.6.1-rich-root'"
1733
% (self._format, self.bzrdir.transport.base))
2291
super(KnitPackRepository, self)._warn_if_deprecated(branch)
1735
2293
def _abort_write_group(self):
2294
self.revisions._index._key_dependencies.clear()
1736
2295
self._pack_collection._abort_write_group()
1738
def _find_inconsistent_revision_parents(self):
1739
"""Find revisions with incorrectly cached parents.
1741
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
1742
parents-in-revision).
1744
if not self.is_locked():
1745
raise errors.ObjectNotLocked(self)
1746
pb = ui.ui_factory.nested_progress_bar()
1749
revision_nodes = self._pack_collection.revision_index \
1750
.combined_index.iter_all_entries()
1751
index_positions = []
1752
# Get the cached index values for all revisions, and also the location
1753
# in each index of the revision text so we can perform linear IO.
1754
for index, key, value, refs in revision_nodes:
1755
pos, length = value[1:].split(' ')
1756
index_positions.append((index, int(pos), key[0],
1757
tuple(parent[0] for parent in refs[0])))
1758
pb.update("Reading revision index.", 0, 0)
1759
index_positions.sort()
1760
batch_count = len(index_positions) / 1000 + 1
1761
pb.update("Checking cached revision graph.", 0, batch_count)
1762
for offset in xrange(batch_count):
1763
pb.update("Checking cached revision graph.", offset)
1764
to_query = index_positions[offset * 1000:(offset + 1) * 1000]
1767
rev_ids = [item[2] for item in to_query]
1768
revs = self.get_revisions(rev_ids)
1769
for revision, item in zip(revs, to_query):
1770
index_parents = item[3]
1771
rev_parents = tuple(revision.parent_ids)
1772
if index_parents != rev_parents:
1773
result.append((revision.revision_id, index_parents, rev_parents))
1778
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
1779
def get_parents(self, revision_ids):
1780
"""See graph._StackedParentsProvider.get_parents."""
1781
parent_map = self.get_parent_map(revision_ids)
1782
return [parent_map.get(r, None) for r in revision_ids]
2297
def _get_source(self, to_format):
2298
if to_format.network_name() == self._format.network_name():
2299
return KnitPackStreamSource(self, to_format)
2300
return super(KnitPackRepository, self)._get_source(to_format)
1784
2302
def _make_parents_provider(self):
1785
2303
return graph.CachingParentsProvider(self)
1787
2305
def _refresh_data(self):
1788
if self._write_lock_count == 1 or (
1789
self.control_files._lock_count == 1 and
1790
self.control_files._lock_mode == 'r'):
1791
# forget what names there are
1792
self._pack_collection.reset()
1793
# XXX: Better to do an in-memory merge when acquiring a new lock -
1794
# factor out code from _save_pack_names.
1795
self._pack_collection.ensure_loaded()
2306
if not self.is_locked():
2308
self._pack_collection.reload_pack_names()
1797
2310
def _start_write_group(self):
1798
2311
self._pack_collection._start_write_group()
1800
2313
def _commit_write_group(self):
1801
return self._pack_collection._commit_write_group()
2314
hint = self._pack_collection._commit_write_group()
2315
self.revisions._index._key_dependencies.clear()
2318
def suspend_write_group(self):
2319
# XXX check self._write_group is self.get_transaction()?
2320
tokens = self._pack_collection._suspend_write_group()
2321
self.revisions._index._key_dependencies.clear()
2322
self._write_group = None
2325
def _resume_write_group(self, tokens):
2326
self._start_write_group()
2328
self._pack_collection._resume_write_group(tokens)
2329
except errors.UnresumableWriteGroup:
2330
self._abort_write_group()
2332
for pack in self._pack_collection._resumed_packs:
2333
self.revisions._index.scan_unvalidated_index(pack.revision_index)
1803
2335
def get_transaction(self):
1804
2336
if self._write_lock_count:
1872
2418
transaction = self._transaction
1873
2419
self._transaction = None
1874
2420
transaction.finish()
1875
for repo in self._fallback_repositories:
1878
2422
self.control_files.unlock()
2424
if not self.is_locked():
1879
2425
for repo in self._fallback_repositories:
2429
class KnitPackStreamSource(StreamSource):
2430
"""A StreamSource used to transfer data between same-format KnitPack repos.
2432
This source assumes:
2433
1) Same serialization format for all objects
2434
2) Same root information
2435
3) XML format inventories
2436
4) Atomic inserts (so we can stream inventory texts before text
2441
def __init__(self, from_repository, to_format):
2442
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
2443
self._text_keys = None
2444
self._text_fetch_order = 'unordered'
2446
def _get_filtered_inv_stream(self, revision_ids):
2447
from_repo = self.from_repository
2448
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
2449
parent_keys = [(p,) for p in parent_ids]
2450
find_text_keys = from_repo._find_text_key_references_from_xml_inventory_lines
2451
parent_text_keys = set(find_text_keys(
2452
from_repo._inventory_xml_lines_for_keys(parent_keys)))
2453
content_text_keys = set()
2454
knit = KnitVersionedFiles(None, None)
2455
factory = KnitPlainFactory()
2456
def find_text_keys_from_content(record):
2457
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
2458
raise ValueError("Unknown content storage kind for"
2459
" inventory text: %s" % (record.storage_kind,))
2460
# It's a knit record, it has a _raw_record field (even if it was
2461
# reconstituted from a network stream).
2462
raw_data = record._raw_record
2463
# read the entire thing
2464
revision_id = record.key[-1]
2465
content, _ = knit._parse_record(revision_id, raw_data)
2466
if record.storage_kind == 'knit-delta-gz':
2467
line_iterator = factory.get_linedelta_content(content)
2468
elif record.storage_kind == 'knit-ft-gz':
2469
line_iterator = factory.get_fulltext_content(content)
2470
content_text_keys.update(find_text_keys(
2471
[(line, revision_id) for line in line_iterator]))
2472
revision_keys = [(r,) for r in revision_ids]
2473
def _filtered_inv_stream():
2474
source_vf = from_repo.inventories
2475
stream = source_vf.get_record_stream(revision_keys,
2477
for record in stream:
2478
if record.storage_kind == 'absent':
2479
raise errors.NoSuchRevision(from_repo, record.key)
2480
find_text_keys_from_content(record)
2482
self._text_keys = content_text_keys - parent_text_keys
2483
return ('inventories', _filtered_inv_stream())
2485
def _get_text_stream(self):
2486
# Note: We know we don't have to handle adding root keys, because both
2487
# the source and target are the identical network name.
2488
text_stream = self.from_repository.texts.get_record_stream(
2489
self._text_keys, self._text_fetch_order, False)
2490
return ('texts', text_stream)
2492
def get_stream(self, search):
2493
revision_ids = search.get_keys()
2494
for stream_info in self._fetch_revision_texts(revision_ids):
2496
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
2497
yield self._get_filtered_inv_stream(revision_ids)
2498
yield self._get_text_stream()
1883
2502
class RepositoryFormatPack(MetaDirRepositoryFormat):
1884
2503
"""Format logic for pack structured repositories.
2199
2838
def get_format_string(self):
2200
2839
"""See RepositoryFormat.get_format_string()."""
2201
return "Bazaar development format 1 (needs bzr.dev from before 1.6)\n"
2840
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
2203
2842
def get_format_description(self):
2204
2843
"""See RepositoryFormat.get_format_description()."""
2205
return ("Development repository format, currently the same as "
2206
"pack-0.92 with external reference support.\n")
2208
def check_conversion_target(self, target_format):
2844
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
2847
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
2848
"""A repository with rich roots, no subtrees, stacking and btree indexes.
2850
1.6-rich-root with B+Tree indices.
2853
repository_class = KnitPackRepository
2854
_commit_builder_class = PackRootCommitBuilder
2855
rich_root_data = True
2856
supports_tree_reference = False # no subtrees
2857
supports_external_lookups = True
2858
# What index classes to use
2859
index_builder_class = BTreeBuilder
2860
index_class = BTreeGraphIndex
2863
def _serializer(self):
2864
return xml6.serializer_v6
2866
def _get_matching_bzrdir(self):
2867
return bzrdir.format_registry.make_bzrdir(
2870
def _ignore_setting_bzrdir(self, format):
2212
class RepositoryFormatPackDevelopment1Subtree(RepositoryFormatPack):
2873
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2875
def get_format_string(self):
2876
"""See RepositoryFormat.get_format_string()."""
2877
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
2879
def get_format_description(self):
2880
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
2883
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
2213
2884
"""A subtrees development repository.
2215
This format should be retained until the second release after bzr 1.5.
2217
Supports external lookups, which results in non-truncated ghosts after
2218
reconcile compared to pack-0.92 formats.
2886
This format should be retained until the second release after bzr 1.7.
2888
1.6.1-subtree[as it might have been] with B+Tree indices.
2890
This is [now] retained until we have a CHK based subtree format in
2221
2894
repository_class = KnitPackRepository
2222
2895
_commit_builder_class = PackRootCommitBuilder
2223
2896
rich_root_data = True
2224
2898
supports_tree_reference = True
2225
_serializer = xml7.serializer_v7
2226
2899
supports_external_lookups = True
2900
# What index classes to use
2901
index_builder_class = BTreeBuilder
2902
index_class = BTreeGraphIndex
2905
def _serializer(self):
2906
return xml7.serializer_v7
2228
2908
def _get_matching_bzrdir(self):
2229
2909
return bzrdir.format_registry.make_bzrdir(
2230
'development1-subtree')
2910
'development-subtree')
2232
2912
def _ignore_setting_bzrdir(self, format):
2235
2915
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2237
def check_conversion_target(self, target_format):
2238
if not target_format.rich_root_data:
2239
raise errors.BadConversionTarget(
2240
'Does not support rich root data.', target_format)
2241
if not getattr(target_format, 'supports_tree_reference', False):
2242
raise errors.BadConversionTarget(
2243
'Does not support nested trees', target_format)
2245
2917
def get_format_string(self):
2246
2918
"""See RepositoryFormat.get_format_string()."""
2247
return ("Bazaar development format 1 with subtree support "
2248
"(needs bzr.dev from before 1.6)\n")
2919
return ("Bazaar development format 2 with subtree support "
2920
"(needs bzr.dev from before 1.8)\n")
2250
2922
def get_format_description(self):
2251
2923
"""See RepositoryFormat.get_format_description()."""
2252
2924
return ("Development repository format, currently the same as "
2253
"pack-0.92-subtree with external reference support.\n")
2925
"1.6.1-subtree with B+Tree indices.\n")