~bzr-pqm/bzr/bzr.dev : revision 3735.2.143

1

2

#

3

# This program is free software; you can redistribute it and/or modify

4

# it under the terms of the GNU General Public License as published by

5

# the Free Software Foundation; either version 2 of the License, or

6

# (at your option) any later version.

7

#

8

# This program is distributed in the hope that it will be useful,

9

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# GNU General Public License for more details.

12

#

13

# You should have received a copy of the GNU General Public License

14

# along with this program; if not, write to the Free Software

15

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

16

17

"""Repostory formats using B+Tree indices and groupcompress compression."""

18

19

import time

20

21

from bzrlib import (

22

bzrdir,

23

chk_map,

24

chk_serializer,

25

debug,

26

errors,

27

index as _mod_index,

28

inventory,

29

knit,

30

osutils,

31

pack,

32

repository,

33

revision as _mod_revision,

34

trace,

35

ui,

36

)

37

from bzrlib.index import GraphIndex, GraphIndexBuilder

38

from bzrlib.groupcompress import (

39

_GCGraphIndex,

40

GroupCompressVersionedFiles,

41

)

42

from bzrlib.repofmt.pack_repo import (

43

Pack,

44

NewPack,

45

KnitPackRepository,

46

PackRootCommitBuilder,

47

RepositoryPackCollection,

48

RepositoryFormatKnitPack6,

49

Packer,

50

CHKInventoryRepository,

51

RepositoryFormatPackDevelopment5Hash16,

52

RepositoryFormatPackDevelopment5Hash255,

53

)

54

55

56

57

class GCPack(NewPack):

58

59

def __init__(self, pack_collection, upload_suffix='', file_mode=None):

60

"""Create a NewPack instance.

61

62

:param pack_collection: A PackCollection into which this is being

63

inserted.

64

:param upload_suffix: An optional suffix to be given to any temporary

65

files created during the pack creation. e.g '.autopack'

66

:param file_mode: An optional file mode to create the new files with.

67

"""

68

# replaced from NewPack to:

69

# - change inventory reference list length to 1

70

# - change texts reference lists to 1

71

# TODO: patch this to be parameterised

72

73

# The relative locations of the packs are constrained, but all are

74

# passed in because the caller has them, so as to avoid object churn.

75

index_builder_class = pack_collection._index_builder_class

76

# from brisbane-core

77

if pack_collection.chk_index is not None:

78

chk_index = index_builder_class(reference_lists=0)

79

else:

80

chk_index = None

81

Pack.__init__(self,

82

# Revisions: parents list, no text compression.

83

index_builder_class(reference_lists=1),

84

# Inventory: We want to map compression only, but currently the

85

# knit code hasn't been updated enough to understand that, so we

86

# have a regular 2-list index giving parents and compression

87

# source.

88

index_builder_class(reference_lists=1),

89

# Texts: compression and per file graph, for all fileids - so two

90

# reference lists and two elements in the key tuple.

91

index_builder_class(reference_lists=1, key_elements=2),

92

# Signatures: Just blobs to store, no compression, no parents

93

# listing.

94

index_builder_class(reference_lists=0),

95

# CHK based storage - just blobs, no compression or parents.

96

chk_index=chk_index

97

)

98

self._pack_collection = pack_collection

99

# When we make readonly indices, we need this.

100

self.index_class = pack_collection._index_class

101

# where should the new pack be opened

102

self.upload_transport = pack_collection._upload_transport

103

# where are indices written out to

104

self.index_transport = pack_collection._index_transport

105

# where is the pack renamed to when it is finished?

106

self.pack_transport = pack_collection._pack_transport

107

# What file mode to upload the pack and indices with.

108

self._file_mode = file_mode

109

# tracks the content written to the .pack file.

110

self._hash = osutils.md5()

111

# a four-tuple with the length in bytes of the indices, once the pack

112

# is finalised. (rev, inv, text, sigs)

113

self.index_sizes = None

114

# How much data to cache when writing packs. Note that this is not

115

# synchronised with reads, because it's not in the transport layer, so

116

# is not safe unless the client knows it won't be reading from the pack

117

# under creation.

118

self._cache_limit = 0

119

# the temporary pack file name.

120

self.random_name = osutils.rand_chars(20) + upload_suffix

121

# when was this pack started ?

122

self.start_time = time.time()

123

# open an output stream for the data added to the pack.

124

self.write_stream = self.upload_transport.open_write_stream(

125

self.random_name, mode=self._file_mode)

126

if 'pack' in debug.debug_flags:

127

trace.mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',

128

time.ctime(), self.upload_transport.base, self.random_name,

129

time.time() - self.start_time)

130

# A list of byte sequences to be written to the new pack, and the

131

# aggregate size of them. Stored as a list rather than separate

132

# variables so that the _write_data closure below can update them.

133

self._buffer = [[], 0]

134

# create a callable for adding data

135

#

136

# robertc says- this is a closure rather than a method on the object

137

# so that the variables are locals, and faster than accessing object

138

# members.

139

def _write_data(bytes, flush=False, _buffer=self._buffer,

140

_write=self.write_stream.write, _update=self._hash.update):

141

_buffer[0].append(bytes)

142

_buffer[1] += len(bytes)

143

# buffer cap

144

if _buffer[1] > self._cache_limit or flush:

145

bytes = ''.join(_buffer[0])

146

_write(bytes)

147

_update(bytes)

148

_buffer[:] = [[], 0]

149

# expose this on self, for the occasion when clients want to add data.

150

self._write_data = _write_data

151

# a pack writer object to serialise pack records.

152

self._writer = pack.ContainerWriter(self._write_data)

153

self._writer.begin()

154

# what state is the pack in? (open, finished, aborted)

155

self._state = 'open'

156

157

def _check_references(self):

158

"""Make sure our external references are present.

159

160

Packs are allowed to have deltas whose base is not in the pack, but it

161

must be present somewhere in this collection. It is not allowed to

162

have deltas based on a fallback repository.

163

(See <https://bugs.launchpad.net/bzr/+bug/288751>)

164

"""

165

# Groupcompress packs don't have any external references

166

167

168

class GCCHKPacker(Packer):

169

"""This class understand what it takes to collect a GCCHK repo."""

170

171

def __init__(self, pack_collection, packs, suffix, revision_ids=None,

172

reload_func=None):

173

super(GCCHKPacker, self).__init__(pack_collection, packs, suffix,

174

revision_ids=revision_ids,

175

reload_func=reload_func)

176

self._pack_collection = pack_collection

177

# ATM, We only support this for GCCHK repositories

178

assert pack_collection.chk_index is not None

179

self._gather_text_refs = False

180

self._chk_id_roots = []

181

self._chk_p_id_roots = []

182

self._text_refs = None

183

# set by .pack() if self.revision_ids is not None

184

self.revision_keys = None

185

186

def _get_progress_stream(self, source_vf, keys, message, pb):

187

def pb_stream():

188

substream = source_vf.get_record_stream(keys, 'groupcompress', True)

189

for idx, record in enumerate(substream):

190

if pb is not None:

191

pb.update(message, idx + 1, len(keys))

192

yield record

193

return pb_stream()

194

195

def _get_filtered_inv_stream(self, source_vf, keys, message, pb=None):

196

"""Filter the texts of inventories, to find the chk pages."""

197

total_keys = len(keys)

198

def _filtered_inv_stream():

199

id_roots_set = set()

200

p_id_roots_set = set()

201

stream = source_vf.get_record_stream(keys, 'groupcompress', True)

202

for idx, record in enumerate(stream):

203

bytes = record.get_bytes_as('fulltext')

204

chk_inv = inventory.CHKInventory.deserialise(None, bytes,

205

record.key)

206

if pb is not None:

207

pb.update('inv', idx, total_keys)

208

key = chk_inv.id_to_entry.key()

209

if key not in id_roots_set:

210

self._chk_id_roots.append(key)

211

id_roots_set.add(key)

212

p_id_map = chk_inv.parent_id_basename_to_file_id

213

assert p_id_map is not None

214

key = p_id_map.key()

215

if key not in p_id_roots_set:

216

p_id_roots_set.add(key)

217

self._chk_p_id_roots.append(key)

218

yield record

219

# We have finished processing all of the inventory records, we

220

# don't need these sets anymore

221

id_roots_set.clear()

222

p_id_roots_set.clear()

223

return _filtered_inv_stream()

224

225

def _get_chk_streams(self, source_vf, keys, pb=None):

226

# We want to stream the keys from 'id_roots', and things they

227

# reference, and then stream things from p_id_roots and things they

228

# reference, and then any remaining keys that we didn't get to.

229

230

# We also group referenced texts together, so if one root references a

231

# text with prefix 'a', and another root references a node with prefix

232

# 'a', we want to yield those nodes before we yield the nodes for 'b'

233

# This keeps 'similar' nodes together.

234

235

# Note: We probably actually want multiple streams here, to help the

236

# client understand that the different levels won't compress well

237

# against each other.

238

# Test the difference between using one Group per level, and

239

# using 1 Group per prefix. (so '' (root) would get a group, then

240

# all the references to search-key 'a' would get a group, etc.)

241

total_keys = len(keys)

242

remaining_keys = set(keys)

243

counter = [0]

244

if self._gather_text_refs:

245

# Just to get _bytes_to_entry, so we don't care about the

246

# search_key_name

247

inv = inventory.CHKInventory(None)

248

self._text_refs = set()

249

def _get_referenced_stream(root_keys, parse_leaf_nodes=False):

250

cur_keys = root_keys

251

while cur_keys:

252

keys_by_search_prefix = {}

253

remaining_keys.difference_update(cur_keys)

254

next_keys = set()

255

stream = source_vf.get_record_stream(cur_keys, 'as-requested',

256

True)

257

def handle_internal_node(node):

258

for prefix, value in node._items.iteritems():

259

if not isinstance(value, tuple):

260

raise AssertionError("value is %s when a tuple"

261

" is expected" % (value.__class__))

262

if value not in next_keys:

263

keys_by_search_prefix.setdefault(prefix,

264

[]).append(value)

265

next_keys.add(value)

266

def handle_leaf_node(node):

267

# Store is None, because we know we have a LeafNode, and we

268

# just want its entries

269

for file_id, bytes in node.iteritems(None):

270

try:

271

entry = inv._bytes_to_entry(bytes)

272

except ValueError:

273

import pdb; pdb.set_trace()

274

self._text_refs.add((entry.file_id, entry.revision))

275

def next_stream():

276

for record in stream:

277

bytes = record.get_bytes_as('fulltext')

278

# We don't care about search_key_func for this code,

279

# because we only care about external references.

280

node = chk_map._deserialise(bytes, record.key,

281

search_key_func=None)

282

common_base = node._search_prefix

283

if isinstance(node, chk_map.InternalNode):

284

handle_internal_node(node)

285

elif parse_leaf_nodes:

286

handle_leaf_node(node)

287

# XXX: We don't walk the chk map to determine

288

# referenced (file_id, revision_id) keys.

289

# We don't do it yet because you really need to

290

# filter out the ones that are present in the

291

# parents of the rev just before the ones you are

292

# copying, otherwise the filter is grabbing too

293

# many keys...

294

counter[0] += 1

295

if pb is not None:

296

pb.update('chk node', counter[0], total_keys)

297

yield record

298

yield next_stream()

299

# Double check that we won't be emitting any keys twice

300

# If we get rid of the pre-calculation of all keys, we could

301

# turn this around and do

302

# next_keys.difference_update(seen_keys)

303

next_keys = next_keys.intersection(remaining_keys)

304

cur_keys = []

305

for prefix in sorted(keys_by_search_prefix):

306

cur_keys.extend(keys_by_search_prefix[prefix])

307

for stream in _get_referenced_stream(self._chk_id_roots,

308

self._gather_text_refs):

309

yield stream

310

del self._chk_id_roots

311

for stream in _get_referenced_stream(self._chk_p_id_roots, False):

312

yield stream

313

del self._chk_p_id_roots

314

if remaining_keys:

315

trace.mutter('There were %d keys in the chk index, %d of which'

316

' were not referenced', total_keys,

317

len(remaining_keys))

318

if self.revision_ids is None:

319

stream = source_vf.get_record_stream(remaining_keys,

320

'unordered', True)

321

yield stream

322

323

def _build_vf(self, index_name, parents, delta, for_write=False):

324

"""Build a VersionedFiles instance on top of this group of packs."""

325

index_name = index_name + '_index'

326

index_to_pack = {}

327

access = knit._DirectPackAccess(index_to_pack)

328

if for_write:

329

# Use new_pack

330

assert self.new_pack is not None

331

index = getattr(self.new_pack, index_name)

332

index_to_pack[index] = self.new_pack.access_tuple()

333

index.set_optimize(for_size=True)

334

access.set_writer(self.new_pack._writer, index,

335

self.new_pack.access_tuple())

336

add_callback = index.add_nodes

337

else:

338

indices = []

339

for pack in self.packs:

340

sub_index = getattr(pack, index_name)

341

index_to_pack[sub_index] = pack.access_tuple()

342

indices.append(sub_index)

343

index = _mod_index.CombinedGraphIndex(indices)

344

add_callback = None

345

vf = GroupCompressVersionedFiles(

346

_GCGraphIndex(index,

347

add_callback=add_callback,

348

parents=parents,

349

is_locked=self._pack_collection.repo.is_locked),

350

access=access,

351

delta=delta)

352

return vf

353

354

def _build_vfs(self, index_name, parents, delta):

355

"""Build the source and target VersionedFiles."""

356

source_vf = self._build_vf(index_name, parents,

357

delta, for_write=False)

358

target_vf = self._build_vf(index_name, parents,

359

delta, for_write=True)

360

return source_vf, target_vf

361

362

def _copy_stream(self, source_vf, target_vf, keys, message, vf_to_stream,

363

pb_offset):

364

trace.mutter('repacking %d %s', len(keys), message)

365

self.pb.update('repacking %s', pb_offset)

366

child_pb = ui.ui_factory.nested_progress_bar()

367

try:

368

stream = vf_to_stream(source_vf, keys, message, child_pb)

369

target_vf.insert_record_stream(stream)

370

finally:

371

child_pb.finished()

372

373

def _copy_revision_texts(self):

374

source_vf, target_vf = self._build_vfs('revision', True, False)

375

if not self.revision_keys:

376

# We are doing a full fetch, aka 'pack'

377

self.revision_keys = source_vf.keys()

378

self._copy_stream(source_vf, target_vf, self.revision_keys,

379

'revisions', self._get_progress_stream, 1)

380

381

def _copy_inventory_texts(self):

382

source_vf, target_vf = self._build_vfs('inventory', True, True)

383

self._copy_stream(source_vf, target_vf, self.revision_keys,

384

'inventories', self._get_filtered_inv_stream, 2)

385

386

def _copy_chk_texts(self):

387

source_vf, target_vf = self._build_vfs('chk', False, False)

388

# TODO: This is technically spurious... if it is a performance issue,

389

# remove it

390

total_keys = source_vf.keys()

391

trace.mutter('repacking chk: %d id_to_entry roots,'

392

' %d p_id_map roots, %d total keys',

393

len(self._chk_id_roots), len(self._chk_p_id_roots),

394

len(total_keys))

395

self.pb.update('repacking chk', 3)

396

child_pb = ui.ui_factory.nested_progress_bar()

397

try:

398

for stream in self._get_chk_streams(source_vf, total_keys,

399

pb=child_pb):

400

target_vf.insert_record_stream(stream)

401

finally:

402

child_pb.finished()

403

404

def _copy_text_texts(self):

405

source_vf, target_vf = self._build_vfs('text', True, True)

406

# XXX: We don't walk the chk map to determine referenced (file_id,

407

# revision_id) keys. We don't do it yet because you really need

408

# to filter out the ones that are present in the parents of the

409

# rev just before the ones you are copying, otherwise the filter

410

# is grabbing too many keys...

411

text_keys = source_vf.keys()

412

self._copy_stream(source_vf, target_vf, text_keys,

413

'text', self._get_progress_stream, 4)

414

415

def _copy_signature_texts(self):

416

source_vf, target_vf = self._build_vfs('signature', False, False)

417

signature_keys = source_vf.keys()

418

signature_keys.intersection(self.revision_keys)

419

self._copy_stream(source_vf, target_vf, signature_keys,

420

'signatures', self._get_progress_stream, 5)

421

422

def _create_pack_from_packs(self):

423

self.pb.update('repacking', 0, 7)

424

self.new_pack = self.open_pack()

425

# Is this necessary for GC ?

426

self.new_pack.set_write_cache_size(1024*1024)

427

self._copy_revision_texts()

428

self._copy_inventory_texts()

429

self._copy_chk_texts()

430

self._copy_text_texts()

431

self._copy_signature_texts()

432

self.new_pack._check_references()

433

if not self._use_pack(self.new_pack):

434

self.new_pack.abort()

435

return None

436

self.pb.update('finishing repack', 6, 7)

437

self.new_pack.finish()

438

self._pack_collection.allocate(self.new_pack)

439

return self.new_pack

440

441

442

class GCCHKReconcilePacker(GCCHKPacker):

443

"""A packer which regenerates indices etc as it copies.

444

445

This is used by ``bzr reconcile`` to cause parent text pointers to be

446

regenerated.

447

"""

448

449

def __init__(self, *args, **kwargs):

450

super(GCCHKReconcilePacker, self).__init__(*args, **kwargs)

451

self._data_changed = False

452

self._gather_text_refs = True

453

454

def _copy_inventory_texts(self):

455

source_vf, target_vf = self._build_vfs('inventory', True, True)

456

self._copy_stream(source_vf, target_vf, self.revision_keys,

457

'inventories', self._get_filtered_inv_stream, 2)

458

if source_vf.keys() != self.revision_keys:

459

self._data_changed = True

460

461

def _copy_text_texts(self):

462

"""generate what texts we should have and then copy."""

463

source_vf, target_vf = self._build_vfs('text', True, True)

464

trace.mutter('repacking %d texts', len(self._text_refs))

465

self.pb.update("repacking texts", 4)

466

# we have three major tasks here:

467

# 1) generate the ideal index

468

repo = self._pack_collection.repo

469

# We want the one we just wrote, so base it on self.new_pack

470

revision_vf = self._build_vf('revision', True, False, for_write=True)

471

ancestor_keys = revision_vf.get_parent_map(revision_vf.keys())

472

# Strip keys back into revision_ids.

473

ancestors = dict((k[0], tuple([p[0] for p in parents]))

474

for k, parents in ancestor_keys.iteritems())

475

del ancestor_keys

476

# TODO: _generate_text_key_index should be much cheaper to generate from

477

# a chk repository, rather than the current implementation

478

ideal_index = repo._generate_text_key_index(None, ancestors)

479

file_id_parent_map = source_vf.get_parent_map(self._text_refs)

480

# 2) generate a keys list that contains all the entries that can

481

# be used as-is, with corrected parents.

482

ok_keys = []

483

new_parent_keys = {} # (key, parent_keys)

484

discarded_keys = []

485

NULL_REVISION = _mod_revision.NULL_REVISION

486

for key in self._text_refs:

487

# 0 - index

488

# 1 - key

489

# 2 - value

490

# 3 - refs

491

try:

492

ideal_parents = tuple(ideal_index[key])

493

except KeyError:

494

discarded_keys.append(key)

495

self._data_changed = True

496

else:

497

if ideal_parents == (NULL_REVISION,):

498

ideal_parents = ()

499

source_parents = file_id_parent_map[key]

500

if ideal_parents == source_parents:

501

# no change needed.

502

ok_keys.append(key)

503

else:

504

# We need to change the parent graph, but we don't need to

505

# re-insert the text (since we don't pun the compression

506

# parent with the parents list)

507

self._data_changed = True

508

new_parent_keys[key] = ideal_parents

509

# we're finished with some data.

510

del ideal_index

511

del file_id_parent_map

512

# 3) bulk copy the data, updating records than need it

513

def _update_parents_for_texts():

514

stream = source_vf.get_record_stream(self._text_refs,

515

'groupcompress', False)

516

for record in stream:

517

if record.key in new_parent_keys:

518

record.parents = new_parent_keys[record.key]

519

yield record

520

target_vf.insert_record_stream(_update_parents_for_texts())

521

522

def _use_pack(self, new_pack):

523

"""Override _use_pack to check for reconcile having changed content."""

524

return new_pack.data_inserted() and self._data_changed

525

526

527

class GCRepositoryPackCollection(RepositoryPackCollection):

528

529

pack_factory = GCPack

530

531

def _execute_pack_operations(self, pack_operations,

532

_packer_class=GCCHKPacker,

533

reload_func=None):

534

"""Execute a series of pack operations.

535

536

:param pack_operations: A list of [revision_count, packs_to_combine].

537

:param _packer_class: The class of packer to use (default: Packer).

538

:return: None.

539

"""

540

# XXX: Copied across from RepositoryPackCollection simply because we

541

# want to override the _packer_class ... :(

542

for revision_count, packs in pack_operations:

543

# we may have no-ops from the setup logic

544

if len(packs) == 0:

545

continue

546

packer = GCCHKPacker(self, packs, '.autopack',

547

reload_func=reload_func)

548

try:

549

packer.pack()

550

except errors.RetryWithNewPacks:

551

# An exception is propagating out of this context, make sure

552

# this packer has cleaned up. Packer() doesn't set its new_pack

553

# state into the RepositoryPackCollection object, so we only

554

# have access to it directly here.

555

if packer.new_pack is not None:

556

packer.new_pack.abort()

557

raise

558

for pack in packs:

559

self._remove_pack_from_memory(pack)

560

# record the newly available packs and stop advertising the old

561

# packs

562

self._save_pack_names(clear_obsolete_packs=True)

563

# Move the old packs out of the way now they are no longer referenced.

564

for revision_count, packs in pack_operations:

565

self._obsolete_packs(packs)

566

567

568

# XXX: This format is scheduled for termination

569

#

570

# class GCPackRepository(KnitPackRepository):

571

# """GC customisation of KnitPackRepository."""

572

#

573

# def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,

574

# _serializer):

575

# """Overridden to change pack collection class."""

576

# KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,

577

# _commit_builder_class, _serializer)

578

# # and now replace everything it did :)

579

# index_transport = self._transport.clone('indices')

580

# self._pack_collection = GCRepositoryPackCollection(self,

581

# self._transport, index_transport,

582

# self._transport.clone('upload'),

583

# self._transport.clone('packs'),

584

# _format.index_builder_class,

585

# _format.index_class,

586

# use_chk_index=self._format.supports_chks,

587

# )

588

# self.inventories = GroupCompressVersionedFiles(

589

# _GCGraphIndex(self._pack_collection.inventory_index.combined_index,

590

# add_callback=self._pack_collection.inventory_index.add_callback,

591

# parents=True, is_locked=self.is_locked),

592

# access=self._pack_collection.inventory_index.data_access)

593

# self.revisions = GroupCompressVersionedFiles(

594

# _GCGraphIndex(self._pack_collection.revision_index.combined_index,

595

# add_callback=self._pack_collection.revision_index.add_callback,

596

# parents=True, is_locked=self.is_locked),

597

# access=self._pack_collection.revision_index.data_access,

598

# delta=False)

599

# self.signatures = GroupCompressVersionedFiles(

600

# _GCGraphIndex(self._pack_collection.signature_index.combined_index,

601

# add_callback=self._pack_collection.signature_index.add_callback,

602

# parents=False, is_locked=self.is_locked),

603

# access=self._pack_collection.signature_index.data_access,

604

# delta=False)

605

# self.texts = GroupCompressVersionedFiles(

606

# _GCGraphIndex(self._pack_collection.text_index.combined_index,

607

# add_callback=self._pack_collection.text_index.add_callback,

608

# parents=True, is_locked=self.is_locked),

609

# access=self._pack_collection.text_index.data_access)

610

# if _format.supports_chks:

611

# # No graph, no compression:- references from chks are between

612

# # different objects not temporal versions of the same; and without

613

# # some sort of temporal structure knit compression will just fail.

614

# self.chk_bytes = GroupCompressVersionedFiles(

615

# _GCGraphIndex(self._pack_collection.chk_index.combined_index,

616

# add_callback=self._pack_collection.chk_index.add_callback,

617

# parents=False, is_locked=self.is_locked),

618

# access=self._pack_collection.chk_index.data_access)

619

# else:

620

# self.chk_bytes = None

621

# # True when the repository object is 'write locked' (as opposed to the

622

# # physical lock only taken out around changes to the pack-names list.)

623

# # Another way to represent this would be a decorator around the control

624

# # files object that presents logical locks as physical ones - if this

625

# # gets ugly consider that alternative design. RBC 20071011

626

# self._write_lock_count = 0

627

# self._transaction = None

628

# # for tests

629

# self._reconcile_does_inventory_gc = True

630

# self._reconcile_fixes_text_parents = True

631

# self._reconcile_backsup_inventory = False

632

#

633

# def suspend_write_group(self):

634

# raise errors.UnsuspendableWriteGroup(self)

635

#

636

# def _resume_write_group(self, tokens):

637

# raise errors.UnsuspendableWriteGroup(self)

638

#

639

# def _reconcile_pack(self, collection, packs, extension, revs, pb):

640

# bork

641

# return packer.pack(pb)

642

643

644

class GCCHKPackRepository(CHKInventoryRepository):

645

"""GC customisation of CHKInventoryRepository."""

646

647

def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,

648

_serializer):

649

"""Overridden to change pack collection class."""

650

KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,

651

_commit_builder_class, _serializer)

652

# and now replace everything it did :)

653

index_transport = self._transport.clone('indices')

654

self._pack_collection = GCRepositoryPackCollection(self,

655

self._transport, index_transport,

656

self._transport.clone('upload'),

657

self._transport.clone('packs'),

658

_format.index_builder_class,

659

_format.index_class,

660

use_chk_index=self._format.supports_chks,

661

)

662

self.inventories = GroupCompressVersionedFiles(

663

_GCGraphIndex(self._pack_collection.inventory_index.combined_index,

664

add_callback=self._pack_collection.inventory_index.add_callback,

665

parents=True, is_locked=self.is_locked),

666

access=self._pack_collection.inventory_index.data_access)

667

self.revisions = GroupCompressVersionedFiles(

668

_GCGraphIndex(self._pack_collection.revision_index.combined_index,

669

add_callback=self._pack_collection.revision_index.add_callback,

670

parents=True, is_locked=self.is_locked),

671

access=self._pack_collection.revision_index.data_access,

672

delta=False)

673

self.signatures = GroupCompressVersionedFiles(

674

_GCGraphIndex(self._pack_collection.signature_index.combined_index,

675

add_callback=self._pack_collection.signature_index.add_callback,

676

parents=False, is_locked=self.is_locked),

677

access=self._pack_collection.signature_index.data_access,

678

delta=False)

679

self.texts = GroupCompressVersionedFiles(

680

_GCGraphIndex(self._pack_collection.text_index.combined_index,

681

add_callback=self._pack_collection.text_index.add_callback,

682

parents=True, is_locked=self.is_locked),

683

access=self._pack_collection.text_index.data_access)

684

# No parents, individual CHK pages don't have specific ancestry

685

self.chk_bytes = GroupCompressVersionedFiles(

686

_GCGraphIndex(self._pack_collection.chk_index.combined_index,

687

add_callback=self._pack_collection.chk_index.add_callback,

688

parents=False, is_locked=self.is_locked),

689

access=self._pack_collection.chk_index.data_access)

690

# True when the repository object is 'write locked' (as opposed to the

691

# physical lock only taken out around changes to the pack-names list.)

692

# Another way to represent this would be a decorator around the control

693

# files object that presents logical locks as physical ones - if this

694

# gets ugly consider that alternative design. RBC 20071011

695

self._write_lock_count = 0

696

self._transaction = None

697

# for tests

698

self._reconcile_does_inventory_gc = True

699

self._reconcile_fixes_text_parents = True

700

self._reconcile_backsup_inventory = False

701

702

def suspend_write_group(self):

703

raise errors.UnsuspendableWriteGroup(self)

704

705

def _resume_write_group(self, tokens):

706

raise errors.UnsuspendableWriteGroup(self)

707

708

def _reconcile_pack(self, collection, packs, extension, revs, pb):

709

# assert revs is None

710

packer = GCCHKReconcilePacker(collection, packs, extension)

711

return packer.pack(pb)

712

713

714

# This format has been disabled for now. It is not expected that this will be a

715

# useful next-generation format.

716

#

717

# class RepositoryFormatPackGCPlain(RepositoryFormatKnitPack6):

718

# """A B+Tree index using pack repository."""

719

#

720

# repository_class = GCPackRepository

721

# rich_root_data = False

722

# # Note: We cannot unpack a delta that references a text we haven't

723

# # seen yet. There are 2 options, work in fulltexts, or require

724

# # topological sorting. Using fulltexts is more optimal for local

725

# # operations, because the source can be smart about extracting

726

# # multiple in-a-row (and sharing strings). Topological is better

727

# # for remote, because we access less data.

728

# _fetch_order = 'unordered'

729

# _fetch_uses_deltas = False

730

#

731

# def _get_matching_bzrdir(self):

732

# return bzrdir.format_registry.make_bzrdir('gc-no-rich-root')

733

#

734

# def _ignore_setting_bzrdir(self, format):

735

# pass

736

#

737

# _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

738

#

739

# def get_format_string(self):

740

# """See RepositoryFormat.get_format_string()."""

741

# return ("Bazaar development format - btree+gc "

742

# "(needs bzr.dev from 1.13)\n")

743

#

744

# def get_format_description(self):

745

# """See RepositoryFormat.get_format_description()."""

746

# return ("Development repository format - btree+groupcompress "

747

# ", interoperates with pack-0.92\n")

748

#

749

750

class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):

751

"""A hashed CHK+group compress pack repository."""

752

753

repository_class = GCCHKPackRepository

754

_commit_builder_class = PackRootCommitBuilder

755

rich_root_data = True

756

supports_external_lookups = True

757

supports_tree_reference = True

758

supports_chks = True

759

# Note: We cannot unpack a delta that references a text we haven't

760

# seen yet. There are 2 options, work in fulltexts, or require

761

# topological sorting. Using fulltexts is more optimal for local

762

# operations, because the source can be smart about extracting

763

# multiple in-a-row (and sharing strings). Topological is better

764

# for remote, because we access less data.

765

_fetch_order = 'unordered'

766

_fetch_uses_deltas = False

767

768

def _get_matching_bzrdir(self):

769

return bzrdir.format_registry.make_bzrdir('gc-chk16')

770

771

def _ignore_setting_bzrdir(self, format):

772

pass

773

774

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

775

776

def get_format_string(self):

777

"""See RepositoryFormat.get_format_string()."""

778

return ('Bazaar development format - hash16chk+gc rich-root'

779

' (needs bzr.dev from 1.13)\n')

780

781

def get_format_description(self):

782

"""See RepositoryFormat.get_format_description()."""

783

return ("Development repository format - hash16chk+groupcompress")

784

785

def check_conversion_target(self, target_format):

786

if not target_format.rich_root_data:

787

raise errors.BadConversionTarget(

788

'Does not support rich root data.', target_format)

789

if not getattr(target_format, 'supports_tree_reference', False):

790

raise errors.BadConversionTarget(

791

'Does not support nested trees', target_format)

792

793

794

class RepositoryFormatPackGCCHK255(RepositoryFormatPackDevelopment5Hash255):

795

"""A hashed CHK+group compress pack repository."""

796

797

repository_class = GCCHKPackRepository

798

supports_chks = True

799

# Setting this to True causes us to use InterModel1And2, so for now set

800

# it to False which uses InterDifferingSerializer. When IM1&2 is

801

# removed (as it is in bzr.dev) we can set this back to True.

802

_commit_builder_class = PackRootCommitBuilder

803

rich_root_data = True

804

805

def _get_matching_bzrdir(self):

806

return bzrdir.format_registry.make_bzrdir('gc-chk255')

807

808

def _ignore_setting_bzrdir(self, format):

809

pass

810

811

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

812

813

def get_format_string(self):

814

"""See RepositoryFormat.get_format_string()."""

815

return ('Bazaar development format - hash255chk+gc rich-root'

816

' (needs bzr.dev from 1.13)\n')

817

818

def get_format_description(self):

819

"""See RepositoryFormat.get_format_description()."""

820

return ("Development repository format - hash255chk+groupcompress")

821

822

def check_conversion_target(self, target_format):

823

if not target_format.rich_root_data:

824

raise errors.BadConversionTarget(

825

'Does not support rich root data.', target_format)

826

if not getattr(target_format, 'supports_tree_reference', False):

827

raise errors.BadConversionTarget(

828

'Does not support nested trees', target_format)

829

830

831

class RepositoryFormatPackGCCHK255Big(RepositoryFormatPackGCCHK255):

832

"""A hashed CHK+group compress pack repository."""

833

834

repository_class = GCCHKPackRepository

835

supports_chks = True

836

# For right now, setting this to True gives us InterModel1And2 rather

837

# than InterDifferingSerializer

838

_commit_builder_class = PackRootCommitBuilder

839

rich_root_data = True

840

_serializer = chk_serializer.chk_serializer_255_bigpage

841

# Note: We cannot unpack a delta that references a text we haven't

842

# seen yet. There are 2 options, work in fulltexts, or require

843

# topological sorting. Using fulltexts is more optimal for local

844

# operations, because the source can be smart about extracting

845

# multiple in-a-row (and sharing strings). Topological is better

846

# for remote, because we access less data.

847

_fetch_order = 'unordered'

848

_fetch_uses_deltas = False

849

850

def _get_matching_bzrdir(self):

851

return bzrdir.format_registry.make_bzrdir('gc-chk255-big')

852

853

def _ignore_setting_bzrdir(self, format):

854

pass

855

856

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

857

858

def get_format_string(self):

859

"""See RepositoryFormat.get_format_string()."""

860

return ('Bazaar development format - hash255chk+gc rich-root bigpage'

861

' (needs bzr.dev from 1.13)\n')

862

863

def get_format_description(self):

864

"""See RepositoryFormat.get_format_description()."""

865

return ("Development repository format - hash255chk+groupcompress + bigpage")

866

867

def check_conversion_target(self, target_format):

868

if not target_format.rich_root_data:

869

raise errors.BadConversionTarget(

870

'Does not support rich root data.', target_format)

871

if not getattr(target_format, 'supports_tree_reference', False):

872

raise errors.BadConversionTarget(

873

'Does not support nested trees', target_format)