~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/groupcompress_repo.py

Committer: John Arbash Meinel
Date: 2009-03-11 07:42:37 UTC
mto: (3735.2.143 brisbane-core)
mto: This revision was merged to the branch mainline in revision 4280.
Revision ID: john@arbash-meinel.com-20090311074237-6lzsbd3r0tmbofjz

Continue trying to reduce the friction for the groupcompress repo formats.

files modified:
bzrlib/chk_serializer.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repository.py

Show diffs side-by-side

added added

removed removed

bzrlib/repofmt/groupcompress_repo.py

from bzrlib import (

chk_map,

chk_serializer,

debug,

errors,

inventory,

ui,

)

from bzrlib.index import GraphIndex, GraphIndexBuilder

from bzrlib.repository import InterPackRepo

from bzrlib.groupcompress import (

_GCGraphIndex,

GroupCompressVersionedFiles,

RepositoryPackCollection,

RepositoryFormatKnitPack6,

Packer,

)

try:

from bzrlib.repofmt.pack_repo import (

CHKInventoryRepository,

RepositoryFormatPackDevelopment5,

RepositoryFormatPackDevelopment5Hash16,

## RepositoryFormatPackDevelopment5Hash16b,

## RepositoryFormatPackDevelopment5Hash63,

## RepositoryFormatPackDevelopment5Hash127a,

## RepositoryFormatPackDevelopment5Hash127b,

RepositoryFormatPackDevelopment5Hash255,

)

from bzrlib import

chk_support = True

except ImportError:

chk_support = False

def open_pack(self):

# The relative locations of the packs are constrained, but all are

# passed in because the caller has them, so as to avoid object churn.

index_builder_class = pack_collection._index_builder_class

if chk_support:

# from brisbane-core

if pack_collection.chk_index is not None:

chk_index = index_builder_class(reference_lists=0)

else:

chk_index = None

Pack.__init__(self,

# Revisions: parents list, no text compression.

100

index_builder_class(reference_lists=1),

101

# Inventory: We want to map compression only, but currently the

102

# knit code hasn't been updated enough to understand that, so we

103

# have a regular 2-list index giving parents and compression

104

# source.

105

index_builder_class(reference_lists=1),

106

# Texts: compression and per file graph, for all fileids - so two

107

# reference lists and two elements in the key tuple.

108

index_builder_class(reference_lists=1, key_elements=2),

109

# Signatures: Just blobs to store, no compression, no parents

110

# listing.

111

index_builder_class(reference_lists=0),

112

# CHK based storage - just blobs, no compression or parents.

113

chk_index=chk_index

114

)

# from brisbane-core

if pack_collection.chk_index is not None:

chk_index = index_builder_class(reference_lists=0)

115

else:

116

# from bzr.dev

117

Pack.__init__(self,

118

# Revisions: parents list, no text compression.

119

index_builder_class(reference_lists=1),

120

# Inventory: compressed, with graph for compatibility with other

121

# existing bzrlib code.

122

index_builder_class(reference_lists=1),

123

# Texts: per file graph:

124

index_builder_class(reference_lists=1, key_elements=2),

125

# Signatures: Just blobs to store, no compression, no parents

126

# listing.

127

index_builder_class(reference_lists=0),

128

)

chk_index = None

Pack.__init__(self,

# Revisions: parents list, no text compression.

index_builder_class(reference_lists=1),

# Inventory: We want to map compression only, but currently the

# knit code hasn't been updated enough to understand that, so we

# have a regular 2-list index giving parents and compression

# source.

index_builder_class(reference_lists=1),

# Texts: compression and per file graph, for all fileids - so two

# reference lists and two elements in the key tuple.

index_builder_class(reference_lists=1, key_elements=2),

# Signatures: Just blobs to store, no compression, no parents

# listing.

index_builder_class(reference_lists=0),

# CHK based storage - just blobs, no compression or parents.

100

chk_index=chk_index

101

)

129

102

self._pack_collection = pack_collection

130

103

# When we make readonly indices, we need this.

131

104

self.index_class = pack_collection._index_class

208

181

self._new_pack)

209

182

self.signature_index.add_writable_index(self._new_pack.signature_index,

210

183

self._new_pack)

211

if chk_support and self.chk_index is not None:

184

if self.chk_index is not None:

212

185

self.chk_index.add_writable_index(self._new_pack.chk_index,

213

186

self._new_pack)

214

187

self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback

426

399

class GCPackRepository(KnitPackRepository):

427

400

"""GC customisation of KnitPackRepository."""

428

401

429

# Note: I think the CHK support can be dropped from this class as it's

430

# implemented via the GCCHKPackRepository class defined next. IGC 20090301

431

432

402

def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,

433

403

_serializer):

434

404

"""Overridden to change pack collection class."""

436

406

_commit_builder_class, _serializer)

437

407

# and now replace everything it did :)

438

408

index_transport = self._transport.clone('indices')

439

if chk_support:

440

self._pack_collection = GCRepositoryPackCollection(self,

441

self._transport, index_transport,

442

self._transport.clone('upload'),

443

self._transport.clone('packs'),

444

_format.index_builder_class,

445

_format.index_class,

446

use_chk_index=self._format.supports_chks,

447

)

448

else:

449

self._pack_collection = GCRepositoryPackCollection(self,

450

self._transport, index_transport,

451

self._transport.clone('upload'),

452

self._transport.clone('packs'),

453

_format.index_builder_class,

454

_format.index_class)

409

self._pack_collection = GCRepositoryPackCollection(self,

410

self._transport, index_transport,

411

self._transport.clone('upload'),

412

self._transport.clone('packs'),

413

_format.index_builder_class,

414

_format.index_class,

415

use_chk_index=self._format.supports_chks,

416

)

455

417

self.inventories = GroupCompressVersionedFiles(

456

418

_GCGraphIndex(self._pack_collection.inventory_index.combined_index,

457

419

add_callback=self._pack_collection.inventory_index.add_callback,

474

436

add_callback=self._pack_collection.text_index.add_callback,

475

437

parents=True, is_locked=self.is_locked),

476

438

access=self._pack_collection.text_index.data_access)

477

if chk_support and _format.supports_chks:

439

if _format.supports_chks:

478

440

# No graph, no compression:- references from chks are between

479

441

# different objects not temporal versions of the same; and without

480

442

# some sort of temporal structure knit compression will just fail.

498

460

self._reconcile_backsup_inventory = False

499

461

500

462

501

if chk_support:

502

class GCCHKPackRepository(CHKInventoryRepository):

503

"""GC customisation of CHKInventoryRepository."""

463

class GCCHKPackRepository(CHKInventoryRepository):

464

"""GC customisation of CHKInventoryRepository."""

504

465

505

def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,

506

_serializer):

507

"""Overridden to change pack collection class."""

508

KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,

509

_commit_builder_class, _serializer)

510

# and now replace everything it did :)

511

index_transport = self._transport.clone('indices')

512

self._pack_collection = GCRepositoryPackCollection(self,

513

self._transport, index_transport,

514

self._transport.clone('upload'),

515

self._transport.clone('packs'),

516

_format.index_builder_class,

517

_format.index_class,

518

use_chk_index=self._format.supports_chks,

519

)

520

self.inventories = GroupCompressVersionedFiles(

521

_GCGraphIndex(self._pack_collection.inventory_index.combined_index,

522

add_callback=self._pack_collection.inventory_index.add_callback,

523

parents=True, is_locked=self.is_locked),

524

access=self._pack_collection.inventory_index.data_access)

525

self.revisions = GroupCompressVersionedFiles(

526

_GCGraphIndex(self._pack_collection.revision_index.combined_index,

527

add_callback=self._pack_collection.revision_index.add_callback,

528

parents=True, is_locked=self.is_locked),

529

access=self._pack_collection.revision_index.data_access,

530

delta=False)

531

self.signatures = GroupCompressVersionedFiles(

532

_GCGraphIndex(self._pack_collection.signature_index.combined_index,

533

add_callback=self._pack_collection.signature_index.add_callback,

534

parents=False, is_locked=self.is_locked),

535

access=self._pack_collection.signature_index.data_access,

536

delta=False)

537

self.texts = GroupCompressVersionedFiles(

538

_GCGraphIndex(self._pack_collection.text_index.combined_index,

539

add_callback=self._pack_collection.text_index.add_callback,

540

parents=True, is_locked=self.is_locked),

541

access=self._pack_collection.text_index.data_access)

542

# No parents, individual CHK pages don't have specific ancestry

543

self.chk_bytes = GroupCompressVersionedFiles(

544

_GCGraphIndex(self._pack_collection.chk_index.combined_index,

545

add_callback=self._pack_collection.chk_index.add_callback,

546

parents=False, is_locked=self.is_locked),

547

access=self._pack_collection.chk_index.data_access)

548

# True when the repository object is 'write locked' (as opposed to the

549

# physical lock only taken out around changes to the pack-names list.)

550

# Another way to represent this would be a decorator around the control

551

# files object that presents logical locks as physical ones - if this

552

# gets ugly consider that alternative design. RBC 20071011

553

self._write_lock_count = 0

554

self._transaction = None

555

# for tests

556

self._reconcile_does_inventory_gc = True

557

self._reconcile_fixes_text_parents = True

558

self._reconcile_backsup_inventory = False

466

def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,

467

_serializer):

468

"""Overridden to change pack collection class."""

469

KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,

470

_commit_builder_class, _serializer)

471

# and now replace everything it did :)

472

index_transport = self._transport.clone('indices')

473

self._pack_collection = GCRepositoryPackCollection(self,

474

self._transport, index_transport,

475

self._transport.clone('upload'),

476

self._transport.clone('packs'),

477

_format.index_builder_class,

478

_format.index_class,

479

use_chk_index=self._format.supports_chks,

480

)

481

self.inventories = GroupCompressVersionedFiles(

482

_GCGraphIndex(self._pack_collection.inventory_index.combined_index,

483

add_callback=self._pack_collection.inventory_index.add_callback,

484

parents=True, is_locked=self.is_locked),

485

access=self._pack_collection.inventory_index.data_access)

486

self.revisions = GroupCompressVersionedFiles(

487

_GCGraphIndex(self._pack_collection.revision_index.combined_index,

488

add_callback=self._pack_collection.revision_index.add_callback,

489

parents=True, is_locked=self.is_locked),

490

access=self._pack_collection.revision_index.data_access,

491

delta=False)

492

self.signatures = GroupCompressVersionedFiles(

493

_GCGraphIndex(self._pack_collection.signature_index.combined_index,

494

add_callback=self._pack_collection.signature_index.add_callback,

495

parents=False, is_locked=self.is_locked),

496

access=self._pack_collection.signature_index.data_access,

497

delta=False)

498

self.texts = GroupCompressVersionedFiles(

499

_GCGraphIndex(self._pack_collection.text_index.combined_index,

500

add_callback=self._pack_collection.text_index.add_callback,

501

parents=True, is_locked=self.is_locked),

502

access=self._pack_collection.text_index.data_access)

503

# No parents, individual CHK pages don't have specific ancestry

504

self.chk_bytes = GroupCompressVersionedFiles(

505

_GCGraphIndex(self._pack_collection.chk_index.combined_index,

506

add_callback=self._pack_collection.chk_index.add_callback,

507

parents=False, is_locked=self.is_locked),

508

access=self._pack_collection.chk_index.data_access)

509

# True when the repository object is 'write locked' (as opposed to the

510

# physical lock only taken out around changes to the pack-names list.)

511

# Another way to represent this would be a decorator around the control

512

# files object that presents logical locks as physical ones - if this

513

# gets ugly consider that alternative design. RBC 20071011

514

self._write_lock_count = 0

515

self._transaction = None

516

# for tests

517

self._reconcile_does_inventory_gc = True

518

self._reconcile_fixes_text_parents = True

519

self._reconcile_backsup_inventory = False

559

520

560

521

561

522

class RepositoryFormatPackGCPlain(RepositoryFormatKnitPack6):

570

531

# multiple in-a-row (and sharing strings). Topological is better

571

532

# for remote, because we access less data.

572

533

_fetch_order = 'unordered'

573

_fetch_gc_optimal = True

574

534

_fetch_uses_deltas = False

575

535

576

536

def get_format_string(self):

584

544

", interoperates with pack-0.92\n")

585

545

586

546

587

if chk_support:

588

from bzrlib import chk_serializer

589

class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):

590

"""A hashed CHK+group compress pack repository."""

591

592

repository_class = GCCHKPackRepository

593

rich_root_data = True

594

# Note: We cannot unpack a delta that references a text we haven't

595

# seen yet. There are 2 options, work in fulltexts, or require

596

# topological sorting. Using fulltexts is more optimal for local

597

# operations, because the source can be smart about extracting

598

# multiple in-a-row (and sharing strings). Topological is better

599

# for remote, because we access less data.

600

_fetch_order = 'unordered'

601

_fetch_gc_optimal = True

602

_fetch_uses_deltas = False

603

604

def get_format_string(self):

605

"""See RepositoryFormat.get_format_string()."""

606

return ('Bazaar development format - hash16chk+gc rich-root'

607

' (needs bzr.dev from 1.13)\n')

608

609

def get_format_description(self):

610

"""See RepositoryFormat.get_format_description()."""

611

return ("Development repository format - hash16chk+groupcompress")

612

613

614

class RepositoryFormatPackGCCHK255(RepositoryFormatPackDevelopment5Hash255):

615

"""A hashed CHK+group compress pack repository."""

616

617

repository_class = GCCHKPackRepository

618

# Setting this to True causes us to use InterModel1And2, so for now set

619

# it to False which uses InterDifferingSerializer. When IM1&2 is

620

# removed (as it is in bzr.dev) we can set this back to True.

621

rich_root_data = True

622

623

def get_format_string(self):

624

"""See RepositoryFormat.get_format_string()."""

625

return ('Bazaar development format - hash255chk+gc rich-root'

626

' (needs bzr.dev from 1.13)\n')

627

628

def get_format_description(self):

629

"""See RepositoryFormat.get_format_description()."""

630

return ("Development repository format - hash255chk+groupcompress")

631

632

633

chk_serializer_255_bigpage = chk_serializer.CHKSerializer(65536, 'hash-255-way')

634

class RepositoryFormatPackGCCHK255Big(RepositoryFormatPackGCCHK255):

635

"""A hashed CHK+group compress pack repository."""

636

637

repository_class = GCCHKPackRepository

638

# For right now, setting this to True gives us InterModel1And2 rather

639

# than InterDifferingSerializer

640

rich_root_data = True

641

_serializer = chk_serializer_255_bigpage

642

# Note: We cannot unpack a delta that references a text we haven't

643

# seen yet. There are 2 options, work in fulltexts, or require

644

# topological sorting. Using fulltexts is more optimal for local

645

# operations, because the source can be smart about extracting

646

# multiple in-a-row (and sharing strings). Topological is better

647

# for remote, because we access less data.

648

_fetch_order = 'unordered'

649

_fetch_gc_optimal = True

650

_fetch_uses_deltas = False

651

652

def get_format_string(self):

653

"""See RepositoryFormat.get_format_string()."""

654

return ('Bazaar development format - hash255chk+gc rich-root bigpage'

655

' (needs bzr.dev from 1.13)\n')

656

657

def get_format_description(self):

658

"""See RepositoryFormat.get_format_description()."""

659

return ("Development repository format - hash255chk+groupcompress + bigpage")

660

661

662

def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):

663

"""Be incompatible with the regular fetch code."""

664

formats = (RepositoryFormatPackGCPlain,)

665

if chk_support:

666

formats = formats + (RepositoryFormatPackGCCHK16,

667

RepositoryFormatPackGCCHK255,

668

RepositoryFormatPackGCCHK255Big)

669

if isinstance(source._format, formats) or isinstance(target._format, formats):

670

return False

671

else:

672

return orig_method(source, target)

673

674

675

InterPackRepo.is_compatible = staticmethod(pack_incompatible)

547

class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):

548

"""A hashed CHK+group compress pack repository."""

549

550

repository_class = GCCHKPackRepository

551

rich_root_data = True

552

supports_external_lookups = True

553

supports_tree_reference = True

554

supports_chks = True

555

# Note: We cannot unpack a delta that references a text we haven't

556

# seen yet. There are 2 options, work in fulltexts, or require

557

# topological sorting. Using fulltexts is more optimal for local

558

# operations, because the source can be smart about extracting

559

# multiple in-a-row (and sharing strings). Topological is better

560

# for remote, because we access less data.

561

_fetch_order = 'unordered'

562

_fetch_uses_deltas = False

563

564

def get_format_string(self):

565

"""See RepositoryFormat.get_format_string()."""

566

return ('Bazaar development format - hash16chk+gc rich-root'

567

' (needs bzr.dev from 1.13)\n')

568

569

def get_format_description(self):

570

"""See RepositoryFormat.get_format_description()."""

571

return ("Development repository format - hash16chk+groupcompress")

572

573

574

class RepositoryFormatPackGCCHK255(RepositoryFormatPackDevelopment5Hash255):

575

"""A hashed CHK+group compress pack repository."""

576

577

repository_class = GCCHKPackRepository

578

supports_chks = True

579

# Setting this to True causes us to use InterModel1And2, so for now set

580

# it to False which uses InterDifferingSerializer. When IM1&2 is

581

# removed (as it is in bzr.dev) we can set this back to True.

582

rich_root_data = True

583

584

def get_format_string(self):

585

"""See RepositoryFormat.get_format_string()."""

586

return ('Bazaar development format - hash255chk+gc rich-root'

587

' (needs bzr.dev from 1.13)\n')

588

589

def get_format_description(self):

590

"""See RepositoryFormat.get_format_description()."""

591

return ("Development repository format - hash255chk+groupcompress")

592

593

594

class RepositoryFormatPackGCCHK255Big(RepositoryFormatPackGCCHK255):

595

"""A hashed CHK+group compress pack repository."""

596

597

repository_class = GCCHKPackRepository

598

supports_chks = True

599

# For right now, setting this to True gives us InterModel1And2 rather

600

# than InterDifferingSerializer

601

rich_root_data = True

602

_serializer = chk_serializer.chk_serializer_255_bigpage

603

# Note: We cannot unpack a delta that references a text we haven't

604

# seen yet. There are 2 options, work in fulltexts, or require

605

# topological sorting. Using fulltexts is more optimal for local

606

# operations, because the source can be smart about extracting

607

# multiple in-a-row (and sharing strings). Topological is better

608

# for remote, because we access less data.

609

_fetch_order = 'unordered'

610

_fetch_uses_deltas = False

611

612

def get_format_string(self):

613

"""See RepositoryFormat.get_format_string()."""

614

return ('Bazaar development format - hash255chk+gc rich-root bigpage'

615

' (needs bzr.dev from 1.13)\n')

616

617

def get_format_description(self):

618

"""See RepositoryFormat.get_format_description()."""

619

return ("Development repository format - hash255chk+groupcompress + bigpage")

Older »