~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/pack_repo.py

Committer: Robert Collins
Date: 2007-10-31 20:53:23 UTC
mto: (3035.2.3 fetch_check_parents_165290)
mto: This revision was merged to the branch mainline in revision 2963.
Revision ID: robertc@robertcollins.net-20071031205323-pbtan7ltoci9slpq

Partial refactoring of pack_repo to create a Packer object for packing.

files modified:
bzrlib/reconcile.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/tests/test_repository.py

Show diffs side-by-side

added added

removed removed

bzrlib/repofmt/pack_repo.py

475

self.knit_access.set_writer(None, None, (None, None))

476

477

478

class RepositoryPackCollection(object):

479

"""Management of packs within a repository."""

480

481

def __init__(self, repo, transport, index_transport, upload_transport,

482

pack_transport):

483

"""Create a new RepositoryPackCollection.

484

485

:param transport: Addresses the repository base directory

486

(typically .bzr/repository/).

487

:param index_transport: Addresses the directory containing indices.

488

:param upload_transport: Addresses the directory into which packs are written

489

while they're being created.

490

:param pack_transport: Addresses the directory of existing complete packs.

491

"""

492

self.repo = repo

493

self.transport = transport

494

self._index_transport = index_transport

495

self._upload_transport = upload_transport

496

self._pack_transport = pack_transport

497

self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}

498

self.packs = []

499

# name:Pack mapping

500

self._packs_by_name = {}

501

# the previous pack-names content

502

self._packs_at_load = None

503

# when a pack is being created by this object, the state of that pack.

504

self._new_pack = None

505

# aggregated revision index data

506

self.revision_index = AggregateIndex()

507

self.inventory_index = AggregateIndex()

508

self.text_index = AggregateIndex()

509

self.signature_index = AggregateIndex()

510

511

def add_pack_to_memory(self, pack):

512

"""Make a Pack object available to the repository to satisfy queries.

513

514

:param pack: A Pack object.

515

"""

516

assert pack.name not in self._packs_by_name

517

self.packs.append(pack)

518

self._packs_by_name[pack.name] = pack

519

self.revision_index.add_index(pack.revision_index, pack)

520

self.inventory_index.add_index(pack.inventory_index, pack)

521

self.text_index.add_index(pack.text_index, pack)

522

self.signature_index.add_index(pack.signature_index, pack)

523

524

def _add_text_to_weave(self, file_id, revision_id, new_lines, parents,

525

nostore_sha, random_revid):

526

file_id_index = GraphIndexPrefixAdapter(

527

self.text_index.combined_index,

528

(file_id, ), 1,

529

add_nodes_callback=self.text_index.add_callback)

530

self.repo._text_knit._index._graph_index = file_id_index

531

self.repo._text_knit._index._add_callback = file_id_index.add_nodes

532

return self.repo._text_knit.add_lines_with_ghosts(

533

revision_id, parents, new_lines, nostore_sha=nostore_sha,

534

random_id=random_revid, check_content=False)[0:2]

535

536

def all_packs(self):

537

"""Return a list of all the Pack objects this repository has.

538

539

Note that an in-progress pack being created is not returned.

540

541

:return: A list of Pack objects for all the packs in the repository.

542

"""

543

result = []

544

for name in self.names():

545

result.append(self.get_pack_by_name(name))

546

return result

547

548

def autopack(self):

549

"""Pack the pack collection incrementally.

550

551

This will not attempt global reorganisation or recompression,

552

rather it will just ensure that the total number of packs does

553

not grow without bound. It uses the _max_pack_count method to

554

determine if autopacking is needed, and the pack_distribution

555

method to determine the number of revisions in each pack.

556

557

If autopacking takes place then the packs name collection will have

558

been flushed to disk - packing requires updating the name collection

559

in synchronisation with certain steps. Otherwise the names collection

560

is not flushed.

561

562

:return: True if packing took place.

563

"""

564

# XXX: Should not be needed when the management of indices is sane.

565

total_revisions = self.revision_index.combined_index.key_count()

566

total_packs = len(self._names)

567

if self._max_pack_count(total_revisions) >= total_packs:

568

return False

569

# XXX: the following may want to be a class, to pack with a given

570

# policy.

571

mutter('Auto-packing repository %s, which has %d pack files, '

572

'containing %d revisions into %d packs.', self, total_packs,

573

total_revisions, self._max_pack_count(total_revisions))

574

# determine which packs need changing

575

pack_distribution = self.pack_distribution(total_revisions)

576

existing_packs = []

577

for pack in self.all_packs():

578

revision_count = pack.get_revision_count()

579

if revision_count == 0:

580

# revision less packs are not generated by normal operation,

581

# only by operations like sign-my-commits, and thus will not

582

# tend to grow rapdily or without bound like commit containing

583

# packs do - leave them alone as packing them really should

584

# group their data with the relevant commit, and that may

585

# involve rewriting ancient history - which autopack tries to

586

# avoid. Alternatively we could not group the data but treat

587

# each of these as having a single revision, and thus add

588

# one revision for each to the total revision count, to get

589

# a matching distribution.

590

continue

591

existing_packs.append((revision_count, pack))

592

pack_operations = self.plan_autopack_combinations(

593

existing_packs, pack_distribution)

594

self._execute_pack_operations(pack_operations)

595

return True

596

597

def create_pack_from_packs(self, packs, suffix, revision_ids=None):

478

class Packer(object):

479

"""Create a pack from packs."""

480

481

def __init__(self, pack_collection, packs, suffix, revision_ids=None):

482

self.packs = packs

483

self.suffix = suffix

484

self.revision_ids = revision_ids

485

self._pack_collection = pack_collection

486

487

def pack(self):

598

488

"""Create a new pack by reading data from other packs.

599

489

600

490

This does little more than a bulk copy of data. One key difference

614

504

# - which has already been flushed, so its safe.

615

505

# XXX: - duplicate code warning with start_write_group; fix before

616

506

# considering 'done'.

617

if self._new_pack is not None:

507

if self._pack_collection._new_pack is not None:

618

508

raise errors.BzrError('call to create_pack_from_packs while '

619

509

'another pack is being written.')

620

if revision_ids is not None:

621

if len(revision_ids) == 0:

510

if self.revision_ids is not None:

511

if len(self.revision_ids) == 0:

622

512

# silly fetch request.

623

513

return None

624

514

else:

625

revision_ids = frozenset(revision_ids)

626

pb = ui.ui_factory.nested_progress_bar()

515

self.revision_ids = frozenset(self.revision_ids)

516

self.pb = ui.ui_factory.nested_progress_bar()

627

517

try:

628

return self._create_pack_from_packs(packs, suffix, revision_ids,

629

pb)

518

return self._create_pack_from_packs()

630

519

finally:

631

pb.finished()

632

633

def _create_pack_from_packs(self, packs, suffix, revision_ids, pb):

634

pb.update("Opening pack", 0, 5)

635

new_pack = NewPack(self._upload_transport, self._index_transport,

636

self._pack_transport, upload_suffix=suffix)

520

self.pb.finished()

521

522

def open_pack(self):

523

"""Open a pack for the pack we are creating."""

524

return NewPack(self._pack_collection._upload_transport,

525

self._pack_collection._index_transport,

526

self._pack_collection._pack_transport, upload_suffix=self.suffix)

527

528

def _create_pack_from_packs(self):

529

self.pb.update("Opening pack", 0, 5)

530

new_pack = self.open_pack()

637

531

# buffer data - we won't be reading-back during the pack creation and

638

532

# this makes a significant difference on sftp pushes.

639

533

new_pack.set_write_cache_size(1024*1024)

640

534

if 'pack' in debug.debug_flags:

641

535

plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)

642

for a_pack in packs]

643

if revision_ids is not None:

644

rev_count = len(revision_ids)

536

for a_pack in self.packs]

537

if self.revision_ids is not None:

538

rev_count = len(self.revision_ids)

645

539

else:

646

540

rev_count = 'all'

647

541

mutter('%s: create_pack: creating pack from source packs: '

648

542

'%s%s %s revisions wanted %s t=0',

649

time.ctime(), self._upload_transport.base, new_pack.random_name,

543

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

650

544

plain_pack_list, rev_count)

651

545

# select revisions

652

if revision_ids:

653

revision_keys = [(revision_id,) for revision_id in revision_ids]

546

if self.revision_ids:

547

revision_keys = [(revision_id,) for revision_id in self.revision_ids]

654

548

else:

655

549

revision_keys = None

656

550

657

551

# select revision keys

658

revision_index_map = self._packs_list_to_pack_map_and_index_list(

659

packs, 'revision_index')[0]

660

revision_nodes = self._index_contents(revision_index_map, revision_keys)

552

revision_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(

553

self.packs, 'revision_index')[0]

554

revision_nodes = self._pack_collection._index_contents(revision_index_map, revision_keys)

661

555

# copy revision keys and adjust values

662

pb.update("Copying revision texts", 1)

556

self.pb.update("Copying revision texts", 1)

663

557

list(self._copy_nodes_graph(revision_nodes, revision_index_map,

664

558

new_pack._writer, new_pack.revision_index))

665

559

if 'pack' in debug.debug_flags:

666

560

mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',

667

time.ctime(), self._upload_transport.base, new_pack.random_name,

561

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

668

562

new_pack.revision_index.key_count(),

669

563

time.time() - new_pack.start_time)

670

564

# select inventory keys

672

566

# querying for keys here could introduce a bug where an inventory item

673

567

# is missed, so do not change it to query separately without cross

674

568

# checking like the text key check below.

675

inventory_index_map = self._packs_list_to_pack_map_and_index_list(

676

packs, 'inventory_index')[0]

677

inv_nodes = self._index_contents(inventory_index_map, inv_keys)

569

inventory_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(

570

self.packs, 'inventory_index')[0]

571

inv_nodes = self._pack_collection._index_contents(inventory_index_map, inv_keys)

678

572

# copy inventory keys and adjust values

679

573

# XXX: Should be a helper function to allow different inv representation

680

574

# at this point.

681

pb.update("Copying inventory texts", 2)

575

self.pb.update("Copying inventory texts", 2)

682

576

inv_lines = self._copy_nodes_graph(inv_nodes, inventory_index_map,

683

577

new_pack._writer, new_pack.inventory_index, output_lines=True)

684

if revision_ids:

685

fileid_revisions = self.repo._find_file_ids_from_xml_inventory_lines(

686

inv_lines, revision_ids)

578

if self.revision_ids:

579

fileid_revisions = self._pack_collection.repo._find_file_ids_from_xml_inventory_lines(

580

inv_lines, self.revision_ids)

687

581

text_filter = []

688

582

for fileid, file_revids in fileid_revisions.iteritems():

689

583

text_filter.extend(

694

588

text_filter = None

695

589

if 'pack' in debug.debug_flags:

696

590

mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',

697

time.ctime(), self._upload_transport.base, new_pack.random_name,

591

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

698

592

new_pack.inventory_index.key_count(),

699

593

time.time() - new_pack.start_time)

700

594

# select text keys

701

text_index_map = self._packs_list_to_pack_map_and_index_list(

702

packs, 'text_index')[0]

703

text_nodes = self._index_contents(text_index_map, text_filter)

595

text_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(

596

self.packs, 'text_index')[0]

597

text_nodes = self._pack_collection._index_contents(text_index_map, text_filter)

704

598

if text_filter is not None:

705

599

# We could return the keys copied as part of the return value from

706

600

# _copy_nodes_graph but this doesn't work all that well with the

718

612

raise errors.RevisionNotPresent(a_missing_key[1],

719

613

a_missing_key[0])

720

614

# copy text keys and adjust values

721

pb.update("Copying content texts", 3)

615

self.pb.update("Copying content texts", 3)

722

616

list(self._copy_nodes_graph(text_nodes, text_index_map,

723

617

new_pack._writer, new_pack.text_index))

724

618

if 'pack' in debug.debug_flags:

725

619

mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',

726

time.ctime(), self._upload_transport.base, new_pack.random_name,

620

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

727

621

new_pack.text_index.key_count(),

728

622

time.time() - new_pack.start_time)

729

623

# select signature keys

730

624

signature_filter = revision_keys # same keyspace

731

signature_index_map = self._packs_list_to_pack_map_and_index_list(

732

packs, 'signature_index')[0]

733

signature_nodes = self._index_contents(signature_index_map,

625

signature_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(

626

self.packs, 'signature_index')[0]

627

signature_nodes = self._pack_collection._index_contents(signature_index_map,

734

628

signature_filter)

735

629

# copy signature keys and adjust values

736

pb.update("Copying signature texts", 4)

630

self.pb.update("Copying signature texts", 4)

737

631

self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,

738

632

new_pack.signature_index)

739

633

if 'pack' in debug.debug_flags:

740

634

mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',

741

time.ctime(), self._upload_transport.base, new_pack.random_name,

635

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

742

636

new_pack.signature_index.key_count(),

743

637

time.time() - new_pack.start_time)

744

638

if not new_pack.data_inserted():

745

639

new_pack.abort()

746

640

return None

747

pb.update("Finishing pack", 5)

641

self.pb.update("Finishing pack", 5)

748

642

new_pack.finish()

749

self.allocate(new_pack)

643

self._pack_collection.allocate(new_pack)

750

644

return new_pack

751

645

646

def _copy_nodes(self, nodes, index_map, writer, write_index):

647

"""Copy knit nodes between packs with no graph references."""

648

pb = ui.ui_factory.nested_progress_bar()

649

try:

650

return self._do_copy_nodes(nodes, index_map, writer,

651

write_index, pb)

652

finally:

653

pb.finished()

654

655

def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):

656

# for record verification

657

knit_data = _KnitData(None)

658

# plan a readv on each source pack:

659

# group by pack

660

nodes = sorted(nodes)

661

# how to map this into knit.py - or knit.py into this?

662

# we don't want the typical knit logic, we want grouping by pack

663

# at this point - perhaps a helper library for the following code

664

# duplication points?

665

request_groups = {}

666

for index, key, value in nodes:

667

if index not in request_groups:

668

request_groups[index] = []

669

request_groups[index].append((key, value))

670

record_index = 0

671

pb.update("Copied record", record_index, len(nodes))

672

for index, items in request_groups.iteritems():

673

pack_readv_requests = []

674

for key, value in items:

675

# ---- KnitGraphIndex.get_position

676

bits = value[1:].split(' ')

677

offset, length = int(bits[0]), int(bits[1])

678

pack_readv_requests.append((offset, length, (key, value[0])))

679

# linear scan up the pack

680

pack_readv_requests.sort()

681

# copy the data

682

transport, path = index_map[index]

683

reader = pack.make_readv_reader(transport, path,

684

[offset[0:2] for offset in pack_readv_requests])

685

for (names, read_func), (_1, _2, (key, eol_flag)) in \

686

izip(reader.iter_records(), pack_readv_requests):

687

raw_data = read_func(None)

688

# check the header only

689

df, _ = knit_data._parse_record_header(key[-1], raw_data)

690

df.close()

691

pos, size = writer.add_bytes_record(raw_data, names)

692

write_index.add_node(key, eol_flag + "%d %d" % (pos, size))

693

pb.update("Copied record", record_index)

694

record_index += 1

695

696

def _copy_nodes_graph(self, nodes, index_map, writer, write_index,

697

output_lines=False):

698

"""Copy knit nodes between packs.

699

700

:param output_lines: Return lines present in the copied data as

701

an iterator.

702

"""

703

pb = ui.ui_factory.nested_progress_bar()

704

try:

705

return self._do_copy_nodes_graph(nodes, index_map, writer,

706

write_index, output_lines, pb)

707

finally:

708

pb.finished()

709

710

def _do_copy_nodes_graph(self, nodes, index_map, writer, write_index,

711

output_lines, pb):

712

# for record verification

713

knit_data = _KnitData(None)

714

# for line extraction when requested (inventories only)

715

if output_lines:

716

factory = knit.KnitPlainFactory()

717

# plan a readv on each source pack:

718

# group by pack

719

nodes = sorted(nodes)

720

# how to map this into knit.py - or knit.py into this?

721

# we don't want the typical knit logic, we want grouping by pack

722

# at this point - perhaps a helper library for the following code

723

# duplication points?

724

request_groups = {}

725

record_index = 0

726

pb.update("Copied record", record_index, len(nodes))

727

for index, key, value, references in nodes:

728

if index not in request_groups:

729

request_groups[index] = []

730

request_groups[index].append((key, value, references))

731

for index, items in request_groups.iteritems():

732

pack_readv_requests = []

733

for key, value, references in items:

734

# ---- KnitGraphIndex.get_position

735

bits = value[1:].split(' ')

736

offset, length = int(bits[0]), int(bits[1])

737

pack_readv_requests.append((offset, length, (key, value[0], references)))

738

# linear scan up the pack

739

pack_readv_requests.sort()

740

# copy the data

741

transport, path = index_map[index]

742

reader = pack.make_readv_reader(transport, path,

743

[offset[0:2] for offset in pack_readv_requests])

744

for (names, read_func), (_1, _2, (key, eol_flag, references)) in \

745

izip(reader.iter_records(), pack_readv_requests):

746

raw_data = read_func(None)

747

if output_lines:

748

# read the entire thing

749

content, _ = knit_data._parse_record(key[-1], raw_data)

750

if len(references[-1]) == 0:

751

line_iterator = factory.get_fulltext_content(content)

752

else:

753

line_iterator = factory.get_linedelta_content(content)

754

for line in line_iterator:

755

yield line

756

else:

757

# check the header only

758

df, _ = knit_data._parse_record_header(key[-1], raw_data)

759

df.close()

760

pos, size = writer.add_bytes_record(raw_data, names)

761

write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)

762

pb.update("Copied record", record_index)

763

record_index += 1

764

765

766

767

class RepositoryPackCollection(object):

768

"""Management of packs within a repository."""

769

770

def __init__(self, repo, transport, index_transport, upload_transport,

771

pack_transport):

772

"""Create a new RepositoryPackCollection.

773

774

:param transport: Addresses the repository base directory

775

(typically .bzr/repository/).

776

:param index_transport: Addresses the directory containing indices.

777

:param upload_transport: Addresses the directory into which packs are written

778

while they're being created.

779

:param pack_transport: Addresses the directory of existing complete packs.

780

"""

781

self.repo = repo

782

self.transport = transport

783

self._index_transport = index_transport

784

self._upload_transport = upload_transport

785

self._pack_transport = pack_transport

786

self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}

787

self.packs = []

788

# name:Pack mapping

789

self._packs_by_name = {}

790

# the previous pack-names content

791

self._packs_at_load = None

792

# when a pack is being created by this object, the state of that pack.

793

self._new_pack = None

794

# aggregated revision index data

795

self.revision_index = AggregateIndex()

796

self.inventory_index = AggregateIndex()

797

self.text_index = AggregateIndex()

798

self.signature_index = AggregateIndex()

799

800

def add_pack_to_memory(self, pack):

801

"""Make a Pack object available to the repository to satisfy queries.

802

803

:param pack: A Pack object.

804

"""

805

assert pack.name not in self._packs_by_name

806

self.packs.append(pack)

807

self._packs_by_name[pack.name] = pack

808

self.revision_index.add_index(pack.revision_index, pack)

809

self.inventory_index.add_index(pack.inventory_index, pack)

810

self.text_index.add_index(pack.text_index, pack)

811

self.signature_index.add_index(pack.signature_index, pack)

812

813

def _add_text_to_weave(self, file_id, revision_id, new_lines, parents,

814

nostore_sha, random_revid):

815

file_id_index = GraphIndexPrefixAdapter(

816

self.text_index.combined_index,

817

(file_id, ), 1,

818

add_nodes_callback=self.text_index.add_callback)

819

self.repo._text_knit._index._graph_index = file_id_index

820

self.repo._text_knit._index._add_callback = file_id_index.add_nodes

821

return self.repo._text_knit.add_lines_with_ghosts(

822

revision_id, parents, new_lines, nostore_sha=nostore_sha,

823

random_id=random_revid, check_content=False)[0:2]

824

825

def all_packs(self):

826

"""Return a list of all the Pack objects this repository has.

827

828

Note that an in-progress pack being created is not returned.

829

830

:return: A list of Pack objects for all the packs in the repository.

831

"""

832

result = []

833

for name in self.names():

834

result.append(self.get_pack_by_name(name))

835

return result

836

837

def autopack(self):

838

"""Pack the pack collection incrementally.

839

840

This will not attempt global reorganisation or recompression,

841

rather it will just ensure that the total number of packs does

842

not grow without bound. It uses the _max_pack_count method to

843

determine if autopacking is needed, and the pack_distribution

844

method to determine the number of revisions in each pack.

845

846

If autopacking takes place then the packs name collection will have

847

been flushed to disk - packing requires updating the name collection

848

in synchronisation with certain steps. Otherwise the names collection

849

is not flushed.

850

851

:return: True if packing took place.

852

"""

853

# XXX: Should not be needed when the management of indices is sane.

854

total_revisions = self.revision_index.combined_index.key_count()

855

total_packs = len(self._names)

856

if self._max_pack_count(total_revisions) >= total_packs:

857

return False

858

# XXX: the following may want to be a class, to pack with a given

859

# policy.

860

mutter('Auto-packing repository %s, which has %d pack files, '

861

'containing %d revisions into %d packs.', self, total_packs,

862

total_revisions, self._max_pack_count(total_revisions))

863

# determine which packs need changing

864

pack_distribution = self.pack_distribution(total_revisions)

865

existing_packs = []

866

for pack in self.all_packs():

867

revision_count = pack.get_revision_count()

868

if revision_count == 0:

869

# revision less packs are not generated by normal operation,

870

# only by operations like sign-my-commits, and thus will not

871

# tend to grow rapdily or without bound like commit containing

872

# packs do - leave them alone as packing them really should

873

# group their data with the relevant commit, and that may

874

# involve rewriting ancient history - which autopack tries to

875

# avoid. Alternatively we could not group the data but treat

876

# each of these as having a single revision, and thus add

877

# one revision for each to the total revision count, to get

878

# a matching distribution.

879

continue

880

existing_packs.append((revision_count, pack))

881

pack_operations = self.plan_autopack_combinations(

882

existing_packs, pack_distribution)

883

self._execute_pack_operations(pack_operations)

884

return True

885

752

886

def _execute_pack_operations(self, pack_operations):

753

887

"""Execute a series of pack operations.

754

888

759

893

# we may have no-ops from the setup logic

760

894

if len(packs) == 0:

761

895

continue

762

# have a progress bar?

763

self.create_pack_from_packs(packs, '.autopack')

896

Packer(self, packs, '.autopack').pack()

764

897

for pack in packs:

765

898

self._remove_pack_from_memory(pack)

766

899

# record the newly available packs and stop advertising the old

841

974

842

975

return pack_operations

843

976

844

def _copy_nodes(self, nodes, index_map, writer, write_index):

845

"""Copy knit nodes between packs with no graph references."""

846

pb = ui.ui_factory.nested_progress_bar()

847

try:

848

return self._do_copy_nodes(nodes, index_map, writer,

849

write_index, pb)

850

finally:

851

pb.finished()

852

853

def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):

854

# for record verification

855

knit_data = _KnitData(None)

856

# plan a readv on each source pack:

857

# group by pack

858

nodes = sorted(nodes)

859

# how to map this into knit.py - or knit.py into this?

860

# we don't want the typical knit logic, we want grouping by pack

861

# at this point - perhaps a helper library for the following code

862

# duplication points?

863

request_groups = {}

864

for index, key, value in nodes:

865

if index not in request_groups:

866

request_groups[index] = []

867

request_groups[index].append((key, value))

868

record_index = 0

869

pb.update("Copied record", record_index, len(nodes))

870

for index, items in request_groups.iteritems():

871

pack_readv_requests = []

872

for key, value in items:

873

# ---- KnitGraphIndex.get_position

874

bits = value[1:].split(' ')

875

offset, length = int(bits[0]), int(bits[1])

876

pack_readv_requests.append((offset, length, (key, value[0])))

877

# linear scan up the pack

878

pack_readv_requests.sort()

879

# copy the data

880

transport, path = index_map[index]

881

reader = pack.make_readv_reader(transport, path,

882

[offset[0:2] for offset in pack_readv_requests])

883

for (names, read_func), (_1, _2, (key, eol_flag)) in \

884

izip(reader.iter_records(), pack_readv_requests):

885

raw_data = read_func(None)

886

# check the header only

887

df, _ = knit_data._parse_record_header(key[-1], raw_data)

888

df.close()

889

pos, size = writer.add_bytes_record(raw_data, names)

890

write_index.add_node(key, eol_flag + "%d %d" % (pos, size))

891

pb.update("Copied record", record_index)

892

record_index += 1

893

894

def _copy_nodes_graph(self, nodes, index_map, writer, write_index,

895

output_lines=False):

896

"""Copy knit nodes between packs.

897

898

:param output_lines: Return lines present in the copied data as

899

an iterator.

900

"""

901

pb = ui.ui_factory.nested_progress_bar()

902

try:

903

return self._do_copy_nodes_graph(nodes, index_map, writer,

904

write_index, output_lines, pb)

905

finally:

906

pb.finished()

907

908

def _do_copy_nodes_graph(self, nodes, index_map, writer, write_index,

909

output_lines, pb):

910

# for record verification

911

knit_data = _KnitData(None)

912

# for line extraction when requested (inventories only)

913

if output_lines:

914

factory = knit.KnitPlainFactory()

915

# plan a readv on each source pack:

916

# group by pack

917

nodes = sorted(nodes)

918

# how to map this into knit.py - or knit.py into this?

919

# we don't want the typical knit logic, we want grouping by pack

920

# at this point - perhaps a helper library for the following code

921

# duplication points?

922

request_groups = {}

923

record_index = 0

924

pb.update("Copied record", record_index, len(nodes))

925

for index, key, value, references in nodes:

926

if index not in request_groups:

927

request_groups[index] = []

928

request_groups[index].append((key, value, references))

929

for index, items in request_groups.iteritems():

930

pack_readv_requests = []

931

for key, value, references in items:

932

# ---- KnitGraphIndex.get_position

933

bits = value[1:].split(' ')

934

offset, length = int(bits[0]), int(bits[1])

935

pack_readv_requests.append((offset, length, (key, value[0], references)))

936

# linear scan up the pack

937

pack_readv_requests.sort()

938

# copy the data

939

transport, path = index_map[index]

940

reader = pack.make_readv_reader(transport, path,

941

[offset[0:2] for offset in pack_readv_requests])

942

for (names, read_func), (_1, _2, (key, eol_flag, references)) in \

943

izip(reader.iter_records(), pack_readv_requests):

944

raw_data = read_func(None)

945

if output_lines:

946

# read the entire thing

947

content, _ = knit_data._parse_record(key[-1], raw_data)

948

if len(references[-1]) == 0:

949

line_iterator = factory.get_fulltext_content(content)

950

else:

951

line_iterator = factory.get_linedelta_content(content)

952

for line in line_iterator:

953

yield line

954

else:

955

# check the header only

956

df, _ = knit_data._parse_record_header(key[-1], raw_data)

957

df.close()

958

pos, size = writer.add_bytes_record(raw_data, names)

959

write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)

960

pb.update("Copied record", record_index)

961

record_index += 1

962

963

977

def ensure_loaded(self):

964

978

# NB: if you see an assertion error here, its probably access against

965

979

# an unlocked repo. Naughty.

1483

1497

return 'w'

1484

1498

return 'r'

1485

1499

1500

def _find_inconsistent_revision_parents(self):

1501

"""Find revisions with incorrectly cached parents.

1502

1503

:returns: an iterator yielding tuples of (revison-id, parents-in-index,

1504

parents-in-revision).

1505

"""

1506

assert self.is_locked()

1507

pb = ui.ui_factory.nested_progress_bar()

1508

try:

1509

revision_nodes = self._pack_collection.revision_index \

1510

.combined_index.iter_all_entries()

1511

index_positions = []

1512

# Get the cached index values for all revisions, and also the location

1513

# in each index of the revision text so we can perform linear IO.

1514

for index, key, value, refs in revision_nodes:

1515

pos, length = value[1:].split(' ')

1516

index_positions.append((index, int(pos), key[0],

1517

tuple(parent[0] for parent in refs[0])))

1518

pb.update("Reading revision index.", 0, 0)

1519

index_positions.sort()

1520

total = len(index_positions) / 1000 + 1

1521

for offset in xrange(total):

1522

pb.update("Checking cached revision graph.", offset)

1523

to_query = index_positions[offset * 1000:(offset + 1) * 1000]

1524

if not to_query:

1525

break

1526

rev_ids = [item[2] for item in to_query]

1527

revs = self.get_revisions(rev_ids)

1528

for revision, item in zip(revs, to_query):

1529

index_parents = item[3]

1530

rev_parents = tuple(revision.parent_ids)

1531

if index_parents != rev_parents:

1532

yield (revision.revision_id, index_parents, rev_parents)

1533

finally:

1534

pb.finished()

1535

1486

1536

def get_parents(self, revision_ids):

1487

1537

"""See StackedParentsProvider.get_parents.

1488

1538

Older »