~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/versionedfile.py

Committer: Robert Collins
Date: 2008-06-11 04:20:16 UTC
mto: (3221.12.10 Development1) (3517.4.1 annotate)
mto: This revision was merged to the branch mainline in revision 3510.
Revision ID: robertc@robertcollins.net-20080611042016-661dds0v1rjz542g

First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.

files removed:
bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_escaped_store.py

files modified:
bzrlib/annotate.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/fetch.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/multiparent.py

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisiontree.py

bzrlib/smart/repository.py

bzrlib/store/__init__.py

bzrlib/store/versioned/__init__.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tree.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

Show diffs side-by-side

added added

removed removed

bzrlib/versionedfile.py

"""Versioned text file storage api."""

from cStringIO import StringIO

import os

import urllib

from zlib import adler32

revision,

ui,

)

from bzrlib.graph import Graph

from bzrlib.graph import DictParentsProvider, Graph, _StackedParentsProvider

from bzrlib.transport.memory import MemoryTransport

""")

from bzrlib.inter import InterObject

:ivar parents: A tuple of parent keys for self.key. If the object has

no parent information, None (as opposed to () for an empty list of

parents).

"""

def __init__(self):

"""Create a ContentFactory."""

self.parents = None

class AbsentContentFactory(object):

class FulltextContentFactory(ContentFactory):

"""Static data content factory.

This takes a fulltext when created and just returns that during

get_bytes_as('fulltext').

:ivar sha1: None, or the sha1 of the content fulltext.

:ivar storage_kind: The native storage kind of this factory. Always

'fulltext'.

:ivar key: The key of this content. Each key is a tuple with a single

string in it.

:ivar parents: A tuple of parent keys for self.key. If the object has

no parent information, None (as opposed to () for an empty list of

parents).

"""

100

101

def __init__(self, key, parents, sha1, text):

102

"""Create a ContentFactory."""

103

self.sha1 = sha1

104

self.storage_kind = 'fulltext'

105

self.key = key

106

self.parents = parents

107

self._text = text

108

109

def get_bytes_as(self, storage_kind):

110

if storage_kind == self.storage_kind:

111

return self._text

112

raise errors.UnavailableRepresentation(self.key, storage_kind,

113

self.storage_kind)

114

115

116

class AbsentContentFactory(ContentFactory):

117

"""A placeholder content factory for unavailable texts.

118

119

:ivar sha1: None.

131

self.parents = None

100

132

101

133

134

class AdapterFactory(ContentFactory):

135

"""A content factory to adapt between key prefix's."""

136

137

def __init__(self, key, parents, adapted):

138

"""Create an adapter factory instance."""

139

self.key = key

140

self.parents = parents

141

self._adapted = adapted

142

143

def __getattr__(self, attr):

144

"""Return a member from the adapted object."""

145

if attr in ('key', 'parents'):

146

return self.__dict__[attr]

147

else:

148

return getattr(self._adapted, attr)

149

150

102

151

def filter_absent(record_stream):

103

152

"""Adapt a record stream to remove absent records."""

104

153

for record in record_stream:

415

464

"""

416

465

raise NotImplementedError(self.annotate)

417

466

418

@deprecated_method(one_five)

419

def join(self, other, pb=None, msg=None, version_ids=None,

420

ignore_missing=False):

421

"""Integrate versions from other into this versioned file.

422

423

If version_ids is None all versions from other should be

424

incorporated into this versioned file.

425

426

Must raise RevisionNotPresent if any of the specified versions

427

are not present in the other file's history unless ignore_missing

428

is supplied in which case they are silently skipped.

429

"""

430

self._check_write_ok()

431

return InterVersionedFile.get(other, self).join(

432

pb,

433

msg,

434

version_ids,

435

ignore_missing)

436

437

467

def iter_lines_added_or_present_in_versions(self, version_ids=None,

438

468

pb=None):

439

469

"""Iterate over the lines in the versioned file from version_ids.

483

513

return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]

484

514

485

515

486

class RecordingVersionedFileDecorator(object):

487

"""A minimal versioned file that records calls made on it.

516

class RecordingVersionedFilesDecorator(object):

517

"""A minimal versioned files that records calls made on it.

488

518

489

519

Only enough methods have been added to support tests using it to date.

490

520

493

523

"""

494

524

495

525

def __init__(self, backing_vf):

496

"""Create a RecordingVersionedFileDecorator decorating backing_vf.

526

"""Create a RecordingVersionedFileDsecorator decorating backing_vf.

497

527

498

528

:param backing_vf: The versioned file to answer all methods.

499

529

"""

500

530

self._backing_vf = backing_vf

501

531

self.calls = []

502

532

503

def get_lines(self, version_ids):

504

self.calls.append(("get_lines", version_ids))

505

return self._backing_vf.get_lines(version_ids)

506

507

508

class _PlanMergeVersionedFile(object):

533

def get_record_stream(self, keys, sort_order, include_delta_closure):

534

self.calls.append(("get_record_stream", keys, sort_order,

535

include_delta_closure))

536

return self._backing_vf.get_record_stream(keys, sort_order,

537

include_delta_closure)

538

539

540

class KeyMapper(object):

541

"""KeyMappers map between keys and underlying paritioned storage."""

542

543

def map(self, key):

544

"""Map key to an underlying storage identifier.

545

546

:param key: A key tuple e.g. ('file-id', 'revision-id').

547

:return: An underlying storage identifier, specific to the partitioning

548

mechanism.

549

"""

550

raise NotImplementedError(self.map)

551

552

def unmap(self, partition_id):

553

"""Map a partitioned storage id back to a key prefix.

554

555

:param partition_id: The underlying partition id.

556

:return: As much of a key (or prefix) as is derivable from the parition

557

id.

558

"""

559

raise NotImplementedError(self.unmap)

560

561

562

class ConstantMapper(KeyMapper):

563

"""A key mapper that maps to a constant result."""

564

565

def __init__(self, result):

566

"""Create a ConstantMapper which will return result for all maps."""

567

self._result = result

568

569

def map(self, key):

570

"""See KeyMapper.map()."""

571

return self._result

572

573

574

class URLEscapeMapper(KeyMapper):

575

"""Base class for use with transport backed storage.

576

577

This provides a map and unmap wrapper that respectively url escape and

578

unescape their outputs and inputs.

579

"""

580

581

def map(self, key):

582

"""See KeyMapper.map()."""

583

return urllib.quote(self._map(key))

584

585

def unmap(self, partition_id):

586

"""See KeyMapper.unmap()."""

587

return self._unmap(urllib.unquote(partition_id))

588

589

590

class PrefixMapper(URLEscapeMapper):

591

"""A key mapper that extracts the first component of a key.

592

593

This mapper is for use with a transport based backend.

594

"""

595

596

def _map(self, key):

597

"""See KeyMapper.map()."""

598

return key[0]

599

600

def _unmap(self, partition_id):

601

"""See KeyMapper.unmap()."""

602

return (partition_id,)

603

604

605

class HashPrefixMapper(URLEscapeMapper):

606

"""A key mapper that combines the first component of a key with a hash.

607

608

This mapper is for use with a transport based backend.

609

"""

610

611

def _map(self, key):

612

"""See KeyMapper.map()."""

613

prefix = self._escape(key[0])

614

return "%02x/%s" % (adler32(prefix) & 0xff, prefix)

615

616

def _escape(self, prefix):

617

"""No escaping needed here."""

618

return prefix

619

620

def _unmap(self, partition_id):

621

"""See KeyMapper.unmap()."""

622

return (self._unescape(osutils.basename(partition_id)),)

623

624

def _unescape(self, basename):

625

"""No unescaping needed for HashPrefixMapper."""

626

return basename

627

628

629

class HashEscapedPrefixMapper(HashPrefixMapper):

630

"""Combines the escaped first component of a key with a hash.

631

632

This mapper is for use with a transport based backend.

633

"""

634

635

_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."

636

637

def _escape(self, prefix):

638

"""Turn a key element into a filesystem safe string.

639

640

This is similar to a plain urllib.quote, except

641

it uses specific safe characters, so that it doesn't

642

have to translate a lot of valid file ids.

643

"""

644

# @ does not get escaped. This is because it is a valid

645

# filesystem character we use all the time, and it looks

646

# a lot better than seeing %40 all the time.

647

r = [((c in self._safe) and c or ('%%%02x' % ord(c)))

648

for c in prefix]

649

return ''.join(r)

650

651

def _unescape(self, basename):

652

"""Escaped names are easily unescaped by urlutils."""

653

return urllib.unquote(basename)

654

655

656

def make_versioned_files_factory(versioned_file_factory, mapper):

657

"""Create a ThunkedVersionedFiles factory.

658

659

This will create a callable which when called creates a

660

ThunkedVersionedFiles on a transport, using mapper to access individual

661

versioned files, and versioned_file_factory to create each individual file.

662

"""

663

def factory(transport):

664

return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,

665

lambda:True)

666

return factory

667

668

669

class VersionedFiles(object):

670

"""Storage for many versioned files.

671

672

This object allows a single keyspace for accessing the history graph and

673

contents of named bytestrings.

674

675

Currently no implementation allows the graph of different key prefixes to

676

intersect, but the API does allow such implementations in the future.

677

"""

678

679

def add_lines(self, key, parents, lines, parent_texts=None,

680

left_matching_blocks=None, nostore_sha=None, random_id=False,

681

check_content=True):

682

"""Add a text to the store.

683

684

:param key: The key tuple of the text to add.

685

:param parents: The parents key tuples of the text to add.

686

:param lines: A list of lines. Each line must be a bytestring. And all

687

of them except the last must be terminated with \n and contain no

688

other \n's. The last line may either contain no \n's or a single

689

terminating \n. If the lines list does meet this constraint the add

690

routine may error or may succeed - but you will be unable to read

691

the data back accurately. (Checking the lines have been split

692

correctly is expensive and extremely unlikely to catch bugs so it

693

is not done at runtime unless check_content is True.)

694

:param parent_texts: An optional dictionary containing the opaque

695

representations of some or all of the parents of version_id to

696

allow delta optimisations. VERY IMPORTANT: the texts must be those

697

returned by add_lines or data corruption can be caused.

698

:param left_matching_blocks: a hint about which areas are common

699

between the text and its left-hand-parent. The format is

700

the SequenceMatcher.get_matching_blocks format.

701

:param nostore_sha: Raise ExistingContent and do not add the lines to

702

the versioned file if the digest of the lines matches this.

703

:param random_id: If True a random id has been selected rather than

704

an id determined by some deterministic process such as a converter

705

from a foreign VCS. When True the backend may choose not to check

706

for uniqueness of the resulting key within the versioned file, so

707

this should only be done when the result is expected to be unique

708

anyway.

709

:param check_content: If True, the lines supplied are verified to be

710

bytestrings that are correctly formed lines.

711

:return: The text sha1, the number of bytes in the text, and an opaque

712

representation of the inserted version which can be provided

713

back to future add_lines calls in the parent_texts dictionary.

714

"""

715

raise NotImplementedError(self.add_lines)

716

717

def add_mpdiffs(self, records):

718

"""Add mpdiffs to this VersionedFile.

719

720

Records should be iterables of version, parents, expected_sha1,

721

mpdiff. mpdiff should be a MultiParent instance.

722

"""

723

vf_parents = {}

724

mpvf = multiparent.MultiMemoryVersionedFile()

725

versions = []

726

for version, parent_ids, expected_sha1, mpdiff in records:

727

versions.append(version)

728

mpvf.add_diff(mpdiff, version, parent_ids)

729

needed_parents = set()

730

for version, parent_ids, expected_sha1, mpdiff in records:

731

needed_parents.update(p for p in parent_ids

732

if not mpvf.has_version(p))

733

# It seems likely that adding all the present parents as fulltexts can

734

# easily exhaust memory.

735

present_parents = set(self.get_parent_map(needed_parents).keys())

736

split_lines = osutils.split_lines

737

for record in self.get_record_stream(present_parents, 'unordered',

738

True):

739

mpvf.add_version(split_lines(record.get_bytes_as('fulltext')),

740

record.key, [])

741

for (key, parent_keys, expected_sha1, mpdiff), lines in\

742

zip(records, mpvf.get_line_list(versions)):

743

if len(parent_keys) == 1:

744

left_matching_blocks = list(mpdiff.get_matching_blocks(0,

745

mpvf.get_diff(parent_keys[0]).num_lines()))

746

else:

747

left_matching_blocks = None

748

version_sha1, _, version_text = self.add_lines(key,

749

parent_keys, lines, vf_parents,

750

left_matching_blocks=left_matching_blocks)

751

if version_sha1 != expected_sha1:

752

raise errors.VersionedFileInvalidChecksum(version)

753

vf_parents[key] = version_text

754

755

def annotate(self, key):

756

"""Return a list of (version-key, line) tuples for the text of key.

757

758

:raise RevisionNotPresent: If the key is not present.

759

"""

760

raise NotImplementedError(self.annotate)

761

762

def check(self, progress_bar=None):

763

"""Check this object for integrity."""

764

raise NotImplementedError(self.check)

765

766

@staticmethod

767

def check_not_reserved_id(version_id):

768

revision.check_not_reserved_id(version_id)

769

770

def _check_lines_not_unicode(self, lines):

771

"""Check that lines being added to a versioned file are not unicode."""

772

for line in lines:

773

if line.__class__ is not str:

774

raise errors.BzrBadParameterUnicode("lines")

775

776

def _check_lines_are_lines(self, lines):

777

"""Check that the lines really are full lines without inline EOL."""

778

for line in lines:

779

if '\n' in line[:-1]:

780

raise errors.BzrBadParameterContainsNewline("lines")

781

782

def get_parent_map(self, keys):

783

"""Get a map of the parents of keys.

784

785

:param keys: The keys to look up parents for.

786

:return: A mapping from keys to parents. Absent keys are absent from

787

the mapping.

788

"""

789

raise NotImplementedError(self.get_parent_map)

790

791

def get_record_stream(self, keys, ordering, include_delta_closure):

792

"""Get a stream of records for keys.

793

794

:param keys: The keys to include.

795

:param ordering: Either 'unordered' or 'topological'. A topologically

796

sorted stream has compression parents strictly before their

797

children.

798

:param include_delta_closure: If True then the closure across any

799

compression parents will be included (in the opaque data).

800

:return: An iterator of ContentFactory objects, each of which is only

801

valid until the iterator is advanced.

802

"""

803

raise NotImplementedError(self.get_record_stream)

804

805

def get_sha1s(self, keys):

806

"""Get the sha1's of the texts for the given keys.

807

808

:param keys: The names of the keys to lookup

809

:return: a list of sha1s matching keys.

810

"""

811

raise NotImplementedError(self.get_sha1s)

812

813

def insert_record_stream(self, stream):

814

"""Insert a record stream into this container.

815

816

:param stream: A stream of records to insert.

817

:return: None

818

:seealso VersionedFile.get_record_stream:

819

"""

820

raise NotImplementedError

821

822

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

823

"""Iterate over the lines in the versioned files from keys.

824

825

This may return lines from other keys. Each item the returned

826

iterator yields is a tuple of a line and a text version that that line

827

is present in (not introduced in).

828

829

Ordering of results is in whatever order is most suitable for the

830

underlying storage format.

831

832

If a progress bar is supplied, it may be used to indicate progress.

833

The caller is responsible for cleaning up progress bars (because this

834

is an iterator).

835

836

NOTES:

837

* Lines are normalised by the underlying store: they will all have \n

838

terminators.

839

* Lines are returned in arbitrary order.

840

841

:return: An iterator over (line, key).

842

"""

843

raise NotImplementedError(self.iter_lines_added_or_present_in_keys)

844

845

def keys(self):

846

"""Return a iterable of the keys for all the contained texts."""

847

raise NotImplementedError(self.keys)

848

849

def make_mpdiffs(self, keys):

850

"""Create multiparent diffs for specified keys."""

851

keys_order = tuple(keys)

852

keys = frozenset(keys)

853

knit_keys = set(keys)

854

parent_map = self.get_parent_map(keys)

855

for parent_keys in parent_map.itervalues():

856

if parent_keys:

857

knit_keys.update(parent_keys)

858

missing_keys = keys - set(parent_map)

859

if missing_keys:

860

raise errors.RevisionNotPresent(missing_keys.pop(), self)

861

# We need to filter out ghosts, because we can't diff against them.

862

maybe_ghosts = knit_keys - keys

863

ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))

864

knit_keys.difference_update(ghosts)

865

lines = {}

866

split_lines = osutils.split_lines

867

for record in self.get_record_stream(knit_keys, 'topological', True):

868

lines[record.key] = split_lines(record.get_bytes_as('fulltext'))

869

# line_block_dict = {}

870

# for parent, blocks in record.extract_line_blocks():

871

# line_blocks[parent] = blocks

872

# line_blocks[record.key] = line_block_dict

873

diffs = []

874

for key in keys_order:

875

target = lines[key]

876

parents = parent_map[key] or []

877

# Note that filtering knit_keys can lead to a parent difference

878

# between the creation and the application of the mpdiff.

879

parent_lines = [lines[p] for p in parents if p in knit_keys]

880

if len(parent_lines) > 0:

881

left_parent_blocks = self._extract_blocks(key, parent_lines[0],

882

target)

883

else:

884

left_parent_blocks = None

885

diffs.append(multiparent.MultiParent.from_lines(target,

886

parent_lines, left_parent_blocks))

887

return diffs

888

889

def _extract_blocks(self, version_id, source, target):

890

return None

891

892

893

class ThunkedVersionedFiles(VersionedFiles):

894

"""Storage for many versioned files thunked onto a 'VersionedFile' class.

895

896

This object allows a single keyspace for accessing the history graph and

897

contents of named bytestrings.

898

899

Currently no implementation allows the graph of different key prefixes to

900

intersect, but the API does allow such implementations in the future.

901

"""

902

903

def __init__(self, transport, file_factory, mapper, is_locked):

904

"""Create a ThunkedVersionedFiles."""

905

self._transport = transport

906

self._file_factory = file_factory

907

self._mapper = mapper

908

self._is_locked = is_locked

909

910

def add_lines(self, key, parents, lines, parent_texts=None,

911

left_matching_blocks=None, nostore_sha=None, random_id=False,

912

check_content=True):

913

"""See VersionedFiles.add_lines()."""

914

path = self._mapper.map(key)

915

version_id = key[-1]

916

parents = [parent[-1] for parent in parents]

917

vf = self._get_vf(path)

918

try:

919

try:

920

return vf.add_lines_with_ghosts(version_id, parents, lines,

921

parent_texts=parent_texts,

922

left_matching_blocks=left_matching_blocks,

923

nostore_sha=nostore_sha, random_id=random_id,

924

check_content=check_content)

925

except NotImplementedError:

926

return vf.add_lines(version_id, parents, lines,

927

parent_texts=parent_texts,

928

left_matching_blocks=left_matching_blocks,

929

nostore_sha=nostore_sha, random_id=random_id,

930

check_content=check_content)

931

except errors.NoSuchFile:

932

# parent directory may be missing, try again.

933

self._transport.mkdir(osutils.dirname(path))

934

try:

935

return vf.add_lines_with_ghosts(version_id, parents, lines,

936

parent_texts=parent_texts,

937

left_matching_blocks=left_matching_blocks,

938

nostore_sha=nostore_sha, random_id=random_id,

939

check_content=check_content)

940

except NotImplementedError:

941

return vf.add_lines(version_id, parents, lines,

942

parent_texts=parent_texts,

943

left_matching_blocks=left_matching_blocks,

944

nostore_sha=nostore_sha, random_id=random_id,

945

check_content=check_content)

946

947

def annotate(self, key):

948

"""Return a list of (version-key, line) tuples for the text of key.

949

950

:raise RevisionNotPresent: If the key is not present.

951

"""

952

prefix = key[:-1]

953

path = self._mapper.map(prefix)

954

vf = self._get_vf(path)

955

origins = vf.annotate(key[-1])

956

result = []

957

for origin, line in origins:

958

result.append((prefix + (origin,), line))

959

return result

960

961

def check(self, progress_bar=None):

962

"""See VersionedFiles.check()."""

963

for prefix, vf in self._iter_all_components():

964

vf.check()

965

966

def get_parent_map(self, keys):

967

"""Get a map of the parents of keys.

968

969

:param keys: The keys to look up parents for.

970

:return: A mapping from keys to parents. Absent keys are absent from

971

the mapping.

972

"""

973

prefixes = self._partition_keys(keys)

974

result = {}

975

for prefix, suffixes in prefixes.items():

976

path = self._mapper.map(prefix)

977

vf = self._get_vf(path)

978

parent_map = vf.get_parent_map(suffixes)

979

for key, parents in parent_map.items():

980

result[prefix + (key,)] = tuple(

981

prefix + (parent,) for parent in parents)

982

return result

983

984

def _get_vf(self, path):

985

if not self._is_locked():

986

raise errors.ObjectNotLocked(self)

987

return self._file_factory(path, self._transport, create=True,

988

get_scope=lambda:None)

989

990

def _partition_keys(self, keys):

991

"""Turn keys into a dict of prefix:suffix_list."""

992

result = {}

993

for key in keys:

994

prefix_keys = result.setdefault(key[:-1], [])

995

prefix_keys.append(key[-1])

996

return result

997

998

def _get_all_prefixes(self):

999

# Identify all key prefixes.

1000

# XXX: A bit hacky, needs polish.

1001

if type(self._mapper) == ConstantMapper:

1002

paths = [self._mapper.map(())]

1003

prefixes = [()]

1004

else:

1005

relpaths = set()

1006

for quoted_relpath in self._transport.iter_files_recursive():

1007

path, ext = os.path.splitext(quoted_relpath)

1008

relpaths.add(path)

1009

paths = list(relpaths)

1010

prefixes = [self._mapper.unmap(path) for path in paths]

1011

return zip(paths, prefixes)

1012

1013

def get_record_stream(self, keys, ordering, include_delta_closure):

1014

"""See VersionedFiles.get_record_stream()."""

1015

# Ordering will be taken care of by each partitioned store; group keys

1016

# by partition.

1017

keys = sorted(keys)

1018

for prefix, suffixes, vf in self._iter_keys_vf(keys):

1019

suffixes = [(suffix,) for suffix in suffixes]

1020

for record in vf.get_record_stream(suffixes, ordering,

1021

include_delta_closure):

1022

if record.parents is not None:

1023

record.parents = tuple(

1024

prefix + parent for parent in record.parents)

1025

record.key = prefix + record.key

1026

yield record

1027

1028

def _iter_keys_vf(self, keys):

1029

prefixes = self._partition_keys(keys)

1030

sha1s = {}

1031

for prefix, suffixes in prefixes.items():

1032

path = self._mapper.map(prefix)

1033

vf = self._get_vf(path)

1034

yield prefix, suffixes, vf

1035

1036

def get_sha1s(self, keys):

1037

"""See VersionedFiles.get_sha1s()."""

1038

sha1s = {}

1039

for prefix,suffixes, vf in self._iter_keys_vf(keys):

1040

vf_sha1s = vf.get_sha1s(suffixes)

1041

for suffix, sha1 in zip(suffixes, vf.get_sha1s(suffixes)):

1042

sha1s[prefix + (suffix,)] = sha1

1043

return [sha1s[key] for key in keys]

1044

1045

def insert_record_stream(self, stream):

1046

"""Insert a record stream into this container.

1047

1048

:param stream: A stream of records to insert.

1049

:return: None

1050

:seealso VersionedFile.get_record_stream:

1051

"""

1052

for record in stream:

1053

prefix = record.key[:-1]

1054

key = record.key[-1:]

1055

if record.parents is not None:

1056

parents = [parent[-1:] for parent in record.parents]

1057

else:

1058

parents = None

1059

thunk_record = AdapterFactory(key, parents, record)

1060

path = self._mapper.map(prefix)

1061

# Note that this parses the file many times; we can do better but

1062

# as this only impacts weaves in terms of performance, it is

1063

# tolerable.

1064

vf = self._get_vf(path)

1065

vf.insert_record_stream([thunk_record])

1066

1067

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1068

"""Iterate over the lines in the versioned files from keys.

1069

1070

This may return lines from other keys. Each item the returned

1071

iterator yields is a tuple of a line and a text version that that line

1072

is present in (not introduced in).

1073

1074

Ordering of results is in whatever order is most suitable for the

1075

underlying storage format.

1076

1077

If a progress bar is supplied, it may be used to indicate progress.

1078

The caller is responsible for cleaning up progress bars (because this

1079

is an iterator).

1080

1081

NOTES:

1082

* Lines are normalised by the underlying store: they will all have \n

1083

terminators.

1084

* Lines are returned in arbitrary order.

1085

1086

:return: An iterator over (line, key).

1087

"""

1088

for prefix, suffixes, vf in self._iter_keys_vf(keys):

1089

for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):

1090

yield line, prefix + (version,)

1091

1092

def _iter_all_components(self):

1093

for path, prefix in self._get_all_prefixes():

1094

yield prefix, self._get_vf(path)

1095

1096

def keys(self):

1097

"""See VersionedFiles.keys()."""

1098

result = set()

1099

for prefix, vf in self._iter_all_components():

1100

for suffix in vf.versions():

1101

result.add(prefix + (suffix,))

1102

return result

1103

1104

1105

class _PlanMergeVersionedFile(VersionedFiles):

509

1106

"""A VersionedFile for uncommitted and committed texts.

510

1107

511

1108

It is intended to allow merges to be planned with working tree texts.

512

It implements only the small part of the VersionedFile interface used by

1109

It implements only the small part of the VersionedFiles interface used by

513

1110

PlanMerge. It falls back to multiple versionedfiles for data not stored in

514

1111

_PlanMergeVersionedFile itself.

1112

1113

:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be

1114

queried for missing texts.

515

1115

"""

516

1116

517

def __init__(self, file_id, fallback_versionedfiles=None):

518

"""Constuctor

1117

def __init__(self, file_id):

1118

"""Create a _PlanMergeVersionedFile.

519

1119

520

:param file_id: Used when raising exceptions.

521

:param fallback_versionedfiles: If supplied, the set of fallbacks to

522

use. Otherwise, _PlanMergeVersionedFile.fallback_versionedfiles

523

can be appended to later.

1120

:param file_id: Used with _PlanMerge code which is not yet fully

1121

tuple-keyspace aware.

524

1122

"""

525

1123

self._file_id = file_id

526

if fallback_versionedfiles is None:

527

self.fallback_versionedfiles = []

528

else:

529

self.fallback_versionedfiles = fallback_versionedfiles

1124

# fallback locations

1125

self.fallback_versionedfiles = []

1126

# Parents for locally held keys.

530

1127

self._parents = {}

1128

# line data for locally held keys.

531

1129

self._lines = {}

1130

# key lookup providers

1131

self._providers = [DictParentsProvider(self._parents)]

532

1132

533

1133

def plan_merge(self, ver_a, ver_b, base=None):

534

1134

"""See VersionedFile.plan_merge"""

535

1135

from bzrlib.merge import _PlanMerge

536

1136

if base is None:

537

return _PlanMerge(ver_a, ver_b, self).plan_merge()

538

old_plan = list(_PlanMerge(ver_a, base, self).plan_merge())

539

new_plan = list(_PlanMerge(ver_a, ver_b, self).plan_merge())

1137

return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()

1138

old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())

1139

new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())

540

1140

return _PlanMerge._subtract_plans(old_plan, new_plan)

541

1141

542

1142

def plan_lca_merge(self, ver_a, ver_b, base=None):

543

1143

from bzrlib.merge import _PlanLCAMerge

544

graph = self._get_graph()

545

new_plan = _PlanLCAMerge(ver_a, ver_b, self, graph).plan_merge()

1144

graph = Graph(self)

1145

new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()

546

1146

if base is None:

547

1147

return new_plan

548

old_plan = _PlanLCAMerge(ver_a, base, self, graph).plan_merge()

1148

old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()

549

1149

return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))

550

1150

551

def add_lines(self, version_id, parents, lines):

552

"""See VersionedFile.add_lines

1151

def add_lines(self, key, parents, lines):

1152

"""See VersionedFiles.add_lines

553

1153

554

Lines are added locally, not fallback versionedfiles. Also, ghosts are

555

permitted. Only reserved ids are permitted.

1154

Lines are added locally, not to fallback versionedfiles. Also, ghosts

1155

are permitted. Only reserved ids are permitted.

556

1156

"""

557

if not revision.is_reserved_id(version_id):

1157

if type(key) != tuple:

1158

import pdb;pdb.set_trace()

1159

if not revision.is_reserved_id(key[-1]):

558

1160

raise ValueError('Only reserved ids may be used')

559

1161

if parents is None:

560

1162

raise ValueError('Parents may not be None')

561

1163

if lines is None:

562

1164

raise ValueError('Lines may not be None')

563

self._parents[version_id] = tuple(parents)

564

self._lines[version_id] = lines

1165

self._parents[key] = tuple(parents)

1166

self._lines[key] = lines

565

1167

566

def get_lines(self, version_id):

567

"""See VersionedFile.get_ancestry"""

568

lines = self._lines.get(version_id)

569

if lines is not None:

570

return lines

1168

def get_record_stream(self, keys, ordering, include_delta_closure):

1169

pending = set(keys)

1170

for key in keys:

1171

if key in self._lines:

1172

lines = self._lines[key]

1173

parents = self._parents[key]

1174

pending.remove(key)

1175

yield FulltextContentFactory(key, parents, None,

1176

''.join(lines))

571

1177

for versionedfile in self.fallback_versionedfiles:

572

try:

573

return versionedfile.get_lines(version_id)

574

except errors.RevisionNotPresent:

575

continue

576

else:

577

raise errors.RevisionNotPresent(version_id, self._file_id)

578

579

def get_ancestry(self, version_id, topo_sorted=False):

580

"""See VersionedFile.get_ancestry.

581

582

Note that this implementation assumes that if a VersionedFile can

583

answer get_ancestry at all, it can give an authoritative answer. In

584

fact, ghosts can invalidate this assumption. But it's good enough

585

99% of the time, and far cheaper/simpler.

586

587

Also note that the results of this version are never topologically

588

sorted, and are a set.

589

"""

590

if topo_sorted:

591

raise ValueError('This implementation does not provide sorting')

592

parents = self._parents.get(version_id)

593

if parents is None:

594

for vf in self.fallback_versionedfiles:

595

try:

596

return vf.get_ancestry(version_id, topo_sorted=False)

597

except errors.RevisionNotPresent:

1178

for record in versionedfile.get_record_stream(

1179

pending, 'unordered', True):

1180

if record.storage_kind == 'absent':

598

1181

continue

599

else:

600

raise errors.RevisionNotPresent(version_id, self._file_id)

601

ancestry = set([version_id])

602

for parent in parents:

603

ancestry.update(self.get_ancestry(parent, topo_sorted=False))

604

return ancestry

605

606

def get_parent_map(self, version_ids):

607

"""See VersionedFile.get_parent_map"""

608

result = {}

609

pending = set(version_ids)

610

for key in version_ids:

611

try:

612

result[key] = self._parents[key]

613

except KeyError:

614

pass

615

pending = pending - set(result.keys())

616

for versionedfile in self.fallback_versionedfiles:

617

parents = versionedfile.get_parent_map(pending)

618

result.update(parents)

619

pending = pending - set(parents.keys())

1182

else:

1183

pending.remove(record.key)

1184

yield record

620

1185

if not pending:

621

return result

622

return result

1186

return

1187

# report absent entries

1188

for key in pending:

1189

yield AbsentContentFactory(key)

623

1190

624

def _get_graph(self):

625

from bzrlib.graph import (

626

DictParentsProvider,

627

Graph,

628

_StackedParentsProvider,

629

)

630

from bzrlib.repofmt.knitrepo import _KnitParentsProvider

631

parent_providers = [DictParentsProvider(self._parents)]

632

for vf in self.fallback_versionedfiles:

633

parent_providers.append(_KnitParentsProvider(vf))

634

return Graph(_StackedParentsProvider(parent_providers))

1191

def get_parent_map(self, keys):

1192

"""See VersionedFiles.get_parent_map"""

1193

# We create a new provider because a fallback may have been added.

1194

# If we make fallbacks private we can update a stack list and avoid

1195

# object creation thrashing.

1196

self._providers = self._providers[:1] + self.fallback_versionedfiles

1197

return _StackedParentsProvider(self._providers).get_parent_map(keys)

635

1198

636

1199

637

1200

class PlanWeaveMerge(TextMerge):

714

1277

PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)

715

1278

716

1279

717

class InterVersionedFile(InterObject):

718

"""This class represents operations taking place between two VersionedFiles.

719

720

Its instances have methods like join, and contain

721

references to the source and target versionedfiles these operations can be

722

carried out on.

723

724

Often we will provide convenience methods on 'versionedfile' which carry out

725

operations with another versionedfile - they will always forward to

726

InterVersionedFile.get(other).method_name(parameters).

727

"""

728

729

_optimisers = []

730

"""The available optimised InterVersionedFile types."""

731

732

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

733

"""Integrate versions from self.source into self.target.

734

735

If version_ids is None all versions from source should be

736

incorporated into this versioned file.

737

738

Must raise RevisionNotPresent if any of the specified versions

739

are not present in the other file's history unless ignore_missing is

740

supplied in which case they are silently skipped.

741

"""

742

target = self.target

743

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

744

graph = Graph(self.source)

745

search = graph._make_breadth_first_searcher(version_ids)

746

transitive_ids = set()

747

map(transitive_ids.update, list(search))

748

parent_map = self.source.get_parent_map(transitive_ids)

749

order = tsort.topo_sort(parent_map.items())

750

pb = ui.ui_factory.nested_progress_bar()

751

parent_texts = {}

752

try:

753

# TODO for incremental cross-format work:

754

# make a versioned file with the following content:

755

# all revisions we have been asked to join

756

# all their ancestors that are *not* in target already.

757

# the immediate parents of the above two sets, with

758

# empty parent lists - these versions are in target already

759

# and the incorrect version data will be ignored.

760

# TODO: for all ancestors that are present in target already,

761

# check them for consistent data, this requires moving sha1 from

762

763

# TODO: remove parent texts when they are not relevant any more for

764

# memory pressure reduction. RBC 20060313

765

# pb.update('Converting versioned data', 0, len(order))

766

total = len(order)

767

for index, version in enumerate(order):

768

pb.update('Converting versioned data', index, total)

769

if version in target:

770

continue

771

_, _, parent_text = target.add_lines(version,

772

parent_map[version],

773

self.source.get_lines(version),

774

parent_texts=parent_texts)

775

parent_texts[version] = parent_text

776

return total

777

finally:

778

pb.finished()

779

780

def _get_source_version_ids(self, version_ids, ignore_missing):

781

"""Determine the version ids to be used from self.source.

782

783

:param version_ids: The caller-supplied version ids to check. (None

784

for all). If None is in version_ids, it is stripped.

785

:param ignore_missing: if True, remove missing ids from the version

786

list. If False, raise RevisionNotPresent on

787

a missing version id.

788

:return: A set of version ids.

789

"""

790

if version_ids is None:

791

# None cannot be in source.versions

792

return set(self.source.versions())

793

else:

794

if ignore_missing:

795

return set(self.source.versions()).intersection(set(version_ids))

796

else:

797

new_version_ids = set()

798

for version in version_ids:

799

if version is None:

800

continue

801

if not self.source.has_version(version):

802

raise errors.RevisionNotPresent(version, str(self.source))

803

else:

804

new_version_ids.add(version)

805

return new_version_ids

806

807

808

class KeyMapper(object):

809

"""KeyMappers map between keys and underlying paritioned storage."""

810

811

def map(self, key):

812

"""Map key to an underlying storage identifier.

813

814

:param key: A key tuple e.g. ('file-id', 'revision-id').

815

:return: An underlying storage identifier, specific to the partitioning

816

mechanism.

817

"""

818

819

def unmap(self, partition_id):

820

"""Map a partitioned storage id back to a key prefix.

821

822

:param partition_id: The underlying partition id.

823

:return: As much of a key (or prefix) as is derivable from the parition

824

id.

825

"""

826

827

828

class ConstantMapper(KeyMapper):

829

"""A key mapper that maps to a constant result."""

830

831

def __init__(self, result):

832

"""Create a ConstantMapper which will return result for all maps."""

833

self._result = result

834

835

def map(self, key):

836

"""See KeyMapper.map()."""

837

return self._result

838

839

840

class PrefixMapper(KeyMapper):

841

"""A key mapper that extracts the first component of a key."""

842

843

def map(self, key):

844

"""See KeyMapper.map()."""

845

return key[0]

846

847

def unmap(self, partition_id):

848

"""See KeyMapper.unmap()."""

849

return (partition_id,)

850

851

852

class HashPrefixMapper(KeyMapper):

853

"""A key mapper that combines the first component of a key with a hash."""

854

855

def map(self, key):

856

"""See KeyMapper.map()."""

857

prefix = self._escape(key[0])

858

return "%02x/%s" % (adler32(prefix) & 0xff, prefix)

859

860

def _escape(self, prefix):

861

"""No escaping needed here."""

862

return prefix

863

864

def unmap(self, partition_id):

865

"""See KeyMapper.unmap()."""

866

return (self._unescape(osutils.basename(partition_id)),)

867

868

def _unescape(self, basename):

869

"""No unescaping needed for HashPrefixMapper."""

870

return basename

871

872

873

class HashEscapedPrefixMapper(HashPrefixMapper):

874

"""Combines the escaped first component of a key with a hash."""

875

876

_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."

877

878

def _escape(self, prefix):

879

"""Turn a key element into a filesystem safe string.

880

881

This is similar to a plain urllib.quote, except

882

it uses specific safe characters, so that it doesn't

883

have to translate a lot of valid file ids.

884

"""

885

# @ does not get escaped. This is because it is a valid

886

# filesystem character we use all the time, and it looks

887

# a lot better than seeing %40 all the time.

888

r = [((c in self._safe) and c or ('%%%02x' % ord(c)))

889

for c in prefix]

890

return ''.join(r)

891

892

def _unescape(self, basename):

893

"""Escaped names are unescaped by urlutils."""

894

return urllib.unquote(basename)

895

896

897

def make_pack_factory(graph, delta, keylength):

898

"""Create a factory for creating a pack based VersionedFiles.

899

900

:param graph: Store a graph.

901

:param delta: Delta compress contents.

902

:param keylength: How long should keys be.

903

"""

904

return lambda x:None

905

906

907

def make_versioned_files_factory(versioned_file_factory, mapper):

908

"""Create a ThunkedVersionedFiles factory.

909

910

This will create a callable which when called creates a

911

ThunkedVersionedFiles on a transport, using mapper to access individual

912

versioned files, and versioned_file_factory to create each individual file.

913

"""

914

return lambda x:None

915

916

917

class VersionedFiles(object):

918

"""Storage for many versioned files.

919

920

This object allows a single keyspace for accessing the history graph and

921

contents of named bytestrings.

922

923

Currently no implementation allows the graph of different key prefixes to

924

intersect, but the API does allow such implementations in the future.

925

"""

Older »