~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Andrew Bennetts
Date: 2008-04-14 08:07:08 UTC
mfrom: (3245.3.4 Robust _get_line and push_back)
mto: (3245.5.6 _build_client_protocol refactoring)
mto: This revision was merged to the branch mainline in revision 3368.
Revision ID: andrew.bennetts@canonical.com-20080414080708-uxg4d8ebx204q11e

Merge from bzr.dev

files added:
bzrlib/tests/test_mutabletree.py

bzrlib/transport/nosmart.py

doc/developers/integration.txt

doc/developers/plugin-api.txt

files renamed:
bzrlib/tests/test_revisionnamespaces.py => bzrlib/tests/test_revisionspec.py

files modified:
NEWS

bzrlib/annotate.py

bzrlib/branch.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/commands.py

bzrlib/config.py

bzrlib/doc/api/__init__.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/hooks.txt

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/log.py

bzrlib/mail_client.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/status.py

bzrlib/store/revision/knit.py

bzrlib/store/versioned/__init__.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_config.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_log.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/ftp.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/developers/HACKING.txt

doc/developers/index.txt

doc/developers/repository.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/undoing_mistakes.txt

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

101

RevisionNotPresent,

102

RevisionAlreadyPresent,

103

)

104

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

104

from bzrlib.graph import Graph

105

from bzrlib.osutils import (

106

contains_whitespace,

107

contains_linebreaks,

108

sha_string,

109

sha_strings,

110

)

111

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

111

from bzrlib.symbol_versioning import (

112

DEPRECATED_PARAMETER,

113

deprecated_method,

114

deprecated_passed,

115

one_four,

116

)

112

117

from bzrlib.tsort import topo_sort

118

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

113

119

import bzrlib.ui

120

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

114

121

import bzrlib.weave

115

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

116

122

117

123

118

124

# TODO: Split out code specific to this format into an associated object.

138

144

def __init__(self):

139

145

self._should_strip_eol = False

140

146

141

def annotate(self):

142

"""Return a list of (origin, text) tuples."""

143

return list(self.annotate_iter())

144

145

147

def apply_delta(self, delta, new_version_id):

146

148

"""Apply delta to this object to become new_version_id."""

147

149

raise NotImplementedError(self.apply_delta)

200

202

KnitContent.__init__(self)

201

203

self._lines = lines

202

204

203

def annotate_iter(self):

204

"""Yield tuples of (origin, text) for each content line."""

205

return iter(self._lines)

205

def annotate(self):

206

"""Return a list of (origin, text) for each content line."""

207

return list(self._lines)

206

208

207

209

def apply_delta(self, delta, new_version_id):

208

210

"""Apply delta to this object to become new_version_id."""

250

252

self._lines = lines

251

253

self._version_id = version_id

252

254

253

def annotate_iter(self):

254

"""Yield tuples of (origin, text) for each content line."""

255

for line in self._lines:

256

yield self._version_id, line

255

def annotate(self):

256

"""Return a list of (origin, text) for each content line."""

257

return [(self._version_id, line) for line in self._lines]

257

258

259

def apply_delta(self, delta, new_version_id):

259

260

"""Apply delta to this object to become new_version_id."""

421

422

for origin, text in lines)

422

423

return out

423

424

def annotate_iter(self, knit, version_id):

425

def annotate(self, knit, version_id):

425

426

content = knit._get_content(version_id)

426

return content.annotate_iter()

427

return content.annotate()

427

428

429

430

class KnitPlainFactory(_KnitFactory):

483

484

out.extend(lines)

484

485

return out

485

486

def annotate_iter(self, knit, version_id):

487

def annotate(self, knit, version_id):

487

488

annotator = _KnitAnnotator(knit)

488

return iter(annotator.annotate(version_id))

489

return annotator.annotate(version_id)

489

490

491

492

def make_empty_knit(transport, relpath):

492

493

"""Construct a empty knit at the specified location."""

493

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

494

k = make_file_knit(transport, relpath, 'w', KnitPlainFactory)

495

496

497

def make_file_knit(name, transport, file_mode=None, access_mode='w',

498

factory=None, delta=True, create=False, create_parent_dir=False,

499

delay_create=False, dir_mode=None, get_scope=None):

500

"""Factory to create a KnitVersionedFile for a .knit/.kndx file pair."""

501

if factory is None:

502

factory = KnitAnnotateFactory()

503

else:

504

factory = KnitPlainFactory()

505

if get_scope is None:

506

get_scope = lambda:None

507

index = _KnitIndex(transport, name + INDEX_SUFFIX,

508

access_mode, create=create, file_mode=file_mode,

509

create_parent_dir=create_parent_dir, delay_create=delay_create,

510

dir_mode=dir_mode, get_scope=get_scope)

511

access = _KnitAccess(transport, name + DATA_SUFFIX, file_mode,

512

dir_mode, ((create and not len(index)) and delay_create),

513

create_parent_dir)

514

return KnitVersionedFile(name, transport, factory=factory,

515

create=create, delay_create=delay_create, index=index,

516

access_method=access)

517

518

519

def get_suffixes():

520

"""Return the suffixes used by file based knits."""

521

return [DATA_SUFFIX, INDEX_SUFFIX]

522

make_file_knit.get_suffixes = get_suffixes

494

523

495

524

496

525

class KnitVersionedFile(VersionedFile):

508

537

stored and retrieved.

509

538

"""

510

539

511

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

540

def __init__(self, relpath, transport, file_mode=None,

512

541

factory=None, delta=True, create=False, create_parent_dir=False,

513

542

delay_create=False, dir_mode=None, index=None, access_method=None):

514

543

"""Construct a knit at location specified by relpath.

521

550

actually be created until the first data is stored.

522

551

:param index: An index to use for the knit.

523

552

"""

524

if access_mode is None:

525

access_mode = 'w'

526

super(KnitVersionedFile, self).__init__(access_mode)

527

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

553

super(KnitVersionedFile, self).__init__()

528

554

self.transport = transport

529

555

self.filename = relpath

530

556

self.factory = factory or KnitAnnotateFactory()

531

self.writable = (access_mode == 'w')

532

557

self.delta = delta

533

558

534

559

self._max_delta_chain = 200

535

560

536

if index is None:

537

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

538

access_mode, create=create, file_mode=file_mode,

539

create_parent_dir=create_parent_dir, delay_create=delay_create,

540

dir_mode=dir_mode)

541

else:

542

self._index = index

543

if access_method is None:

544

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

545

((create and not len(self)) and delay_create), create_parent_dir)

546

else:

547

_access = access_method

561

if None in (access_method, index):

562

raise ValueError("No default access_method or index any more")

563

self._index = index

564

_access = access_method

548

565

if create and not len(self) and not delay_create:

549

566

_access.create()

550

567

self._data = _KnitData(_access)

582

599

583

600

return fulltext_size > delta_size

584

601

602

def _check_write_ok(self):

603

return self._index._check_write_ok()

604

585

605

def _add_raw_records(self, records, data):

586

606

"""Add all the records 'records' with data pre-joined in 'data'.

587

607

598

618

for (version_id, options, parents, size), access_memo in zip(

599

619

records, positions):

600

620

index_entries.append((version_id, options, access_memo, parents))

601

if self._data._do_cache:

602

self._data._cache[version_id] = data[offset:offset+size]

603

621

offset += size

604

622

self._index.add_versions(index_entries)

605

623

606

def enable_cache(self):

607

"""Start caching data for this knit"""

608

self._data.enable_cache()

609

610

def clear_cache(self):

611

"""Clear the data cache only."""

612

self._data.clear_cache()

613

614

624

def copy_to(self, name, transport):

615

625

"""See VersionedFile.copy_to()."""

616

626

# copy the current index to a temp index to avoid racing with local

626

636

# move the copied index into place

627

637

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

628

638

629

def create_empty(self, name, transport, mode=None):

630

return KnitVersionedFile(name, transport, factory=self.factory,

631

delta=self.delta, create=True)

632

633

639

def get_data_stream(self, required_versions):

634

640

"""Get a data stream for the specified versions.

635

641

751

757

annotated_part = "plain"

752

758

return "knit-%s" % (annotated_part,)

753

759

760

@deprecated_method(one_four)

754

761

def get_graph_with_ghosts(self):

755

762

"""See VersionedFile.get_graph_with_ghosts()."""

756

graph_items = self._index.get_graph()

757

return dict(graph_items)

758

759

def get_sha1(self, version_id):

760

return self.get_sha1s([version_id])[0]

763

return self.get_parent_map(self.versions())

761

764

762

765

def get_sha1s(self, version_ids):

763

"""See VersionedFile.get_sha1()."""

766

"""See VersionedFile.get_sha1s()."""

764

767

record_map = self._get_record_map(version_ids)

765

768

# record entry 2 is the 'digest'.

766

769

return [record_map[v][2] for v in version_ids]

767

770

768

@staticmethod

769

def get_suffixes():

770

"""See VersionedFile.get_suffixes()."""

771

return [DATA_SUFFIX, INDEX_SUFFIX]

772

771

@deprecated_method(one_four)

773

772

def has_ghost(self, version_id):

774

773

"""True if there is a ghost reference in the file to version_id."""

775

774

# maybe we have it

776

775

if self.has_version(version_id):

777

776

return False

778

777

# optimisable if needed by memoising the _ghosts set.

779

items = self._index.get_graph()

780

for node, parents in items:

778

items = self.get_parent_map(self.versions())

779

for parents in items.itervalues():

781

780

for parent in parents:

782

if parent not in self._index._cache:

783

if parent == version_id:

784

return True

781

if parent == version_id and parent not in items:

782

return True

785

783

return False

786

784

787

785

def insert_data_stream(self, (format, data_list, reader_callable)):

818

816

# Also check the SHA-1 of the fulltext this content will

819

817

# produce.

820

818

raw_data = reader_callable(length)

821

my_fulltext_sha1 = self.get_sha1(version_id)

819

my_fulltext_sha1 = self.get_sha1s([version_id])[0]

822

820

df, rec = self._data._parse_record_header(version_id, raw_data)

823

821

stream_fulltext_sha1 = rec[3]

824

822

if my_fulltext_sha1 != stream_fulltext_sha1:

1088

1086

def check(self, progress_bar=None):

1089

1087

"""See VersionedFile.check()."""

1090

1088

1091

def _clone_text(self, new_version_id, old_version_id, parents):

1092

"""See VersionedFile.clone_text()."""

1093

# FIXME RBC 20060228 make fast by only inserting an index with null

1094

# delta.

1095

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

1096

1097

1089

def get_lines(self, version_id):

1098

1090

"""See VersionedFile.get_lines()."""

1099

1091

return self.get_line_list([version_id])[0]

1237

1229

1238

1230

pb.update('Walking content.', total, total)

1239

1231

1240

def iter_parents(self, version_ids):

1241

"""Iterate through the parents for many version ids.

1242

1243

:param version_ids: An iterable yielding version_ids.

1244

:return: An iterator that yields (version_id, parents). Requested

1245

version_ids not present in the versioned file are simply skipped.

1246

The order is undefined, allowing for different optimisations in

1247

the underlying implementation.

1248

"""

1249

return self._index.iter_parents(version_ids)

1250

1251

1232

def num_versions(self):

1252

1233

"""See VersionedFile.num_versions()."""

1253

1234

return self._index.num_versions()

1254

1235

1255

1236

__len__ = num_versions

1256

1237

1257

def annotate_iter(self, version_id):

1258

"""See VersionedFile.annotate_iter."""

1259

return self.factory.annotate_iter(self, version_id)

1238

def annotate(self, version_id):

1239

"""See VersionedFile.annotate."""

1240

return self.factory.annotate(self, version_id)

1260

1241

1261

1242

def get_parent_map(self, version_ids):

1262

1243

"""See VersionedFile.get_parent_map."""

1398

1379

parents,

1399

1380

index)

1400

1381

1382

def _check_write_ok(self):

1383

if self._get_scope() != self._scope:

1384

raise errors.OutSideTransaction()

1385

if self._mode != 'w':

1386

raise errors.ReadOnlyObjectDirtiedError(self)

1387

1401

1388

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1402

create_parent_dir=False, delay_create=False, dir_mode=None):

1389

create_parent_dir=False, delay_create=False, dir_mode=None,

1390

get_scope=None):

1403

1391

_KnitComponentFile.__init__(self, transport, filename, mode,

1404

1392

file_mode=file_mode,

1405

1393

create_parent_dir=create_parent_dir,

1426

1414

else:

1427

1415

self._transport.put_bytes_non_atomic(

1428

1416

self._filename, self.HEADER, mode=self._file_mode)

1429

1430

def get_graph(self):

1431

"""Return a list of the node:parents lists from this knit index."""

1432

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1417

self._scope = get_scope()

1418

self._get_scope = get_scope

1433

1419

1434

1420

def get_ancestry(self, versions, topo_sorted=True):

1435

1421

"""See VersionedFile.get_ancestry."""

1507

1493

parents, (method, noeol))

1508

1494

return result

1509

1495

1510

def iter_parents(self, version_ids):

1511

"""Iterate through the parents for many version ids.

1512

1513

:param version_ids: An iterable yielding version_ids.

1514

:return: An iterator that yields (version_id, parents). Requested

1515

version_ids not present in the versioned file are simply skipped.

1516

The order is undefined, allowing for different optimisations in

1517

the underlying implementation.

1518

"""

1519

parent_map = self.get_parent_map(version_ids)

1520

parent_map_set = set(parent_map)

1521

unknown_existence = set()

1522

for parents in parent_map.itervalues():

1523

unknown_existence.update(parents)

1524

unknown_existence.difference_update(parent_map_set)

1525

present_parents = set(self.get_parent_map(unknown_existence))

1526

present_parents.update(parent_map_set)

1527

for version_id, parents in parent_map.iteritems():

1528

parents = tuple(parent for parent in parents

1529

if parent in present_parents)

1530

yield version_id, parents

1531

1532

1496

def num_versions(self):

1533

1497

return len(self._history)

1534

1498

1678

1642

raise KnitCorrupt(self, "Cannot do delta compression without "

1679

1643

"parent tracking.")

1680

1644

1645

def _check_write_ok(self):

1646

pass

1647

1681

1648

def _get_entries(self, keys, check_present=False):

1682

1649

"""Get the entries for keys.

1683

1650

1848

1815

else:

1849

1816

return 'fulltext'

1850

1817

1851

def get_graph(self):

1852

"""Return a list of the node:parents lists from this knit index."""

1853

if not self._parents:

1854

return [(key, ()) for key in self.get_versions()]

1855

result = []

1856

for index, key, value, refs in self._graph_index.iter_all_entries():

1857

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1858

return result

1859

1860

def iter_parents(self, version_ids):

1861

"""Iterate through the parents for many version ids.

1862

1863

:param version_ids: An iterable yielding version_ids.

1864

:return: An iterator that yields (version_id, parents). Requested

1865

version_ids not present in the versioned file are simply skipped.

1866

The order is undefined, allowing for different optimisations in

1867

the underlying implementation.

1868

"""

1869

if self._parents:

1870

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1871

all_parents = set()

1872

present_parents = set()

1873

for node in all_nodes:

1874

all_parents.update(node[3][0])

1875

# any node we are querying must be present

1876

present_parents.add(node[1])

1877

unknown_parents = all_parents.difference(present_parents)

1878

present_parents.update(self._present_keys(unknown_parents))

1879

for node in all_nodes:

1880

parents = []

1881

for parent in node[3][0]:

1882

if parent in present_parents:

1883

parents.append(parent[0])

1884

yield node[1][0], tuple(parents)

1885

else:

1886

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1887

yield node[1][0], ()

1888

1889

1818

def num_versions(self):

1890

1819

return len(list(self._graph_index.iter_all_entries()))

1891

1820

2236

2165

def get_raw_records(self, memos_for_retrieval):

2237

2166

"""Get the raw bytes for a records.

2238

2167

2239

:param memos_for_retrieval: An iterable containing the (thunk_flag,

2240

index, start, end) memo for retrieving the bytes.

2241

:return: An iterator over the bytes of the records.

2168

:param memos_for_retrieval: An iterable of memos from the

2169

_StreamIndex object identifying bytes to read; for these classes

2170

they are (from_backing_knit, index, start, end) and can point to

2171

either the backing knit or streamed data.

2172

:return: An iterator yielding a byte string for each record in

2173

memos_for_retrieval.

2242

2174

"""

2243

2175

# use a generator for memory friendliness

2244

for thunk_flag, version_id, start, end in memos_for_retrieval:

2245

if version_id is self.stream_index:

2176

for from_backing_knit, version_id, start, end in memos_for_retrieval:

2177

if not from_backing_knit:

2178

assert version_id is self.stream_index

2246

2179

yield self.data[start:end]

2247

2180

continue

2248

2181

# we have been asked to thunk. This thunking only occurs when

2253

2186

# as desired. However, for now, this is sufficient.

2254

2187

if self.orig_factory.__class__ != KnitPlainFactory:

2255

2188

raise errors.KnitCorrupt(

2256

self, 'Bad thunk request %r' % version_id)

2189

self, 'Bad thunk request %r cannot be backed by %r' %

2190

(version_id, self.orig_factory))

2257

2191

lines = self.backing_knit.get_lines(version_id)

2258

2192

line_bytes = ''.join(lines)

2259

2193

digest = sha_string(line_bytes)

2194

# the packed form of the fulltext always has a trailing newline,

2195

# even if the actual text does not, unless the file is empty. the

2196

# record options including the noeol flag are passed through by

2197

# _StreamIndex, so this is safe.

2260

2198

if lines:

2261

2199

if lines[-1][-1] != '\n':

2262

2200

lines[-1] = lines[-1] + '\n'

2263

2201

line_bytes += '\n'

2264

orig_options = list(self.backing_knit._index.get_options(version_id))

2265

if 'fulltext' not in orig_options:

2266

if 'line-delta' not in orig_options:

2267

raise errors.KnitCorrupt(self,

2268

'Unknown compression method %r' % orig_options)

2269

orig_options.remove('line-delta')

2270

orig_options.append('fulltext')

2271

2202

# We want plain data, because we expect to thunk only to allow text

2272

2203

# extraction.

2273

2204

size, bytes = self.backing_knit._data._record_to_data(version_id,

2325

2256

:return: A dict of version_id:(index_memo, compression_parent,

2326

2257

parents, record_details).

2327

2258

index_memo

2328

opaque structure to pass to read_records to extract the raw

2329

data

2259

opaque memo that can be passed to _StreamAccess.read_records

2260

to extract the raw data; for these classes it is

2261

(from_backing_knit, index, start, end)

2330

2262

compression_parent

2331

2263

Content that this record is built upon, may be None

2332

2264

parents

2344

2276

continue

2345

2277

parent_ids = self.get_parents_with_ghosts(version_id)

2346

2278

noeol = ('no-eol' in self.get_options(version_id))

2279

index_memo = self.get_position(version_id)

2280

from_backing_knit = index_memo[0]

2281

if from_backing_knit:

2282

# texts retrieved from the backing knit are always full texts

2283

method = 'fulltext'

2347

2284

if method == 'fulltext':

2348

2285

compression_parent = None

2349

2286

else:

2350

2287

compression_parent = parent_ids[0]

2351

index_memo = self.get_position(version_id)

2352

2288

result[version_id] = (index_memo, compression_parent,

2353

2289

parent_ids, (method, noeol))

2354

2290

return result

2355

2291

2356

2292

def get_method(self, version_id):

2357

2293

"""Return compression method of specified version."""

2358

try:

2359

options = self._by_version[version_id][0]

2360

except KeyError:

2361

# Strictly speaking this should check in the backing knit, but

2362

# until we have a test to discriminate, this will do.

2363

return self.backing_index.get_method(version_id)

2294

options = self.get_options(version_id)

2364

2295

if 'fulltext' in options:

2365

2296

return 'fulltext'

2366

2297

elif 'line-delta' in options:

2376

2307

try:

2377

2308

return self._by_version[version_id][0]

2378

2309

except KeyError:

2379

return self.backing_index.get_options(version_id)

2310

options = list(self.backing_index.get_options(version_id))

2311

if 'fulltext' in options:

2312

pass

2313

elif 'line-delta' in options:

2314

# Texts from the backing knit are always returned from the stream

2315

# as full texts

2316

options.remove('line-delta')

2317

options.append('fulltext')

2318

else:

2319

raise errors.KnitIndexUnknownMethod(self, options)

2320

return tuple(options)

2380

2321

2381

2322

def get_parent_map(self, version_ids):

2382

2323

"""Passed through to by KnitVersionedFile.get_parent_map."""

2404

2345

coordinates into that (as index_memo's are opaque outside the

2405

2346

index and matching access class).

2406

2347

2407

:return: a tuple (thunk_flag, index, start, end). If thunk_flag is

2408

False, index will be self, otherwise it will be a version id.

2348

:return: a tuple (from_backing_knit, index, start, end) that can

2349

be passed e.g. to get_raw_records.

2350

If from_backing_knit is False, index will be self, otherwise it

2351

will be a version id.

2409

2352

"""

2410

2353

try:

2411

2354

start, end = self._by_version[version_id][1]

2418

2361

"""Get all the versions in the stream."""

2419

2362

return self._by_version.keys()

2420

2363

2421

def iter_parents(self, version_ids):

2422

"""Iterate through the parents for many version ids.

2423

2424

:param version_ids: An iterable yielding version_ids.

2425

:return: An iterator that yields (version_id, parents). Requested

2426

version_ids not present in the versioned file are simply skipped.

2427

The order is undefined, allowing for different optimisations in

2428

the underlying implementation.

2429

"""

2430

result = []

2431

for version in version_ids:

2432

try:

2433

result.append((version, self._by_version[version][2]))

2434

except KeyError:

2435

pass

2436

return result

2437

2438

2364

2439

2365

class _KnitData(object):

2440

2366

"""Manage extraction of data from a KnitAccess, caching and decompressing.

2452

2378

"""

2453

2379

self._access = access

2454

2380

self._checked = False

2455

# TODO: jam 20060713 conceptually, this could spill to disk

2456

# if the cached size gets larger than a certain amount

2457

# but it complicates the model a bit, so for now just use

2458

# a simple dictionary

2459

self._cache = {}

2460

self._do_cache = False

2461

2462

def enable_cache(self):

2463

"""Enable caching of reads."""

2464

self._do_cache = True

2465

2466

def clear_cache(self):

2467

"""Clear the record cache."""

2468

self._do_cache = False

2469

self._cache = {}

2470

2381

2471

2382

def _open_file(self):

2472

2383

return self._access.open_file()

2572

2483

# uses readv so nice and fast we hope.

2573

2484

if len(records):

2574

2485

# grab the disk data needed.

2575

if self._cache:

2576

# Don't check _cache if it is empty

2577

needed_offsets = [index_memo for version_id, index_memo

2578

in records

2579

if version_id not in self._cache]

2580

else:

2581

needed_offsets = [index_memo for version_id, index_memo

2582

in records]

2583

2486

needed_offsets = [index_memo for version_id, index_memo

2487

in records]

2584

2488

raw_records = self._access.get_raw_records(needed_offsets)

2585

2489

2586

2490

for version_id, index_memo in records:

2587

if version_id in self._cache:

2588

# This data has already been validated

2589

data = self._cache[version_id]

2590

else:

2591

data = raw_records.next()

2592

if self._do_cache:

2593

self._cache[version_id] = data

2594

2595

# validate the header

2596

df, rec = self._parse_record_header(version_id, data)

2597

df.close()

2491

data = raw_records.next()

2492

# validate the header

2493

df, rec = self._parse_record_header(version_id, data)

2494

df.close()

2598

2495

yield version_id, data

2599

2496

2600

2497

def read_records_iter(self, records):

2610

2507

if not records:

2611

2508

return

2612

2509

2613

if self._cache:

2614

# Skip records we have alread seen

2615

yielded_records = set()

2616

needed_records = set()

2617

for record in records:

2618

if record[0] in self._cache:

2619

if record[0] in yielded_records:

2620

continue

2621

yielded_records.add(record[0])

2622

data = self._cache[record[0]]

2623

content, digest = self._parse_record(record[0], data)

2624

yield (record[0], content, digest)

2625

else:

2626

needed_records.add(record)

2627

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2628

else:

2629

needed_records = sorted(set(records), key=operator.itemgetter(1))

2630

2510

needed_records = sorted(set(records), key=operator.itemgetter(1))

2631

2511

if not needed_records:

2632

2512

return

2633

2513

2639

2519

for (version_id, index_memo), data in \

2640

2520

izip(iter(needed_records), raw_data):

2641

2521

content, digest = self._parse_record(version_id, data)

2642

if self._do_cache:

2643

self._cache[version_id] = data

2644

2522

yield version_id, content, digest

2645

2523

2646

2524

def read_records(self, records):

2655

2533

class InterKnit(InterVersionedFile):

2656

2534

"""Optimised code paths for knit to knit operations."""

2657

2535

2658

_matching_file_from_factory = KnitVersionedFile

2659

_matching_file_to_factory = KnitVersionedFile

2536

_matching_file_from_factory = staticmethod(make_file_knit)

2537

_matching_file_to_factory = staticmethod(make_file_knit)

2660

2538

2661

2539

@staticmethod

2662

2540

def is_compatible(source, target):

2673

2551

see join() for the parameter definitions.

2674

2552

"""

2675

2553

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2676

graph = self.source.get_graph(version_ids)

2677

order = topo_sort(graph.items())

2554

# --- the below is factorable out with VersionedFile.join, but wait for

2555

# VersionedFiles, it may all be simpler then.

2556

graph = Graph(self.source)

2557

search = graph._make_breadth_first_searcher(version_ids)

2558

transitive_ids = set()

2559

map(transitive_ids.update, list(search))

2560

parent_map = self.source.get_parent_map(transitive_ids)

2561

order = topo_sort(parent_map.items())

2678

2562

2679

2563

def size_of_content(content):

2680

2564

return sum(len(line) for line in content.text())

2741

2625

2742

2626

if not needed_versions:

2743

2627

return 0

2744

full_list = topo_sort(self.source.get_graph())

2628

full_list = topo_sort(

2629

self.source.get_parent_map(self.source.versions()))

2745

2630

2746

2631

version_list = [i for i in full_list if (not self.target.has_version(i)

2747

2632

and i in needed_versions)]

2812

2697

"""Optimised code paths for weave to knit operations."""

2813

2698

2814

2699

_matching_file_from_factory = bzrlib.weave.WeaveFile

2815

_matching_file_to_factory = KnitVersionedFile

2700

_matching_file_to_factory = staticmethod(make_file_knit)

2816

2701

2817

2702

@staticmethod

2818

2703

def is_compatible(source, target):

2843

2728

2844

2729

if not needed_versions:

2845

2730

return 0

2846

full_list = topo_sort(self.source.get_graph())

2731

full_list = topo_sort(

2732

self.source.get_parent_map(self.source.versions()))

2847

2733

2848

2734

version_list = [i for i in full_list if (not self.target.has_version(i)

2849

2735

and i in needed_versions)]

Older »