~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/repofmt/pack_repo.py

Committer: Canonical.com Patch Queue Manager
Date: 2009-04-07 13:56:43 UTC
mfrom: (4241.6.9 integration)
Revision ID: pqm@pqm.ubuntu.com-20090407135643-r15qstzbwg87d2nq

(robertc) Add --development6-rich-root,
        disabling the legacy and unneeded development2 format,
        and activating the tests for CHK features disabled pending this format.
        (Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil)

files added:
bzrlib/repofmt/groupcompress_repo.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

files modified:
NEWS

bzrlib/bzrdir.py

bzrlib/chk_serializer.py

bzrlib/inventory.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

doc/developers/development-repo.txt

Show diffs side-by-side

added added

removed removed

bzrlib/repofmt/pack_repo.py

import time

from bzrlib import (

chk_map,

debug,

graph,

osutils,

errors,

lockable_files,

lockdir,

revision as _mod_revision,

symbol_versioning,

)

131

133

# A map of index 'type' to the file extension and position in the

132

134

# index_sizes array.

133

135

index_definitions = {

136

'chk': ('.cix', 4),

134

137

'revision': ('.rix', 0),

135

138

'inventory': ('.iix', 1),

136

139

'text': ('.tix', 2),

138

141

}

139

142

140

143

def __init__(self, revision_index, inventory_index, text_index,

141

signature_index):

144

signature_index, chk_index=None):

142

145

"""Create a pack instance.

143

146

144

147

:param revision_index: A GraphIndex for determining what revisions are

151

154

texts/deltas (via (fileid, revisionid) tuples).

152

155

:param signature_index: A GraphIndex for determining what signatures are

153

156

present in the Pack and accessing the locations of their texts.

157

:param chk_index: A GraphIndex for accessing content by CHK, if the

158

pack has one.

154

159

"""

155

160

self.revision_index = revision_index

156

161

self.inventory_index = inventory_index

157

162

self.text_index = text_index

158

163

self.signature_index = signature_index

164

self.chk_index = chk_index

159

165

160

166

def access_tuple(self):

161

167

"""Return a tuple (transport, name) for the pack content."""

232

238

"""An in memory proxy for an existing .pack and its disk indices."""

233

239

234

240

def __init__(self, pack_transport, name, revision_index, inventory_index,

235

text_index, signature_index):

241

text_index, signature_index, chk_index=None):

236

242

"""Create an ExistingPack object.

237

243

238

244

:param pack_transport: The transport where the pack file resides.

239

245

:param name: The name of the pack on disk in the pack_transport.

240

246

"""

241

247

Pack.__init__(self, revision_index, inventory_index, text_index,

242

signature_index)

248

signature_index, chk_index)

243

249

self.name = name

244

250

self.pack_transport = pack_transport

245

251

if None in (revision_index, inventory_index, text_index,

327

333

# The relative locations of the packs are constrained, but all are

328

334

# passed in because the caller has them, so as to avoid object churn.

329

335

index_builder_class = pack_collection._index_builder_class

336

if pack_collection.chk_index is not None:

337

chk_index = index_builder_class(reference_lists=0)

338

else:

339

chk_index = None

330

340

Pack.__init__(self,

331

341

# Revisions: parents list, no text compression.

332

342

index_builder_class(reference_lists=1),

341

351

# Signatures: Just blobs to store, no compression, no parents

342

352

# listing.

343

353

index_builder_class(reference_lists=0),

354

# CHK based storage - just blobs, no compression or parents.

355

chk_index=chk_index

344

356

)

345

357

self._pack_collection = pack_collection

346

358

# When we make readonly indices, we need this.

355

367

self._file_mode = file_mode

356

368

# tracks the content written to the .pack file.

357

369

self._hash = osutils.md5()

358

# a four-tuple with the length in bytes of the indices, once the pack

359

# is finalised. (rev, inv, text, sigs)

370

# a tuple with the length in bytes of the indices, once the pack

371

# is finalised. (rev, inv, text, sigs, chk_if_in_use)

360

372

self.index_sizes = None

361

373

# How much data to cache when writing packs. Note that this is not

362

374

# synchronised with reads, because it's not in the transport layer, so

423

435

return bool(self.get_revision_count() or

424

436

self.inventory_index.key_count() or

425

437

self.text_index.key_count() or

426

self.signature_index.key_count())

438

self.signature_index.key_count() or

439

(self.chk_index is not None and self.chk_index.key_count()))

427

440

428

441

def finish(self, suspend=False):

429

442

"""Finish the new pack.

454

467

self._write_index('text', self.text_index, 'file texts', suspend)

455

468

self._write_index('signature', self.signature_index,

456

469

'revision signatures', suspend)

470

if self.chk_index is not None:

471

self.index_sizes.append(None)

472

self._write_index('chk', self.chk_index,

473

'content hash bytes', suspend)

457

474

self.write_stream.close()

458

475

# Note that this will clobber an existing pack with the same name,

459

476

# without checking for hash collisions. While this is undesirable this

726

743

727

744

def open_pack(self):

728

745

"""Open a pack for the pack we are creating."""

729

new_pack = NewPack(self._pack_collection, upload_suffix=self.suffix,

746

new_pack = self._pack_collection.pack_factory(self._pack_collection,

747

upload_suffix=self.suffix,

730

748

file_mode=self._pack_collection.repo.bzrdir._get_file_mode())

731

749

# We know that we will process all nodes in order, and don't need to

732

750

# query, so don't combine any indices spilled to disk until we are done

897

915

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

898

916

new_pack.signature_index.key_count(),

899

917

time.time() - new_pack.start_time)

918

# copy chk contents

919

# NB XXX: how to check CHK references are present? perhaps by yielding

920

# the items? How should that interact with stacked repos?

921

if new_pack.chk_index is not None:

922

self._copy_chks()

923

if 'pack' in debug.debug_flags:

924

mutter('%s: create_pack: chk content copied: %s%s %d items t+%6.3fs',

925

time.ctime(), self._pack_collection._upload_transport.base,

926

new_pack.random_name,

927

new_pack.chk_index.key_count(),

928

time.time() - new_pack.start_time)

900

929

new_pack._check_references()

901

930

if not self._use_pack(new_pack):

902

931

new_pack.abort()

906

935

self._pack_collection.allocate(new_pack)

907

936

return new_pack

908

937

909

def _copy_nodes(self, nodes, index_map, writer, write_index):

910

"""Copy knit nodes between packs with no graph references."""

938

def _copy_chks(self, refs=None):

939

# XXX: Todo, recursive follow-pointers facility when fetching some

940

# revisions only.

941

chk_index_map, chk_indices = self._pack_map_and_index_list(

942

'chk_index')

943

chk_nodes = self._index_contents(chk_indices, refs)

944

new_refs = set()

945

# TODO: This isn't strictly tasteful as we are accessing some private

946

# variables (_serializer). Perhaps a better way would be to have

947

# Repository._deserialise_chk_node()

948

search_key_func = chk_map.search_key_registry.get(

949

self._pack_collection.repo._serializer.search_key_name)

950

def accumlate_refs(lines):

951

# XXX: move to a generic location

952

# Yay mismatch:

953

bytes = ''.join(lines)

954

node = chk_map._deserialise(bytes, ("unknown",), search_key_func)

955

new_refs.update(node.refs())

956

self._copy_nodes(chk_nodes, chk_index_map, self.new_pack._writer,

957

self.new_pack.chk_index, output_lines=accumlate_refs)

958

return new_refs

959

960

def _copy_nodes(self, nodes, index_map, writer, write_index,

961

output_lines=None):

962

"""Copy knit nodes between packs with no graph references.

963

964

:param output_lines: Output full texts of copied items.

965

"""

911

966

pb = ui.ui_factory.nested_progress_bar()

912

967

try:

913

968

return self._do_copy_nodes(nodes, index_map, writer,

914

write_index, pb)

969

write_index, pb, output_lines=output_lines)

915

970

finally:

916

971

pb.finished()

917

972

918

def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):

973

def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,

974

output_lines=None):

919

975

# for record verification

920

976

knit = KnitVersionedFiles(None, None)

921

977

# plan a readv on each source pack:

955

1011

izip(reader.iter_records(), pack_readv_requests):

956

1012

raw_data = read_func(None)

957

1013

# check the header only

958

df, _ = knit._parse_record_header(key, raw_data)

959

df.close()

1014

if output_lines is not None:

1015

output_lines(knit._parse_record(key[-1], raw_data)[0])

1016

else:

1017

df, _ = knit._parse_record_header(key, raw_data)

1018

df.close()

960

1019

pos, size = writer.add_bytes_record(raw_data, names)

961

1020

write_index.add_node(key, eol_flag + "%d %d" % (pos, size))

962

1021

pb.update("Copied record", record_index)

1292

1351

:ivar _names: map of {pack_name: (index_size,)}

1293

1352

"""

1294

1353

1354

pack_factory = NewPack

1355

1295

1356

def __init__(self, repo, transport, index_transport, upload_transport,

1296

pack_transport, index_builder_class, index_class):

1357

pack_transport, index_builder_class, index_class,

1358

use_chk_index):

1297

1359

"""Create a new RepositoryPackCollection.

1298

1360

1299

1361

:param transport: Addresses the repository base directory

1304

1366

:param pack_transport: Addresses the directory of existing complete packs.

1305

1367

:param index_builder_class: The index builder class to use.

1306

1368

:param index_class: The index class to use.

1369

:param use_chk_index: Whether to setup and manage a CHK index.

1307

1370

"""

1308

1371

# XXX: This should call self.reset()

1309

1372

self.repo = repo

1313

1376

self._pack_transport = pack_transport

1314

1377

self._index_builder_class = index_builder_class

1315

1378

self._index_class = index_class

1316

self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}

1379

self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3,

1380

'.cix': 4}

1317

1381

self.packs = []

1318

1382

# name:Pack mapping

1319

1383

self._names = None

1328

1392

self.inventory_index = AggregateIndex(self.reload_pack_names, flush)

1329

1393

self.text_index = AggregateIndex(self.reload_pack_names, flush)

1330

1394

self.signature_index = AggregateIndex(self.reload_pack_names, flush)

1395

if use_chk_index:

1396

self.chk_index = AggregateIndex(self.reload_pack_names, flush)

1397

else:

1398

# used to determine if we're using a chk_index elsewhere.

1399

self.chk_index = None

1331

1400

# resumed packs

1332

1401

self._resumed_packs = []

1333

1402

1345

1414

self.inventory_index.add_index(pack.inventory_index, pack)

1346

1415

self.text_index.add_index(pack.text_index, pack)

1347

1416

self.signature_index.add_index(pack.signature_index, pack)

1417

if self.chk_index is not None:

1418

self.chk_index.add_index(pack.chk_index, pack)

1348

1419

1349

1420

def all_packs(self):

1350

1421

"""Return a list of all the Pack objects this repository has.

1388

1459

total_packs = len(self._names)

1389

1460

if self._max_pack_count(total_revisions) >= total_packs:

1390

1461

return False

1391

# XXX: the following may want to be a class, to pack with a given

1392

# policy.

1393

1462

# determine which packs need changing

1394

1463

pack_distribution = self.pack_distribution(total_revisions)

1395

1464

existing_packs = []

1419

1488

num_new_packs, num_revs_affected)

1420

1489

self._execute_pack_operations(pack_operations,

1421

1490

reload_func=self._restart_autopack)

1491

mutter('Auto-packing repository %s completed', self)

1422

1492

return True

1423

1493

1424

1494

def _execute_pack_operations(self, pack_operations, _packer_class=Packer,

1466

1536

"""

1467

1537

self.repo.control_files.lock_write()

1468

1538

1539

def _already_packed(self):

1540

"""Is the collection already packed?"""

1541

return len(self._names) < 2

1542

1469

1543

def pack(self):

1470

1544

"""Pack the pack collection totally."""

1471

1545

self.ensure_loaded()

1472

1546

total_packs = len(self._names)

1473

if total_packs < 2:

1547

if self._already_packed():

1474

1548

# This is arguably wrong because we might not be optimal, but for

1475

1549

# now lets leave it in. (e.g. reconcile -> one pack. But not

1476

1550

# optimal.

1581

1655

inv_index = self._make_index(name, '.iix')

1582

1656

txt_index = self._make_index(name, '.tix')

1583

1657

sig_index = self._make_index(name, '.six')

1658

if self.chk_index is not None:

1659

chk_index = self._make_index(name, '.cix')

1660

else:

1661

chk_index = None

1584

1662

result = ExistingPack(self._pack_transport, name, rev_index,

1585

inv_index, txt_index, sig_index)

1663

inv_index, txt_index, sig_index, chk_index)

1586

1664

self.add_pack_to_memory(result)

1587

1665

return result

1588

1666

1682

1760

# TODO: Probably needs to know all possible indices for this pack

1683

1761

# - or maybe list the directory and move all indices matching this

1684

1762

# name whether we recognize it or not?

1685

for suffix in ('.iix', '.six', '.tix', '.rix'):

1763

suffixes = ['.iix', '.six', '.tix', '.rix']

1764

if self.chk_index is not None:

1765

suffixes.append('.cix')

1766

for suffix in suffixes:

1686

1767

self._index_transport.rename(pack.name + suffix,

1687

1768

'../obsolete_packs/' + pack.name + suffix)

1688

1769

1722

1803

self.inventory_index.remove_index(pack.inventory_index, pack)

1723

1804

self.text_index.remove_index(pack.text_index, pack)

1724

1805

self.signature_index.remove_index(pack.signature_index, pack)

1806

if self.chk_index is not None:

1807

self.chk_index.remove_index(pack.chk_index, pack)

1725

1808

1726

1809

def reset(self):

1727

1810

"""Clear all cached data."""

1736

1819

self.repo._text_knit = None

1737

1820

# cached inventory data

1738

1821

self.inventory_index.clear()

1822

# cached chk data

1823

if self.chk_index is not None:

1824

self.chk_index.clear()

1739

1825

# remove the open pack

1740

1826

self._new_pack = None

1741

1827

# information about packs.

1903

1989

# Do not permit preparation for writing if we're not in a 'write lock'.

1904

1990

if not self.repo.is_write_locked():

1905

1991

raise errors.NotWriteLocked(self)

1906

self._new_pack = NewPack(self, upload_suffix='.pack',

1992

self._new_pack = self.pack_factory(self, upload_suffix='.pack',

1907

1993

file_mode=self.repo.bzrdir._get_file_mode())

1908

1994

# allow writing: queue writes to a new index

1909

1995

self.revision_index.add_writable_index(self._new_pack.revision_index,

1912

1998

self._new_pack)

1913

1999

self.text_index.add_writable_index(self._new_pack.text_index,

1914

2000

self._new_pack)

2001

self._new_pack.text_index.set_optimize(combine_backing_indices=False)

1915

2002

self.signature_index.add_writable_index(self._new_pack.signature_index,

1916

2003

self._new_pack)

2004

if self.chk_index is not None:

2005

self.chk_index.add_writable_index(self._new_pack.chk_index,

2006

self._new_pack)

2007

self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback

2008

self._new_pack.chk_index.set_optimize(combine_backing_indices=False)

1917

2009

1918

2010

self.repo.inventories._index._add_callback = self.inventory_index.add_callback

1919

2011

self.repo.revisions._index._add_callback = self.revision_index.add_callback

2046

2138

self._transport.clone('upload'),

2047

2139

self._transport.clone('packs'),

2048

2140

_format.index_builder_class,

2049

_format.index_class)

2141

_format.index_class,

2142

use_chk_index=self._format.supports_chks,

2143

)

2050

2144

self.inventories = KnitVersionedFiles(

2051

2145

_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,

2052

2146

add_callback=self._pack_collection.inventory_index.add_callback,

2071

2165

deltas=True, parents=True, is_locked=self.is_locked),

2072

2166

data_access=self._pack_collection.text_index.data_access,

2073

2167

max_delta_chain=200)

2074

self.chk_bytes = None

2168

if _format.supports_chks:

2169

# No graph, no compression:- references from chks are between

2170

# different objects not temporal versions of the same; and without

2171

# some sort of temporal structure knit compression will just fail.

2172

self.chk_bytes = KnitVersionedFiles(

2173

_KnitGraphIndex(self._pack_collection.chk_index.combined_index,

2174

add_callback=self._pack_collection.chk_index.add_callback,

2175

deltas=False, parents=False, is_locked=self.is_locked),

2176

data_access=self._pack_collection.chk_index.data_access,

2177

max_delta_chain=0)

2178

else:

2179

self.chk_bytes = None

2075

2180

# True when the repository object is 'write locked' (as opposed to the

2076

2181

# physical lock only taken out around changes to the pack-names list.)

2077

2182

# Another way to represent this would be a decorator around the control

2112

2217

revision_nodes = self._pack_collection.revision_index \

2113

2218

.combined_index.iter_all_entries()

2114

2219

index_positions = []

2115

# Get the cached index values for all revisions, and also the location

2116

# in each index of the revision text so we can perform linear IO.

2220

# Get the cached index values for all revisions, and also the

2221

# location in each index of the revision text so we can perform

2222

# linear IO.

2117

2223

for index, key, value, refs in revision_nodes:

2118

pos, length = value[1:].split(' ')

2119

index_positions.append((index, int(pos), key[0],

2120

tuple(parent[0] for parent in refs[0])))

2224

node = (index, key, value, refs)

2225

index_memo = self.revisions._index._node_to_position(node)

2226

if index_memo[0] != index:

2227

raise AssertionError('%r != %r' % (index_memo[0], index))

2228

index_positions.append((index_memo, key[0],

2229

tuple(parent[0] for parent in refs[0])))

2121

2230

pb.update("Reading revision index", 0, 0)

2122

2231

index_positions.sort()

2123

batch_count = len(index_positions) / 1000 + 1

2124

pb.update("Checking cached revision graph", 0, batch_count)

2125

for offset in xrange(batch_count):

2232

batch_size = 1000

2233

pb.update("Checking cached revision graph", 0,

2234

len(index_positions))

2235

for offset in xrange(0, len(index_positions), 1000):

2126

2236

pb.update("Checking cached revision graph", offset)

2127

to_query = index_positions[offset * 1000:(offset + 1) * 1000]

2237

to_query = index_positions[offset:offset + batch_size]

2128

2238

if not to_query:

2129

2239

break

2130

rev_ids = [item[2] for item in to_query]

2240

rev_ids = [item[1] for item in to_query]

2131

2241

revs = self.get_revisions(rev_ids)

2132

2242

for revision, item in zip(revs, to_query):

2133

index_parents = item[3]

2243

index_parents = item[2]

2134

2244

rev_parents = tuple(revision.parent_ids)

2135

2245

if index_parents != rev_parents:

2136

result.append((revision.revision_id, index_parents, rev_parents))

2246

result.append((revision.revision_id, index_parents,

2247

rev_parents))

2137

2248

finally:

2138

2249

pb.finished()

2139

2250

return result

2667

2778

return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"

2668

2779

2669

2780

2670

class RepositoryFormatPackDevelopment2(RepositoryFormatPack):

2671

"""A no-subtrees development repository.

2672

2673

This format should be retained until the second release after bzr 1.7.

2674

2675

This is pack-1.6.1 with B+Tree indices.

2676

"""

2677

2678

repository_class = KnitPackRepository

2679

_commit_builder_class = PackCommitBuilder

2680

supports_external_lookups = True

2681

# What index classes to use

2682

index_builder_class = BTreeBuilder

2683

index_class = BTreeGraphIndex

2684

# Set to true to get the fast-commit code path tested until a really fast

2685

# format lands in trunk. Not actually fast in this format.

2686

fast_deltas = True

2687

2688

@property

2689

def _serializer(self):

2690

return xml5.serializer_v5

2691

2692

def _get_matching_bzrdir(self):

2693

return bzrdir.format_registry.make_bzrdir('development2')

2694

2695

def _ignore_setting_bzrdir(self, format):

2696

pass

2697

2698

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

2699

2700

def get_format_string(self):

2701

"""See RepositoryFormat.get_format_string()."""

2702

return "Bazaar development format 2 (needs bzr.dev from before 1.8)\n"

2703

2704

def get_format_description(self):

2705

"""See RepositoryFormat.get_format_description()."""

2706

return ("Development repository format, currently the same as "

2707

"1.6.1 with B+Trees.\n")

2708

2709

def check_conversion_target(self, target_format):

2710

pass

2711

2712

2713

2781

class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):

2714

2782

"""A subtrees development repository.

2715

2783

2716

2784

This format should be retained until the second release after bzr 1.7.

2717

2785

2718

2786

1.6.1-subtree[as it might have been] with B+Tree indices.

2787

2788

This is [now] retained until we have a CHK based subtree format in

2789

development.

2719

2790

"""

2720

2791

2721

2792

repository_class = KnitPackRepository

2733

2804

2734

2805

def _get_matching_bzrdir(self):

2735

2806

return bzrdir.format_registry.make_bzrdir(

2736

'development2-subtree')

2807

'development-subtree')

2737

2808

2738

2809

def _ignore_setting_bzrdir(self, format):

2739

2810

pass

2757

2828

"""See RepositoryFormat.get_format_description()."""

2758

2829

return ("Development repository format, currently the same as "

2759

2830

"1.6.1-subtree with B+Tree indices.\n")

2831

Older »