~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Guillermo Gonzalez
Date: 2008-03-29 22:46:38 UTC
mfrom: (3315 +trunk)
mto: (3542.1.1 logdisplayers)
mto: This revision was merged to the branch mainline in revision 3556.
Revision ID: guillo.gonzo@gmail.com-20080329224638-l3ip0jcaskndln3z

* merge with bzr.dev

files added:
bzrlib/directory_service.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

doc/en/admin-guide

doc/en/admin-guide/index.txt

files renamed:
bzrlib/plugins/launchpad/lp_indirect.py => bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/test_lp_indirect.py => bzrlib/plugins/launchpad/test_lp_directory.py

files modified:
NEWS

README

bzrlib/__init__.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/annotate.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/debug.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/help_topics/__init__.py

bzrlib/knit.py

bzrlib/log.py

bzrlib/mail_client.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/multiparent.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/repository.py

bzrlib/status.py

bzrlib/symbol_versioning.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/tree.py

bzrlib/ui/__init__.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/developers/lca-merge.txt

doc/en/mini-tutorial/index.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/version_info.txt

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

lazy_import(globals(), """

from bzrlib import (

annotate,

graph as _mod_graph,

lru_cache,

pack,

trace,

100

101

RevisionNotPresent,

101

102

RevisionAlreadyPresent,

102

103

)

103

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

104

from bzrlib.graph import Graph

104

105

from bzrlib.osutils import (

105

106

contains_whitespace,

106

107

contains_linebreaks,

107

108

sha_string,

108

109

sha_strings,

109

110

)

110

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

111

from bzrlib.symbol_versioning import (

112

DEPRECATED_PARAMETER,

113

deprecated_method,

114

deprecated_passed,

115

one_four,

116

)

111

117

from bzrlib.tsort import topo_sort

118

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

112

119

import bzrlib.ui

120

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

113

121

import bzrlib.weave

114

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

115

122

116

123

117

124

# TODO: Split out code specific to this format into an associated object.

134

141

class KnitContent(object):

135

142

"""Content of a knit version to which deltas can be applied."""

136

143

144

def __init__(self):

145

self._should_strip_eol = False

146

137

147

def annotate(self):

138

148

"""Return a list of (origin, text) tuples."""

139

149

return list(self.annotate_iter())

142

152

"""Apply delta to this object to become new_version_id."""

143

153

raise NotImplementedError(self.apply_delta)

144

154

155

def cleanup_eol(self, copy_on_mutate=True):

156

if self._should_strip_eol:

157

if copy_on_mutate:

158

self._lines = self._lines[:]

159

self.strip_last_line_newline()

160

145

161

def line_delta_iter(self, new_lines):

146

162

"""Generate line-based delta from this content to new_lines."""

147

163

new_texts = new_lines.text()

187

203

"""Annotated content."""

188

204

189

205

def __init__(self, lines):

206

KnitContent.__init__(self)

190

207

self._lines = lines

191

208

192

209

def annotate_iter(self):

204

221

def strip_last_line_newline(self):

205

222

line = self._lines[-1][1].rstrip('\n')

206

223

self._lines[-1] = (self._lines[-1][0], line)

224

self._should_strip_eol = False

207

225

208

226

def text(self):

209

227

try:

210

return [text for origin, text in self._lines]

228

lines = [text for origin, text in self._lines]

211

229

except ValueError, e:

212

230

# most commonly (only?) caused by the internal form of the knit

213

231

# missing annotation information because of a bug - see thread

216

234

"line in annotated knit missing annotation information: %s"

217

235

% (e,))

218

236

237

if self._should_strip_eol:

238

anno, line = lines[-1]

239

lines[-1] = (anno, line.rstrip('\n'))

240

return lines

241

219

242

def copy(self):

220

243

return AnnotatedKnitContent(self._lines[:])

221

244

229

252

"""

230

253

231

254

def __init__(self, lines, version_id):

255

KnitContent.__init__(self)

232

256

self._lines = lines

233

257

self._version_id = version_id

234

258

251

275

252

276

def strip_last_line_newline(self):

253

277

self._lines[-1] = self._lines[-1].rstrip('\n')

278

self._should_strip_eol = False

254

279

255

280

def text(self):

256

return self._lines

257

258

259

class KnitAnnotateFactory(object):

281

lines = self._lines

282

if self._should_strip_eol:

283

lines = lines[:]

284

lines[-1] = lines[-1].rstrip('\n')

285

return lines

286

287

288

class _KnitFactory(object):

289

"""Base class for common Factory functions."""

290

291

def parse_record(self, version_id, record, record_details,

292

base_content, copy_base_content=True):

293

"""Parse a record into a full content object.

294

295

:param version_id: The official version id for this content

296

:param record: The data returned by read_records_iter()

297

:param record_details: Details about the record returned by

298

get_build_details

299

:param base_content: If get_build_details returns a compression_parent,

300

you must return a base_content here, else use None

301

:param copy_base_content: When building from the base_content, decide

302

you can either copy it and return a new object, or modify it in

303

place.

304

:return: (content, delta) A Content object and possibly a line-delta,

305

delta may be None

306

"""

307

method, noeol = record_details

308

if method == 'line-delta':

309

assert base_content is not None

310

if copy_base_content:

311

content = base_content.copy()

312

else:

313

content = base_content

314

delta = self.parse_line_delta(record, version_id)

315

content.apply_delta(delta, version_id)

316

else:

317

content = self.parse_fulltext(record, version_id)

318

delta = None

319

content._should_strip_eol = noeol

320

return (content, delta)

321

322

323

class KnitAnnotateFactory(_KnitFactory):

260

324

"""Factory for creating annotated Content objects."""

261

325

262

326

annotated = True

368

432

return content.annotate_iter()

369

433

370

434

371

class KnitPlainFactory(object):

435

class KnitPlainFactory(_KnitFactory):

372

436

"""Factory for creating plain Content objects."""

373

437

374

438

annotated = False

426

490

return out

427

491

428

492

def annotate_iter(self, knit, version_id):

429

return annotate_knit(knit, version_id)

493

annotator = _KnitAnnotator(knit)

494

return iter(annotator.annotate(version_id))

430

495

431

496

432

497

def make_empty_knit(transport, relpath):

516

581

fulltext_size = size

517

582

break

518

583

delta_size += size

519

delta_parents = self._index.get_parents(parent)

584

delta_parents = self._index.get_parent_map([parent])[parent]

520

585

else:

521

586

# We couldn't find a fulltext, so we must create a new one

522

587

return False

661

726

def get_delta(self, version_id):

662

727

"""Get a delta for constructing version from some other version."""

663

728

self.check_not_reserved_id(version_id)

664

parents = self.get_parents(version_id)

729

parents = self.get_parent_map([version_id])[version_id]

665

730

if len(parents):

666

731

parent = parents[0]

667

732

else:

692

757

annotated_part = "plain"

693

758

return "knit-%s" % (annotated_part,)

694

759

760

@deprecated_method(one_four)

695

761

def get_graph_with_ghosts(self):

696

762

"""See VersionedFile.get_graph_with_ghosts()."""

697

graph_items = self._index.get_graph()

698

return dict(graph_items)

763

return self.get_parent_map(self.versions())

699

764

700

765

def get_sha1(self, version_id):

701

766

return self.get_sha1s([version_id])[0]

711

776

"""See VersionedFile.get_suffixes()."""

712

777

return [DATA_SUFFIX, INDEX_SUFFIX]

713

778

779

@deprecated_method(one_four)

714

780

def has_ghost(self, version_id):

715

781

"""True if there is a ghost reference in the file to version_id."""

716

782

# maybe we have it

717

783

if self.has_version(version_id):

718

784

return False

719

785

# optimisable if needed by memoising the _ghosts set.

720

items = self._index.get_graph()

721

for node, parents in items:

786

items = self.get_parent_map(self.versions())

787

for parents in items.itervalues():

722

788

for parent in parents:

723

if parent not in self._index._cache:

724

if parent == version_id:

725

return True

789

if parent == version_id and parent not in items:

790

return True

726

791

return False

727

792

728

793

def insert_data_stream(self, (format, data_list, reader_callable)):

807

872

factory = KnitAnnotateFactory()

808

873

else:

809

874

raise errors.KnitDataStreamUnknown(format)

810

index = _StreamIndex(data_list)

875

index = _StreamIndex(data_list, self._index)

811

876

access = _StreamAccess(reader_callable, index, self, factory)

812

877

return KnitVersionedFile(self.filename, self.transport,

813

878

factory=factory, index=index, access_method=access)

874

939

875

940

This data is intended to be used for retrieving the knit records.

876

941

877

A dict of version_id to (method, index_memo, next) is

942

A dict of version_id to (record_details, index_memo, next, parents) is

878

943

returned.

879

944

method is the way referenced data should be applied.

880

data_pos is the position of the data in the knit.

881

data_size is the size of the data in the knit.

945

index_memo is the handle to pass to the data access to actually get the

946

data

882

947

next is the build-parent of the version, or None for fulltexts.

948

parents is the version_ids of the parents of this version

883

949

"""

884

950

component_data = {}

885

951

pending_components = version_ids

886

952

while pending_components:

887

953

build_details = self._index.get_build_details(pending_components)

954

current_components = set(pending_components)

888

955

pending_components = set()

889

for version_id, details in build_details.items():

890

method, index_memo, compression_parent = details

956

for version_id, details in build_details.iteritems():

957

(index_memo, compression_parent, parents,

958

record_details) = details

959

method = record_details[0]

891

960

if compression_parent is not None:

892

961

pending_components.add(compression_parent)

893

component_data[version_id] = details

962

component_data[version_id] = (record_details, index_memo,

963

compression_parent)

964

missing = current_components.difference(build_details)

965

if missing:

966

raise errors.RevisionNotPresent(missing.pop(), self.filename)

894

967

return component_data

895

968

896

969

def _get_content(self, version_id, parent_texts={}):

910

983

self._index.check_versions_present(version_ids)

911

984

912

985

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

913

nostore_sha, random_id, check_content):

986

nostore_sha, random_id, check_content, left_matching_blocks):

914

987

"""See VersionedFile.add_lines_with_ghosts()."""

915

988

self._check_add(version_id, lines, random_id, check_content)

916

989

return self._add(version_id, lines, parents, self.delta,

917

parent_texts, None, nostore_sha, random_id)

990

parent_texts, left_matching_blocks, nostore_sha, random_id)

918

991

919

992

def _add_lines(self, version_id, parents, lines, parent_texts,

920

993

left_matching_blocks, nostore_sha, random_id, check_content):

1034

1107

def _get_record_map(self, version_ids):

1035

1108

"""Produce a dictionary of knit records.

1036

1109

1037

The keys are version_ids, the values are tuples of (method, content,

1038

digest, next).

1039

method is the way the content should be applied.

1040

content is a KnitContent object.

1041

digest is the SHA1 digest of this version id after all steps are done

1042

next is the build-parent of the version, i.e. the leftmost ancestor.

1043

If the method is fulltext, next will be None.

1110

:return: {version_id:(record, record_details, digest, next)}

1111

record

1112

data returned from read_records

1113

record_details

1114

opaque information to pass to parse_record

1115

digest

1116

SHA1 digest of the full text after all steps are done

1117

1118

build-parent of the version, i.e. the leftmost ancestor.

1119

Will be None if the record is not a delta.

1044

1120

"""

1045

1121

position_map = self._get_components_positions(version_ids)

1046

# c = component_id, m = method, i_m = index_memo, n = next

1047

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

1122

# c = component_id, r = record_details, i_m = index_memo, n = next

1123

records = [(c, i_m) for c, (r, i_m, n)

1124

in position_map.iteritems()]

1048

1125

record_map = {}

1049

for component_id, content, digest in \

1126

for component_id, record, digest in \

1050

1127

self._data.read_records_iter(records):

1051

method, index_memo, next = position_map[component_id]

1052

record_map[component_id] = method, content, digest, next

1053

1128

(record_details, index_memo, next) = position_map[component_id]

1129

record_map[component_id] = record, record_details, digest, next

1130

1054

1131

return record_map

1055

1132

1056

1133

def get_text(self, version_id):

1091

1168

components = []

1092

1169

cursor = version_id

1093

1170

while cursor is not None:

1094

method, data, digest, next = record_map[cursor]

1095

components.append((cursor, method, data, digest))

1171

record, record_details, digest, next = record_map[cursor]

1172

components.append((cursor, record, record_details, digest))

1096

1173

if cursor in content_map:

1097

1174

break

1098

1175

cursor = next

1099

1176

1100

1177

content = None

1101

for component_id, method, data, digest in reversed(components):

1178

for (component_id, record, record_details,

1179

digest) in reversed(components):

1102

1180

if component_id in content_map:

1103

1181

content = content_map[component_id]

1104

1182

else:

1105

if method == 'fulltext':

1106

assert content is None

1107

content = self.factory.parse_fulltext(data, version_id)

1108

elif method == 'line-delta':

1109

delta = self.factory.parse_line_delta(data, version_id)

1110

if multiple_versions:

1111

# only doing this when we want multiple versions

1112

# output avoids list copies - which reference and

1113

# dereference many strings.

1114

content = content.copy()

1115

content.apply_delta(delta, version_id)

1183

content, delta = self.factory.parse_record(version_id,

1184

record, record_details, content,

1185

copy_base_content=multiple_versions)

1116

1186

if multiple_versions:

1117

1187

content_map[component_id] = content

1118

1188

1119

if 'no-eol' in self._index.get_options(version_id):

1120

if multiple_versions:

1121

content = content.copy()

1122

content.strip_last_line_newline()

1189

content.cleanup_eol(copy_on_mutate=multiple_versions)

1123

1190

final_content[version_id] = content

1124

1191

1125

1192

# digest here is the digest from the last applied component.

1197

1264

"""See VersionedFile.annotate_iter."""

1198

1265

return self.factory.annotate_iter(self, version_id)

1199

1266

1200

def get_parents(self, version_id):

1201

"""See VersionedFile.get_parents."""

1202

# perf notes:

1203

# optimism counts!

1204

# 52554 calls in 1264 872 internal down from 3674

1205

try:

1206

return self._index.get_parents(version_id)

1207

except KeyError:

1208

raise RevisionNotPresent(version_id, self.filename)

1209

1210

def get_parents_with_ghosts(self, version_id):

1211

"""See VersionedFile.get_parents."""

1212

try:

1213

return self._index.get_parents_with_ghosts(version_id)

1214

except KeyError:

1215

raise RevisionNotPresent(version_id, self.filename)

1267

def get_parent_map(self, version_ids):

1268

"""See VersionedFile.get_parent_map."""

1269

return self._index.get_parent_map(version_ids)

1216

1270

1217

1271

def get_ancestry(self, versions, topo_sorted=True):

1218

1272

"""See VersionedFile.get_ancestry."""

1379

1433

self._transport.put_bytes_non_atomic(

1380

1434

self._filename, self.HEADER, mode=self._file_mode)

1381

1435

1382

def get_graph(self):

1383

"""Return a list of the node:parents lists from this knit index."""

1384

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1385

1386

1436

def get_ancestry(self, versions, topo_sorted=True):

1387

1437

"""See VersionedFile.get_ancestry."""

1388

1438

# get a graph of all the mentioned versions:

1426

1476

def get_build_details(self, version_ids):

1427

1477

"""Get the method, index_memo and compression parent for version_ids.

1428

1478

1479

Ghosts are omitted from the result.

1480

1429

1481

:param version_ids: An iterable of version_ids.

1430

:return: A dict of version_id:(method, index_memo, compression_parent).

1482

:return: A dict of version_id:(index_memo, compression_parent,

1483

parents, record_details).

1484

index_memo

1485

opaque structure to pass to read_records to extract the raw

1486

data

1487

compression_parent

1488

Content that this record is built upon, may be None

1489

parents

1490

Logical parents of this node

1491

record_details

1492

extra information about the content which needs to be passed to

1493

Factory.parse_record

1431

1494

"""

1432

1495

result = {}

1433

1496

for version_id in version_ids:

1497

if version_id not in self._cache:

1498

# ghosts are omitted

1499

continue

1434

1500

method = self.get_method(version_id)

1501

parents = self.get_parents_with_ghosts(version_id)

1435

1502

if method == 'fulltext':

1436

1503

compression_parent = None

1437

1504

else:

1438

compression_parent = self.get_parents_with_ghosts(version_id)[0]

1505

compression_parent = parents[0]

1506

noeol = 'no-eol' in self.get_options(version_id)

1439

1507

index_memo = self.get_position(version_id)

1440

result[version_id] = (method, index_memo, compression_parent)

1508

result[version_id] = (index_memo, compression_parent,

1509

parents, (method, noeol))

1441

1510

return result

1442

1511

1443

1512

def iter_parents(self, version_ids):

1449

1518

The order is undefined, allowing for different optimisations in

1450

1519

the underlying implementation.

1451

1520

"""

1452

for version_id in version_ids:

1453

try:

1454

yield version_id, tuple(self.get_parents(version_id))

1455

except KeyError:

1456

pass

1521

parent_map = self.get_parent_map(version_ids)

1522

parent_map_set = set(parent_map)

1523

unknown_existence = set()

1524

for parents in parent_map.itervalues():

1525

unknown_existence.update(parents)

1526

unknown_existence.difference_update(parent_map_set)

1527

present_parents = set(self.get_parent_map(unknown_existence))

1528

present_parents.update(parent_map_set)

1529

for version_id, parents in parent_map.iteritems():

1530

parents = tuple(parent for parent in parents

1531

if parent in present_parents)

1532

yield version_id, parents

1457

1533

1458

1534

def num_versions(self):

1459

1535

return len(self._history)

1502

1578

assert isinstance(line, str), \

1503

1579

'content must be utf-8 encoded: %r' % (line,)

1504

1580

lines.append(line)

1505

self._cache_version(version_id, options, pos, size, parents)

1581

self._cache_version(version_id, options, pos, size, tuple(parents))

1506

1582

if not self._need_to_create:

1507

1583

self._transport.append_bytes(self._filename, ''.join(lines))

1508

1584

else:

1557

1633

"""

1558

1634

return self._cache[version_id][1]

1559

1635

1560

def get_parents(self, version_id):

1561

"""Return parents of specified version ignoring ghosts."""

1562

return [parent for parent in self._cache[version_id][4]

1563

if parent in self._cache]

1636

def get_parent_map(self, version_ids):

1637

"""Passed through to by KnitVersionedFile.get_parent_map."""

1638

result = {}

1639

for version_id in version_ids:

1640

try:

1641

result[version_id] = tuple(self._cache[version_id][4])

1642

except KeyError:

1643

pass

1644

return result

1564

1645

1565

1646

def get_parents_with_ghosts(self, version_id):

1566

1647

"""Return parents of specified version with ghosts."""

1567

return self._cache[version_id][4]

1648

try:

1649

return self.get_parent_map([version_id])[version_id]

1650

except KeyError:

1651

raise RevisionNotPresent(version_id, self)

1568

1652

1569

1653

def check_versions_present(self, version_ids):

1570

1654

"""Check that all specified versions are present."""

1706

1790

def get_build_details(self, version_ids):

1707

1791

"""Get the method, index_memo and compression parent for version_ids.

1708

1792

1793

Ghosts are omitted from the result.

1794

1709

1795

:param version_ids: An iterable of version_ids.

1710

:return: A dict of version_id:(method, index_memo, compression_parent).

1796

:return: A dict of version_id:(index_memo, compression_parent,

1797

parents, record_details).

1798

index_memo

1799

opaque structure to pass to read_records to extract the raw

1800

data

1801

compression_parent

1802

Content that this record is built upon, may be None

1803

parents

1804

Logical parents of this node

1805

record_details

1806

extra information about the content which needs to be passed to

1807

Factory.parse_record

1711

1808

"""

1712

1809

result = {}

1713

1810

entries = self._get_entries(self._version_ids_to_keys(version_ids), True)

1714

1811

for entry in entries:

1715

1812

version_id = self._keys_to_version_ids((entry[1],))[0]

1813

if not self._parents:

1814

parents = ()

1815

else:

1816

parents = self._keys_to_version_ids(entry[3][0])

1716

1817

if not self._deltas:

1717

1818

compression_parent = None

1718

1819

else:

1722

1823

(compression_parent_key,))[0]

1723

1824

else:

1724

1825

compression_parent = None

1826

noeol = (entry[2][0] == 'N')

1725

1827

if compression_parent:

1726

1828

method = 'line-delta'

1727

1829

else:

1728

1830

method = 'fulltext'

1729

result[version_id] = (method, self._node_to_position(entry),

1730

compression_parent)

1831

result[version_id] = (self._node_to_position(entry),

1832

compression_parent, parents,

1833

(method, noeol))

1731

1834

return result

1732

1835

1733

1836

def _compression_parent(self, an_entry):

1747

1850

else:

1748

1851

return 'fulltext'

1749

1852

1750

def get_graph(self):

1751

"""Return a list of the node:parents lists from this knit index."""

1752

if not self._parents:

1753

return [(key, ()) for key in self.get_versions()]

1754

result = []

1755

for index, key, value, refs in self._graph_index.iter_all_entries():

1756

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1757

return result

1758

1759

1853

def iter_parents(self, version_ids):

1760

1854

"""Iterate through the parents for many version ids.

1761

1855

1836

1930

options.append('no-eol')

1837

1931

return options

1838

1932

1839

def get_parents(self, version_id):

1840

"""Return parents of specified version ignoring ghosts."""

1841

parents = list(self.iter_parents([version_id]))

1842

if not parents:

1843

# missing key

1844

raise errors.RevisionNotPresent(version_id, self)

1845

return parents[0][1]

1933

def get_parent_map(self, version_ids):

1934

"""Passed through to by KnitVersionedFile.get_parent_map."""

1935

nodes = self._get_entries(self._version_ids_to_keys(version_ids))

1936

result = {}

1937

if self._parents:

1938

for node in nodes:

1939

result[node[1][0]] = self._keys_to_version_ids(node[3][0])

1940

else:

1941

for node in nodes:

1942

result[node[1][0]] = ()

1943

return result

1846

1944

1847

1945

def get_parents_with_ghosts(self, version_id):

1848

1946

"""Return parents of specified version with ghosts."""

1849

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1850

check_present=True))

1851

if not self._parents:

1852

return ()

1853

return self._keys_to_version_ids(nodes[0][3][0])

1947

try:

1948

return self.get_parent_map([version_id])[version_id]

1949

except KeyError:

1950

raise RevisionNotPresent(version_id, self)

1854

1951

1855

1952

def check_versions_present(self, version_ids):

1856

1953

"""Check that all specified versions are present."""

2174

2271

class _StreamIndex(object):

2175

2272

"""A Knit Index object that uses the data map from a datastream."""

2176

2273

2177

def __init__(self, data_list):

2274

def __init__(self, data_list, backing_index):

2178

2275

"""Create a _StreamIndex object.

2179

2276

2180

2277

:param data_list: The data_list from the datastream.

2278

:param backing_index: The index which will supply values for nodes

2279

referenced outside of this stream.

2181

2280

"""

2182

2281

self.data_list = data_list

2282

self.backing_index = backing_index

2183

2283

self._by_version = {}

2184

2284

pos = 0

2185

2285

for key, options, length, parents in data_list:

2212

2312

def get_build_details(self, version_ids):

2213

2313

"""Get the method, index_memo and compression parent for version_ids.

2214

2314

2315

Ghosts are omitted from the result.

2316

2215

2317

:param version_ids: An iterable of version_ids.

2216

:return: A dict of version_id:(method, index_memo, compression_parent).

2318

:return: A dict of version_id:(index_memo, compression_parent,

2319

parents, record_details).

2320

index_memo

2321

opaque structure to pass to read_records to extract the raw

2322

data

2323

compression_parent

2324

Content that this record is built upon, may be None

2325

parents

2326

Logical parents of this node

2327

record_details

2328

extra information about the content which needs to be passed to

2329

Factory.parse_record

2217

2330

"""

2218

2331

result = {}

2219

2332

for version_id in version_ids:

2220

method = self.get_method(version_id)

2333

try:

2334

method = self.get_method(version_id)

2335

except errors.RevisionNotPresent:

2336

# ghosts are omitted

2337

continue

2338

parent_ids = self.get_parents_with_ghosts(version_id)

2339

noeol = ('no-eol' in self.get_options(version_id))

2221

2340

if method == 'fulltext':

2222

2341

compression_parent = None

2223

2342

else:

2224

compression_parent = self.get_parents_with_ghosts(version_id)[0]

2343

compression_parent = parent_ids[0]

2225

2344

index_memo = self.get_position(version_id)

2226

result[version_id] = (method, index_memo, compression_parent)

2345

result[version_id] = (index_memo, compression_parent,

2346

parent_ids, (method, noeol))

2227

2347

return result

2228

2348

2229

2349

def get_method(self, version_id):

2233

2353

except KeyError:

2234

2354

# Strictly speaking this should check in the backing knit, but

2235

2355

# until we have a test to discriminate, this will do.

2236

return 'fulltext'

2356

return self.backing_index.get_method(version_id)

2237

2357

if 'fulltext' in options:

2238

2358

return 'fulltext'

2239

2359

elif 'line-delta' in options:

2246

2366

2247

2367

e.g. ['foo', 'bar']

2248

2368

"""

2249

return self._by_version[version_id][0]

2369

try:

2370

return self._by_version[version_id][0]

2371

except KeyError:

2372

return self.backing_index.get_options(version_id)

2373

2374

def get_parent_map(self, version_ids):

2375

"""Passed through to by KnitVersionedFile.get_parent_map."""

2376

result = {}

2377

pending_ids = set()

2378

for version_id in version_ids:

2379

try:

2380

result[version_id] = self._by_version[version_id][2]

2381

except KeyError:

2382

pending_ids.add(version_id)

2383

result.update(self.backing_index.get_parent_map(pending_ids))

2384

return result

2250

2385

2251

2386

def get_parents_with_ghosts(self, version_id):

2252

2387

"""Return parents of specified version with ghosts."""

2253

return self._by_version[version_id][2]

2388

try:

2389

return self.get_parent_map([version_id])[version_id]

2390

except KeyError:

2391

raise RevisionNotPresent(version_id, self)

2254

2392

2255

2393

def get_position(self, version_id):

2256

2394

"""Return details needed to access the version.

2528

2666

see join() for the parameter definitions.

2529

2667

"""

2530

2668

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2531

graph = self.source.get_graph(version_ids)

2532

order = topo_sort(graph.items())

2669

# --- the below is factorable out with VersionedFile.join, but wait for

2670

# VersionedFiles, it may all be simpler then.

2671

graph = Graph(self.source)

2672

search = graph._make_breadth_first_searcher(version_ids)

2673

transitive_ids = set()

2674

map(transitive_ids.update, list(search))

2675

parent_map = self.source.get_parent_map(transitive_ids)

2676

order = topo_sort(parent_map.items())

2533

2677

2534

2678

def size_of_content(content):

2535

2679

return sum(len(line) for line in content.text())

2596

2740

2597

2741

if not needed_versions:

2598

2742

return 0

2599

full_list = topo_sort(self.source.get_graph())

2743

full_list = topo_sort(

2744

self.source.get_parent_map(self.source.versions()))

2600

2745

2601

2746

version_list = [i for i in full_list if (not self.target.has_version(i)

2602

2747

and i in needed_versions)]

2698

2843

2699

2844

if not needed_versions:

2700

2845

return 0

2701

full_list = topo_sort(self.source.get_graph())

2846

full_list = topo_sort(

2847

self.source.get_parent_map(self.source.versions()))

2702

2848

2703

2849

version_list = [i for i in full_list if (not self.target.has_version(i)

2704

2850

and i in needed_versions)]

2706

2852

# do the join:

2707

2853

count = 0

2708

2854

total = len(version_list)

2855

parent_map = self.source.get_parent_map(version_list)

2709

2856

for version_id in version_list:

2710

2857

pb.update("Converting to knit", count, total)

2711

parents = self.source.get_parents(version_id)

2858

parents = parent_map[version_id]

2712

2859

# check that its will be a consistent copy:

2713

2860

for parent in parents:

2714

2861

# if source has the parent, we must already have it

2735

2882

It will work for knits with cached annotations, but this is not

2736

2883

recommended.

2737

2884

"""

2738

ancestry = knit.get_ancestry(revision_id)

2739

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2740

annotations = {}

2741

for candidate in ancestry:

2742

if candidate in annotations:

2743

continue

2744

parents = knit.get_parents(candidate)

2745

if len(parents) == 0:

2746

blocks = None

2747

elif knit._index.get_method(candidate) != 'line-delta':

2748

blocks = None

2885

annotator = _KnitAnnotator(knit)

2886

return iter(annotator.annotate(revision_id))

2887

2888

2889

class _KnitAnnotator(object):

2890

"""Build up the annotations for a text."""

2891

2892

def __init__(self, knit):

2893

self._knit = knit

2894

2895

# Content objects, differs from fulltexts because of how final newlines

2896

# are treated by knits. the content objects here will always have a

2897

# final newline

2898

self._fulltext_contents = {}

2899

2900

# Annotated lines of specific revisions

2901

self._annotated_lines = {}

2902

2903

# Track the raw data for nodes that we could not process yet.

2904

# This maps the revision_id of the base to a list of children that will

2905

# annotated from it.

2906

self._pending_children = {}

2907

2908

# Nodes which cannot be extracted

2909

self._ghosts = set()

2910

2911

# Track how many children this node has, so we know if we need to keep

2912

# it

2913

self._annotate_children = {}

2914

self._compression_children = {}

2915

2916

self._all_build_details = {}

2917

# The children => parent revision_id graph

2918

self._revision_id_graph = {}

2919

2920

self._heads_provider = None

2921

2922

self._nodes_to_keep_annotations = set()

2923

self._generations_until_keep = 100

2924

2925

def set_generations_until_keep(self, value):

2926

"""Set the number of generations before caching a node.

2927

2928

Setting this to -1 will cache every merge node, setting this higher

2929

will cache fewer nodes.

2930

"""

2931

self._generations_until_keep = value

2932

2933

def _add_fulltext_content(self, revision_id, content_obj):

2934

self._fulltext_contents[revision_id] = content_obj

2935

# TODO: jam 20080305 It might be good to check the sha1digest here

2936

return content_obj.text()

2937

2938

def _check_parents(self, child, nodes_to_annotate):

2939

"""Check if all parents have been processed.

2940

2941

:param child: A tuple of (rev_id, parents, raw_content)

2942

:param nodes_to_annotate: If child is ready, add it to

2943

nodes_to_annotate, otherwise put it back in self._pending_children

2944

"""

2945

for parent_id in child[1]:

2946

if (parent_id not in self._annotated_lines):

2947

# This parent is present, but another parent is missing

2948

self._pending_children.setdefault(parent_id,

2949

[]).append(child)

2950

break

2749

2951

else:

2750

parent, sha1, noeol, delta = knit.get_delta(candidate)

2751

blocks = KnitContent.get_line_delta_blocks(delta,

2752

fulltext[parents[0]], fulltext[candidate])

2753

annotations[candidate] = list(annotate.reannotate([annotations[p]

2754

for p in parents], fulltext[candidate], candidate, blocks))

2755

return iter(annotations[revision_id])

2952

# This one is ready to be processed

2953

nodes_to_annotate.append(child)

2954

2955

def _add_annotation(self, revision_id, fulltext, parent_ids,

2956

left_matching_blocks=None):

2957

"""Add an annotation entry.

2958

2959

All parents should already have been annotated.

2960

:return: A list of children that now have their parents satisfied.

2961

"""

2962

a = self._annotated_lines

2963

annotated_parent_lines = [a[p] for p in parent_ids]

2964

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

2965

fulltext, revision_id, left_matching_blocks,

2966

heads_provider=self._get_heads_provider()))

2967

self._annotated_lines[revision_id] = annotated_lines

2968

for p in parent_ids:

2969

ann_children = self._annotate_children[p]

2970

ann_children.remove(revision_id)

2971

if (not ann_children

2972

and p not in self._nodes_to_keep_annotations):

2973

del self._annotated_lines[p]

2974

del self._all_build_details[p]

2975

if p in self._fulltext_contents:

2976

del self._fulltext_contents[p]

2977

# Now that we've added this one, see if there are any pending

2978

# deltas to be done, certainly this parent is finished

2979

nodes_to_annotate = []

2980

for child in self._pending_children.pop(revision_id, []):

2981

self._check_parents(child, nodes_to_annotate)

2982

return nodes_to_annotate

2983

2984

def _get_build_graph(self, revision_id):

2985

"""Get the graphs for building texts and annotations.

2986

2987

The data you need for creating a full text may be different than the

2988

data you need to annotate that text. (At a minimum, you need both

2989

parents to create an annotation, but only need 1 parent to generate the

2990

fulltext.)

2991

2992

:return: A list of (revision_id, index_memo) records, suitable for

2993

passing to read_records_iter to start reading in the raw data fro/

2994

the pack file.

2995

"""

2996

if revision_id in self._annotated_lines:

2997

# Nothing to do

2998

return []

2999

pending = set([revision_id])

3000

records = []

3001

generation = 0

3002

kept_generation = 0

3003

while pending:

3004

# get all pending nodes

3005

generation += 1

3006

this_iteration = pending

3007

build_details = self._knit._index.get_build_details(this_iteration)

3008

self._all_build_details.update(build_details)

3009

# new_nodes = self._knit._index._get_entries(this_iteration)

3010

pending = set()

3011

for rev_id, details in build_details.iteritems():

3012

(index_memo, compression_parent, parents,

3013

record_details) = details

3014

self._revision_id_graph[rev_id] = parents

3015

records.append((rev_id, index_memo))

3016

# Do we actually need to check _annotated_lines?

3017

pending.update(p for p in parents

3018

if p not in self._all_build_details)

3019

if compression_parent:

3020

self._compression_children.setdefault(compression_parent,

3021

[]).append(rev_id)

3022

if parents:

3023

for parent in parents:

3024

self._annotate_children.setdefault(parent,

3025

[]).append(rev_id)

3026

num_gens = generation - kept_generation

3027

if ((num_gens >= self._generations_until_keep)

3028

and len(parents) > 1):

3029

kept_generation = generation

3030

self._nodes_to_keep_annotations.add(rev_id)

3031

3032

missing_versions = this_iteration.difference(build_details.keys())

3033

self._ghosts.update(missing_versions)

3034

for missing_version in missing_versions:

3035

# add a key, no parents

3036

self._revision_id_graph[missing_version] = ()

3037

pending.discard(missing_version) # don't look for it

3038

# XXX: This should probably be a real exception, as it is a data

3039

# inconsistency

3040

assert not self._ghosts.intersection(self._compression_children), \

3041

"We cannot have nodes which have a compression parent of a ghost."

3042

# Cleanout anything that depends on a ghost so that we don't wait for

3043

# the ghost to show up

3044

for node in self._ghosts:

3045

if node in self._annotate_children:

3046

# We won't be building this node

3047

del self._annotate_children[node]

3048

# Generally we will want to read the records in reverse order, because

3049

# we find the parent nodes after the children

3050

records.reverse()

3051

return records

3052

3053

def _annotate_records(self, records):

3054

"""Build the annotations for the listed records."""

3055

# We iterate in the order read, rather than a strict order requested

3056

# However, process what we can, and put off to the side things that

3057

# still need parents, cleaning them up when those parents are

3058

# processed.

3059

for (rev_id, record,

3060

digest) in self._knit._data.read_records_iter(records):

3061

if rev_id in self._annotated_lines:

3062

continue

3063

parent_ids = self._revision_id_graph[rev_id]

3064

parent_ids = [p for p in parent_ids if p not in self._ghosts]

3065

details = self._all_build_details[rev_id]

3066

(index_memo, compression_parent, parents,

3067

record_details) = details

3068

nodes_to_annotate = []

3069

# TODO: Remove the punning between compression parents, and

3070

# parent_ids, we should be able to do this without assuming

3071

# the build order

3072

if len(parent_ids) == 0:

3073

# There are no parents for this node, so just add it

3074

# TODO: This probably needs to be decoupled

3075

assert compression_parent is None

3076

fulltext_content, delta = self._knit.factory.parse_record(

3077

rev_id, record, record_details, None)

3078

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3079

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

3080

parent_ids, left_matching_blocks=None))

3081

else:

3082

child = (rev_id, parent_ids, record)

3083

# Check if all the parents are present

3084

self._check_parents(child, nodes_to_annotate)

3085

while nodes_to_annotate:

3086

# Should we use a queue here instead of a stack?

3087

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

3088

(index_memo, compression_parent, parents,

3089

record_details) = self._all_build_details[rev_id]

3090

if compression_parent is not None:

3091

comp_children = self._compression_children[compression_parent]

3092

assert rev_id in comp_children

3093

# If there is only 1 child, it is safe to reuse this

3094

# content

3095

reuse_content = (len(comp_children) == 1

3096

and compression_parent not in

3097

self._nodes_to_keep_annotations)

3098

if reuse_content:

3099

# Remove it from the cache since it will be changing

3100

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

3101

# Make sure to copy the fulltext since it might be

3102

# modified

3103

parent_fulltext = list(parent_fulltext_content.text())

3104

else:

3105

parent_fulltext_content = self._fulltext_contents[compression_parent]

3106

parent_fulltext = parent_fulltext_content.text()

3107

comp_children.remove(rev_id)

3108

fulltext_content, delta = self._knit.factory.parse_record(

3109

rev_id, record, record_details,

3110

parent_fulltext_content,

3111

copy_base_content=(not reuse_content))

3112

fulltext = self._add_fulltext_content(rev_id,

3113

fulltext_content)

3114

blocks = KnitContent.get_line_delta_blocks(delta,

3115

parent_fulltext, fulltext)

3116

else:

3117

fulltext_content = self._knit.factory.parse_fulltext(

3118

record, rev_id)

3119

fulltext = self._add_fulltext_content(rev_id,

3120

fulltext_content)

3121

blocks = None

3122

nodes_to_annotate.extend(

3123

self._add_annotation(rev_id, fulltext, parent_ids,

3124

left_matching_blocks=blocks))

3125

3126

def _get_heads_provider(self):

3127

"""Create a heads provider for resolving ancestry issues."""

3128

if self._heads_provider is not None:

3129

return self._heads_provider

3130

parent_provider = _mod_graph.DictParentsProvider(

3131

self._revision_id_graph)

3132

graph_obj = _mod_graph.Graph(parent_provider)

3133

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

3134

self._heads_provider = head_cache

3135

return head_cache

3136

3137

def annotate(self, revision_id):

3138

"""Return the annotated fulltext at the given revision.

3139

3140

:param revision_id: The revision id for this file

3141

"""

3142

records = self._get_build_graph(revision_id)

3143

if revision_id in self._ghosts:

3144

raise errors.RevisionNotPresent(revision_id, self._knit)

3145

self._annotate_records(records)

3146

return self._annotated_lines[revision_id]

2756

3147

2757

3148

2758

3149

try:

Older »