~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Canonical.com Patch Queue Manager
Date: 2008-03-15 17:44:41 UTC
mfrom: (3224.1.29 annotate_finish)
Revision ID: pqm@pqm.ubuntu.com-20080315174441-l8xpw6femn0syal1

(jam) Tweak annotate performance for pack repositories,
improving speed and decreasing memory consumption.

files added:
bzrlib/tests/tree_implementations/test_annotate_iter.py

files modified:
NEWS

bzrlib/annotate.py

bzrlib/graph.py

bzrlib/knit.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_knit.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

lazy_import(globals(), """

from bzrlib import (

annotate,

graph as _mod_graph,

lru_cache,

pack,

trace,

134

135

class KnitContent(object):

135

136

"""Content of a knit version to which deltas can be applied."""

136

137

138

def __init__(self):

139

self._should_strip_eol = False

140

137

141

def annotate(self):

138

142

"""Return a list of (origin, text) tuples."""

139

143

return list(self.annotate_iter())

142

146

"""Apply delta to this object to become new_version_id."""

143

147

raise NotImplementedError(self.apply_delta)

144

148

149

def cleanup_eol(self, copy_on_mutate=True):

150

if self._should_strip_eol:

151

if copy_on_mutate:

152

self._lines = self._lines[:]

153

self.strip_last_line_newline()

154

145

155

def line_delta_iter(self, new_lines):

146

156

"""Generate line-based delta from this content to new_lines."""

147

157

new_texts = new_lines.text()

187

197

"""Annotated content."""

188

198

189

199

def __init__(self, lines):

200

KnitContent.__init__(self)

190

201

self._lines = lines

191

202

192

203

def annotate_iter(self):

204

215

def strip_last_line_newline(self):

205

216

line = self._lines[-1][1].rstrip('\n')

206

217

self._lines[-1] = (self._lines[-1][0], line)

218

self._should_strip_eol = False

207

219

208

220

def text(self):

209

221

try:

210

return [text for origin, text in self._lines]

222

lines = [text for origin, text in self._lines]

211

223

except ValueError, e:

212

224

# most commonly (only?) caused by the internal form of the knit

213

225

# missing annotation information because of a bug - see thread

216

228

"line in annotated knit missing annotation information: %s"

217

229

% (e,))

218

230

231

if self._should_strip_eol:

232

anno, line = lines[-1]

233

lines[-1] = (anno, line.rstrip('\n'))

234

return lines

235

219

236

def copy(self):

220

237

return AnnotatedKnitContent(self._lines[:])

221

238

229

246

"""

230

247

231

248

def __init__(self, lines, version_id):

249

KnitContent.__init__(self)

232

250

self._lines = lines

233

251

self._version_id = version_id

234

252

251

269

252

270

def strip_last_line_newline(self):

253

271

self._lines[-1] = self._lines[-1].rstrip('\n')

272

self._should_strip_eol = False

254

273

255

274

def text(self):

256

return self._lines

257

258

259

class KnitAnnotateFactory(object):

275

lines = self._lines

276

if self._should_strip_eol:

277

lines = lines[:]

278

lines[-1] = lines[-1].rstrip('\n')

279

return lines

280

281

282

class _KnitFactory(object):

283

"""Base class for common Factory functions."""

284

285

def parse_record(self, version_id, record, record_details,

286

base_content, copy_base_content=True):

287

"""Parse a record into a full content object.

288

289

:param version_id: The official version id for this content

290

:param record: The data returned by read_records_iter()

291

:param record_details: Details about the record returned by

292

get_build_details

293

:param base_content: If get_build_details returns a compression_parent,

294

you must return a base_content here, else use None

295

:param copy_base_content: When building from the base_content, decide

296

you can either copy it and return a new object, or modify it in

297

place.

298

:return: (content, delta) A Content object and possibly a line-delta,

299

delta may be None

300

"""

301

method, noeol = record_details

302

if method == 'line-delta':

303

assert base_content is not None

304

if copy_base_content:

305

content = base_content.copy()

306

else:

307

content = base_content

308

delta = self.parse_line_delta(record, version_id)

309

content.apply_delta(delta, version_id)

310

else:

311

content = self.parse_fulltext(record, version_id)

312

delta = None

313

content._should_strip_eol = noeol

314

return (content, delta)

315

316

317

class KnitAnnotateFactory(_KnitFactory):

260

318

"""Factory for creating annotated Content objects."""

261

319

262

320

annotated = True

368

426

return content.annotate_iter()

369

427

370

428

371

class KnitPlainFactory(object):

429

class KnitPlainFactory(_KnitFactory):

372

430

"""Factory for creating plain Content objects."""

373

431

374

432

annotated = False

426

484

return out

427

485

428

486

def annotate_iter(self, knit, version_id):

429

return annotate_knit(knit, version_id)

487

annotator = _KnitAnnotator(knit)

488

return iter(annotator.annotate(version_id))

430

489

431

490

432

491

def make_empty_knit(transport, relpath):

807

866

factory = KnitAnnotateFactory()

808

867

else:

809

868

raise errors.KnitDataStreamUnknown(format)

810

index = _StreamIndex(data_list)

869

index = _StreamIndex(data_list, self._index)

811

870

access = _StreamAccess(reader_callable, index, self, factory)

812

871

return KnitVersionedFile(self.filename, self.transport,

813

872

factory=factory, index=index, access_method=access)

874

933

875

934

This data is intended to be used for retrieving the knit records.

876

935

877

A dict of version_id to (method, index_memo, next) is

936

A dict of version_id to (record_details, index_memo, next, parents) is

878

937

returned.

879

938

method is the way referenced data should be applied.

880

data_pos is the position of the data in the knit.

881

data_size is the size of the data in the knit.

939

index_memo is the handle to pass to the data access to actually get the

940

data

882

941

next is the build-parent of the version, or None for fulltexts.

942

parents is the version_ids of the parents of this version

883

943

"""

884

944

component_data = {}

885

945

pending_components = version_ids

886

946

while pending_components:

887

947

build_details = self._index.get_build_details(pending_components)

948

current_components = set(pending_components)

888

949

pending_components = set()

889

for version_id, details in build_details.items():

890

method, index_memo, compression_parent = details

950

for version_id, details in build_details.iteritems():

951

(index_memo, compression_parent, parents,

952

record_details) = details

953

method = record_details[0]

891

954

if compression_parent is not None:

892

955

pending_components.add(compression_parent)

893

component_data[version_id] = details

956

component_data[version_id] = (record_details, index_memo,

957

compression_parent)

958

missing = current_components.difference(build_details)

959

if missing:

960

raise errors.RevisionNotPresent(missing.pop(), self.filename)

894

961

return component_data

895

962

896

963

def _get_content(self, version_id, parent_texts={}):

1034

1101

def _get_record_map(self, version_ids):

1035

1102

"""Produce a dictionary of knit records.

1036

1103

1037

The keys are version_ids, the values are tuples of (method, content,

1038

digest, next).

1039

method is the way the content should be applied.

1040

content is a KnitContent object.

1041

digest is the SHA1 digest of this version id after all steps are done

1042

next is the build-parent of the version, i.e. the leftmost ancestor.

1043

If the method is fulltext, next will be None.

1104

:return: {version_id:(record, record_details, digest, next)}

1105

record

1106

data returned from read_records

1107

record_details

1108

opaque information to pass to parse_record

1109

digest

1110

SHA1 digest of the full text after all steps are done

1111

1112

build-parent of the version, i.e. the leftmost ancestor.

1113

Will be None if the record is not a delta.

1044

1114

"""

1045

1115

position_map = self._get_components_positions(version_ids)

1046

# c = component_id, m = method, i_m = index_memo, n = next

1047

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

1116

# c = component_id, r = record_details, i_m = index_memo, n = next

1117

records = [(c, i_m) for c, (r, i_m, n)

1118

in position_map.iteritems()]

1048

1119

record_map = {}

1049

for component_id, content, digest in \

1120

for component_id, record, digest in \

1050

1121

self._data.read_records_iter(records):

1051

method, index_memo, next = position_map[component_id]

1052

record_map[component_id] = method, content, digest, next

1053

1122

(record_details, index_memo, next) = position_map[component_id]

1123

record_map[component_id] = record, record_details, digest, next

1124

1054

1125

return record_map

1055

1126

1056

1127

def get_text(self, version_id):

1091

1162

components = []

1092

1163

cursor = version_id

1093

1164

while cursor is not None:

1094

method, data, digest, next = record_map[cursor]

1095

components.append((cursor, method, data, digest))

1165

record, record_details, digest, next = record_map[cursor]

1166

components.append((cursor, record, record_details, digest))

1096

1167

if cursor in content_map:

1097

1168

break

1098

1169

cursor = next

1099

1170

1100

1171

content = None

1101

for component_id, method, data, digest in reversed(components):

1172

for (component_id, record, record_details,

1173

digest) in reversed(components):

1102

1174

if component_id in content_map:

1103

1175

content = content_map[component_id]

1104

1176

else:

1105

if method == 'fulltext':

1106

assert content is None

1107

content = self.factory.parse_fulltext(data, version_id)

1108

elif method == 'line-delta':

1109

delta = self.factory.parse_line_delta(data, version_id)

1110

if multiple_versions:

1111

# only doing this when we want multiple versions

1112

# output avoids list copies - which reference and

1113

# dereference many strings.

1114

content = content.copy()

1115

content.apply_delta(delta, version_id)

1177

content, delta = self.factory.parse_record(version_id,

1178

record, record_details, content,

1179

copy_base_content=multiple_versions)

1116

1180

if multiple_versions:

1117

1181

content_map[component_id] = content

1118

1182

1119

if 'no-eol' in self._index.get_options(version_id):

1120

if multiple_versions:

1121

content = content.copy()

1122

content.strip_last_line_newline()

1183

content.cleanup_eol(copy_on_mutate=multiple_versions)

1123

1184

final_content[version_id] = content

1124

1185

1125

1186

# digest here is the digest from the last applied component.

1426

1487

def get_build_details(self, version_ids):

1427

1488

"""Get the method, index_memo and compression parent for version_ids.

1428

1489

1490

Ghosts are omitted from the result.

1491

1429

1492

:param version_ids: An iterable of version_ids.

1430

:return: A dict of version_id:(method, index_memo, compression_parent).

1493

:return: A dict of version_id:(index_memo, compression_parent,

1494

parents, record_details).

1495

index_memo

1496

opaque structure to pass to read_records to extract the raw

1497

data

1498

compression_parent

1499

Content that this record is built upon, may be None

1500

parents

1501

Logical parents of this node

1502

record_details

1503

extra information about the content which needs to be passed to

1504

Factory.parse_record

1431

1505

"""

1432

1506

result = {}

1433

1507

for version_id in version_ids:

1508

if version_id not in self._cache:

1509

# ghosts are omitted

1510

continue

1434

1511

method = self.get_method(version_id)

1512

parents = self.get_parents_with_ghosts(version_id)

1435

1513

if method == 'fulltext':

1436

1514

compression_parent = None

1437

1515

else:

1438

compression_parent = self.get_parents_with_ghosts(version_id)[0]

1516

compression_parent = parents[0]

1517

noeol = 'no-eol' in self.get_options(version_id)

1439

1518

index_memo = self.get_position(version_id)

1440

result[version_id] = (method, index_memo, compression_parent)

1519

result[version_id] = (index_memo, compression_parent,

1520

parents, (method, noeol))

1441

1521

return result

1442

1522

1443

1523

def iter_parents(self, version_ids):

1706

1786

def get_build_details(self, version_ids):

1707

1787

"""Get the method, index_memo and compression parent for version_ids.

1708

1788

1789

Ghosts are omitted from the result.

1790

1709

1791

:param version_ids: An iterable of version_ids.

1710

:return: A dict of version_id:(method, index_memo, compression_parent).

1792

:return: A dict of version_id:(index_memo, compression_parent,

1793

parents, record_details).

1794

index_memo

1795

opaque structure to pass to read_records to extract the raw

1796

data

1797

compression_parent

1798

Content that this record is built upon, may be None

1799

parents

1800

Logical parents of this node

1801

record_details

1802

extra information about the content which needs to be passed to

1803

Factory.parse_record

1711

1804

"""

1712

1805

result = {}

1713

1806

entries = self._get_entries(self._version_ids_to_keys(version_ids), True)

1714

1807

for entry in entries:

1715

1808

version_id = self._keys_to_version_ids((entry[1],))[0]

1809

if not self._parents:

1810

parents = ()

1811

else:

1812

parents = self._keys_to_version_ids(entry[3][0])

1716

1813

if not self._deltas:

1717

1814

compression_parent = None

1718

1815

else:

1722

1819

(compression_parent_key,))[0]

1723

1820

else:

1724

1821

compression_parent = None

1822

noeol = (entry[2][0] == 'N')

1725

1823

if compression_parent:

1726

1824

method = 'line-delta'

1727

1825

else:

1728

1826

method = 'fulltext'

1729

result[version_id] = (method, self._node_to_position(entry),

1730

compression_parent)

1827

result[version_id] = (self._node_to_position(entry),

1828

compression_parent, parents,

1829

(method, noeol))

1731

1830

return result

1732

1831

1733

1832

def _compression_parent(self, an_entry):

2174

2273

class _StreamIndex(object):

2175

2274

"""A Knit Index object that uses the data map from a datastream."""

2176

2275

2177

def __init__(self, data_list):

2276

def __init__(self, data_list, backing_index):

2178

2277

"""Create a _StreamIndex object.

2179

2278

2180

2279

:param data_list: The data_list from the datastream.

2280

:param backing_index: The index which will supply values for nodes

2281

referenced outside of this stream.

2181

2282

"""

2182

2283

self.data_list = data_list

2284

self.backing_index = backing_index

2183

2285

self._by_version = {}

2184

2286

pos = 0

2185

2287

for key, options, length, parents in data_list:

2212

2314

def get_build_details(self, version_ids):

2213

2315

"""Get the method, index_memo and compression parent for version_ids.

2214

2316

2317

Ghosts are omitted from the result.

2318

2215

2319

:param version_ids: An iterable of version_ids.

2216

:return: A dict of version_id:(method, index_memo, compression_parent).

2320

:return: A dict of version_id:(index_memo, compression_parent,

2321

parents, record_details).

2322

index_memo

2323

opaque structure to pass to read_records to extract the raw

2324

data

2325

compression_parent

2326

Content that this record is built upon, may be None

2327

parents

2328

Logical parents of this node

2329

record_details

2330

extra information about the content which needs to be passed to

2331

Factory.parse_record

2217

2332

"""

2218

2333

result = {}

2219

2334

for version_id in version_ids:

2220

method = self.get_method(version_id)

2335

try:

2336

method = self.get_method(version_id)

2337

except errors.RevisionNotPresent:

2338

# ghosts are omitted

2339

continue

2340

parent_ids = self.get_parents_with_ghosts(version_id)

2341

noeol = ('no-eol' in self.get_options(version_id))

2221

2342

if method == 'fulltext':

2222

2343

compression_parent = None

2223

2344

else:

2224

compression_parent = self.get_parents_with_ghosts(version_id)[0]

2345

compression_parent = parent_ids[0]

2225

2346

index_memo = self.get_position(version_id)

2226

result[version_id] = (method, index_memo, compression_parent)

2347

result[version_id] = (index_memo, compression_parent,

2348

parent_ids, (method, noeol))

2227

2349

return result

2228

2350

2229

2351

def get_method(self, version_id):

2233

2355

except KeyError:

2234

2356

# Strictly speaking this should check in the backing knit, but

2235

2357

# until we have a test to discriminate, this will do.

2236

return 'fulltext'

2358

return self.backing_index.get_method(version_id)

2237

2359

if 'fulltext' in options:

2238

2360

return 'fulltext'

2239

2361

elif 'line-delta' in options:

2246

2368

2247

2369

e.g. ['foo', 'bar']

2248

2370

"""

2249

return self._by_version[version_id][0]

2371

try:

2372

return self._by_version[version_id][0]

2373

except KeyError:

2374

return self.backing_index.get_options(version_id)

2250

2375

2251

2376

def get_parents_with_ghosts(self, version_id):

2252

2377

"""Return parents of specified version with ghosts."""

2253

return self._by_version[version_id][2]

2378

try:

2379

return self._by_version[version_id][2]

2380

except KeyError:

2381

return self.backing_index.get_parents_with_ghosts(version_id)

2254

2382

2255

2383

def get_position(self, version_id):

2256

2384

"""Return details needed to access the version.

2735

2863

It will work for knits with cached annotations, but this is not

2736

2864

recommended.

2737

2865

"""

2738

ancestry = knit.get_ancestry(revision_id)

2739

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2740

annotations = {}

2741

for candidate in ancestry:

2742

if candidate in annotations:

2743

continue

2744

parents = knit.get_parents(candidate)

2745

if len(parents) == 0:

2746

blocks = None

2747

elif knit._index.get_method(candidate) != 'line-delta':

2748

blocks = None

2866

annotator = _KnitAnnotator(knit)

2867

return iter(annotator.annotate(revision_id))

2868

2869

2870

class _KnitAnnotator(object):

2871

"""Build up the annotations for a text."""

2872

2873

def __init__(self, knit):

2874

self._knit = knit

2875

2876

# Content objects, differs from fulltexts because of how final newlines

2877

# are treated by knits. the content objects here will always have a

2878

# final newline

2879

self._fulltext_contents = {}

2880

2881

# Annotated lines of specific revisions

2882

self._annotated_lines = {}

2883

2884

# Track the raw data for nodes that we could not process yet.

2885

# This maps the revision_id of the base to a list of children that will

2886

# annotated from it.

2887

self._pending_children = {}

2888

2889

# Nodes which cannot be extracted

2890

self._ghosts = set()

2891

2892

# Track how many children this node has, so we know if we need to keep

2893

# it

2894

self._annotate_children = {}

2895

self._compression_children = {}

2896

2897

self._all_build_details = {}

2898

# The children => parent revision_id graph

2899

self._revision_id_graph = {}

2900

2901

self._heads_provider = None

2902

2903

self._nodes_to_keep_annotations = set()

2904

self._generations_until_keep = 100

2905

2906

def set_generations_until_keep(self, value):

2907

"""Set the number of generations before caching a node.

2908

2909

Setting this to -1 will cache every merge node, setting this higher

2910

will cache fewer nodes.

2911

"""

2912

self._generations_until_keep = value

2913

2914

def _add_fulltext_content(self, revision_id, content_obj):

2915

self._fulltext_contents[revision_id] = content_obj

2916

# TODO: jam 20080305 It might be good to check the sha1digest here

2917

return content_obj.text()

2918

2919

def _check_parents(self, child, nodes_to_annotate):

2920

"""Check if all parents have been processed.

2921

2922

:param child: A tuple of (rev_id, parents, raw_content)

2923

:param nodes_to_annotate: If child is ready, add it to

2924

nodes_to_annotate, otherwise put it back in self._pending_children

2925

"""

2926

for parent_id in child[1]:

2927

if (parent_id not in self._annotated_lines):

2928

# This parent is present, but another parent is missing

2929

self._pending_children.setdefault(parent_id,

2930

[]).append(child)

2931

break

2749

2932

else:

2750

parent, sha1, noeol, delta = knit.get_delta(candidate)

2751

blocks = KnitContent.get_line_delta_blocks(delta,

2752

fulltext[parents[0]], fulltext[candidate])

2753

annotations[candidate] = list(annotate.reannotate([annotations[p]

2754

for p in parents], fulltext[candidate], candidate, blocks))

2755

return iter(annotations[revision_id])

2933

# This one is ready to be processed

2934

nodes_to_annotate.append(child)

2935

2936

def _add_annotation(self, revision_id, fulltext, parent_ids,

2937

left_matching_blocks=None):

2938

"""Add an annotation entry.

2939

2940

All parents should already have been annotated.

2941

:return: A list of children that now have their parents satisfied.

2942

"""

2943

a = self._annotated_lines

2944

annotated_parent_lines = [a[p] for p in parent_ids]

2945

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

2946

fulltext, revision_id, left_matching_blocks,

2947

heads_provider=self._get_heads_provider()))

2948

self._annotated_lines[revision_id] = annotated_lines

2949

for p in parent_ids:

2950

ann_children = self._annotate_children[p]

2951

ann_children.remove(revision_id)

2952

if (not ann_children

2953

and p not in self._nodes_to_keep_annotations):

2954

del self._annotated_lines[p]

2955

del self._all_build_details[p]

2956

if p in self._fulltext_contents:

2957

del self._fulltext_contents[p]

2958

# Now that we've added this one, see if there are any pending

2959

# deltas to be done, certainly this parent is finished

2960

nodes_to_annotate = []

2961

for child in self._pending_children.pop(revision_id, []):

2962

self._check_parents(child, nodes_to_annotate)

2963

return nodes_to_annotate

2964

2965

def _get_build_graph(self, revision_id):

2966

"""Get the graphs for building texts and annotations.

2967

2968

The data you need for creating a full text may be different than the

2969

data you need to annotate that text. (At a minimum, you need both

2970

parents to create an annotation, but only need 1 parent to generate the

2971

fulltext.)

2972

2973

:return: A list of (revision_id, index_memo) records, suitable for

2974

passing to read_records_iter to start reading in the raw data fro/

2975

the pack file.

2976

"""

2977

if revision_id in self._annotated_lines:

2978

# Nothing to do

2979

return []

2980

pending = set([revision_id])

2981

records = []

2982

generation = 0

2983

kept_generation = 0

2984

while pending:

2985

# get all pending nodes

2986

generation += 1

2987

this_iteration = pending

2988

build_details = self._knit._index.get_build_details(this_iteration)

2989

self._all_build_details.update(build_details)

2990

# new_nodes = self._knit._index._get_entries(this_iteration)

2991

pending = set()

2992

for rev_id, details in build_details.iteritems():

2993

(index_memo, compression_parent, parents,

2994

record_details) = details

2995

self._revision_id_graph[rev_id] = parents

2996

records.append((rev_id, index_memo))

2997

# Do we actually need to check _annotated_lines?

2998

pending.update(p for p in parents

2999

if p not in self._all_build_details)

3000

if compression_parent:

3001

self._compression_children.setdefault(compression_parent,

3002

[]).append(rev_id)

3003

if parents:

3004

for parent in parents:

3005

self._annotate_children.setdefault(parent,

3006

[]).append(rev_id)

3007

num_gens = generation - kept_generation

3008

if ((num_gens >= self._generations_until_keep)

3009

and len(parents) > 1):

3010

kept_generation = generation

3011

self._nodes_to_keep_annotations.add(rev_id)

3012

3013

missing_versions = this_iteration.difference(build_details.keys())

3014

self._ghosts.update(missing_versions)

3015

for missing_version in missing_versions:

3016

# add a key, no parents

3017

self._revision_id_graph[missing_version] = ()

3018

pending.discard(missing_version) # don't look for it

3019

# XXX: This should probably be a real exception, as it is a data

3020

# inconsistency

3021

assert not self._ghosts.intersection(self._compression_children), \

3022

"We cannot have nodes which have a compression parent of a ghost."

3023

# Cleanout anything that depends on a ghost so that we don't wait for

3024

# the ghost to show up

3025

for node in self._ghosts:

3026

if node in self._annotate_children:

3027

# We won't be building this node

3028

del self._annotate_children[node]

3029

# Generally we will want to read the records in reverse order, because

3030

# we find the parent nodes after the children

3031

records.reverse()

3032

return records

3033

3034

def _annotate_records(self, records):

3035

"""Build the annotations for the listed records."""

3036

# We iterate in the order read, rather than a strict order requested

3037

# However, process what we can, and put off to the side things that

3038

# still need parents, cleaning them up when those parents are

3039

# processed.

3040

for (rev_id, record,

3041

digest) in self._knit._data.read_records_iter(records):

3042

if rev_id in self._annotated_lines:

3043

continue

3044

parent_ids = self._revision_id_graph[rev_id]

3045

parent_ids = [p for p in parent_ids if p not in self._ghosts]

3046

details = self._all_build_details[rev_id]

3047

(index_memo, compression_parent, parents,

3048

record_details) = details

3049

nodes_to_annotate = []

3050

# TODO: Remove the punning between compression parents, and

3051

# parent_ids, we should be able to do this without assuming

3052

# the build order

3053

if len(parent_ids) == 0:

3054

# There are no parents for this node, so just add it

3055

# TODO: This probably needs to be decoupled

3056

assert compression_parent is None

3057

fulltext_content, delta = self._knit.factory.parse_record(

3058

rev_id, record, record_details, None)

3059

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3060

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

3061

parent_ids, left_matching_blocks=None))

3062

else:

3063

child = (rev_id, parent_ids, record)

3064

# Check if all the parents are present

3065

self._check_parents(child, nodes_to_annotate)

3066

while nodes_to_annotate:

3067

# Should we use a queue here instead of a stack?

3068

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

3069

(index_memo, compression_parent, parents,

3070

record_details) = self._all_build_details[rev_id]

3071

if compression_parent is not None:

3072

comp_children = self._compression_children[compression_parent]

3073

assert rev_id in comp_children

3074

# If there is only 1 child, it is safe to reuse this

3075

# content

3076

reuse_content = (len(comp_children) == 1

3077

and compression_parent not in

3078

self._nodes_to_keep_annotations)

3079

if reuse_content:

3080

# Remove it from the cache since it will be changing

3081

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

3082

# Make sure to copy the fulltext since it might be

3083

# modified

3084

parent_fulltext = list(parent_fulltext_content.text())

3085

else:

3086

parent_fulltext_content = self._fulltext_contents[compression_parent]

3087

parent_fulltext = parent_fulltext_content.text()

3088

comp_children.remove(rev_id)

3089

fulltext_content, delta = self._knit.factory.parse_record(

3090

rev_id, record, record_details,

3091

parent_fulltext_content,

3092

copy_base_content=(not reuse_content))

3093

fulltext = self._add_fulltext_content(rev_id,

3094

fulltext_content)

3095

blocks = KnitContent.get_line_delta_blocks(delta,

3096

parent_fulltext, fulltext)

3097

else:

3098

fulltext_content = self._knit.factory.parse_fulltext(

3099

record, rev_id)

3100

fulltext = self._add_fulltext_content(rev_id,

3101

fulltext_content)

3102

blocks = None

3103

nodes_to_annotate.extend(

3104

self._add_annotation(rev_id, fulltext, parent_ids,

3105

left_matching_blocks=blocks))

3106

3107

def _get_heads_provider(self):

3108

"""Create a heads provider for resolving ancestry issues."""

3109

if self._heads_provider is not None:

3110

return self._heads_provider

3111

parent_provider = _mod_graph.DictParentsProvider(

3112

self._revision_id_graph)

3113

graph_obj = _mod_graph.Graph(parent_provider)

3114

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

3115

self._heads_provider = head_cache

3116

return head_cache

3117

3118

def annotate(self, revision_id):

3119

"""Return the annotated fulltext at the given revision.

3120

3121

:param revision_id: The revision id for this file

3122

"""

3123

records = self._get_build_graph(revision_id)

3124

if revision_id in self._ghosts:

3125

raise errors.RevisionNotPresent(revision_id, self._knit)

3126

self._annotate_records(records)

3127

return self._annotated_lines[revision_id]

2756

3128

2757

3129

2758

3130

try:

Older »