~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Brad Crittenden
Date: 2007-02-26 20:56:10 UTC
mfrom: (2300 +trunk)
mto: (2293.1.5 bzr.dev)
mto: This revision was merged to the branch mainline in revision 2311.
Revision ID: brad.crittenden@canonical.com-20070226205610-44oatbxrjjz3ajwy

merge

files added:
bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

files modified:
NEWS

bzrlib/add.py

bzrlib/branch.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/v08.py

bzrlib/cache_utf8.py

bzrlib/generate_ids.py

bzrlib/identitymap.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/repofmt/knitrepo.py

bzrlib/repository.py

bzrlib/revisiontree.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/versioned/__init__.py

bzrlib/testament.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/tree.py

bzrlib/versionedfile.py

bzrlib/workingtree.py

bzrlib/xml5.py

bzrlib/xml6.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

from bzrlib import (

cache_utf8,

errors,

osutils,

patiencediff,

progress,

ui,

156

157

class _KnitFactory(object):

157

158

"""Base factory for creating content objects."""

158

159

def make(self, lines, version):

160

def make(self, lines, version_id):

160

161

num_lines = len(lines)

161

return KnitContent(zip([version] * num_lines, lines))

162

return KnitContent(zip([version_id] * num_lines, lines))

162

163

164

165

class KnitAnnotateFactory(_KnitFactory):

166

167

168

annotated = True

168

169

def parse_fulltext(self, content, version):

170

def parse_fulltext(self, content, version_id):

170

171

"""Convert fulltext to internal representation

171

172

173

fulltext content is of the format

174

175

internal representation is of the format:

175

176

(revid, plaintext)

176

177

"""

177

decode_utf8 = cache_utf8.decode

178

lines = []

179

for line in content:

180

origin, text = line.split(' ', 1)

181

lines.append((decode_utf8(origin), text))

178

# TODO: jam 20070209 The tests expect this to be returned as tuples,

179

# but the code itself doesn't really depend on that.

180

# Figure out a way to not require the overhead of turning the

181

# list back into tuples.

182

lines = [tuple(line.split(' ', 1)) for line in content]

182

183

return KnitContent(lines)

183

184

185

def parse_line_delta_iter(self, lines):

185

186

return iter(self.parse_line_delta(lines))

186

187

def parse_line_delta(self, lines, version):

188

def parse_line_delta(self, lines, version_id):

188

189

"""Convert a line based delta into internal representation.

189

190

191

line delta is in the form of:

194

195

internal representation is

195

196

(start, end, count, [1..count tuples (revid, newline)])

196

197

"""

197

decode_utf8 = cache_utf8.decode

198

result = []

199

lines = iter(lines)

200

next = lines.next

201

202

cache = {}

203

def cache_and_return(line):

204

origin, text = line.split(' ', 1)

205

return cache.setdefault(origin, origin), text

206

201

207

# walk through the lines parsing.

202

208

for header in lines:

203

209

start, end, count = [int(n) for n in header.split(',')]

204

contents = []

205

remaining = count

206

while remaining:

207

origin, text = next().split(' ', 1)

208

remaining -= 1

209

contents.append((decode_utf8(origin), text))

210

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

210

211

result.append((start, end, count, contents))

211

212

return result

212

213

234

235

236

see parse_fulltext which this inverts.

236

237

"""

237

encode_utf8 = cache_utf8.encode

238

return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]

238

# TODO: jam 20070209 We only do the caching thing to make sure that

239

# the origin is a valid utf-8 line, eventually we could remove it

240

return ['%s %s' % (o, t) for o, t in content._lines]

239

241

240

242

def lower_line_delta(self, delta):

241

243

"""convert a delta into a serializable form.

242

244

243

245

See parse_line_delta which this inverts.

244

246

"""

245

encode_utf8 = cache_utf8.encode

247

# TODO: jam 20070209 We only do the caching thing to make sure that

248

# the origin is a valid utf-8 line, eventually we could remove it

246

249

out = []

247

250

for start, end, c, lines in delta:

248

251

out.append('%d,%d,%d\n' % (start, end, c))

249

out.extend(encode_utf8(origin) + ' ' + text

252

out.extend(origin + ' ' + text

250

253

for origin, text in lines)

251

254

return out

252

255

256

259

257

260

annotated = False

258

261

259

def parse_fulltext(self, content, version):

262

def parse_fulltext(self, content, version_id):

260

263

"""This parses an unannotated fulltext.

261

264

262

265

Note that this is not a noop - the internal representation

263

266

has (versionid, line) - its just a constant versionid.

264

267

"""

265

return self.make(content, version)

268

return self.make(content, version_id)

266

269

267

def parse_line_delta_iter(self, lines, version):

270

def parse_line_delta_iter(self, lines, version_id):

268

271

cur = 0

269

272

num_lines = len(lines)

270

273

while cur < num_lines:

271

274

header = lines[cur]

272

275

cur += 1

273

276

start, end, c = [int(n) for n in header.split(',')]

274

yield start, end, c, zip([version] * c, lines[cur:cur+c])

277

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

275

278

cur += c

276

279

277

def parse_line_delta(self, lines, version):

278

return list(self.parse_line_delta_iter(lines, version))

280

def parse_line_delta(self, lines, version_id):

281

return list(self.parse_line_delta_iter(lines, version_id))

279

282

280

283

def get_fulltext_content(self, lines):

281

284

"""Extract just the content lines from a fulltext."""

500

503

return KnitVersionedFile(name, transport, factory=self.factory,

501

504

delta=self.delta, create=True)

502

505

503

def _fix_parents(self, version, new_parents):

506

def _fix_parents(self, version_id, new_parents):

504

507

"""Fix the parents list for version.

505

508

506

509

This is done by appending a new version to the index

508

511

the parents list must be a superset of the current

509

512

list.

510

513

"""

511

current_values = self._index._cache[version]

514

current_values = self._index._cache[version_id]

512

515

assert set(current_values[4]).difference(set(new_parents)) == set()

513

self._index.add_version(version,

516

self._index.add_version(version_id,

514

517

current_values[1],

515

518

current_values[2],

516

519

current_values[3],

518

521

519

522

def get_delta(self, version_id):

520

523

"""Get a delta for constructing version from some other version."""

524

version_id = osutils.safe_revision_id(version_id)

521

525

self.check_not_reserved_id(version_id)

522

526

if not self.has_version(version_id):

523

527

raise RevisionNotPresent(version_id, self.filename)

529

533

parent = None

530

534

data_pos, data_size = self._index.get_position(version_id)

531

535

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

532

version_idx = self._index.lookup(version_id)

533

536

noeol = 'no-eol' in self._index.get_options(version_id)

534

537

if 'fulltext' == self._index.get_method(version_id):

535

new_content = self.factory.parse_fulltext(data, version_idx)

538

new_content = self.factory.parse_fulltext(data, version_id)

536

539

if parent is not None:

537

540

reference_content = self._get_content(parent)

538

541

old_texts = reference_content.text()

542

545

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

543

546

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

544

547

else:

545

delta = self.factory.parse_line_delta(data, version_idx)

548

delta = self.factory.parse_line_delta(data, version_id)

546

549

return parent, sha1, noeol, delta

547

550

548

551

def get_graph_with_ghosts(self):

552

555

553

556

def get_sha1(self, version_id):

554

557

"""See VersionedFile.get_sha1()."""

558

version_id = osutils.safe_revision_id(version_id)

555

559

record_map = self._get_record_map([version_id])

556

560

method, content, digest, next = record_map[version_id]

557

561

return digest

563

567

564

568

def has_ghost(self, version_id):

565

569

"""True if there is a ghost reference in the file to version_id."""

570

version_id = osutils.safe_revision_id(version_id)

566

571

# maybe we have it

567

572

if self.has_version(version_id):

568

573

return False

581

586

582

587

def has_version(self, version_id):

583

588

"""See VersionedFile.has_version."""

589

version_id = osutils.safe_revision_id(version_id)

584

590

return self._index.has_version(version_id)

585

591

586

592

__contains__ = has_version

738

744

# I/O and the time spend applying deltas.

739

745

delta = self._check_should_delta(present_parents)

740

746

747

assert isinstance(version_id, str)

741

748

lines = self.factory.make(lines, version_id)

742

749

if delta or (self.factory.annotated and len(present_parents) > 0):

743

750

# Merge annotations from parent texts if so is needed.

799

806

800

807

def get_line_list(self, version_ids):

801

808

"""Return the texts of listed versions as a list of strings."""

809

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

802

810

for version_id in version_ids:

803

811

self.check_not_reserved_id(version_id)

804

812

text_map, content_map = self._get_content_maps(version_ids)

834

842

if component_id in content_map:

835

843

content = content_map[component_id]

836

844

else:

837

version_idx = self._index.lookup(component_id)

838

845

if method == 'fulltext':

839

846

assert content is None

840

content = self.factory.parse_fulltext(data, version_idx)

847

content = self.factory.parse_fulltext(data, version_id)

841

848

elif method == 'line-delta':

842

delta = self.factory.parse_line_delta(data, version_idx)

849

delta = self.factory.parse_line_delta(data, version_id)

843

850

content = content.copy()

844

851

content._lines = self._apply_delta(content._lines,

845

852

delta)

865

872

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

866

873

if version_ids is None:

867

874

version_ids = self.versions()

875

else:

876

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

868

877

if pb is None:

869

878

pb = progress.DummyProgress()

870

879

# we don't care about inclusions, the caller cares.

887

896

enumerate(self._data.read_records_iter(version_id_records)):

888

897

pb.update('Walking content.', version_idx, total)

889

898

method = self._index.get_method(version_id)

890

version_idx = self._index.lookup(version_id)

891

899

892

900

assert method in ('fulltext', 'line-delta')

893

901

if method == 'fulltext':

907

915

908

916

def annotate_iter(self, version_id):

909

917

"""See VersionedFile.annotate_iter."""

918

version_id = osutils.safe_revision_id(version_id)

910

919

content = self._get_content(version_id)

911

920

for origin, text in content.annotate_iter():

912

921

yield origin, text

916

925

# perf notes:

917

926

# optimism counts!

918

927

# 52554 calls in 1264 872 internal down from 3674

928

version_id = osutils.safe_revision_id(version_id)

919

929

try:

920

930

return self._index.get_parents(version_id)

921

931

except KeyError:

923

933

924

934

def get_parents_with_ghosts(self, version_id):

925

935

"""See VersionedFile.get_parents."""

936

version_id = osutils.safe_revision_id(version_id)

926

937

try:

927

938

return self._index.get_parents_with_ghosts(version_id)

928

939

except KeyError:

934

945

versions = [versions]

935

946

if not versions:

936

947

return []

948

versions = [osutils.safe_revision_id(v) for v in versions]

937

949

return self._index.get_ancestry(versions)

938

950

939

951

def get_ancestry_with_ghosts(self, versions):

942

954

versions = [versions]

943

955

if not versions:

944

956

return []

957

versions = [osutils.safe_revision_id(v) for v in versions]

945

958

return self._index.get_ancestry_with_ghosts(versions)

946

959

947

960

#@deprecated_method(zero_eight)

967

980

968

981

def plan_merge(self, ver_a, ver_b):

969

982

"""See VersionedFile.plan_merge."""

983

ver_a = osutils.safe_revision_id(ver_a)

984

ver_b = osutils.safe_revision_id(ver_b)

970

985

ancestors_b = set(self.get_ancestry(ver_b))

971

986

def status_a(revision, text):

972

987

if revision in ancestors_b:

1150

1165

def _load_data(self, fp):

1151

1166

cache = self._cache

1152

1167

history = self._history

1153

decode_utf8 = cache_utf8.decode

1154

1168

1155

1169

self.check_header(fp)

1156

1170

# readlines reads the whole file at once:

1175

1189

for value in rec[4:-1]:

1176

1190

if value[0] == '.':

1177

1191

# uncompressed reference

1178

parents.append(decode_utf8(value[1:]))

1192

parent_id = value[1:]

1179

1193

else:

1180

parents.append(history[int(value)])

1194

parent_id = history[int(value)]

1195

parents.append(parent_id)

1181

1196

1182

1197

version_id, options, pos, size = rec[:4]

1183

version_id = decode_utf8(version_id)

1198

version_id = version_id

1184

1199

1185

1200

# See self._cache_version

1186

1201

# only want the _history index to reference the 1st

1256

1271

return self._cache[version_id][5]

1257

1272

1258

1273

def _version_list_to_index(self, versions):

1259

encode_utf8 = cache_utf8.encode

1260

1274

result_list = []

1261

1275

cache = self._cache

1262

1276

for version in versions:

1265

1279

result_list.append(str(cache[version][5]))

1266

1280

# -- end lookup () --

1267

1281

else:

1268

result_list.append('.' + encode_utf8(version))

1282

result_list.append('.' + version)

1269

1283

return ' '.join(result_list)

1270

1284

1271

1285

def add_version(self, version_id, options, pos, size, parents):

1279

1293

(version_id, options, pos, size, parents).

1280

1294

"""

1281

1295

lines = []

1282

encode_utf8 = cache_utf8.encode

1283

1296

orig_history = self._history[:]

1284

1297

orig_cache = self._cache.copy()

1285

1298

1286

1299

try:

1287

1300

for version_id, options, pos, size, parents in versions:

1288

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1301

line = "\n%s %s %s %s %s :" % (version_id,

1289

1302

','.join(options),

1290

1303

pos,

1291

1304

size,

1399

1412

sio = StringIO()

1400

1413

data_file = GzipFile(None, mode='wb', fileobj=sio)

1401

1414

1402

version_id_utf8 = cache_utf8.encode(version_id)

1415

assert isinstance(version_id, str)

1403

1416

data_file.writelines(chain(

1404

["version %s %d %s\n" % (version_id_utf8,

1417

["version %s %d %s\n" % (version_id,

1405

1418

len(lines),

1406

1419

digest)],

1407

1420

lines,

1408

["end %s\n" % version_id_utf8]))

1421

["end %s\n" % version_id]))

1409

1422

data_file.close()

1410

1423

length= sio.tell()

1411

1424

1461

1474

if len(rec) != 4:

1462

1475

raise KnitCorrupt(self._filename,

1463

1476

'unexpected number of elements in record header')

1464

if cache_utf8.decode(rec[1]) != version_id:

1477

if rec[1] != version_id:

1465

1478

raise KnitCorrupt(self._filename,

1466

1479

'unexpected version, wanted %r, got %r'

1467

1480

% (version_id, rec[1]))

Older »