~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: John Arbash Meinel
Date: 2007-02-10 02:48:43 UTC
mto: This revision was merged to the branch mainline in revision 2294.
Revision ID: john@arbash-meinel.com-20070210024843-oz2ed16luwjca48h

Change the APIs for VersionedFile, Store, and some of Repository into utf-8
Get tests to pass again.

files modified:
bzrlib/cache_utf8.py

bzrlib/knit.py

bzrlib/repository.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_versionedfile.py

bzrlib/versionedfile.py

bzrlib/xml6.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

from bzrlib import (

cache_utf8,

errors,

osutils,

patiencediff,

progress,

ui,

156

157

class _KnitFactory(object):

157

158

"""Base factory for creating content objects."""

158

159

def make(self, lines, version):

160

def make(self, lines, version_id):

160

161

num_lines = len(lines)

161

return KnitContent(zip([version] * num_lines, lines))

162

return KnitContent(zip([version_id] * num_lines, lines))

162

163

164

165

class KnitAnnotateFactory(_KnitFactory):

166

167

168

annotated = True

168

169

def parse_fulltext(self, content, version):

170

def parse_fulltext(self, content, version_id):

170

171

"""Convert fulltext to internal representation

171

172

173

fulltext content is of the format

174

175

internal representation is of the format:

175

176

(revid, plaintext)

176

177

"""

177

lines = [line.split(' ', 1) for line in content]

178

# TODO: jam 20070209 The tests expect this to be returned as tuples,

179

# but the code itself doesn't really depend on that.

180

# Figure out a way to not require the overhead of turning the

181

# list back into tuples.

182

lines = [tuple(line.split(' ', 1)) for line in content]

178

183

return KnitContent(lines)

179

184

180

185

def parse_line_delta_iter(self, lines):

181

186

return iter(self.parse_line_delta(lines))

182

187

183

def parse_line_delta(self, lines, version):

188

def parse_line_delta(self, lines, version_id):

184

189

"""Convert a line based delta into internal representation.

185

190

186

191

line delta is in the form of:

197

202

# walk through the lines parsing.

198

203

for header in lines:

199

204

start, end, count = [int(n) for n in header.split(',')]

200

contents = [next().split(' ', 1) for i in xrange(count)]

205

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

201

206

result.append((start, end, count, contents))

202

207

return result

203

208

225

230

226

231

see parse_fulltext which this inverts.

227

232

"""

228

encode_utf8 = cache_utf8.encode

229

return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]

233

# TODO: jam 20070209 We only do the caching thing to make sure that

234

# the origin is a valid utf-8 line, eventually we could remove it

235

get_cached_utf8 = cache_utf8.get_cached_utf8

236

return ['%s %s' % (get_cached_utf8(o), t) for o, t in content._lines]

230

237

231

238

def lower_line_delta(self, delta):

232

239

"""convert a delta into a serializable form.

233

240

234

241

See parse_line_delta which this inverts.

235

242

"""

236

encode_utf8 = cache_utf8.encode

243

# TODO: jam 20070209 We only do the caching thing to make sure that

244

# the origin is a valid utf-8 line, eventually we could remove it

245

get_cached_utf8 = cache_utf8.get_cached_utf8

237

246

out = []

238

247

for start, end, c, lines in delta:

239

248

out.append('%d,%d,%d\n' % (start, end, c))

240

out.extend(encode_utf8(origin) + ' ' + text

249

out.extend(get_cached_utf8(origin) + ' ' + text

241

250

for origin, text in lines)

242

251

return out

243

252

247

256

248

257

annotated = False

249

258

250

def parse_fulltext(self, content, version):

259

def parse_fulltext(self, content, version_id):

251

260

"""This parses an unannotated fulltext.

252

261

253

262

Note that this is not a noop - the internal representation

254

263

has (versionid, line) - its just a constant versionid.

255

264

"""

256

return self.make(content, version)

265

return self.make(content, version_id)

257

266

258

def parse_line_delta_iter(self, lines, version):

267

def parse_line_delta_iter(self, lines, version_id):

259

268

cur = 0

260

269

num_lines = len(lines)

261

270

while cur < num_lines:

262

271

header = lines[cur]

263

272

cur += 1

264

273

start, end, c = [int(n) for n in header.split(',')]

265

yield start, end, c, zip([version] * c, lines[cur:cur+c])

274

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

266

275

cur += c

267

276

268

def parse_line_delta(self, lines, version):

269

return list(self.parse_line_delta_iter(lines, version))

277

def parse_line_delta(self, lines, version_id):

278

return list(self.parse_line_delta_iter(lines, version_id))

270

279

271

280

def get_fulltext_content(self, lines):

272

281

"""Extract just the content lines from a fulltext."""

491

500

return KnitVersionedFile(name, transport, factory=self.factory,

492

501

delta=self.delta, create=True)

493

502

494

def _fix_parents(self, version, new_parents):

503

def _fix_parents(self, version_id, new_parents):

495

504

"""Fix the parents list for version.

496

505

497

506

This is done by appending a new version to the index

499

508

the parents list must be a superset of the current

500

509

list.

501

510

"""

502

current_values = self._index._cache[version]

511

current_values = self._index._cache[version_id]

503

512

assert set(current_values[4]).difference(set(new_parents)) == set()

504

self._index.add_version(version,

513

self._index.add_version(version_id,

505

514

current_values[1],

506

515

current_values[2],

507

516

current_values[3],

509

518

510

519

def get_delta(self, version_id):

511

520

"""Get a delta for constructing version from some other version."""

521

version_id = osutils.safe_revision_id(version_id)

512

522

self.check_not_reserved_id(version_id)

513

523

if not self.has_version(version_id):

514

524

raise RevisionNotPresent(version_id, self.filename)

520

530

parent = None

521

531

data_pos, data_size = self._index.get_position(version_id)

522

532

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

523

version_idx = self._index.lookup(version_id)

524

533

noeol = 'no-eol' in self._index.get_options(version_id)

525

534

if 'fulltext' == self._index.get_method(version_id):

526

new_content = self.factory.parse_fulltext(data, version_idx)

535

new_content = self.factory.parse_fulltext(data, version_id)

527

536

if parent is not None:

528

537

reference_content = self._get_content(parent)

529

538

old_texts = reference_content.text()

533

542

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

534

543

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

535

544

else:

536

delta = self.factory.parse_line_delta(data, version_idx)

545

delta = self.factory.parse_line_delta(data, version_id)

537

546

return parent, sha1, noeol, delta

538

547

539

548

def get_graph_with_ghosts(self):

543

552

544

553

def get_sha1(self, version_id):

545

554

"""See VersionedFile.get_sha1()."""

555

version_id = osutils.safe_revision_id(version_id)

546

556

record_map = self._get_record_map([version_id])

547

557

method, content, digest, next = record_map[version_id]

548

558

return digest

554

564

555

565

def has_ghost(self, version_id):

556

566

"""True if there is a ghost reference in the file to version_id."""

567

version_id = osutils.safe_revision_id(version_id)

557

568

# maybe we have it

558

569

if self.has_version(version_id):

559

570

return False

572

583

573

584

def has_version(self, version_id):

574

585

"""See VersionedFile.has_version."""

586

version_id = osutils.safe_revision_id(version_id)

575

587

return self._index.has_version(version_id)

576

588

577

589

__contains__ = has_version

790

802

791

803

def get_line_list(self, version_ids):

792

804

"""Return the texts of listed versions as a list of strings."""

805

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

793

806

for version_id in version_ids:

794

807

self.check_not_reserved_id(version_id)

795

808

text_map, content_map = self._get_content_maps(version_ids)

825

838

if component_id in content_map:

826

839

content = content_map[component_id]

827

840

else:

828

version_idx = self._index.lookup(component_id)

829

841

if method == 'fulltext':

830

842

assert content is None

831

content = self.factory.parse_fulltext(data, version_idx)

843

content = self.factory.parse_fulltext(data, version_id)

832

844

elif method == 'line-delta':

833

delta = self.factory.parse_line_delta(data, version_idx)

845

delta = self.factory.parse_line_delta(data, version_id)

834

846

content = content.copy()

835

847

content._lines = self._apply_delta(content._lines,

836

848

delta)

856

868

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

857

869

if version_ids is None:

858

870

version_ids = self.versions()

871

else:

872

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

859

873

if pb is None:

860

874

pb = progress.DummyProgress()

861

875

# we don't care about inclusions, the caller cares.

878

892

enumerate(self._data.read_records_iter(version_id_records)):

879

893

pb.update('Walking content.', version_idx, total)

880

894

method = self._index.get_method(version_id)

881

version_idx = self._index.lookup(version_id)

882

895

883

896

assert method in ('fulltext', 'line-delta')

884

897

if method == 'fulltext':

898

911

899

912

def annotate_iter(self, version_id):

900

913

"""See VersionedFile.annotate_iter."""

914

version_id = osutils.safe_revision_id(version_id)

901

915

content = self._get_content(version_id)

902

916

for origin, text in content.annotate_iter():

903

917

yield origin, text

907

921

# perf notes:

908

922

# optimism counts!

909

923

# 52554 calls in 1264 872 internal down from 3674

924

version_id = osutils.safe_revision_id(version_id)

910

925

try:

911

926

return self._index.get_parents(version_id)

912

927

except KeyError:

914

929

915

930

def get_parents_with_ghosts(self, version_id):

916

931

"""See VersionedFile.get_parents."""

932

version_id = osutils.safe_revision_id(version_id)

917

933

try:

918

934

return self._index.get_parents_with_ghosts(version_id)

919

935

except KeyError:

925

941

versions = [versions]

926

942

if not versions:

927

943

return []

944

versions = [osutils.safe_revision_id(v) for v in versions]

928

945

return self._index.get_ancestry(versions)

929

946

930

947

def get_ancestry_with_ghosts(self, versions):

933

950

versions = [versions]

934

951

if not versions:

935

952

return []

953

versions = [osutils.safe_revision_id(v) for v in versions]

936

954

return self._index.get_ancestry_with_ghosts(versions)

937

955

938

956

#@deprecated_method(zero_eight)

958

976

959

977

def plan_merge(self, ver_a, ver_b):

960

978

"""See VersionedFile.plan_merge."""

979

ver_a = osutils.safe_revision_id(ver_a)

980

ver_b = osutils.safe_revision_id(ver_b)

961

981

ancestors_b = set(self.get_ancestry(ver_b))

962

982

def status_a(revision, text):

963

983

if revision in ancestors_b:

1121

1141

# so - wc -l of a knit index is != the number of unique names

1122

1142

# in the knit.

1123

1143

self._history = []

1124

decode_utf8 = cache_utf8.decode

1125

1144

pb = ui.ui_factory.nested_progress_bar()

1126

1145

try:

1127

1146

pb.update('read knit index', 0, 1)

1148

1167

def _load_data(self, fp):

1149

1168

cache = self._cache

1150

1169

history = self._history

1170

get_cached_utf8 = cache_utf8.get_cached_utf8

1151

1171

1152

1172

self.check_header(fp)

1153

1173

# readlines reads the whole file at once:

1172

1192

for value in rec[4:-1]:

1173

1193

if value[0] == '.':

1174

1194

# uncompressed reference

1175

parents.append(value[1:])

1195

parent_id = get_cached_utf8(value[1:])

1176

1196

else:

1177

parents.append(history[int(value)])

1197

parent_id = history[int(value)]

1198

parents.append(parent_id)

1178

1199

1179

1200

version_id, options, pos, size = rec[:4]

1201

version_id = get_cached_utf8(version_id)

1180

1202

1181

1203

# See self._cache_version

1182

1204

# only want the _history index to reference the 1st

1252

1274

return self._cache[version_id][5]

1253

1275

1254

1276

def _version_list_to_index(self, versions):

1255

encode_utf8 = cache_utf8.encode

1277

# TODO: jam 20070209 We only do the caching thing to make sure that

1278

# what we have in memory is already a proper utf-8 string

1279

# Eventually we should be able to write out the index without

1280

# doing any sort of encode step

1281

get_cached_utf8 = cache_utf8.get_cached_utf8

1256

1282

result_list = []

1257

1283

cache = self._cache

1258

1284

for version in versions:

1261

1287

result_list.append(str(cache[version][5]))

1262

1288

# -- end lookup () --

1263

1289

else:

1264

result_list.append('.' + encode_utf8(version))

1290

result_list.append('.' + get_cached_utf8(version))

1265

1291

return ' '.join(result_list)

1266

1292

1267

1293

def add_version(self, version_id, options, pos, size, parents):

1275

1301

(version_id, options, pos, size, parents).

1276

1302

"""

1277

1303

lines = []

1278

encode_utf8 = cache_utf8.encode

1304

# TODO: jam 20070209 get_cached_utf8 is just used to verify the

1305

# version_ids are indeed utf-8 eventually these calls can be

1306

# removed

1307

get_cached_utf8 = cache_utf8.get_cached_utf8

1279

1308

orig_history = self._history[:]

1280

1309

orig_cache = self._cache.copy()

1281

1310

1282

1311

try:

1283

1312

for version_id, options, pos, size, parents in versions:

1284

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1313

line = "\n%s %s %s %s %s :" % (get_cached_utf8(version_id),

1285

1314

','.join(options),

1286

1315

pos,

1287

1316

size,

1395

1424

sio = StringIO()

1396

1425

data_file = GzipFile(None, mode='wb', fileobj=sio)

1397

1426

1398

version_id_utf8 = cache_utf8.encode(version_id)

1427

version_id_utf8 = cache_utf8.get_cached_utf8(version_id)

1399

1428

data_file.writelines(chain(

1400

1429

["version %s %d %s\n" % (version_id_utf8,

1401

1430

len(lines),

1457

1486

if len(rec) != 4:

1458

1487

raise KnitCorrupt(self._filename,

1459

1488

'unexpected number of elements in record header')

1460

if cache_utf8.decode(rec[1]) != version_id:

1489

if rec[1] != version_id:

1461

1490

raise KnitCorrupt(self._filename,

1462

1491

'unexpected version, wanted %r, got %r'

1463

1492

% (version_id, rec[1]))

Older »