~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: John Arbash Meinel
Date: 2006-12-01 19:41:16 UTC
mfrom: (2158 +trunk)
mto: This revision was merged to the branch mainline in revision 2159.
Revision ID: john@arbash-meinel.com-20061201194116-nvn5qhfxux5284jc

[merge] bzr.dev 2158

files added:
bzrlib/debug.py

bzrlib/generate_ids.py

bzrlib/help_topics.py

bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_wsgi.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

doc/bazaar-vcs.org.kid

doc/http_smart_server.txt

tools/rst2prettyhtml.py

files modified:
.bzrignore

BRANCH.TODO

HACKING

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/annotate.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/ignores.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patiencediff.py

bzrlib/progress.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_http.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/memory.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/ui/__init__.py

bzrlib/workingtree.py

doc/centralized_workflow.txt

doc/configuration.txt

doc/index.txt

doc/specifying_revisions.txt

doc/tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

from bzrlib import (

cache_utf8,

errors,

patiencediff,

progress,

)

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

118

119

120

def annotate_iter(self):

120

121

"""Yield tuples of (origin, text) for each content line."""

121

for origin, text in self._lines:

122

yield origin, text

122

return iter(self._lines)

123

124

def annotate(self):

125

"""Return a list of (origin, text) tuples."""

127

128

def line_delta_iter(self, new_lines):

129

"""Generate line-based delta from this content to new_lines."""

130

new_texts = [text for origin, text in new_lines._lines]

131

old_texts = [text for origin, text in self._lines]

130

new_texts = new_lines.text()

131

old_texts = self.text()

132

s = KnitSequenceMatcher(None, old_texts, new_texts)

133

for op in s.get_opcodes():

134

if op[0] == 'equal':

133

for tag, i1, i2, j1, j2 in s.get_opcodes():

134

if tag == 'equal':

135

continue

136

# ofrom oto length data

137

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

136

# ofrom, oto, length, data

137

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

138

139

def line_delta(self, new_lines):

140

return list(self.line_delta_iter(new_lines))

308

self.writable = (access_mode == 'w')

309

self.delta = delta

310

311

self._max_delta_chain = 200

312

311

313

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

312

314

access_mode, create=create, file_mode=file_mode,

313

315

create_parent_dir=create_parent_dir, delay_create=delay_create,

321

323

return '%s(%s)' % (self.__class__.__name__,

322

324

self.transport.abspath(self.filename))

323

325

326

def _check_should_delta(self, first_parents):

327

"""Iterate back through the parent listing, looking for a fulltext.

328

329

This is used when we want to decide whether to add a delta or a new

330

fulltext. It searches for _max_delta_chain parents. When it finds a

331

fulltext parent, it sees if the total size of the deltas leading up to

332

it is large enough to indicate that we want a new full text anyway.

333

334

Return True if we should create a new delta, False if we should use a

335

full text.

336

"""

337

delta_size = 0

338

fulltext_size = None

339

delta_parents = first_parents

340

for count in xrange(self._max_delta_chain):

341

parent = delta_parents[0]

342

method = self._index.get_method(parent)

343

pos, size = self._index.get_position(parent)

344

if method == 'fulltext':

345

fulltext_size = size

346

break

347

delta_size += size

348

delta_parents = self._index.get_parents(parent)

349

else:

350

# We couldn't find a fulltext, so we must create a new one

351

return False

352

353

return fulltext_size > delta_size

354

324

355

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

325

356

"""See VersionedFile._add_delta()."""

326

357

self._check_add(version_id, []) # should we check the lines ?

358

389

# To speed the extract of texts the delta chain is limited

359

390

# to a fixed number of deltas. This should minimize both

360

391

# I/O and the time spend applying deltas.

361

count = 0

362

delta_parents = [delta_parent]

363

while count < 25:

364

parent = delta_parents[0]

365

method = self._index.get_method(parent)

366

if method == 'fulltext':

367

break

368

delta_parents = self._index.get_parents(parent)

369

count = count + 1

370

if method == 'line-delta':

371

# did not find a fulltext in the delta limit.

372

# just do a normal insertion.

392

# The window was changed to a maximum of 200 deltas, but also added

393

# was a check that the total compressed size of the deltas is

394

# smaller than the compressed size of the fulltext.

395

if not self._check_should_delta([delta_parent]):

396

# We don't want a delta here, just do a normal insertion.

373

397

return super(KnitVersionedFile, self)._add_delta(version_id,

374

398

parents,

375

399

delta_parent,

523

547

delta_seq = None

524

548

for parent_id in parents:

525

549

merge_content = self._get_content(parent_id, parent_texts)

526

seq = KnitSequenceMatcher(None, merge_content.text(), content.text())

550

seq = patiencediff.PatienceSequenceMatcher(

551

None, merge_content.text(), content.text())

527

552

if delta_seq is None:

528

553

# setup a delta seq to reuse.

529

554

delta_seq = seq

540

565

reference_content = self._get_content(parents[0], parent_texts)

541

566

new_texts = content.text()

542

567

old_texts = reference_content.text()

543

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

568

delta_seq = patiencediff.PatienceSequenceMatcher(

569

None, old_texts, new_texts)

544

570

return self._make_line_delta(delta_seq, content)

545

571

546

572

def _make_line_delta(self, delta_seq, new_content):

667

693

# To speed the extract of texts the delta chain is limited

668

694

# to a fixed number of deltas. This should minimize both

669

695

# I/O and the time spend applying deltas.

670

count = 0

671

delta_parents = present_parents

672

while count < 25:

673

parent = delta_parents[0]

674

method = self._index.get_method(parent)

675

if method == 'fulltext':

676

break

677

delta_parents = self._index.get_parents(parent)

678

count = count + 1

679

if method == 'line-delta':

680

delta = False

696

delta = self._check_should_delta(present_parents)

681

697

682

698

lines = self.factory.make(lines, version_id)

683

699

if delta or (self.factory.annotated and len(present_parents) > 0):

824

840

data_pos, length = self._index.get_position(version_id)

825

841

version_id_records.append((version_id, data_pos, length))

826

842

827

count = 0

828

843

total = len(version_id_records)

829

pb.update('Walking content.', count, total)

830

for version_id, data, sha_value in \

831

self._data.read_records_iter(version_id_records):

832

pb.update('Walking content.', count, total)

844

for version_idx, (version_id, data, sha_value) in \

845

enumerate(self._data.read_records_iter(version_id_records)):

846

pb.update('Walking content.', version_idx, total)

833

847

method = self._index.get_method(version_id)

834

848

version_idx = self._index.lookup(version_id)

835

849

assert method in ('fulltext', 'line-delta')

842

856

for start, end, count, lines in delta:

843

857

for origin, line in lines:

844

858

yield line

845

count +=1

846

859

pb.update('Walking content.', total, total)

847

860

848

861

def num_versions(self):

1254

1267

"""

1255

1268

lines = []

1256

1269

encode_utf8 = cache_utf8.encode

1257

for version_id, options, pos, size, parents in versions:

1258

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1259

','.join(options),

1260

pos,

1261

size,

1262

self._version_list_to_index(parents))

1263

assert isinstance(line, str), \

1264

'content must be utf-8 encoded: %r' % (line,)

1265

lines.append(line)

1266

if not self._need_to_create:

1267

self._transport.append_bytes(self._filename, ''.join(lines))

1268

else:

1269

sio = StringIO()

1270

sio.write(self.HEADER)

1271

sio.writelines(lines)

1272

sio.seek(0)

1273

self._transport.put_file_non_atomic(self._filename, sio,

1274

create_parent_dir=self._create_parent_dir,

1275

mode=self._file_mode,

1276

dir_mode=self._dir_mode)

1277

self._need_to_create = False

1278

1279

# cache after writing, so that a failed write leads to missing cache

1280

# entries not extra ones. XXX TODO: RBC 20060502 in the event of a

1281

# failure, reload the index or flush it or some such, to prevent

1282

# writing records that did complete twice.

1283

for version_id, options, pos, size, parents in versions:

1284

self._cache_version(version_id, options, pos, size, parents)

1285

1270

orig_history = self._history[:]

1271

orig_cache = self._cache.copy()

1272

1273

try:

1274

for version_id, options, pos, size, parents in versions:

1275

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1276

','.join(options),

1277

pos,

1278

size,

1279

self._version_list_to_index(parents))

1280

assert isinstance(line, str), \

1281

'content must be utf-8 encoded: %r' % (line,)

1282

lines.append(line)

1283

self._cache_version(version_id, options, pos, size, parents)

1284

if not self._need_to_create:

1285

self._transport.append_bytes(self._filename, ''.join(lines))

1286

else:

1287

sio = StringIO()

1288

sio.write(self.HEADER)

1289

sio.writelines(lines)

1290

sio.seek(0)

1291

self._transport.put_file_non_atomic(self._filename, sio,

1292

create_parent_dir=self._create_parent_dir,

1293

mode=self._file_mode,

1294

dir_mode=self._dir_mode)

1295

self._need_to_create = False

1296

except:

1297

# If any problems happen, restore the original values and re-raise

1298

self._history = orig_history

1299

self._cache = orig_cache

1300

raise

1301

1286

1302

def has_version(self, version_id):

1287

1303

"""True if the version is in the index."""

1288

1304

return (version_id in self._cache)

Older »