~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/__init__.py

Committer: Robert Collins
Date: 2006-07-29 08:43:38 UTC
mfrom: (1852.10.4 update compare_tree callers to use Tree.changes_from())
mto: (1852.12.4 status treats conflict as normal tree interface.)
mto: This revision was merged to the branch mainline in revision 1895.
Revision ID: robertc@robertcollins.net-20060729084338-b9ff039ed706ff32

Merge bzr.dev.

files modified:
HACKING

NEWS

bzrlib/atomicfile.py

bzrlib/builtins.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commit.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/intset.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/merge3.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/plugin.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/sign_my_commits.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_store.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/textinv.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib/newinventory.py

generate_docs.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/transport/__init__.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

164

return (scheme, username, password, host, port, path)

165

166

167

class _CoalescedOffset(object):

168

"""A data container for keeping track of coalesced offsets."""

169

170

__slots__ = ['start', 'length', 'ranges']

171

172

def __init__(self, start, length, ranges):

173

self.start = start

174

self.length = length

175

self.ranges = ranges

176

177

def __cmp__(self, other):

178

return cmp((self.start, self.length, self.ranges),

179

(other.start, other.length, other.ranges))

180

181

167

182

class Transport(object):

168

183

"""This class encapsulates methods for retrieving or putting a file

169

184

from/to a storage location.

176

191

as an argument (ie always iterate, never index)

177

192

"""

178

193

194

# implementations can override this if it is more efficient

195

# for them to combine larger read chunks together

196

_max_readv_combine = 50

197

# It is better to read this much more data in order, rather

198

# than doing another seek. Even for the local filesystem,

199

# there is a benefit in just reading.

200

# TODO: jam 20060714 Do some real benchmarking to figure out

201

# where the biggest benefit between combining reads and

202

# and seeking is. Consider a runtime auto-tune.

203

_bytes_to_read_before_seek = 0

204

179

205

def __init__(self, base):

180

206

super(Transport, self).__init__()

181

207

self.base = base

353

379

:offsets: A list of (offset, size) tuples.

354

380

:return: A list or generator of (offset, data) tuples

355

381

"""

356

def do_combined_read(combined_offsets):

357

total_size = 0

358

for offset, size in combined_offsets:

359

total_size += size

360

mutter('readv coalesced %d reads.', len(combined_offsets))

361

offset = combined_offsets[0][0]

362

fp.seek(offset)

363

data = fp.read(total_size)

364

pos = 0

365

for offset, size in combined_offsets:

366

yield offset, data[pos:pos + size]

367

pos += size

382

if not offsets:

383

return

368

384

369

if not len(offsets):

370

return

371

385

fp = self.get(relpath)

372

pending_offsets = deque(offsets)

373

combined_offsets = []

374

while len(pending_offsets):

375

offset, size = pending_offsets.popleft()

376

if not combined_offsets:

377

combined_offsets = [[offset, size]]

386

return self._seek_and_read(fp, offsets)

387

388

def _seek_and_read(self, fp, offsets):

389

"""An implementation of readv that uses fp.seek and fp.read.

390

391

This uses _coalesce_offsets to issue larger reads and fewer seeks.

392

393

:param fp: A file-like object that supports seek() and read(size)

394

:param offsets: A list of offsets to be read from the given file.

395

:return: yield (pos, data) tuples for each request

396

"""

397

# We are going to iterate multiple times, we need a list

398

offsets = list(offsets)

399

sorted_offsets = sorted(offsets)

400

401

# turn the list of offsets into a stack

402

offset_stack = iter(offsets)

403

cur_offset_and_size = offset_stack.next()

404

coalesced = self._coalesce_offsets(sorted_offsets,

405

limit=self._max_readv_combine,

406

fudge_factor=self._bytes_to_read_before_seek)

407

408

# Cache the results, but only until they have been fulfilled

409

data_map = {}

410

for c_offset in coalesced:

411

# TODO: jam 20060724 it might be faster to not issue seek if

412

# we are already at the right location. This should be

413

# benchmarked.

414

fp.seek(c_offset.start)

415

data = fp.read(c_offset.length)

416

for suboffset, subsize in c_offset.ranges:

417

key = (c_offset.start+suboffset, subsize)

418

data_map[key] = data[suboffset:suboffset+subsize]

419

420

# Now that we've read some data, see if we can yield anything back

421

while cur_offset_and_size in data_map:

422

this_data = data_map.pop(cur_offset_and_size)

423

yield cur_offset_and_size[0], this_data

424

cur_offset_and_size = offset_stack.next()

425

426

@staticmethod

427

def _coalesce_offsets(offsets, limit, fudge_factor):

428

"""Yield coalesced offsets.

429

430

With a long list of neighboring requests, combine them

431

into a single large request, while retaining the original

432

offsets.

433

Turns [(15, 10), (25, 10)] => [(15, 20, [(0, 10), (10, 10)])]

434

435

:param offsets: A list of (start, length) pairs

436

:param limit: Only combine a maximum of this many pairs

437

Some transports penalize multiple reads more than

438

others, and sometimes it is better to return early.

439

0 means no limit

440

:param fudge_factor: All transports have some level of 'it is

441

better to read some more data and throw it away rather

442

than seek', so collapse if we are 'close enough'

443

:return: yield _CoalescedOffset objects, which have members for wher

444

to start, how much to read, and how to split those

445

chunks back up

446

"""

447

last_end = None

448

cur = _CoalescedOffset(None, None, [])

449

450

for start, size in offsets:

451

end = start + size

452

if (last_end is not None

453

and start <= last_end + fudge_factor

454

and start >= cur.start

455

and (limit <= 0 or len(cur.ranges) < limit)):

456

cur.length = end - cur.start

457

cur.ranges.append((start-cur.start, size))

378

458

else:

379

if (len(combined_offsets) < 50 and

380

combined_offsets[-1][0] + combined_offsets[-1][1] == offset):

381

# combatible offset:

382

combined_offsets.append([offset, size])

383

else:

384

# incompatible, or over the threshold issue a read and yield

385

pending_offsets.appendleft((offset, size))

386

for result in do_combined_read(combined_offsets):

387

yield result

388

combined_offsets = []

389

# whatever is left is a single coalesced request

390

if len(combined_offsets):

391

for result in do_combined_read(combined_offsets):

392

yield result

459

if cur.start is not None:

460

yield cur

461

cur = _CoalescedOffset(start, size, [(0, size)])

462

last_end = end

463

464

if cur.start is not None:

465

yield cur

466

467

return

393

468

394

469

def get_multi(self, relpaths, pb=None):

395

470

"""Get a list of file-like objects, one for each entry in relpaths.

Older »