~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/sftp.py

Committer: Alexander Belchenko
Date: 2006-07-30 07:02:22 UTC
mfrom: (1897 +trunk)
mto: (1711.2.111 jam-integration)
mto: This revision was merged to the branch mainline in revision 1906.
Revision ID: bialix@ukr.net-20060730070222-43d32b9b36d9316b

merge bzr.dev

files added:
bzrlib/ignores.py

bzrlib/revisiontree.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_tree.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/response.py

files removed:
bzrlib/tests/test_emptytree.py

files modified:
HACKING

NEWS

bzrlib/__init__.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/plugin.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textinv.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib/newinventory.py

doc/tutorial.txt

generate_docs.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/transport/sftp.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import errno

import getpass

import itertools

import os

import random

import re

110

111

# to ssh-agent. That attribute doesn't exist on win32 (it does in cygwin)

111

112

# so we get an AttributeError exception. So we will not try to

112

113

# connect to an agent if we are on win32 and using Paramiko older than 1.6

113

_use_ssh_agent = (sys.platform != 'win32' or _paramiko_version >= (1, 6, 0))

114

_use_ssh_agent = (sys.platform != 'win32' or _paramiko_version >= (1, 6, 0))

114

115

116

117

_ssh_vendor = None

311

312

313

314

class SFTPTransport (Transport):

314

"""

315

Transport implementation for SFTP access.

316

"""

315

"""Transport implementation for SFTP access"""

316

317

_do_prefetch = _default_do_prefetch

318

# TODO: jam 20060717 Conceivably these could be configurable, either

319

# by auto-tuning at run-time, or by a configuration (per host??)

320

# but the performance curve is pretty flat, so just going with

321

# reasonable defaults.

322

_max_readv_combine = 200

323

# Having to round trip to the server means waiting for a response,

324

# so it is better to download extra bytes.

325

# 8KiB had good performance for both local and remote network operations

326

_bytes_to_read_before_seek = 8192

327

328

# The sftp spec says that implementations SHOULD allow reads

329

# to be at least 32K. paramiko.readv() does an async request

330

# for the chunks. So we need to keep it within a single request

331

# size for paramiko <= 1.6.1. paramiko 1.6.2 will probably chop

332

# up the request itself, rather than us having to worry about it

333

_max_request_size = 32768

318

334

319

335

def __init__(self, base, clone_from=None):

320

336

assert base.startswith('sftp://')

426

442

except (IOError, paramiko.SSHException), e:

427

443

self._translate_io_exception(e, path, ': error retrieving')

428

444

429

def get_partial(self, relpath, start, length=None):

430

"""

431

Get just part of a file.

432

433

:param relpath: Path to the file, relative to base

434

:param start: The starting position to read from

435

:param length: The length to read. A length of None indicates

436

read to the end of the file.

437

:return: A file-like object containing at least the specified bytes.

438

Some implementations may return objects which can be read

439

past this length, but this is not guaranteed.

440

"""

441

# TODO: implement get_partial_multi to help with knit support

442

f = self.get(relpath)

443

f.seek(start)

444

if self._do_prefetch and hasattr(f, 'prefetch'):

445

f.prefetch()

446

return f

445

def readv(self, relpath, offsets):

446

"""See Transport.readv()"""

447

# We overload the default readv() because we want to use a file

448

# that does not have prefetch enabled.

449

# Also, if we have a new paramiko, it implements an async readv()

450

if not offsets:

451

return

452

453

try:

454

path = self._remote_path(relpath)

455

fp = self._sftp.file(path, mode='rb')

456

readv = getattr(fp, 'readv', None)

457

if readv:

458

return self._sftp_readv(fp, offsets)

459

mutter('seek and read %s offsets', len(offsets))

460

return self._seek_and_read(fp, offsets)

461

except (IOError, paramiko.SSHException), e:

462

self._translate_io_exception(e, path, ': error retrieving')

463

464

def _sftp_readv(self, fp, offsets):

465

"""Use the readv() member of fp to do async readv.

466

467

And then read them using paramiko.readv(). paramiko.readv()

468

does not support ranges > 64K, so it caps the request size, and

469

just reads until it gets all the stuff it wants

470

"""

471

offsets = list(offsets)

472

sorted_offsets = sorted(offsets)

473

474

# The algorithm works as follows:

475

# 1) Coalesce nearby reads into a single chunk

476

# This generates a list of combined regions, the total size

477

# and the size of the sub regions. This coalescing step is limited

478

# in the number of nearby chunks to combine, and is allowed to

479

# skip small breaks in the requests. Limiting it makes sure that

480

# we can start yielding some data earlier, and skipping means we

481

# make fewer requests. (Beneficial even when using async)

482

# 2) Break up this combined regions into chunks that are smaller

483

# than 64KiB. Technically the limit is 65536, but we are a

484

# little bit conservative. This is because sftp has a maximum

485

# return chunk size of 64KiB (max size of an unsigned short)

486

# 3) Issue a readv() to paramiko to create an async request for

487

# all of this data

488

# 4) Read in the data as it comes back, until we've read one

489

# continuous section as determined in step 1

490

# 5) Break up the full sections into hunks for the original requested

491

# offsets. And put them in a cache

492

# 6) Check if the next request is in the cache, and if it is, remove

493

# it from the cache, and yield its data. Continue until no more

494

# entries are in the cache.

495

# 7) loop back to step 4 until all data has been read

496

497

# TODO: jam 20060725 This could be optimized one step further, by

498

# attempting to yield whatever data we have read, even before

499

# the first coallesced section has been fully processed.

500

501

# When coalescing for use with readv(), we don't really need to

502

# use any fudge factor, because the requests are made asynchronously

503

coalesced = list(self._coalesce_offsets(sorted_offsets,

504

limit=self._max_readv_combine,

505

fudge_factor=0,

506

))

507

requests = []

508

for c_offset in coalesced:

509

start = c_offset.start

510

size = c_offset.length

511

512

# We need to break this up into multiple requests

513

while size > 0:

514

next_size = min(size, self._max_request_size)

515

requests.append((start, next_size))

516

size -= next_size

517

start += next_size

518

519

mutter('SFTP.readv() %s offsets => %s coalesced => %s requests',

520

len(offsets), len(coalesced), len(requests))

521

522

# Queue the current read until we have read the full coalesced section

523

cur_data = []

524

cur_data_len = 0

525

cur_coalesced_stack = iter(coalesced)

526

cur_coalesced = cur_coalesced_stack.next()

527

528

# Cache the results, but only until they have been fulfilled

529

data_map = {}

530

# turn the list of offsets into a stack

531

offset_stack = iter(offsets)

532

cur_offset_and_size = offset_stack.next()

533

534

for data in fp.readv(requests):

535

cur_data += data

536

cur_data_len += len(data)

537

538

if cur_data_len < cur_coalesced.length:

539

continue

540

assert cur_data_len == cur_coalesced.length, \

541

"Somehow we read too much: %s != %s" % (cur_data_len,

542

cur_coalesced.length)

543

all_data = ''.join(cur_data)

544

cur_data = []

545

cur_data_len = 0

546

547

for suboffset, subsize in cur_coalesced.ranges:

548

key = (cur_coalesced.start+suboffset, subsize)

549

data_map[key] = all_data[suboffset:suboffset+subsize]

550

551

# Now that we've read some data, see if we can yield anything back

552

while cur_offset_and_size in data_map:

553

this_data = data_map.pop(cur_offset_and_size)

554

yield cur_offset_and_size[0], this_data

555

cur_offset_and_size = offset_stack.next()

556

557

# Now that we've read all of the data for this coalesced section

558

# on to the next

559

cur_coalesced = cur_coalesced_stack.next()

447

560

448

561

def put(self, relpath, f, mode=None):

449

562

"""

Older »