~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/transport/sftp.py

Committer: John Arbash Meinel
Date: 2008-07-11 21:41:24 UTC
mto: This revision was merged to the branch mainline in revision 3543.
Revision ID: john@arbash-meinel.com-20080711214124-qi09irlj7pd5cuzg

Shortcut the case when one revision is in the ancestry of the other.

At the cost of a heads() check, when one parent supersedes, we don't have to extract
the text for the other. Changes merge time from 3m37s => 3m21s. Using a
CachingParentsProvider would drop the time down to 3m11s.

files added:
tools/win32/survey.txt

files removed:
bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/btree_index.py

bzrlib/chunk_writer.py

bzrlib/fifo_cache.py

bzrlib/foreign.py

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/packrepository.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/fake_command.py

bzrlib/tests/https_server.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/log.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/developers/btree_index_prefetch.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/cycle.txt

doc/developers/lca_tree_merging.txt

doc/developers/overview.txt

doc/developers/ppa.txt

doc/developers/testing.txt

doc/en/user-guide/stacked.txt

doc/news-template.txt

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/win32/build_release.py

tools/win32/run_script.py

files renamed:
bzrlib/tests/per_repository/ => bzrlib/tests/repository_implementations/

bzrlib/transport/ftp/__init__.py => bzrlib/transport/ftp.py

files modified:
.bzrignore

Makefile

NEWS

bzr.ico

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/debug.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/fetch.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/hooks.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/smart/branch.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_get_parent_map.py

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_config.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/util/configobj/configobj.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

doc/developers/HACKING.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/development-repo.txt

doc/developers/index.txt

doc/developers/inventory.txt

doc/developers/plugin-api.txt

doc/developers/releasing.txt

doc/en/mini-tutorial/index.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/writing_a_plugin.txt

doc/es/mini-tutorial/index.txt

profile_imports.py

setup.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

bzrlib/transport/sftp.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# suite. Those formats all date back to 0.7; so we should be able to remove

# these methods when we officially drop support for those formats.

import bisect

import errno

import itertools

import os

import random

import select

import warnings

from bzrlib import (

config,

debug,

errors,

urlutils,

)

134

131

pass

135

132

136

133

137

class _SFTPReadvHelper(object):

138

"""A class to help with managing the state of a readv request."""

139

140

# See _get_requests for an explanation.

141

_max_request_size = 32768

142

143

def __init__(self, original_offsets, relpath, _report_activity):

144

"""Create a new readv helper.

145

146

:param original_offsets: The original requests given by the caller of

147

readv()

148

:param relpath: The name of the file (if known)

149

:param _report_activity: A Transport._report_activity bound method,

150

to be called as data arrives.

151

"""

152

self.original_offsets = list(original_offsets)

153

self.relpath = relpath

154

self._report_activity = _report_activity

155

156

def _get_requests(self):

157

"""Break up the offsets into individual requests over sftp.

158

159

The SFTP spec only requires implementers to support 32kB requests. We

160

could try something larger (openssh supports 64kB), but then we have to

161

handle requests that fail.

162

So instead, we just break up our maximum chunks into 32kB chunks, and

163

asyncronously requests them.

164

Newer versions of paramiko would do the chunking for us, but we want to

165

start processing results right away, so we do it ourselves.

166

"""

167

# TODO: Because we issue async requests, we don't 'fudge' any extra

168

# data. I'm not 100% sure that is the best choice.

169

170

# The first thing we do, is to collapse the individual requests as much

171

# as possible, so we don't issues requests <32kB

172

sorted_offsets = sorted(self.original_offsets)

173

coalesced = list(ConnectedTransport._coalesce_offsets(sorted_offsets,

174

limit=0, fudge_factor=0))

175

requests = []

176

for c_offset in coalesced:

177

start = c_offset.start

178

size = c_offset.length

179

180

# Break this up into 32kB requests

181

while size > 0:

182

next_size = min(size, self._max_request_size)

183

requests.append((start, next_size))

184

size -= next_size

185

start += next_size

186

if 'sftp' in debug.debug_flags:

187

mutter('SFTP.readv(%s) %s offsets => %s coalesced => %s requests',

188

self.relpath, len(sorted_offsets), len(coalesced),

189

len(requests))

190

return requests

191

192

def request_and_yield_offsets(self, fp):

193

"""Request the data from the remote machine, yielding the results.

194

195

:param fp: A Paramiko SFTPFile object that supports readv.

196

:return: Yield the data requested by the original readv caller, one by

197

one.

198

"""

199

requests = self._get_requests()

200

offset_iter = iter(self.original_offsets)

201

cur_offset, cur_size = offset_iter.next()

202

# paramiko .readv() yields strings that are in the order of the requests

203

# So we track the current request to know where the next data is

204

# being returned from.

205

input_start = None

206

last_end = None

207

buffered_data = []

208

buffered_len = 0

209

210

# This is used to buffer chunks which we couldn't process yet

211

# It is (start, end, data) tuples.

212

data_chunks = []

213

# Create an 'unlimited' data stream, so we stop based on requests,

214

# rather than just because the data stream ended. This lets us detect

215

# short readv.

216

data_stream = itertools.chain(fp.readv(requests),

217

itertools.repeat(None))

218

for (start, length), data in itertools.izip(requests, data_stream):

219

if data is None:

220

if cur_coalesced is not None:

221

raise errors.ShortReadvError(self.relpath,

222

start, length, len(data))

223

if len(data) != length:

224

raise errors.ShortReadvError(self.relpath,

225

start, length, len(data))

226

self._report_activity(length, 'read')

227

if last_end is None:

228

# This is the first request, just buffer it

229

buffered_data = [data]

230

buffered_len = length

231

input_start = start

232

elif start == last_end:

233

# The data we are reading fits neatly on the previous

234

# buffer, so this is all part of a larger coalesced range.

235

buffered_data.append(data)

236

buffered_len += length

237

else:

238

# We have an 'interrupt' in the data stream. So we know we are

239

# at a request boundary.

240

if buffered_len > 0:

241

# We haven't consumed the buffer so far, so put it into

242

# data_chunks, and continue.

243

buffered = ''.join(buffered_data)

244

data_chunks.append((input_start, buffered))

245

input_start = start

246

buffered_data = [data]

247

buffered_len = length

248

last_end = start + length

249

if input_start == cur_offset and cur_size <= buffered_len:

250

# Simplify the next steps a bit by transforming buffered_data

251

# into a single string. We also have the nice property that

252

# when there is only one string ''.join([x]) == x, so there is

253

# no data copying.

254

buffered = ''.join(buffered_data)

255

# Clean out buffered data so that we keep memory

256

# consumption low

257

del buffered_data[:]

258

buffered_offset = 0

259

# TODO: We *could* also consider the case where cur_offset is in

260

# in the buffered range, even though it doesn't *start*

261

# the buffered range. But for packs we pretty much always

262

# read in order, so you won't get any extra data in the

263

# middle.

264

while (input_start == cur_offset

265

and (buffered_offset + cur_size) <= buffered_len):

266

# We've buffered enough data to process this request, spit it

267

# out

268

cur_data = buffered[buffered_offset:buffered_offset + cur_size]

269

# move the direct pointer into our buffered data

270

buffered_offset += cur_size

271

# Move the start-of-buffer pointer

272

input_start += cur_size

273

# Yield the requested data

274

yield cur_offset, cur_data

275

cur_offset, cur_size = offset_iter.next()

276

# at this point, we've consumed as much of buffered as we can,

277

# so break off the portion that we consumed

278

if buffered_offset == len(buffered_data):

279

# No tail to leave behind

280

buffered_data = []

281

buffered_len = 0

282

else:

283

buffered = buffered[buffered_offset:]

284

buffered_data = [buffered]

285

buffered_len = len(buffered)

286

if buffered_len:

287

buffered = ''.join(buffered_data)

288

del buffered_data[:]

289

data_chunks.append((input_start, buffered))

290

if data_chunks:

291

if 'sftp' in debug.debug_flags:

292

mutter('SFTP readv left with %d out-of-order bytes',

293

sum(map(lambda x: len(x[1]), data_chunks)))

294

# We've processed all the readv data, at this point, anything we

295

# couldn't process is in data_chunks. This doesn't happen often, so

296

# this code path isn't optimized

297

# We use an interesting process for data_chunks

298

# Specifically if we have "bisect_left([(start, len, entries)],

299

# (qstart,)])

300

# If start == qstart, then we get the specific node. Otherwise we

301

# get the previous node

302

while True:

303

idx = bisect.bisect_left(data_chunks, (cur_offset,))

304

if idx < len(data_chunks) and data_chunks[idx][0] == cur_offset:

305

# The data starts here

306

data = data_chunks[idx][1][:cur_size]

307

elif idx > 0:

308

# The data is in a portion of a previous page

309

idx -= 1

310

sub_offset = cur_offset - data_chunks[idx][0]

311

data = data_chunks[idx][1]

312

data = data[sub_offset:sub_offset + cur_size]

313

else:

314

# We are missing the page where the data should be found,

315

# something is wrong

316

data = ''

317

if len(data) != cur_size:

318

raise AssertionError('We must have miscalulated.'

319

' We expected %d bytes, but only found %d'

320

% (cur_size, len(data)))

321

yield cur_offset, data

322

cur_offset, cur_size = offset_iter.next()

323

324

325

134

class SFTPTransport(ConnectedTransport):

326

135

"""Transport implementation for SFTP access."""

327

136

383

192

password = credentials

384

193

385

194

vendor = ssh._get_ssh_vendor()

386

user = self._user

387

if user is None:

388

auth = config.AuthenticationConfig()

389

user = auth.get_user('ssh', self._host, self._port)

390

195

connection = vendor.connect_sftp(self._user, password,

391

196

self._host, self._port)

392

return connection, (user, password)

197

return connection, password

393

198

394

199

def _get_sftp(self):

395

200

"""Ensures that a connection is established"""

411

216

return False

412

217

413

218

def get(self, relpath):

414

"""Get the file at the given relative path.

219

"""

220

Get the file at the given relative path.

415

221

416

222

:param relpath: The relative path to the file

417

223

"""

425

231

self._translate_io_exception(e, path, ': error retrieving',

426

232

failure_exc=errors.ReadError)

427

233

428

def get_bytes(self, relpath):

429

# reimplement this here so that we can report how many bytes came back

430

f = self.get(relpath)

431

try:

432

bytes = f.read()

433

self._report_activity(len(bytes), 'read')

434

return bytes

435

finally:

436

f.close()

437

438

234

def _readv(self, relpath, offsets):

439

235

"""See Transport.readv()"""

440

236

# We overload the default readv() because we want to use a file

449

245

readv = getattr(fp, 'readv', None)

450

246

if readv:

451

247

return self._sftp_readv(fp, offsets, relpath)

452

if 'sftp' in debug.debug_flags:

453

mutter('seek and read %s offsets', len(offsets))

248

mutter('seek and read %s offsets', len(offsets))

454

249

return self._seek_and_read(fp, offsets, relpath)

455

250

except (IOError, paramiko.SSHException), e:

456

251

self._translate_io_exception(e, path, ': error retrieving')

463

258

"""

464

259

return 64 * 1024

465

260

466

def _sftp_readv(self, fp, offsets, relpath):

261

def _sftp_readv(self, fp, offsets, relpath='<unknown>'):

467

262

"""Use the readv() member of fp to do async readv.

468

263

469

Then read them using paramiko.readv(). paramiko.readv()

264

And then read them using paramiko.readv(). paramiko.readv()

470

265

does not support ranges > 64K, so it caps the request size, and

471

just reads until it gets all the stuff it wants.

266

just reads until it gets all the stuff it wants

472

267

"""

473

helper = _SFTPReadvHelper(offsets, relpath, self._report_activity)

474

return helper.request_and_yield_offsets(fp)

268

offsets = list(offsets)

269

sorted_offsets = sorted(offsets)

270

271

# The algorithm works as follows:

272

# 1) Coalesce nearby reads into a single chunk

273

# This generates a list of combined regions, the total size

274

# and the size of the sub regions. This coalescing step is limited

275

# in the number of nearby chunks to combine, and is allowed to

276

# skip small breaks in the requests. Limiting it makes sure that

277

# we can start yielding some data earlier, and skipping means we

278

# make fewer requests. (Beneficial even when using async)

279

# 2) Break up this combined regions into chunks that are smaller

280

# than 64KiB. Technically the limit is 65536, but we are a

281

# little bit conservative. This is because sftp has a maximum

282

# return chunk size of 64KiB (max size of an unsigned short)

283

# 3) Issue a readv() to paramiko to create an async request for

284

# all of this data

285

# 4) Read in the data as it comes back, until we've read one

286

# continuous section as determined in step 1

287

# 5) Break up the full sections into hunks for the original requested

288

# offsets. And put them in a cache

289

# 6) Check if the next request is in the cache, and if it is, remove

290

# it from the cache, and yield its data. Continue until no more

291

# entries are in the cache.

292

# 7) loop back to step 4 until all data has been read

293

294

# TODO: jam 20060725 This could be optimized one step further, by

295

# attempting to yield whatever data we have read, even before

296

# the first coallesced section has been fully processed.

297

298

# When coalescing for use with readv(), we don't really need to

299

# use any fudge factor, because the requests are made asynchronously

300

coalesced = list(self._coalesce_offsets(sorted_offsets,

301

limit=self._max_readv_combine,

302

fudge_factor=0,

303

))

304

requests = []

305

for c_offset in coalesced:

306

start = c_offset.start

307

size = c_offset.length

308

309

# We need to break this up into multiple requests

310

while size > 0:

311

next_size = min(size, self._max_request_size)

312

requests.append((start, next_size))

313

size -= next_size

314

start += next_size

315

316

mutter('SFTP.readv() %s offsets => %s coalesced => %s requests',

317

len(offsets), len(coalesced), len(requests))

318

319

# Queue the current read until we have read the full coalesced section

320

cur_data = []

321

cur_data_len = 0

322

cur_coalesced_stack = iter(coalesced)

323

cur_coalesced = cur_coalesced_stack.next()

324

325

# Cache the results, but only until they have been fulfilled

326

data_map = {}

327

# turn the list of offsets into a stack

328

offset_stack = iter(offsets)

329

cur_offset_and_size = offset_stack.next()

330

331

for data in fp.readv(requests):

332

cur_data += data

333

cur_data_len += len(data)

334

335

if cur_data_len < cur_coalesced.length:

336

continue

337

if cur_data_len != cur_coalesced.length:

338

raise AssertionError(

339

"Somehow we read too much: %s != %s"

340

% (cur_data_len, cur_coalesced.length))

341

all_data = ''.join(cur_data)

342

cur_data = []

343

cur_data_len = 0

344

345

for suboffset, subsize in cur_coalesced.ranges:

346

key = (cur_coalesced.start+suboffset, subsize)

347

data_map[key] = all_data[suboffset:suboffset+subsize]

348

349

# Now that we've read some data, see if we can yield anything back

350

while cur_offset_and_size in data_map:

351

this_data = data_map.pop(cur_offset_and_size)

352

yield cur_offset_and_size[0], this_data

353

cur_offset_and_size = offset_stack.next()

354

355

# We read a coalesced entry, so mark it as done

356

cur_coalesced = None

357

# Now that we've read all of the data for this coalesced section

358

# on to the next

359

cur_coalesced = cur_coalesced_stack.next()

360

361

if cur_coalesced is not None:

362

raise errors.ShortReadvError(relpath, cur_coalesced.start,

363

cur_coalesced.length, len(data))

475

364

476

365

def put_file(self, relpath, f, mode=None):

477

366

"""

630

519

try:

631

520

self._get_sftp().mkdir(abspath, local_mode)

632

521

if mode is not None:

633

# chmod a dir through sftp will erase any sgid bit set

634

# on the server side. So, if the bit mode are already

635

# set, avoid the chmod. If the mode is not fine but

636

# the sgid bit is set, report a warning to the user

637

# with the umask fix.

638

stat = self._get_sftp().lstat(abspath)

639

mode = mode & 0777 # can't set special bits anyway

640

if mode != stat.st_mode & 0777:

641

if stat.st_mode & 06000:

642

warning('About to chmod %s over sftp, which will result'

643

' in its suid or sgid bits being cleared. If'

644

' you want to preserve those bits, change your '

645

' environment on the server to use umask 0%03o.'

646

% (abspath, 0777 - mode))

647

self._get_sftp().chmod(abspath, mode=mode)

522

self._get_sftp().chmod(abspath, mode=mode)

648

523

except (paramiko.SSHException, IOError), e:

649

524

self._translate_io_exception(e, abspath, ': unable to mkdir',

650

525

failure_exc=FileExists)

695

570

if (e.args == ('No such file or directory',) or

696

571

e.args == ('No such file',)):

697

572

raise NoSuchFile(path, str(e) + more_info)

698

if (e.args == ('mkdir failed',) or

699

e.args[0].startswith('syserr: File exists')):

573

if (e.args == ('mkdir failed',)):

700

574

raise FileExists(path, str(e) + more_info)

701

575

# strange but true, for the paramiko server.

702

576

if (e.args == ('Failure',)):

Older »