13
14
# You should have received a copy of the GNU General Public License
14
15
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
18
"""Implementation of Transport over SFTP, using paramiko."""
19
# TODO: Remove the transport-based lock_read and lock_write methods. They'll
20
# then raise TransportNotPossible, which will break remote access to any
21
# formats which rely on OS-level locks. That should be fine as those formats
22
# are pretty old, but these combinations may have to be removed from the test
23
# suite. Those formats all date back to 0.7; so we should be able to remove
24
# these methods when we officially drop support for those formats.
46
from bzrlib.errors import (FileExists,
47
NoSuchFile, PathNotChild,
36
from bzrlib.config import config_dir, ensure_config_dir_exists
37
from bzrlib.errors import (ConnectionError,
39
TransportNotPossible, NoSuchFile, PathNotChild,
51
43
ParamikoNotPresent,
53
45
from bzrlib.osutils import pathjoin, fancy_rename, getcwd
54
from bzrlib.symbol_versioning import (
57
from bzrlib.trace import mutter, warning
46
from bzrlib.trace import mutter, warning, error
58
47
from bzrlib.transport import (
48
register_urlparse_netloc_protocol,
67
# Disable one particular warning that comes from paramiko in Python2.5; if
68
# this is emitted at the wrong time it tends to cause spurious test failures
69
# or at least noise in the test case::
71
# [1770/7639 in 86s, 1 known failures, 50 skipped, 2 missing features]
72
# test_permissions.TestSftpPermissions.test_new_files
73
# /var/lib/python-support/python2.5/paramiko/message.py:226: DeprecationWarning: integer argument expected, got float
74
# self.packet.write(struct.pack('>I', n))
75
warnings.filterwarnings('ignore',
76
'integer argument expected, got float',
77
category=DeprecationWarning,
78
module='paramiko.message')
54
import bzrlib.urlutils as urlutils
87
63
CMD_HANDLE, CMD_OPEN)
88
64
from paramiko.sftp_attr import SFTPAttributes
89
65
from paramiko.sftp_file import SFTPFile
66
from paramiko.sftp_client import SFTPClient
69
register_urlparse_netloc_protocol('sftp')
73
# TODO: This should possibly ignore SIGHUP as well, but bzr currently
74
# doesn't handle it itself.
75
# <https://launchpad.net/products/bzr/+bug/41433/+index>
77
signal.signal(signal.SIGINT, signal.SIG_IGN)
80
def os_specific_subprocess_params():
81
"""Get O/S specific subprocess parameters."""
82
if sys.platform == 'win32':
83
# setting the process group and closing fds is not supported on
87
# We close fds other than the pipes as the child process does not need
90
# We also set the child process to ignore SIGINT. Normally the signal
91
# would be sent to every process in the foreground process group, but
92
# this causes it to be seen only by bzr and not by ssh. Python will
93
# generate a KeyboardInterrupt in bzr, and we will then have a chance
94
# to release locks or do other cleanup over ssh before the connection
96
# <https://launchpad.net/products/bzr/+bug/5987>
98
# Running it in a separate process group is not good because then it
99
# can't get non-echoed input of a password or passphrase.
100
# <https://launchpad.net/products/bzr/+bug/40508>
101
return {'preexec_fn': _ignore_sigint,
92
106
_paramiko_version = getattr(paramiko, '__version_info__', (0, 0, 0))
93
107
# don't use prefetch unless paramiko version >= 1.5.5 (there were bugs earlier)
94
108
_default_do_prefetch = (_paramiko_version >= (1, 5, 5))
110
# Paramiko 1.5 tries to open a socket.AF_UNIX in order to connect
111
# to ssh-agent. That attribute doesn't exist on win32 (it does in cygwin)
112
# so we get an AttributeError exception. So we will not try to
113
# connect to an agent if we are on win32 and using Paramiko older than 1.6
114
_use_ssh_agent = (sys.platform != 'win32' or _paramiko_version >= (1, 6, 0))
118
def _get_ssh_vendor():
119
"""Find out what version of SSH is on the system."""
121
if _ssh_vendor is not None:
126
if 'BZR_SSH' in os.environ:
127
_ssh_vendor = os.environ['BZR_SSH']
128
if _ssh_vendor == 'paramiko':
133
p = subprocess.Popen(['ssh', '-V'],
134
stdin=subprocess.PIPE,
135
stdout=subprocess.PIPE,
136
stderr=subprocess.PIPE,
137
**os_specific_subprocess_params())
138
returncode = p.returncode
139
stdout, stderr = p.communicate()
143
if 'OpenSSH' in stderr:
144
mutter('ssh implementation is OpenSSH')
145
_ssh_vendor = 'openssh'
146
elif 'SSH Secure Shell' in stderr:
147
mutter('ssh implementation is SSH Corp.')
150
if _ssh_vendor != 'none':
153
# XXX: 20051123 jamesh
154
# A check for putty's plink or lsh would go here.
156
mutter('falling back to paramiko implementation')
160
class SFTPSubprocess:
161
"""A socket-like object that talks to an ssh subprocess via pipes."""
162
def __init__(self, hostname, vendor, port=None, user=None):
163
assert vendor in ['openssh', 'ssh']
164
if vendor == 'openssh':
166
'-oForwardX11=no', '-oForwardAgent=no',
167
'-oClearAllForwardings=yes', '-oProtocol=2',
168
'-oNoHostAuthenticationForLocalhost=yes']
170
args.extend(['-p', str(port)])
172
args.extend(['-l', user])
173
args.extend(['-s', hostname, 'sftp'])
174
elif vendor == 'ssh':
177
args.extend(['-p', str(port)])
179
args.extend(['-l', user])
180
args.extend(['-s', 'sftp', hostname])
182
self.proc = subprocess.Popen(args,
183
stdin=subprocess.PIPE,
184
stdout=subprocess.PIPE,
185
**os_specific_subprocess_params())
187
def send(self, data):
188
return os.write(self.proc.stdin.fileno(), data)
190
def recv_ready(self):
191
# TODO: jam 20051215 this function is necessary to support the
192
# pipelined() function. In reality, it probably should use
193
# poll() or select() to actually return if there is data
194
# available, otherwise we probably don't get any benefit
197
def recv(self, count):
198
return os.read(self.proc.stdout.fileno(), count)
201
self.proc.stdin.close()
202
self.proc.stdout.close()
206
class LoopbackSFTP(object):
207
"""Simple wrapper for a socket that pretends to be a paramiko Channel."""
209
def __init__(self, sock):
212
def send(self, data):
213
return self.__socket.send(data)
216
return self.__socket.recv(n)
218
def recv_ready(self):
222
self.__socket.close()
228
# This is a weakref dictionary, so that we can reuse connections
229
# that are still active. Long term, it might be nice to have some
230
# sort of expiration policy, such as disconnect if inactive for
231
# X seconds. But that requires a lot more fanciness.
232
_connected_hosts = weakref.WeakValueDictionary()
234
def clear_connection_cache():
235
"""Remove all hosts from the SFTP connection cache.
237
Primarily useful for test cases wanting to force garbage collection.
239
_connected_hosts.clear()
242
def load_host_keys():
244
Load system host keys (probably doesn't work on windows) and any
245
"discovered" keys from previous sessions.
247
global SYSTEM_HOSTKEYS, BZR_HOSTKEYS
249
SYSTEM_HOSTKEYS = paramiko.util.load_host_keys(os.path.expanduser('~/.ssh/known_hosts'))
251
mutter('failed to load system host keys: ' + str(e))
252
bzr_hostkey_path = pathjoin(config_dir(), 'ssh_host_keys')
254
BZR_HOSTKEYS = paramiko.util.load_host_keys(bzr_hostkey_path)
256
mutter('failed to load bzr host keys: ' + str(e))
260
def save_host_keys():
262
Save "discovered" host keys in $(config)/ssh_host_keys/.
264
global SYSTEM_HOSTKEYS, BZR_HOSTKEYS
265
bzr_hostkey_path = pathjoin(config_dir(), 'ssh_host_keys')
266
ensure_config_dir_exists()
269
f = open(bzr_hostkey_path, 'w')
270
f.write('# SSH host keys collected by bzr\n')
271
for hostname, keys in BZR_HOSTKEYS.iteritems():
272
for keytype, key in keys.iteritems():
273
f.write('%s %s %s\n' % (hostname, keytype, key.get_base64()))
276
mutter('failed to save bzr host keys: ' + str(e))
97
279
class SFTPLock(object):
98
"""This fakes a lock in a remote location.
100
A present lock is indicated just by the existence of a file. This
101
doesn't work well on all transports and they are only used in
102
deprecated storage formats.
280
"""This fakes a lock in a remote location."""
105
281
__slots__ = ['path', 'lock_path', 'lock_file', 'transport']
107
282
def __init__(self, path, transport):
283
assert isinstance(transport, SFTPTransport)
108
285
self.lock_file = None
110
287
self.lock_path = path + '.write-lock'
137
class _SFTPReadvHelper(object):
138
"""A class to help with managing the state of a readv request."""
140
# See _get_requests for an explanation.
141
_max_request_size = 32768
143
def __init__(self, original_offsets, relpath, _report_activity):
144
"""Create a new readv helper.
146
:param original_offsets: The original requests given by the caller of
148
:param relpath: The name of the file (if known)
149
:param _report_activity: A Transport._report_activity bound method,
150
to be called as data arrives.
152
self.original_offsets = list(original_offsets)
153
self.relpath = relpath
154
self._report_activity = _report_activity
156
def _get_requests(self):
157
"""Break up the offsets into individual requests over sftp.
159
The SFTP spec only requires implementers to support 32kB requests. We
160
could try something larger (openssh supports 64kB), but then we have to
161
handle requests that fail.
162
So instead, we just break up our maximum chunks into 32kB chunks, and
163
asyncronously requests them.
164
Newer versions of paramiko would do the chunking for us, but we want to
165
start processing results right away, so we do it ourselves.
167
# TODO: Because we issue async requests, we don't 'fudge' any extra
168
# data. I'm not 100% sure that is the best choice.
170
# The first thing we do, is to collapse the individual requests as much
171
# as possible, so we don't issues requests <32kB
172
sorted_offsets = sorted(self.original_offsets)
173
coalesced = list(ConnectedTransport._coalesce_offsets(sorted_offsets,
174
limit=0, fudge_factor=0))
176
for c_offset in coalesced:
177
start = c_offset.start
178
size = c_offset.length
180
# Break this up into 32kB requests
182
next_size = min(size, self._max_request_size)
183
requests.append((start, next_size))
186
if 'sftp' in debug.debug_flags:
187
mutter('SFTP.readv(%s) %s offsets => %s coalesced => %s requests',
188
self.relpath, len(sorted_offsets), len(coalesced),
192
def request_and_yield_offsets(self, fp):
193
"""Request the data from the remote machine, yielding the results.
195
:param fp: A Paramiko SFTPFile object that supports readv.
196
:return: Yield the data requested by the original readv caller, one by
199
requests = self._get_requests()
200
offset_iter = iter(self.original_offsets)
201
cur_offset, cur_size = offset_iter.next()
202
# paramiko .readv() yields strings that are in the order of the requests
203
# So we track the current request to know where the next data is
204
# being returned from.
210
# This is used to buffer chunks which we couldn't process yet
211
# It is (start, end, data) tuples.
213
# Create an 'unlimited' data stream, so we stop based on requests,
214
# rather than just because the data stream ended. This lets us detect
216
data_stream = itertools.chain(fp.readv(requests),
217
itertools.repeat(None))
218
for (start, length), data in itertools.izip(requests, data_stream):
220
if cur_coalesced is not None:
221
raise errors.ShortReadvError(self.relpath,
222
start, length, len(data))
223
if len(data) != length:
224
raise errors.ShortReadvError(self.relpath,
225
start, length, len(data))
226
self._report_activity(length, 'read')
228
# This is the first request, just buffer it
229
buffered_data = [data]
230
buffered_len = length
232
elif start == last_end:
233
# The data we are reading fits neatly on the previous
234
# buffer, so this is all part of a larger coalesced range.
235
buffered_data.append(data)
236
buffered_len += length
238
# We have an 'interrupt' in the data stream. So we know we are
239
# at a request boundary.
241
# We haven't consumed the buffer so far, so put it into
242
# data_chunks, and continue.
243
buffered = ''.join(buffered_data)
244
data_chunks.append((input_start, buffered))
246
buffered_data = [data]
247
buffered_len = length
248
last_end = start + length
249
if input_start == cur_offset and cur_size <= buffered_len:
250
# Simplify the next steps a bit by transforming buffered_data
251
# into a single string. We also have the nice property that
252
# when there is only one string ''.join([x]) == x, so there is
254
buffered = ''.join(buffered_data)
255
# Clean out buffered data so that we keep memory
259
# TODO: We *could* also consider the case where cur_offset is in
260
# in the buffered range, even though it doesn't *start*
261
# the buffered range. But for packs we pretty much always
262
# read in order, so you won't get any extra data in the
264
while (input_start == cur_offset
265
and (buffered_offset + cur_size) <= buffered_len):
266
# We've buffered enough data to process this request, spit it
268
cur_data = buffered[buffered_offset:buffered_offset + cur_size]
269
# move the direct pointer into our buffered data
270
buffered_offset += cur_size
271
# Move the start-of-buffer pointer
272
input_start += cur_size
273
# Yield the requested data
274
yield cur_offset, cur_data
275
cur_offset, cur_size = offset_iter.next()
276
# at this point, we've consumed as much of buffered as we can,
277
# so break off the portion that we consumed
278
if buffered_offset == len(buffered_data):
279
# No tail to leave behind
283
buffered = buffered[buffered_offset:]
284
buffered_data = [buffered]
285
buffered_len = len(buffered)
287
buffered = ''.join(buffered_data)
289
data_chunks.append((input_start, buffered))
291
if 'sftp' in debug.debug_flags:
292
mutter('SFTP readv left with %d out-of-order bytes',
293
sum(map(lambda x: len(x[1]), data_chunks)))
294
# We've processed all the readv data, at this point, anything we
295
# couldn't process is in data_chunks. This doesn't happen often, so
296
# this code path isn't optimized
297
# We use an interesting process for data_chunks
298
# Specifically if we have "bisect_left([(start, len, entries)],
300
# If start == qstart, then we get the specific node. Otherwise we
301
# get the previous node
303
idx = bisect.bisect_left(data_chunks, (cur_offset,))
304
if idx < len(data_chunks) and data_chunks[idx][0] == cur_offset:
305
# The data starts here
306
data = data_chunks[idx][1][:cur_size]
308
# The data is in a portion of a previous page
310
sub_offset = cur_offset - data_chunks[idx][0]
311
data = data_chunks[idx][1]
312
data = data[sub_offset:sub_offset + cur_size]
314
# We are missing the page where the data should be found,
317
if len(data) != cur_size:
318
raise AssertionError('We must have miscalulated.'
319
' We expected %d bytes, but only found %d'
320
% (cur_size, len(data)))
321
yield cur_offset, data
322
cur_offset, cur_size = offset_iter.next()
325
class SFTPTransport(ConnectedTransport):
326
"""Transport implementation for SFTP access."""
314
class SFTPTransport (Transport):
315
"""Transport implementation for SFTP access"""
328
317
_do_prefetch = _default_do_prefetch
329
318
# TODO: jam 20060717 Conceivably these could be configurable, either
343
332
# up the request itself, rather than us having to worry about it
344
333
_max_request_size = 32768
346
def __init__(self, base, _from_transport=None):
347
super(SFTPTransport, self).__init__(base,
348
_from_transport=_from_transport)
335
def __init__(self, base, clone_from=None):
336
assert base.startswith('sftp://')
337
self._parse_url(base)
338
base = self._unparse_url()
341
super(SFTPTransport, self).__init__(base)
342
if clone_from is None:
345
# use the same ssh connection, etc
346
self._sftp = clone_from._sftp
347
# super saves 'self.base'
349
def should_cache(self):
351
Return True if the data pulled across should be cached locally.
355
def clone(self, offset=None):
357
Return a new SFTPTransport with root at self.base + offset.
358
We share the same SFTP session between such transports, because it's
359
fairly expensive to set them up.
362
return SFTPTransport(self.base, self)
364
return SFTPTransport(self.abspath(offset), self)
366
def abspath(self, relpath):
368
Return the full url to the given relative path.
370
@param relpath: the relative path or path components
371
@type relpath: str or list
373
return self._unparse_url(self._remote_path(relpath))
350
375
def _remote_path(self, relpath):
351
376
"""Return the path to be passed along the sftp protocol for relpath.
353
:param relpath: is a urlencoded string.
355
relative = urlutils.unescape(relpath).encode('utf-8')
356
remote_path = self._combine_paths(self._path, relative)
357
# the initial slash should be removed from the path, and treated as a
358
# homedir relative path (the path begins with a double slash if it is
359
# absolute). see draft-ietf-secsh-scp-sftp-ssh-uri-03.txt
360
# RBC 20060118 we are not using this as its too user hostile. instead
361
# we are following lftp and using /~/foo to mean '~/foo'
362
# vila--20070602 and leave absolute paths begin with a single slash.
363
if remote_path.startswith('/~/'):
364
remote_path = remote_path[3:]
365
elif remote_path == '/~':
369
def _create_connection(self, credentials=None):
370
"""Create a new connection with the provided credentials.
372
:param credentials: The credentials needed to establish the connection.
374
:return: The created connection and its associated credentials.
376
The credentials are only the password as it may have been entered
377
interactively by the user and may be different from the one provided
378
in base url at transport creation time.
380
if credentials is None:
381
password = self._password
383
password = credentials
385
vendor = ssh._get_ssh_vendor()
388
auth = config.AuthenticationConfig()
389
user = auth.get_user('ssh', self._host, self._port)
390
connection = vendor.connect_sftp(self._user, password,
391
self._host, self._port)
392
return connection, (user, password)
395
"""Ensures that a connection is established"""
396
connection = self._get_connection()
397
if connection is None:
398
# First connection ever
399
connection, credentials = self._create_connection()
400
self._set_connection(connection, credentials)
378
relpath is a urlencoded string.
380
# FIXME: share the common code across transports
381
assert isinstance(relpath, basestring)
382
relpath = urlutils.unescape(relpath).split('/')
383
basepath = self._path.split('/')
384
if len(basepath) > 0 and basepath[-1] == '':
385
basepath = basepath[:-1]
389
if len(basepath) == 0:
390
# In most filesystems, a request for the parent
391
# of root, just returns root.
399
path = '/'.join(basepath)
400
# mutter('relpath => remotepath %s => %s', relpath, path)
403
def relpath(self, abspath):
404
username, password, host, port, path = self._split_url(abspath)
406
if (username != self._username):
407
error.append('username mismatch')
408
if (host != self._host):
409
error.append('host mismatch')
410
if (port != self._port):
411
error.append('port mismatch')
412
if (not path.startswith(self._path)):
413
error.append('path mismatch')
415
extra = ': ' + ', '.join(error)
416
raise PathNotChild(abspath, self.base, extra=extra)
418
return path[pl:].strip('/')
403
420
def has(self, relpath):
405
422
Does the target location exist?
408
self._get_sftp().stat(self._remote_path(relpath))
409
# stat result is about 20 bytes, let's say
410
self._report_activity(20, 'read')
425
self._sftp.stat(self._remote_path(relpath))
415
430
def get(self, relpath):
416
"""Get the file at the given relative path.
432
Get the file at the given relative path.
418
434
:param relpath: The relative path to the file
421
# FIXME: by returning the file directly, we don't pass this
422
# through to report_activity. We could try wrapping the object
423
# before it's returned. For readv and get_bytes it's handled in
424
# the higher-level function.
426
437
path = self._remote_path(relpath)
427
f = self._get_sftp().file(path, mode='rb')
438
f = self._sftp.file(path, mode='rb')
428
439
if self._do_prefetch and (getattr(f, 'prefetch', None) is not None):
431
442
except (IOError, paramiko.SSHException), e:
432
self._translate_io_exception(e, path, ': error retrieving',
433
failure_exc=errors.ReadError)
435
def get_bytes(self, relpath):
436
# reimplement this here so that we can report how many bytes came back
437
f = self.get(relpath)
440
self._report_activity(len(bytes), 'read')
445
def _readv(self, relpath, offsets):
443
self._translate_io_exception(e, path, ': error retrieving')
445
def readv(self, relpath, offsets):
446
446
"""See Transport.readv()"""
447
447
# We overload the default readv() because we want to use a file
448
448
# that does not have prefetch enabled.
454
454
path = self._remote_path(relpath)
455
fp = self._get_sftp().file(path, mode='rb')
455
fp = self._sftp.file(path, mode='rb')
456
456
readv = getattr(fp, 'readv', None)
458
return self._sftp_readv(fp, offsets, relpath)
459
if 'sftp' in debug.debug_flags:
460
mutter('seek and read %s offsets', len(offsets))
461
return self._seek_and_read(fp, offsets, relpath)
458
return self._sftp_readv(fp, offsets)
459
mutter('seek and read %s offsets', len(offsets))
460
return self._seek_and_read(fp, offsets)
462
461
except (IOError, paramiko.SSHException), e:
463
462
self._translate_io_exception(e, path, ': error retrieving')
465
def recommended_page_size(self):
466
"""See Transport.recommended_page_size().
468
For SFTP we suggest a large page size to reduce the overhead
469
introduced by latency.
473
def _sftp_readv(self, fp, offsets, relpath):
464
def _sftp_readv(self, fp, offsets):
474
465
"""Use the readv() member of fp to do async readv.
476
Then read them using paramiko.readv(). paramiko.readv()
467
And then read them using paramiko.readv(). paramiko.readv()
477
468
does not support ranges > 64K, so it caps the request size, and
478
just reads until it gets all the stuff it wants.
480
helper = _SFTPReadvHelper(offsets, relpath, self._report_activity)
481
return helper.request_and_yield_offsets(fp)
483
def put_file(self, relpath, f, mode=None):
485
Copy the file-like object into the location.
469
just reads until it gets all the stuff it wants
471
offsets = list(offsets)
472
sorted_offsets = sorted(offsets)
474
# The algorithm works as follows:
475
# 1) Coalesce nearby reads into a single chunk
476
# This generates a list of combined regions, the total size
477
# and the size of the sub regions. This coalescing step is limited
478
# in the number of nearby chunks to combine, and is allowed to
479
# skip small breaks in the requests. Limiting it makes sure that
480
# we can start yielding some data earlier, and skipping means we
481
# make fewer requests. (Beneficial even when using async)
482
# 2) Break up this combined regions into chunks that are smaller
483
# than 64KiB. Technically the limit is 65536, but we are a
484
# little bit conservative. This is because sftp has a maximum
485
# return chunk size of 64KiB (max size of an unsigned short)
486
# 3) Issue a readv() to paramiko to create an async request for
488
# 4) Read in the data as it comes back, until we've read one
489
# continuous section as determined in step 1
490
# 5) Break up the full sections into hunks for the original requested
491
# offsets. And put them in a cache
492
# 6) Check if the next request is in the cache, and if it is, remove
493
# it from the cache, and yield its data. Continue until no more
494
# entries are in the cache.
495
# 7) loop back to step 4 until all data has been read
497
# TODO: jam 20060725 This could be optimized one step further, by
498
# attempting to yield whatever data we have read, even before
499
# the first coallesced section has been fully processed.
501
# When coalescing for use with readv(), we don't really need to
502
# use any fudge factor, because the requests are made asynchronously
503
coalesced = list(self._coalesce_offsets(sorted_offsets,
504
limit=self._max_readv_combine,
508
for c_offset in coalesced:
509
start = c_offset.start
510
size = c_offset.length
512
# We need to break this up into multiple requests
514
next_size = min(size, self._max_request_size)
515
requests.append((start, next_size))
519
mutter('SFTP.readv() %s offsets => %s coalesced => %s requests',
520
len(offsets), len(coalesced), len(requests))
522
# Queue the current read until we have read the full coalesced section
525
cur_coalesced_stack = iter(coalesced)
526
cur_coalesced = cur_coalesced_stack.next()
528
# Cache the results, but only until they have been fulfilled
530
# turn the list of offsets into a stack
531
offset_stack = iter(offsets)
532
cur_offset_and_size = offset_stack.next()
534
for data in fp.readv(requests):
536
cur_data_len += len(data)
538
if cur_data_len < cur_coalesced.length:
540
assert cur_data_len == cur_coalesced.length, \
541
"Somehow we read too much: %s != %s" % (cur_data_len,
542
cur_coalesced.length)
543
all_data = ''.join(cur_data)
547
for suboffset, subsize in cur_coalesced.ranges:
548
key = (cur_coalesced.start+suboffset, subsize)
549
data_map[key] = all_data[suboffset:suboffset+subsize]
551
# Now that we've read some data, see if we can yield anything back
552
while cur_offset_and_size in data_map:
553
this_data = data_map.pop(cur_offset_and_size)
554
yield cur_offset_and_size[0], this_data
555
cur_offset_and_size = offset_stack.next()
557
# Now that we've read all of the data for this coalesced section
559
cur_coalesced = cur_coalesced_stack.next()
561
def put(self, relpath, f, mode=None):
563
Copy the file-like or string object into the location.
487
565
:param relpath: Location to put the contents, relative to base.
488
:param f: File-like object.
566
:param f: File-like or string object.
489
567
:param mode: The final mode for the file
491
569
final_path = self._remote_path(relpath)
492
return self._put(final_path, f, mode=mode)
570
self._put(final_path, f, mode=mode)
494
572
def _put(self, abspath, f, mode=None):
495
573
"""Helper function so both put() and copy_abspaths can reuse the code"""
536
self._get_sftp().remove(tmp_abspath)
600
self._sftp.remove(tmp_abspath)
538
602
# raise the saved except
540
604
# raise the original with its traceback if we can.
543
def _put_non_atomic_helper(self, relpath, writer, mode=None,
544
create_parent_dir=False,
546
abspath = self._remote_path(relpath)
548
# TODO: jam 20060816 paramiko doesn't publicly expose a way to
549
# set the file mode at create time. If it does, use it.
550
# But for now, we just chmod later anyway.
552
def _open_and_write_file():
553
"""Try to open the target file, raise error on failure"""
557
fout = self._get_sftp().file(abspath, mode='wb')
558
fout.set_pipelined(True)
560
except (paramiko.SSHException, IOError), e:
561
self._translate_io_exception(e, abspath,
564
# This is designed to chmod() right before we close.
565
# Because we set_pipelined() earlier, theoretically we might
566
# avoid the round trip for fout.close()
568
self._get_sftp().chmod(abspath, mode)
573
if not create_parent_dir:
574
_open_and_write_file()
577
# Try error handling to create the parent directory if we need to
579
_open_and_write_file()
581
# Try to create the parent directory, and then go back to
583
parent_dir = os.path.dirname(abspath)
584
self._mkdir(parent_dir, dir_mode)
585
_open_and_write_file()
587
def put_file_non_atomic(self, relpath, f, mode=None,
588
create_parent_dir=False,
590
"""Copy the file-like object into the target location.
592
This function is not strictly safe to use. It is only meant to
593
be used when you already know that the target does not exist.
594
It is not safe, because it will open and truncate the remote
595
file. So there may be a time when the file has invalid contents.
597
:param relpath: The remote location to put the contents.
598
:param f: File-like object.
599
:param mode: Possible access permissions for new file.
600
None means do not set remote permissions.
601
:param create_parent_dir: If we cannot create the target file because
602
the parent directory does not exist, go ahead and
603
create it, and then try again.
607
self._put_non_atomic_helper(relpath, writer, mode=mode,
608
create_parent_dir=create_parent_dir,
611
def put_bytes_non_atomic(self, relpath, bytes, mode=None,
612
create_parent_dir=False,
616
self._put_non_atomic_helper(relpath, writer, mode=mode,
617
create_parent_dir=create_parent_dir,
620
607
def iter_files_recursive(self):
621
608
"""Walk the relative paths of all files in this transport."""
622
# progress is handled by list_dir
623
609
queue = list(self.list_dir('.'))
625
relpath = queue.pop(0)
611
relpath = urllib.quote(queue.pop(0))
626
612
st = self.stat(relpath)
627
613
if stat.S_ISDIR(st.st_mode):
628
614
for i, basename in enumerate(self.list_dir(relpath)):
633
def _mkdir(self, abspath, mode=None):
639
self._report_activity(len(abspath), 'write')
640
self._get_sftp().mkdir(abspath, local_mode)
641
self._report_activity(1, 'read')
643
# chmod a dir through sftp will erase any sgid bit set
644
# on the server side. So, if the bit mode are already
645
# set, avoid the chmod. If the mode is not fine but
646
# the sgid bit is set, report a warning to the user
647
# with the umask fix.
648
stat = self._get_sftp().lstat(abspath)
649
mode = mode & 0777 # can't set special bits anyway
650
if mode != stat.st_mode & 0777:
651
if stat.st_mode & 06000:
652
warning('About to chmod %s over sftp, which will result'
653
' in its suid or sgid bits being cleared. If'
654
' you want to preserve those bits, change your '
655
' environment on the server to use umask 0%03o.'
656
% (abspath, 0777 - mode))
657
self._get_sftp().chmod(abspath, mode=mode)
658
except (paramiko.SSHException, IOError), e:
659
self._translate_io_exception(e, abspath, ': unable to mkdir',
660
failure_exc=FileExists)
662
619
def mkdir(self, relpath, mode=None):
663
620
"""Create a directory at the given path."""
664
self._mkdir(self._remote_path(relpath), mode=mode)
666
def open_write_stream(self, relpath, mode=None):
667
"""See Transport.open_write_stream."""
668
# initialise the file to zero-length
669
# this is three round trips, but we don't use this
670
# api more than once per write_group at the moment so
671
# it is a tolerable overhead. Better would be to truncate
672
# the file after opening. RBC 20070805
673
self.put_bytes_non_atomic(relpath, "", mode)
674
abspath = self._remote_path(relpath)
675
# TODO: jam 20060816 paramiko doesn't publicly expose a way to
676
# set the file mode at create time. If it does, use it.
677
# But for now, we just chmod later anyway.
621
path = self._remote_path(relpath)
680
handle = self._get_sftp().file(abspath, mode='wb')
681
handle.set_pipelined(True)
623
# In the paramiko documentation, it says that passing a mode flag
624
# will filtered against the server umask.
625
# StubSFTPServer does not do this, which would be nice, because it is
626
# what we really want :)
627
# However, real servers do use umask, so we really should do it that way
628
self._sftp.mkdir(path)
630
self._sftp.chmod(path, mode=mode)
682
631
except (paramiko.SSHException, IOError), e:
683
self._translate_io_exception(e, abspath,
685
_file_streams[self.abspath(relpath)] = handle
686
return FileFileStream(self, relpath, handle)
632
self._translate_io_exception(e, path, ': unable to mkdir',
633
failure_exc=FileExists)
688
def _translate_io_exception(self, e, path, more_info='',
635
def _translate_io_exception(self, e, path, more_info='',
689
636
failure_exc=PathError):
690
637
"""Translate a paramiko or IOError into a friendlier exception.
696
643
:param failure_exc: Paramiko has the super fun ability to raise completely
697
644
opaque errors that just set "e.args = ('Failure',)" with
698
645
no more information.
699
If this parameter is set, it defines the exception
646
If this parameter is set, it defines the exception
700
647
to raise in these cases.
702
649
# paramiko seems to generate detailless errors.
703
650
self._translate_error(e, path, raise_generic=False)
704
if getattr(e, 'args', None) is not None:
651
if hasattr(e, 'args'):
705
652
if (e.args == ('No such file or directory',) or
706
653
e.args == ('No such file',)):
707
654
raise NoSuchFile(path, str(e) + more_info)
708
if (e.args == ('mkdir failed',) or
709
e.args[0].startswith('syserr: File exists')):
655
if (e.args == ('mkdir failed',)):
710
656
raise FileExists(path, str(e) + more_info)
711
657
# strange but true, for the paramiko server.
712
658
if (e.args == ('Failure',)):
713
659
raise failure_exc(path, str(e) + more_info)
714
# Can be something like args = ('Directory not empty:
715
# '/srv/bazaar.launchpad.net/blah...: '
716
# [Errno 39] Directory not empty',)
717
if (e.args[0].startswith('Directory not empty: ')
718
or getattr(e, 'errno', None) == errno.ENOTEMPTY):
719
raise errors.DirectoryNotEmpty(path, str(e))
720
660
mutter('Raising exception with args %s', e.args)
721
if getattr(e, 'errno', None) is not None:
661
if hasattr(e, 'errno'):
722
662
mutter('Raising exception with errno %s', e.errno)
725
def append_file(self, relpath, f, mode=None):
665
def append(self, relpath, f, mode=None):
727
667
Append the text in the file-like object into the final
731
671
path = self._remote_path(relpath)
732
fout = self._get_sftp().file(path, 'ab')
672
fout = self._sftp.file(path, 'ab')
733
673
if mode is not None:
734
self._get_sftp().chmod(path, mode)
674
self._sftp.chmod(path, mode)
735
675
result = fout.tell()
736
676
self._pump(f, fout)
842
770
# that we have taken the lock.
843
771
return SFTPLock(relpath, self)
773
def _unparse_url(self, path=None):
776
path = urllib.quote(path)
777
# handle homedir paths
778
if not path.startswith('/'):
780
netloc = urllib.quote(self._host)
781
if self._username is not None:
782
netloc = '%s@%s' % (urllib.quote(self._username), netloc)
783
if self._port is not None:
784
netloc = '%s:%d' % (netloc, self._port)
785
return urlparse.urlunparse(('sftp', netloc, path, '', '', ''))
787
def _split_url(self, url):
788
(scheme, username, password, host, port, path) = split_url(url)
789
assert scheme == 'sftp'
791
# the initial slash should be removed from the path, and treated
792
# as a homedir relative path (the path begins with a double slash
793
# if it is absolute).
794
# see draft-ietf-secsh-scp-sftp-ssh-uri-03.txt
795
# RBC 20060118 we are not using this as its too user hostile. instead
796
# we are following lftp and using /~/foo to mean '~/foo'.
797
# handle homedir paths
798
if path.startswith('/~/'):
802
return (username, password, host, port, path)
804
def _parse_url(self, url):
805
(self._username, self._password,
806
self._host, self._port, self._path) = self._split_url(url)
808
def _sftp_connect(self):
809
"""Connect to the remote sftp server.
810
After this, self._sftp should have a valid connection (or
811
we raise an TransportError 'could not connect').
813
TODO: Raise a more reasonable ConnectionFailed exception
815
global _connected_hosts
817
idx = (self._host, self._port, self._username)
819
self._sftp = _connected_hosts[idx]
824
vendor = _get_ssh_vendor()
825
if vendor == 'loopback':
826
sock = socket.socket()
828
sock.connect((self._host, self._port))
829
except socket.error, e:
830
raise ConnectionError('Unable to connect to SSH host %s:%s: %s'
831
% (self._host, self._port, e))
832
self._sftp = SFTPClient(LoopbackSFTP(sock))
833
elif vendor != 'none':
834
sock = SFTPSubprocess(self._host, vendor, self._port,
836
self._sftp = SFTPClient(sock)
838
self._paramiko_connect()
840
_connected_hosts[idx] = self._sftp
842
def _paramiko_connect(self):
843
global SYSTEM_HOSTKEYS, BZR_HOSTKEYS
848
t = paramiko.Transport((self._host, self._port or 22))
849
t.set_log_channel('bzr.paramiko')
851
except paramiko.SSHException, e:
852
raise ConnectionError('Unable to reach SSH host %s:%s: %s'
853
% (self._host, self._port, e))
855
server_key = t.get_remote_server_key()
856
server_key_hex = paramiko.util.hexify(server_key.get_fingerprint())
857
keytype = server_key.get_name()
858
if SYSTEM_HOSTKEYS.has_key(self._host) and SYSTEM_HOSTKEYS[self._host].has_key(keytype):
859
our_server_key = SYSTEM_HOSTKEYS[self._host][keytype]
860
our_server_key_hex = paramiko.util.hexify(our_server_key.get_fingerprint())
861
elif BZR_HOSTKEYS.has_key(self._host) and BZR_HOSTKEYS[self._host].has_key(keytype):
862
our_server_key = BZR_HOSTKEYS[self._host][keytype]
863
our_server_key_hex = paramiko.util.hexify(our_server_key.get_fingerprint())
865
warning('Adding %s host key for %s: %s' % (keytype, self._host, server_key_hex))
866
if not BZR_HOSTKEYS.has_key(self._host):
867
BZR_HOSTKEYS[self._host] = {}
868
BZR_HOSTKEYS[self._host][keytype] = server_key
869
our_server_key = server_key
870
our_server_key_hex = paramiko.util.hexify(our_server_key.get_fingerprint())
872
if server_key != our_server_key:
873
filename1 = os.path.expanduser('~/.ssh/known_hosts')
874
filename2 = pathjoin(config_dir(), 'ssh_host_keys')
875
raise TransportError('Host keys for %s do not match! %s != %s' % \
876
(self._host, our_server_key_hex, server_key_hex),
877
['Try editing %s or %s' % (filename1, filename2)])
882
self._sftp = t.open_sftp_client()
883
except paramiko.SSHException, e:
884
raise ConnectionError('Unable to start sftp client %s:%d' %
885
(self._host, self._port), e)
887
def _sftp_auth(self, transport):
888
# paramiko requires a username, but it might be none if nothing was supplied
889
# use the local username, just in case.
890
# We don't override self._username, because if we aren't using paramiko,
891
# the username might be specified in ~/.ssh/config and we don't want to
892
# force it to something else
893
# Also, it would mess up the self.relpath() functionality
894
username = self._username or getpass.getuser()
897
agent = paramiko.Agent()
898
for key in agent.get_keys():
899
mutter('Trying SSH agent key %s' % paramiko.util.hexify(key.get_fingerprint()))
901
transport.auth_publickey(username, key)
903
except paramiko.SSHException, e:
906
# okay, try finding id_rsa or id_dss? (posix only)
907
if self._try_pkey_auth(transport, paramiko.RSAKey, username, 'id_rsa'):
909
if self._try_pkey_auth(transport, paramiko.DSSKey, username, 'id_dsa'):
914
transport.auth_password(username, self._password)
916
except paramiko.SSHException, e:
919
# FIXME: Don't keep a password held in memory if you can help it
920
#self._password = None
922
# give up and ask for a password
923
password = bzrlib.ui.ui_factory.get_password(
924
prompt='SSH %(user)s@%(host)s password',
925
user=username, host=self._host)
927
transport.auth_password(username, password)
928
except paramiko.SSHException, e:
929
raise ConnectionError('Unable to authenticate to SSH host as %s@%s' %
930
(username, self._host), e)
932
def _try_pkey_auth(self, transport, pkey_class, username, filename):
933
filename = os.path.expanduser('~/.ssh/' + filename)
935
key = pkey_class.from_private_key_file(filename)
936
transport.auth_publickey(username, key)
938
except paramiko.PasswordRequiredException:
939
password = bzrlib.ui.ui_factory.get_password(
940
prompt='SSH %(filename)s password',
943
key = pkey_class.from_private_key_file(filename, password)
944
transport.auth_publickey(username, key)
946
except paramiko.SSHException:
947
mutter('SSH authentication via %s key failed.' % (os.path.basename(filename),))
948
except paramiko.SSHException:
949
mutter('SSH authentication via %s key failed.' % (os.path.basename(filename),))
845
954
def _sftp_open_exclusive(self, abspath, mode=None):
846
955
"""Open a remote path exclusively.
856
965
:param abspath: The remote absolute path where the file should be opened
857
966
:param mode: The mode permissions bits for the new file
859
# TODO: jam 20060816 Paramiko >= 1.6.2 (probably earlier) supports
860
# using the 'x' flag to indicate SFTP_FLAG_EXCL.
861
# However, there is no way to set the permission mode at open
862
# time using the sftp_client.file() functionality.
863
path = self._get_sftp()._adjust_cwd(abspath)
968
path = self._sftp._adjust_cwd(abspath)
864
969
# mutter('sftp abspath %s => %s', abspath, path)
865
970
attr = SFTPAttributes()
866
971
if mode is not None:
867
972
attr.st_mode = mode
868
omode = (SFTP_FLAG_WRITE | SFTP_FLAG_CREATE
973
omode = (SFTP_FLAG_WRITE | SFTP_FLAG_CREATE
869
974
| SFTP_FLAG_TRUNC | SFTP_FLAG_EXCL)
871
t, msg = self._get_sftp()._request(CMD_OPEN, path, omode, attr)
976
t, msg = self._sftp._request(CMD_OPEN, path, omode, attr)
872
977
if t != CMD_HANDLE:
873
978
raise TransportError('Expected an SFTP handle')
874
979
handle = msg.get_string()
875
return SFTPFile(self._get_sftp(), handle, 'wb', -1)
980
return SFTPFile(self._sftp, handle, 'wb', -1)
876
981
except (paramiko.SSHException, IOError), e:
877
982
self._translate_io_exception(e, abspath, ': unable to open',
878
983
failure_exc=FileExists)
880
def _can_roundtrip_unix_modebits(self):
881
if sys.platform == 'win32':
887
986
# ------------- server test implementation --------------
890
990
from bzrlib.tests.stub_sftp import StubServer, StubSFTPServer
949
1049
# probably a failed test; unit test thread will log the
951
1051
sys.excepthook(*sys.exc_info())
952
warning('Exception from within unit test server thread: %r' %
1052
warning('Exception from within unit test server thread: %r' %
956
class SocketDelay(object):
957
"""A socket decorator to make TCP appear slower.
959
This changes recv, send, and sendall to add a fixed latency to each python
960
call if a new roundtrip is detected. That is, when a recv is called and the
961
flag new_roundtrip is set, latency is charged. Every send and send_all
964
In addition every send, sendall and recv sleeps a bit per character send to
967
Not all methods are implemented, this is deliberate as this class is not a
968
replacement for the builtin sockets layer. fileno is not implemented to
969
prevent the proxy being bypassed.
973
_proxied_arguments = dict.fromkeys([
974
"close", "getpeername", "getsockname", "getsockopt", "gettimeout",
975
"setblocking", "setsockopt", "settimeout", "shutdown"])
977
def __init__(self, sock, latency, bandwidth=1.0,
980
:param bandwith: simulated bandwith (MegaBit)
981
:param really_sleep: If set to false, the SocketDelay will just
982
increase a counter, instead of calling time.sleep. This is useful for
983
unittesting the SocketDelay.
986
self.latency = latency
987
self.really_sleep = really_sleep
988
self.time_per_byte = 1 / (bandwidth / 8.0 * 1024 * 1024)
989
self.new_roundtrip = False
992
if self.really_sleep:
995
SocketDelay.simulated_time += s
997
def __getattr__(self, attr):
998
if attr in SocketDelay._proxied_arguments:
999
return getattr(self.sock, attr)
1000
raise AttributeError("'SocketDelay' object has no attribute %r" %
1004
return SocketDelay(self.sock.dup(), self.latency, self.time_per_byte,
1007
def recv(self, *args):
1008
data = self.sock.recv(*args)
1009
if data and self.new_roundtrip:
1010
self.new_roundtrip = False
1011
self.sleep(self.latency)
1012
self.sleep(len(data) * self.time_per_byte)
1015
def sendall(self, data, flags=0):
1016
if not self.new_roundtrip:
1017
self.new_roundtrip = True
1018
self.sleep(self.latency)
1019
self.sleep(len(data) * self.time_per_byte)
1020
return self.sock.sendall(data, flags)
1022
def send(self, data, flags=0):
1023
if not self.new_roundtrip:
1024
self.new_roundtrip = True
1025
self.sleep(self.latency)
1026
bytes_sent = self.sock.send(data, flags)
1027
self.sleep(bytes_sent * self.time_per_byte)
1031
1056
class SFTPServer(Server):
1032
1057
"""Common code for SFTP server facilities."""
1034
def __init__(self, server_interface=StubServer):
1035
1060
self._original_vendor = None
1036
1061
self._homedir = None
1037
1062
self._server_homedir = None
1038
1063
self._listener = None
1039
1064
self._root = None
1040
self._vendor = ssh.ParamikoVendor()
1041
self._server_interface = server_interface
1065
self._vendor = 'none'
1042
1066
# sftp server logs
1044
self.add_latency = 0
1046
1069
def _get_sftp_url(self, path):
1047
1070
"""Calculate an sftp url to this server for path."""
1048
return 'sftp://foo:bar@%s:%d/%s' % (self._listener.host,
1049
self._listener.port, path)
1071
return 'sftp://foo:bar@localhost:%d/%s' % (self._listener.port, path)
1051
1073
def log(self, message):
1052
1074
"""StubServer uses this to log when a new server is created."""
1053
1075
self.logs.append(message)
1055
def _run_server_entry(self, sock):
1056
"""Entry point for all implementations of _run_server.
1058
If self.add_latency is > 0.000001 then sock is given a latency adding
1061
if self.add_latency > 0.000001:
1062
sock = SocketDelay(sock, self.add_latency)
1063
return self._run_server(sock)
1065
1077
def _run_server(self, s):
1066
1078
ssh_server = paramiko.Transport(s)
1067
1079
key_file = pathjoin(self._homedir, 'test_rsa.key')