122
class _SFTPReadvHelper(object):
123
"""A class to help with managing the state of a readv request."""
125
# See _get_requests for an explanation.
126
_max_request_size = 32768
128
def __init__(self, original_offsets, relpath, _report_activity):
129
"""Create a new readv helper.
131
:param original_offsets: The original requests given by the caller of
133
:param relpath: The name of the file (if known)
134
:param _report_activity: A Transport._report_activity bound method,
135
to be called as data arrives.
137
self.original_offsets = list(original_offsets)
138
self.relpath = relpath
139
self._report_activity = _report_activity
141
def _get_requests(self):
142
"""Break up the offsets into individual requests over sftp.
144
The SFTP spec only requires implementers to support 32kB requests. We
145
could try something larger (openssh supports 64kB), but then we have to
146
handle requests that fail.
147
So instead, we just break up our maximum chunks into 32kB chunks, and
148
asyncronously requests them.
149
Newer versions of paramiko would do the chunking for us, but we want to
150
start processing results right away, so we do it ourselves.
152
# TODO: Because we issue async requests, we don't 'fudge' any extra
153
# data. I'm not 100% sure that is the best choice.
155
# The first thing we do, is to collapse the individual requests as much
156
# as possible, so we don't issues requests <32kB
157
sorted_offsets = sorted(self.original_offsets)
158
coalesced = list(ConnectedTransport._coalesce_offsets(sorted_offsets,
159
limit=0, fudge_factor=0))
161
for c_offset in coalesced:
162
start = c_offset.start
163
size = c_offset.length
165
# Break this up into 32kB requests
167
next_size = min(size, self._max_request_size)
168
requests.append((start, next_size))
171
if 'sftp' in debug.debug_flags:
172
mutter('SFTP.readv(%s) %s offsets => %s coalesced => %s requests',
173
self.relpath, len(sorted_offsets), len(coalesced),
177
def request_and_yield_offsets(self, fp):
178
"""Request the data from the remote machine, yielding the results.
180
:param fp: A Paramiko SFTPFile object that supports readv.
181
:return: Yield the data requested by the original readv caller, one by
184
requests = self._get_requests()
185
offset_iter = iter(self.original_offsets)
186
cur_offset, cur_size = offset_iter.next()
187
# paramiko .readv() yields strings that are in the order of the requests
188
# So we track the current request to know where the next data is
189
# being returned from.
195
# This is used to buffer chunks which we couldn't process yet
196
# It is (start, end, data) tuples.
198
# Create an 'unlimited' data stream, so we stop based on requests,
199
# rather than just because the data stream ended. This lets us detect
201
data_stream = itertools.chain(fp.readv(requests),
202
itertools.repeat(None))
203
for (start, length), data in itertools.izip(requests, data_stream):
205
if cur_coalesced is not None:
206
raise errors.ShortReadvError(self.relpath,
207
start, length, len(data))
208
if len(data) != length:
209
raise errors.ShortReadvError(self.relpath,
210
start, length, len(data))
211
self._report_activity(length, 'read')
213
# This is the first request, just buffer it
214
buffered_data = [data]
215
buffered_len = length
217
elif start == last_end:
218
# The data we are reading fits neatly on the previous
219
# buffer, so this is all part of a larger coalesced range.
220
buffered_data.append(data)
221
buffered_len += length
223
# We have an 'interrupt' in the data stream. So we know we are
224
# at a request boundary.
226
# We haven't consumed the buffer so far, so put it into
227
# data_chunks, and continue.
228
buffered = ''.join(buffered_data)
229
data_chunks.append((input_start, buffered))
231
buffered_data = [data]
232
buffered_len = length
233
last_end = start + length
234
if input_start == cur_offset and cur_size <= buffered_len:
235
# Simplify the next steps a bit by transforming buffered_data
236
# into a single string. We also have the nice property that
237
# when there is only one string ''.join([x]) == x, so there is
239
buffered = ''.join(buffered_data)
240
# Clean out buffered data so that we keep memory
244
# TODO: We *could* also consider the case where cur_offset is in
245
# in the buffered range, even though it doesn't *start*
246
# the buffered range. But for packs we pretty much always
247
# read in order, so you won't get any extra data in the
249
while (input_start == cur_offset
250
and (buffered_offset + cur_size) <= buffered_len):
251
# We've buffered enough data to process this request, spit it
253
cur_data = buffered[buffered_offset:buffered_offset + cur_size]
254
# move the direct pointer into our buffered data
255
buffered_offset += cur_size
256
# Move the start-of-buffer pointer
257
input_start += cur_size
258
# Yield the requested data
259
yield cur_offset, cur_data
260
cur_offset, cur_size = offset_iter.next()
261
# at this point, we've consumed as much of buffered as we can,
262
# so break off the portion that we consumed
263
if buffered_offset == len(buffered_data):
264
# No tail to leave behind
268
buffered = buffered[buffered_offset:]
269
buffered_data = [buffered]
270
buffered_len = len(buffered)
271
# now that the data stream is done, close the handle
274
buffered = ''.join(buffered_data)
276
data_chunks.append((input_start, buffered))
278
if 'sftp' in debug.debug_flags:
279
mutter('SFTP readv left with %d out-of-order bytes',
280
sum(map(lambda x: len(x[1]), data_chunks)))
281
# We've processed all the readv data, at this point, anything we
282
# couldn't process is in data_chunks. This doesn't happen often, so
283
# this code path isn't optimized
284
# We use an interesting process for data_chunks
285
# Specifically if we have "bisect_left([(start, len, entries)],
287
# If start == qstart, then we get the specific node. Otherwise we
288
# get the previous node
290
idx = bisect.bisect_left(data_chunks, (cur_offset,))
291
if idx < len(data_chunks) and data_chunks[idx][0] == cur_offset:
292
# The data starts here
293
data = data_chunks[idx][1][:cur_size]
295
# The data is in a portion of a previous page
297
sub_offset = cur_offset - data_chunks[idx][0]
298
data = data_chunks[idx][1]
299
data = data[sub_offset:sub_offset + cur_size]
301
# We are missing the page where the data should be found,
304
if len(data) != cur_size:
305
raise AssertionError('We must have miscalulated.'
306
' We expected %d bytes, but only found %d'
307
% (cur_size, len(data)))
308
yield cur_offset, data
309
cur_offset, cur_size = offset_iter.next()
312
133
class SFTPTransport(ConnectedTransport):
313
134
"""Transport implementation for SFTP access."""
439
252
readv = getattr(fp, 'readv', None)
441
254
return self._sftp_readv(fp, offsets, relpath)
442
if 'sftp' in debug.debug_flags:
443
mutter('seek and read %s offsets', len(offsets))
255
mutter('seek and read %s offsets', len(offsets))
444
256
return self._seek_and_read(fp, offsets, relpath)
445
257
except (IOError, paramiko.SSHException), e:
446
258
self._translate_io_exception(e, path, ': error retrieving')
448
def recommended_page_size(self):
449
"""See Transport.recommended_page_size().
451
For SFTP we suggest a large page size to reduce the overhead
452
introduced by latency.
456
def _sftp_readv(self, fp, offsets, relpath):
260
def _sftp_readv(self, fp, offsets, relpath='<unknown>'):
457
261
"""Use the readv() member of fp to do async readv.
459
Then read them using paramiko.readv(). paramiko.readv()
263
And then read them using paramiko.readv(). paramiko.readv()
460
264
does not support ranges > 64K, so it caps the request size, and
461
just reads until it gets all the stuff it wants.
265
just reads until it gets all the stuff it wants
463
helper = _SFTPReadvHelper(offsets, relpath, self._report_activity)
464
return helper.request_and_yield_offsets(fp)
267
offsets = list(offsets)
268
sorted_offsets = sorted(offsets)
270
# The algorithm works as follows:
271
# 1) Coalesce nearby reads into a single chunk
272
# This generates a list of combined regions, the total size
273
# and the size of the sub regions. This coalescing step is limited
274
# in the number of nearby chunks to combine, and is allowed to
275
# skip small breaks in the requests. Limiting it makes sure that
276
# we can start yielding some data earlier, and skipping means we
277
# make fewer requests. (Beneficial even when using async)
278
# 2) Break up this combined regions into chunks that are smaller
279
# than 64KiB. Technically the limit is 65536, but we are a
280
# little bit conservative. This is because sftp has a maximum
281
# return chunk size of 64KiB (max size of an unsigned short)
282
# 3) Issue a readv() to paramiko to create an async request for
284
# 4) Read in the data as it comes back, until we've read one
285
# continuous section as determined in step 1
286
# 5) Break up the full sections into hunks for the original requested
287
# offsets. And put them in a cache
288
# 6) Check if the next request is in the cache, and if it is, remove
289
# it from the cache, and yield its data. Continue until no more
290
# entries are in the cache.
291
# 7) loop back to step 4 until all data has been read
293
# TODO: jam 20060725 This could be optimized one step further, by
294
# attempting to yield whatever data we have read, even before
295
# the first coallesced section has been fully processed.
297
# When coalescing for use with readv(), we don't really need to
298
# use any fudge factor, because the requests are made asynchronously
299
coalesced = list(self._coalesce_offsets(sorted_offsets,
300
limit=self._max_readv_combine,
304
for c_offset in coalesced:
305
start = c_offset.start
306
size = c_offset.length
308
# We need to break this up into multiple requests
310
next_size = min(size, self._max_request_size)
311
requests.append((start, next_size))
315
mutter('SFTP.readv() %s offsets => %s coalesced => %s requests',
316
len(offsets), len(coalesced), len(requests))
318
# Queue the current read until we have read the full coalesced section
321
cur_coalesced_stack = iter(coalesced)
322
cur_coalesced = cur_coalesced_stack.next()
324
# Cache the results, but only until they have been fulfilled
326
# turn the list of offsets into a stack
327
offset_stack = iter(offsets)
328
cur_offset_and_size = offset_stack.next()
330
for data in fp.readv(requests):
332
cur_data_len += len(data)
334
if cur_data_len < cur_coalesced.length:
336
assert cur_data_len == cur_coalesced.length, \
337
"Somehow we read too much: %s != %s" % (cur_data_len,
338
cur_coalesced.length)
339
all_data = ''.join(cur_data)
343
for suboffset, subsize in cur_coalesced.ranges:
344
key = (cur_coalesced.start+suboffset, subsize)
345
data_map[key] = all_data[suboffset:suboffset+subsize]
347
# Now that we've read some data, see if we can yield anything back
348
while cur_offset_and_size in data_map:
349
this_data = data_map.pop(cur_offset_and_size)
350
yield cur_offset_and_size[0], this_data
351
cur_offset_and_size = offset_stack.next()
353
# We read a coalesced entry, so mark it as done
355
# Now that we've read all of the data for this coalesced section
357
cur_coalesced = cur_coalesced_stack.next()
359
if cur_coalesced is not None:
360
raise errors.ShortReadvError(relpath, cur_coalesced.start,
361
cur_coalesced.length, len(data))
466
363
def put_file(self, relpath, f, mode=None):
719
# ------------- server test implementation --------------
722
from bzrlib.tests.stub_sftp import StubServer, StubSFTPServer
724
STUB_SERVER_KEY = """
725
-----BEGIN RSA PRIVATE KEY-----
726
MIICWgIBAAKBgQDTj1bqB4WmayWNPB+8jVSYpZYk80Ujvj680pOTh2bORBjbIAyz
727
oWGW+GUjzKxTiiPvVmxFgx5wdsFvF03v34lEVVhMpouqPAYQ15N37K/ir5XY+9m/
728
d8ufMCkjeXsQkKqFbAlQcnWMCRnOoPHS3I4vi6hmnDDeeYTSRvfLbW0fhwIBIwKB
729
gBIiOqZYaoqbeD9OS9z2K9KR2atlTxGxOJPXiP4ESqP3NVScWNwyZ3NXHpyrJLa0
730
EbVtzsQhLn6rF+TzXnOlcipFvjsem3iYzCpuChfGQ6SovTcOjHV9z+hnpXvQ/fon
731
soVRZY65wKnF7IAoUwTmJS9opqgrN6kRgCd3DASAMd1bAkEA96SBVWFt/fJBNJ9H
732
tYnBKZGw0VeHOYmVYbvMSstssn8un+pQpUm9vlG/bp7Oxd/m+b9KWEh2xPfv6zqU
733
avNwHwJBANqzGZa/EpzF4J8pGti7oIAPUIDGMtfIcmqNXVMckrmzQ2vTfqtkEZsA
734
4rE1IERRyiJQx6EJsz21wJmGV9WJQ5kCQQDwkS0uXqVdFzgHO6S++tjmjYcxwr3g
735
H0CoFYSgbddOT6miqRskOQF3DZVkJT3kyuBgU2zKygz52ukQZMqxCb1fAkASvuTv
736
qfpH87Qq5kQhNKdbbwbmd2NxlNabazPijWuphGTdW0VfJdWfklyS2Kr+iqrs/5wV
737
HhathJt636Eg7oIjAkA8ht3MQ+XSl9yIJIS8gVpbPxSw5OMfw0PjVE7tBdQruiSc
738
nvuQES5C9BMHjF39LZiGH1iLQy7FgdHyoP+eodI7
739
-----END RSA PRIVATE KEY-----
743
class SocketListener(threading.Thread):
745
def __init__(self, callback):
746
threading.Thread.__init__(self)
747
self._callback = callback
748
self._socket = socket.socket()
749
self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
750
self._socket.bind(('localhost', 0))
751
self._socket.listen(1)
752
self.port = self._socket.getsockname()[1]
753
self._stop_event = threading.Event()
756
# called from outside this thread
757
self._stop_event.set()
758
# use a timeout here, because if the test fails, the server thread may
759
# never notice the stop_event.
765
readable, writable_unused, exception_unused = \
766
select.select([self._socket], [], [], 0.1)
767
if self._stop_event.isSet():
769
if len(readable) == 0:
772
s, addr_unused = self._socket.accept()
773
# because the loopback socket is inline, and transports are
774
# never explicitly closed, best to launch a new thread.
775
threading.Thread(target=self._callback, args=(s,)).start()
776
except socket.error, x:
777
sys.excepthook(*sys.exc_info())
778
warning('Socket error during accept() within unit test server'
781
# probably a failed test; unit test thread will log the
783
sys.excepthook(*sys.exc_info())
784
warning('Exception from within unit test server thread: %r' %
788
class SocketDelay(object):
789
"""A socket decorator to make TCP appear slower.
791
This changes recv, send, and sendall to add a fixed latency to each python
792
call if a new roundtrip is detected. That is, when a recv is called and the
793
flag new_roundtrip is set, latency is charged. Every send and send_all
796
In addition every send, sendall and recv sleeps a bit per character send to
799
Not all methods are implemented, this is deliberate as this class is not a
800
replacement for the builtin sockets layer. fileno is not implemented to
801
prevent the proxy being bypassed.
805
_proxied_arguments = dict.fromkeys([
806
"close", "getpeername", "getsockname", "getsockopt", "gettimeout",
807
"setblocking", "setsockopt", "settimeout", "shutdown"])
809
def __init__(self, sock, latency, bandwidth=1.0,
812
:param bandwith: simulated bandwith (MegaBit)
813
:param really_sleep: If set to false, the SocketDelay will just
814
increase a counter, instead of calling time.sleep. This is useful for
815
unittesting the SocketDelay.
818
self.latency = latency
819
self.really_sleep = really_sleep
820
self.time_per_byte = 1 / (bandwidth / 8.0 * 1024 * 1024)
821
self.new_roundtrip = False
824
if self.really_sleep:
827
SocketDelay.simulated_time += s
829
def __getattr__(self, attr):
830
if attr in SocketDelay._proxied_arguments:
831
return getattr(self.sock, attr)
832
raise AttributeError("'SocketDelay' object has no attribute %r" %
836
return SocketDelay(self.sock.dup(), self.latency, self.time_per_byte,
839
def recv(self, *args):
840
data = self.sock.recv(*args)
841
if data and self.new_roundtrip:
842
self.new_roundtrip = False
843
self.sleep(self.latency)
844
self.sleep(len(data) * self.time_per_byte)
847
def sendall(self, data, flags=0):
848
if not self.new_roundtrip:
849
self.new_roundtrip = True
850
self.sleep(self.latency)
851
self.sleep(len(data) * self.time_per_byte)
852
return self.sock.sendall(data, flags)
854
def send(self, data, flags=0):
855
if not self.new_roundtrip:
856
self.new_roundtrip = True
857
self.sleep(self.latency)
858
bytes_sent = self.sock.send(data, flags)
859
self.sleep(bytes_sent * self.time_per_byte)
863
class SFTPServer(Server):
864
"""Common code for SFTP server facilities."""
866
def __init__(self, server_interface=StubServer):
867
self._original_vendor = None
869
self._server_homedir = None
870
self._listener = None
872
self._vendor = ssh.ParamikoVendor()
873
self._server_interface = server_interface
878
def _get_sftp_url(self, path):
879
"""Calculate an sftp url to this server for path."""
880
return 'sftp://foo:bar@localhost:%d/%s' % (self._listener.port, path)
882
def log(self, message):
883
"""StubServer uses this to log when a new server is created."""
884
self.logs.append(message)
886
def _run_server_entry(self, sock):
887
"""Entry point for all implementations of _run_server.
889
If self.add_latency is > 0.000001 then sock is given a latency adding
892
if self.add_latency > 0.000001:
893
sock = SocketDelay(sock, self.add_latency)
894
return self._run_server(sock)
896
def _run_server(self, s):
897
ssh_server = paramiko.Transport(s)
898
key_file = pathjoin(self._homedir, 'test_rsa.key')
899
f = open(key_file, 'w')
900
f.write(STUB_SERVER_KEY)
902
host_key = paramiko.RSAKey.from_private_key_file(key_file)
903
ssh_server.add_server_key(host_key)
904
server = self._server_interface(self)
905
ssh_server.set_subsystem_handler('sftp', paramiko.SFTPServer,
906
StubSFTPServer, root=self._root,
907
home=self._server_homedir)
908
event = threading.Event()
909
ssh_server.start_server(event, server)
912
def setUp(self, backing_server=None):
913
# XXX: TODO: make sftpserver back onto backing_server rather than local
915
assert (backing_server is None or
916
isinstance(backing_server, local.LocalURLServer)), (
917
"backing_server should not be %r, because this can only serve the "
918
"local current working directory." % (backing_server,))
919
self._original_vendor = ssh._ssh_vendor_manager._cached_ssh_vendor
920
ssh._ssh_vendor_manager._cached_ssh_vendor = self._vendor
921
if sys.platform == 'win32':
922
# Win32 needs to use the UNICODE api
923
self._homedir = getcwd()
925
# But Linux SFTP servers should just deal in bytestreams
926
self._homedir = os.getcwd()
927
if self._server_homedir is None:
928
self._server_homedir = self._homedir
930
if sys.platform == 'win32':
932
self._listener = SocketListener(self._run_server_entry)
933
self._listener.setDaemon(True)
934
self._listener.start()
937
"""See bzrlib.transport.Server.tearDown."""
938
self._listener.stop()
939
ssh._ssh_vendor_manager._cached_ssh_vendor = self._original_vendor
941
def get_bogus_url(self):
942
"""See bzrlib.transport.Server.get_bogus_url."""
943
# this is chosen to try to prevent trouble with proxies, wierd dns, etc
944
# we bind a random socket, so that we get a guaranteed unused port
945
# we just never listen on that port
947
s.bind(('localhost', 0))
948
return 'sftp://%s:%s/' % s.getsockname()
951
class SFTPFullAbsoluteServer(SFTPServer):
952
"""A test server for sftp transports, using absolute urls and ssh."""
955
"""See bzrlib.transport.Server.get_url."""
956
homedir = self._homedir
957
if sys.platform != 'win32':
958
# Remove the initial '/' on all platforms but win32
959
homedir = homedir[1:]
960
return self._get_sftp_url(urlutils.escape(homedir))
963
class SFTPServerWithoutSSH(SFTPServer):
964
"""An SFTP server that uses a simple TCP socket pair rather than SSH."""
967
super(SFTPServerWithoutSSH, self).__init__()
968
self._vendor = ssh.LoopbackVendor()
970
def _run_server(self, sock):
971
# Re-import these as locals, so that they're still accessible during
972
# interpreter shutdown (when all module globals get set to None, leading
973
# to confusing errors like "'NoneType' object has no attribute 'error'".
974
class FakeChannel(object):
975
def get_transport(self):
977
def get_log_channel(self):
981
def get_hexdump(self):
986
server = paramiko.SFTPServer(FakeChannel(), 'sftp', StubServer(self), StubSFTPServer,
987
root=self._root, home=self._server_homedir)
989
server.start_subsystem('sftp', None, sock)
990
except socket.error, e:
991
if (len(e.args) > 0) and (e.args[0] == errno.EPIPE):
992
# it's okay for the client to disconnect abruptly
993
# (bug in paramiko 1.6: it should absorb this exception)
998
# This typically seems to happen during interpreter shutdown, so
999
# most of the useful ways to report this error are won't work.
1000
# Writing the exception type, and then the text of the exception,
1001
# seems to be the best we can do.
1003
sys.stderr.write('\nEXCEPTION %r: ' % (e.__class__,))
1004
sys.stderr.write('%s\n\n' % (e,))
1005
server.finish_subsystem()
1008
class SFTPAbsoluteServer(SFTPServerWithoutSSH):
1009
"""A test server for sftp transports, using absolute urls."""
1012
"""See bzrlib.transport.Server.get_url."""
1013
homedir = self._homedir
1014
if sys.platform != 'win32':
1015
# Remove the initial '/' on all platforms but win32
1016
homedir = homedir[1:]
1017
return self._get_sftp_url(urlutils.escape(homedir))
1020
class SFTPHomeDirServer(SFTPServerWithoutSSH):
1021
"""A test server for sftp transports, using homedir relative urls."""
1024
"""See bzrlib.transport.Server.get_url."""
1025
return self._get_sftp_url("~/")
1028
class SFTPSiblingAbsoluteServer(SFTPAbsoluteServer):
1029
"""A test server for sftp transports where only absolute paths will work.
1031
It does this by serving from a deeply-nested directory that doesn't exist.
1034
def setUp(self, backing_server=None):
1035
self._server_homedir = '/dev/noone/runs/tests/here'
1036
super(SFTPSiblingAbsoluteServer, self).setUp(backing_server)
895
1039
def get_test_permutations():
896
1040
"""Return the permutations to be used in testing."""
897
from bzrlib.tests import stub_sftp
898
return [(SFTPTransport, stub_sftp.SFTPAbsoluteServer),
899
(SFTPTransport, stub_sftp.SFTPHomeDirServer),
900
(SFTPTransport, stub_sftp.SFTPSiblingAbsoluteServer),
1041
return [(SFTPTransport, SFTPAbsoluteServer),
1042
(SFTPTransport, SFTPHomeDirServer),
1043
(SFTPTransport, SFTPSiblingAbsoluteServer),