124
class _SFTPReadvHelper(object):
125
"""A class to help with managing the state of a readv request."""
127
# See _get_requests for an explanation.
128
_max_request_size = 32768
130
def __init__(self, original_offsets, relpath, _report_activity):
131
"""Create a new readv helper.
133
:param original_offsets: The original requests given by the caller of
135
:param relpath: The name of the file (if known)
136
:param _report_activity: A Transport._report_activity bound method,
137
to be called as data arrives.
139
self.original_offsets = list(original_offsets)
140
self.relpath = relpath
141
self._report_activity = _report_activity
143
def _get_requests(self):
144
"""Break up the offsets into individual requests over sftp.
146
The SFTP spec only requires implementers to support 32kB requests. We
147
could try something larger (openssh supports 64kB), but then we have to
148
handle requests that fail.
149
So instead, we just break up our maximum chunks into 32kB chunks, and
150
asyncronously requests them.
151
Newer versions of paramiko would do the chunking for us, but we want to
152
start processing results right away, so we do it ourselves.
154
# TODO: Because we issue async requests, we don't 'fudge' any extra
155
# data. I'm not 100% sure that is the best choice.
157
# The first thing we do, is to collapse the individual requests as much
158
# as possible, so we don't issues requests <32kB
159
sorted_offsets = sorted(self.original_offsets)
160
coalesced = list(ConnectedTransport._coalesce_offsets(sorted_offsets,
161
limit=0, fudge_factor=0))
163
for c_offset in coalesced:
164
start = c_offset.start
165
size = c_offset.length
167
# Break this up into 32kB requests
169
next_size = min(size, self._max_request_size)
170
requests.append((start, next_size))
173
if 'sftp' in debug.debug_flags:
174
mutter('SFTP.readv(%s) %s offsets => %s coalesced => %s requests',
175
self.relpath, len(sorted_offsets), len(coalesced),
179
def request_and_yield_offsets(self, fp):
180
"""Request the data from the remote machine, yielding the results.
182
:param fp: A Paramiko SFTPFile object that supports readv.
183
:return: Yield the data requested by the original readv caller, one by
186
requests = self._get_requests()
187
offset_iter = iter(self.original_offsets)
188
cur_offset, cur_size = offset_iter.next()
189
# paramiko .readv() yields strings that are in the order of the requests
190
# So we track the current request to know where the next data is
191
# being returned from.
197
# This is used to buffer chunks which we couldn't process yet
198
# It is (start, end, data) tuples.
200
# Create an 'unlimited' data stream, so we stop based on requests,
201
# rather than just because the data stream ended. This lets us detect
203
data_stream = itertools.chain(fp.readv(requests),
204
itertools.repeat(None))
205
for (start, length), data in itertools.izip(requests, data_stream):
207
if cur_coalesced is not None:
208
raise errors.ShortReadvError(self.relpath,
209
start, length, len(data))
210
if len(data) != length:
211
raise errors.ShortReadvError(self.relpath,
212
start, length, len(data))
213
self._report_activity(length, 'read')
215
# This is the first request, just buffer it
216
buffered_data = [data]
217
buffered_len = length
219
elif start == last_end:
220
# The data we are reading fits neatly on the previous
221
# buffer, so this is all part of a larger coalesced range.
222
buffered_data.append(data)
223
buffered_len += length
225
# We have an 'interrupt' in the data stream. So we know we are
226
# at a request boundary.
228
# We haven't consumed the buffer so far, so put it into
229
# data_chunks, and continue.
230
buffered = ''.join(buffered_data)
231
data_chunks.append((input_start, buffered))
233
buffered_data = [data]
234
buffered_len = length
235
last_end = start + length
236
if input_start == cur_offset and cur_size <= buffered_len:
237
# Simplify the next steps a bit by transforming buffered_data
238
# into a single string. We also have the nice property that
239
# when there is only one string ''.join([x]) == x, so there is
241
buffered = ''.join(buffered_data)
242
# Clean out buffered data so that we keep memory
246
# TODO: We *could* also consider the case where cur_offset is in
247
# in the buffered range, even though it doesn't *start*
248
# the buffered range. But for packs we pretty much always
249
# read in order, so you won't get any extra data in the
251
while (input_start == cur_offset
252
and (buffered_offset + cur_size) <= buffered_len):
253
# We've buffered enough data to process this request, spit it
255
cur_data = buffered[buffered_offset:buffered_offset + cur_size]
256
# move the direct pointer into our buffered data
257
buffered_offset += cur_size
258
# Move the start-of-buffer pointer
259
input_start += cur_size
260
# Yield the requested data
261
yield cur_offset, cur_data
262
cur_offset, cur_size = offset_iter.next()
263
# at this point, we've consumed as much of buffered as we can,
264
# so break off the portion that we consumed
265
if buffered_offset == len(buffered_data):
266
# No tail to leave behind
270
buffered = buffered[buffered_offset:]
271
buffered_data = [buffered]
272
buffered_len = len(buffered)
273
# now that the data stream is done, close the handle
276
buffered = ''.join(buffered_data)
278
data_chunks.append((input_start, buffered))
280
if 'sftp' in debug.debug_flags:
281
mutter('SFTP readv left with %d out-of-order bytes',
282
sum(map(lambda x: len(x[1]), data_chunks)))
283
# We've processed all the readv data, at this point, anything we
284
# couldn't process is in data_chunks. This doesn't happen often, so
285
# this code path isn't optimized
286
# We use an interesting process for data_chunks
287
# Specifically if we have "bisect_left([(start, len, entries)],
289
# If start == qstart, then we get the specific node. Otherwise we
290
# get the previous node
292
idx = bisect.bisect_left(data_chunks, (cur_offset,))
293
if idx < len(data_chunks) and data_chunks[idx][0] == cur_offset:
294
# The data starts here
295
data = data_chunks[idx][1][:cur_size]
297
# The data is in a portion of a previous page
299
sub_offset = cur_offset - data_chunks[idx][0]
300
data = data_chunks[idx][1]
301
data = data[sub_offset:sub_offset + cur_size]
303
# We are missing the page where the data should be found,
306
if len(data) != cur_size:
307
raise AssertionError('We must have miscalulated.'
308
' We expected %d bytes, but only found %d'
309
% (cur_size, len(data)))
310
yield cur_offset, data
311
cur_offset, cur_size = offset_iter.next()
314
134
class SFTPTransport(ConnectedTransport):
315
135
"""Transport implementation for SFTP access."""
458
def _sftp_readv(self, fp, offsets, relpath):
261
def _sftp_readv(self, fp, offsets, relpath='<unknown>'):
459
262
"""Use the readv() member of fp to do async readv.
461
Then read them using paramiko.readv(). paramiko.readv()
264
And then read them using paramiko.readv(). paramiko.readv()
462
265
does not support ranges > 64K, so it caps the request size, and
463
just reads until it gets all the stuff it wants.
266
just reads until it gets all the stuff it wants
465
helper = _SFTPReadvHelper(offsets, relpath, self._report_activity)
466
return helper.request_and_yield_offsets(fp)
268
offsets = list(offsets)
269
sorted_offsets = sorted(offsets)
271
# The algorithm works as follows:
272
# 1) Coalesce nearby reads into a single chunk
273
# This generates a list of combined regions, the total size
274
# and the size of the sub regions. This coalescing step is limited
275
# in the number of nearby chunks to combine, and is allowed to
276
# skip small breaks in the requests. Limiting it makes sure that
277
# we can start yielding some data earlier, and skipping means we
278
# make fewer requests. (Beneficial even when using async)
279
# 2) Break up this combined regions into chunks that are smaller
280
# than 64KiB. Technically the limit is 65536, but we are a
281
# little bit conservative. This is because sftp has a maximum
282
# return chunk size of 64KiB (max size of an unsigned short)
283
# 3) Issue a readv() to paramiko to create an async request for
285
# 4) Read in the data as it comes back, until we've read one
286
# continuous section as determined in step 1
287
# 5) Break up the full sections into hunks for the original requested
288
# offsets. And put them in a cache
289
# 6) Check if the next request is in the cache, and if it is, remove
290
# it from the cache, and yield its data. Continue until no more
291
# entries are in the cache.
292
# 7) loop back to step 4 until all data has been read
294
# TODO: jam 20060725 This could be optimized one step further, by
295
# attempting to yield whatever data we have read, even before
296
# the first coallesced section has been fully processed.
298
# When coalescing for use with readv(), we don't really need to
299
# use any fudge factor, because the requests are made asynchronously
300
coalesced = list(self._coalesce_offsets(sorted_offsets,
301
limit=self._max_readv_combine,
305
for c_offset in coalesced:
306
start = c_offset.start
307
size = c_offset.length
309
# We need to break this up into multiple requests
311
next_size = min(size, self._max_request_size)
312
requests.append((start, next_size))
316
mutter('SFTP.readv() %s offsets => %s coalesced => %s requests',
317
len(offsets), len(coalesced), len(requests))
319
# Queue the current read until we have read the full coalesced section
322
cur_coalesced_stack = iter(coalesced)
323
cur_coalesced = cur_coalesced_stack.next()
325
# Cache the results, but only until they have been fulfilled
327
# turn the list of offsets into a stack
328
offset_stack = iter(offsets)
329
cur_offset_and_size = offset_stack.next()
331
for data in fp.readv(requests):
333
cur_data_len += len(data)
335
if cur_data_len < cur_coalesced.length:
337
if cur_data_len != cur_coalesced.length:
338
raise AssertionError(
339
"Somehow we read too much: %s != %s"
340
% (cur_data_len, cur_coalesced.length))
341
all_data = ''.join(cur_data)
345
for suboffset, subsize in cur_coalesced.ranges:
346
key = (cur_coalesced.start+suboffset, subsize)
347
data_map[key] = all_data[suboffset:suboffset+subsize]
349
# Now that we've read some data, see if we can yield anything back
350
while cur_offset_and_size in data_map:
351
this_data = data_map.pop(cur_offset_and_size)
352
yield cur_offset_and_size[0], this_data
353
cur_offset_and_size = offset_stack.next()
355
# We read a coalesced entry, so mark it as done
357
# Now that we've read all of the data for this coalesced section
359
cur_coalesced = cur_coalesced_stack.next()
361
if cur_coalesced is not None:
362
raise errors.ShortReadvError(relpath, cur_coalesced.start,
363
cur_coalesced.length, len(data))
468
365
def put_file(self, relpath, f, mode=None):
744
# ------------- server test implementation --------------
747
from bzrlib.tests.stub_sftp import StubServer, StubSFTPServer
749
STUB_SERVER_KEY = """
750
-----BEGIN RSA PRIVATE KEY-----
751
MIICWgIBAAKBgQDTj1bqB4WmayWNPB+8jVSYpZYk80Ujvj680pOTh2bORBjbIAyz
752
oWGW+GUjzKxTiiPvVmxFgx5wdsFvF03v34lEVVhMpouqPAYQ15N37K/ir5XY+9m/
753
d8ufMCkjeXsQkKqFbAlQcnWMCRnOoPHS3I4vi6hmnDDeeYTSRvfLbW0fhwIBIwKB
754
gBIiOqZYaoqbeD9OS9z2K9KR2atlTxGxOJPXiP4ESqP3NVScWNwyZ3NXHpyrJLa0
755
EbVtzsQhLn6rF+TzXnOlcipFvjsem3iYzCpuChfGQ6SovTcOjHV9z+hnpXvQ/fon
756
soVRZY65wKnF7IAoUwTmJS9opqgrN6kRgCd3DASAMd1bAkEA96SBVWFt/fJBNJ9H
757
tYnBKZGw0VeHOYmVYbvMSstssn8un+pQpUm9vlG/bp7Oxd/m+b9KWEh2xPfv6zqU
758
avNwHwJBANqzGZa/EpzF4J8pGti7oIAPUIDGMtfIcmqNXVMckrmzQ2vTfqtkEZsA
759
4rE1IERRyiJQx6EJsz21wJmGV9WJQ5kCQQDwkS0uXqVdFzgHO6S++tjmjYcxwr3g
760
H0CoFYSgbddOT6miqRskOQF3DZVkJT3kyuBgU2zKygz52ukQZMqxCb1fAkASvuTv
761
qfpH87Qq5kQhNKdbbwbmd2NxlNabazPijWuphGTdW0VfJdWfklyS2Kr+iqrs/5wV
762
HhathJt636Eg7oIjAkA8ht3MQ+XSl9yIJIS8gVpbPxSw5OMfw0PjVE7tBdQruiSc
763
nvuQES5C9BMHjF39LZiGH1iLQy7FgdHyoP+eodI7
764
-----END RSA PRIVATE KEY-----
768
class SocketListener(threading.Thread):
770
def __init__(self, callback):
771
threading.Thread.__init__(self)
772
self._callback = callback
773
self._socket = socket.socket()
774
self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
775
self._socket.bind(('localhost', 0))
776
self._socket.listen(1)
777
self.port = self._socket.getsockname()[1]
778
self._stop_event = threading.Event()
781
# called from outside this thread
782
self._stop_event.set()
783
# use a timeout here, because if the test fails, the server thread may
784
# never notice the stop_event.
790
readable, writable_unused, exception_unused = \
791
select.select([self._socket], [], [], 0.1)
792
if self._stop_event.isSet():
794
if len(readable) == 0:
797
s, addr_unused = self._socket.accept()
798
# because the loopback socket is inline, and transports are
799
# never explicitly closed, best to launch a new thread.
800
threading.Thread(target=self._callback, args=(s,)).start()
801
except socket.error, x:
802
sys.excepthook(*sys.exc_info())
803
warning('Socket error during accept() within unit test server'
806
# probably a failed test; unit test thread will log the
808
sys.excepthook(*sys.exc_info())
809
warning('Exception from within unit test server thread: %r' %
813
class SocketDelay(object):
814
"""A socket decorator to make TCP appear slower.
816
This changes recv, send, and sendall to add a fixed latency to each python
817
call if a new roundtrip is detected. That is, when a recv is called and the
818
flag new_roundtrip is set, latency is charged. Every send and send_all
821
In addition every send, sendall and recv sleeps a bit per character send to
824
Not all methods are implemented, this is deliberate as this class is not a
825
replacement for the builtin sockets layer. fileno is not implemented to
826
prevent the proxy being bypassed.
830
_proxied_arguments = dict.fromkeys([
831
"close", "getpeername", "getsockname", "getsockopt", "gettimeout",
832
"setblocking", "setsockopt", "settimeout", "shutdown"])
834
def __init__(self, sock, latency, bandwidth=1.0,
837
:param bandwith: simulated bandwith (MegaBit)
838
:param really_sleep: If set to false, the SocketDelay will just
839
increase a counter, instead of calling time.sleep. This is useful for
840
unittesting the SocketDelay.
843
self.latency = latency
844
self.really_sleep = really_sleep
845
self.time_per_byte = 1 / (bandwidth / 8.0 * 1024 * 1024)
846
self.new_roundtrip = False
849
if self.really_sleep:
852
SocketDelay.simulated_time += s
854
def __getattr__(self, attr):
855
if attr in SocketDelay._proxied_arguments:
856
return getattr(self.sock, attr)
857
raise AttributeError("'SocketDelay' object has no attribute %r" %
861
return SocketDelay(self.sock.dup(), self.latency, self.time_per_byte,
864
def recv(self, *args):
865
data = self.sock.recv(*args)
866
if data and self.new_roundtrip:
867
self.new_roundtrip = False
868
self.sleep(self.latency)
869
self.sleep(len(data) * self.time_per_byte)
872
def sendall(self, data, flags=0):
873
if not self.new_roundtrip:
874
self.new_roundtrip = True
875
self.sleep(self.latency)
876
self.sleep(len(data) * self.time_per_byte)
877
return self.sock.sendall(data, flags)
879
def send(self, data, flags=0):
880
if not self.new_roundtrip:
881
self.new_roundtrip = True
882
self.sleep(self.latency)
883
bytes_sent = self.sock.send(data, flags)
884
self.sleep(bytes_sent * self.time_per_byte)
888
class SFTPServer(Server):
889
"""Common code for SFTP server facilities."""
891
def __init__(self, server_interface=StubServer):
892
self._original_vendor = None
894
self._server_homedir = None
895
self._listener = None
897
self._vendor = ssh.ParamikoVendor()
898
self._server_interface = server_interface
903
def _get_sftp_url(self, path):
904
"""Calculate an sftp url to this server for path."""
905
return 'sftp://foo:bar@localhost:%d/%s' % (self._listener.port, path)
907
def log(self, message):
908
"""StubServer uses this to log when a new server is created."""
909
self.logs.append(message)
911
def _run_server_entry(self, sock):
912
"""Entry point for all implementations of _run_server.
914
If self.add_latency is > 0.000001 then sock is given a latency adding
917
if self.add_latency > 0.000001:
918
sock = SocketDelay(sock, self.add_latency)
919
return self._run_server(sock)
921
def _run_server(self, s):
922
ssh_server = paramiko.Transport(s)
923
key_file = pathjoin(self._homedir, 'test_rsa.key')
924
f = open(key_file, 'w')
925
f.write(STUB_SERVER_KEY)
927
host_key = paramiko.RSAKey.from_private_key_file(key_file)
928
ssh_server.add_server_key(host_key)
929
server = self._server_interface(self)
930
ssh_server.set_subsystem_handler('sftp', paramiko.SFTPServer,
931
StubSFTPServer, root=self._root,
932
home=self._server_homedir)
933
event = threading.Event()
934
ssh_server.start_server(event, server)
937
def setUp(self, backing_server=None):
938
# XXX: TODO: make sftpserver back onto backing_server rather than local
940
if not (backing_server is None or
941
isinstance(backing_server, local.LocalURLServer)):
942
raise AssertionError(
943
"backing_server should not be %r, because this can only serve the "
944
"local current working directory." % (backing_server,))
945
self._original_vendor = ssh._ssh_vendor_manager._cached_ssh_vendor
946
ssh._ssh_vendor_manager._cached_ssh_vendor = self._vendor
947
if sys.platform == 'win32':
948
# Win32 needs to use the UNICODE api
949
self._homedir = getcwd()
951
# But Linux SFTP servers should just deal in bytestreams
952
self._homedir = os.getcwd()
953
if self._server_homedir is None:
954
self._server_homedir = self._homedir
956
if sys.platform == 'win32':
958
self._listener = SocketListener(self._run_server_entry)
959
self._listener.setDaemon(True)
960
self._listener.start()
963
"""See bzrlib.transport.Server.tearDown."""
964
self._listener.stop()
965
ssh._ssh_vendor_manager._cached_ssh_vendor = self._original_vendor
967
def get_bogus_url(self):
968
"""See bzrlib.transport.Server.get_bogus_url."""
969
# this is chosen to try to prevent trouble with proxies, wierd dns, etc
970
# we bind a random socket, so that we get a guaranteed unused port
971
# we just never listen on that port
973
s.bind(('localhost', 0))
974
return 'sftp://%s:%s/' % s.getsockname()
977
class SFTPFullAbsoluteServer(SFTPServer):
978
"""A test server for sftp transports, using absolute urls and ssh."""
981
"""See bzrlib.transport.Server.get_url."""
982
homedir = self._homedir
983
if sys.platform != 'win32':
984
# Remove the initial '/' on all platforms but win32
985
homedir = homedir[1:]
986
return self._get_sftp_url(urlutils.escape(homedir))
989
class SFTPServerWithoutSSH(SFTPServer):
990
"""An SFTP server that uses a simple TCP socket pair rather than SSH."""
993
super(SFTPServerWithoutSSH, self).__init__()
994
self._vendor = ssh.LoopbackVendor()
996
def _run_server(self, sock):
997
# Re-import these as locals, so that they're still accessible during
998
# interpreter shutdown (when all module globals get set to None, leading
999
# to confusing errors like "'NoneType' object has no attribute 'error'".
1000
class FakeChannel(object):
1001
def get_transport(self):
1003
def get_log_channel(self):
1007
def get_hexdump(self):
1012
server = paramiko.SFTPServer(
1013
FakeChannel(), 'sftp', StubServer(self), StubSFTPServer,
1014
root=self._root, home=self._server_homedir)
1016
server.start_subsystem(
1017
'sftp', None, ssh.SocketAsChannelAdapter(sock))
1018
except socket.error, e:
1019
if (len(e.args) > 0) and (e.args[0] == errno.EPIPE):
1020
# it's okay for the client to disconnect abruptly
1021
# (bug in paramiko 1.6: it should absorb this exception)
1025
except Exception, e:
1026
# This typically seems to happen during interpreter shutdown, so
1027
# most of the useful ways to report this error are won't work.
1028
# Writing the exception type, and then the text of the exception,
1029
# seems to be the best we can do.
1031
sys.stderr.write('\nEXCEPTION %r: ' % (e.__class__,))
1032
sys.stderr.write('%s\n\n' % (e,))
1033
server.finish_subsystem()
1036
class SFTPAbsoluteServer(SFTPServerWithoutSSH):
1037
"""A test server for sftp transports, using absolute urls."""
1040
"""See bzrlib.transport.Server.get_url."""
1041
homedir = self._homedir
1042
if sys.platform != 'win32':
1043
# Remove the initial '/' on all platforms but win32
1044
homedir = homedir[1:]
1045
return self._get_sftp_url(urlutils.escape(homedir))
1048
class SFTPHomeDirServer(SFTPServerWithoutSSH):
1049
"""A test server for sftp transports, using homedir relative urls."""
1052
"""See bzrlib.transport.Server.get_url."""
1053
return self._get_sftp_url("~/")
1056
class SFTPSiblingAbsoluteServer(SFTPAbsoluteServer):
1057
"""A test server for sftp transports where only absolute paths will work.
1059
It does this by serving from a deeply-nested directory that doesn't exist.
1062
def setUp(self, backing_server=None):
1063
self._server_homedir = '/dev/noone/runs/tests/here'
1064
super(SFTPSiblingAbsoluteServer, self).setUp(backing_server)
901
1067
def get_test_permutations():
902
1068
"""Return the permutations to be used in testing."""
903
from bzrlib.tests import stub_sftp
904
return [(SFTPTransport, stub_sftp.SFTPAbsoluteServer),
905
(SFTPTransport, stub_sftp.SFTPHomeDirServer),
906
(SFTPTransport, stub_sftp.SFTPSiblingAbsoluteServer),
1069
return [(SFTPTransport, SFTPAbsoluteServer),
1070
(SFTPTransport, SFTPHomeDirServer),
1071
(SFTPTransport, SFTPSiblingAbsoluteServer),