53
36
def do(self, path, *args):
54
37
"""Execute a repository request.
56
All Repository requests take a path to the repository as their first
57
argument. The repository must be at the exact path given by the
58
client - no searching is done.
39
The repository must be at the exact path - no searching is done.
60
41
The actual logic is delegated to self.do_repository_request.
62
:param client_path: The path for the repository as received from the
64
:return: A SmartServerResponse from self.do_repository_request().
43
:param path: The path for the repository.
44
:return: A smart server from self.do_repository_request().
66
transport = self.transport_from_client_path(path)
46
transport = self._backing_transport.clone(path)
67
47
bzrdir = BzrDir.open_from_transport(transport)
68
# Save the repository for use with do_body.
69
self._repository = bzrdir.open_repository()
70
return self.do_repository_request(self._repository, *args)
72
def do_repository_request(self, repository, *args):
73
"""Override to provide an implementation for a verb."""
74
# No-op for verbs that take bodies (None as a result indicates a body
78
def recreate_search(self, repository, search_bytes, discard_excess=False):
79
"""Recreate a search from its serialised form.
81
:param discard_excess: If True, and the search refers to data we don't
82
have, just silently accept that fact - the verb calling
83
recreate_search trusts that clients will look for missing things
84
they expected and get it from elsewhere.
86
lines = search_bytes.split('\n')
87
if lines[0] == 'ancestry-of':
89
search_result = graph.PendingAncestryResult(heads, repository)
90
return search_result, None
91
elif lines[0] == 'search':
92
return self.recreate_search_from_recipe(repository, lines[1:],
93
discard_excess=discard_excess)
95
return (None, FailedSmartServerResponse(('BadSearch',)))
97
def recreate_search_from_recipe(self, repository, lines,
98
discard_excess=False):
99
"""Recreate a specific revision search (vs a from-tip search).
101
:param discard_excess: If True, and the search refers to data we don't
102
have, just silently accept that fact - the verb calling
103
recreate_search trusts that clients will look for missing things
104
they expected and get it from elsewhere.
106
start_keys = set(lines[0].split(' '))
107
exclude_keys = set(lines[1].split(' '))
108
revision_count = int(lines[2])
109
repository.lock_read()
111
search = repository.get_graph()._make_breadth_first_searcher(
115
next_revs = search.next()
116
except StopIteration:
118
search.stop_searching_any(exclude_keys.intersection(next_revs))
119
search_result = search.get_result()
120
if (not discard_excess and
121
search_result.get_recipe()[3] != revision_count):
122
# we got back a different amount of data than expected, this
123
# gets reported as NoSuchRevision, because less revisions
124
# indicates missing revisions, and more should never happen as
125
# the excludes list considers ghosts and ensures that ghost
126
# filling races are not a problem.
127
return (None, FailedSmartServerResponse(('NoSuchRevision',)))
128
return (search_result, None)
133
class SmartServerRepositoryReadLocked(SmartServerRepositoryRequest):
134
"""Calls self.do_readlocked_repository_request."""
136
def do_repository_request(self, repository, *args):
137
"""Read lock a repository for do_readlocked_repository_request."""
138
repository.lock_read()
140
return self.do_readlocked_repository_request(repository, *args)
145
class SmartServerRepositoryGetParentMap(SmartServerRepositoryRequest):
146
"""Bzr 1.2+ - get parent data for revisions during a graph search."""
148
no_extra_results = False
150
def do_repository_request(self, repository, *revision_ids):
151
"""Get parent details for some revisions.
153
All the parents for revision_ids are returned. Additionally up to 64KB
154
of additional parent data found by performing a breadth first search
155
from revision_ids is returned. The verb takes a body containing the
156
current search state, see do_body for details.
158
If 'include-missing:' is in revision_ids, ghosts encountered in the
159
graph traversal for getting parent data are included in the result with
160
a prefix of 'missing:'.
162
:param repository: The repository to query in.
163
:param revision_ids: The utf8 encoded revision_id to answer for.
165
self._revision_ids = revision_ids
166
return None # Signal that we want a body.
168
def do_body(self, body_bytes):
169
"""Process the current search state and perform the parent lookup.
171
:return: A smart server response where the body contains an utf8
172
encoded flattened list of the parents of the revisions (the same
173
format as Repository.get_revision_graph) which has been bz2
176
repository = self._repository
177
repository.lock_read()
179
return self._do_repository_request(body_bytes)
183
def _do_repository_request(self, body_bytes):
184
repository = self._repository
185
revision_ids = set(self._revision_ids)
186
include_missing = 'include-missing:' in revision_ids
188
revision_ids.remove('include-missing:')
189
body_lines = body_bytes.split('\n')
190
search_result, error = self.recreate_search_from_recipe(
191
repository, body_lines)
192
if error is not None:
194
# TODO might be nice to start up the search again; but thats not
195
# written or tested yet.
196
client_seen_revs = set(search_result.get_keys())
197
# Always include the requested ids.
198
client_seen_revs.difference_update(revision_ids)
200
repo_graph = repository.get_graph()
204
next_revs = revision_ids
205
first_loop_done = False
207
queried_revs.update(next_revs)
208
parent_map = repo_graph.get_parent_map(next_revs)
209
current_revs = next_revs
211
for revision_id in current_revs:
213
parents = parent_map.get(revision_id)
214
if parents is not None:
215
# adjust for the wire
216
if parents == (_mod_revision.NULL_REVISION,):
218
# prepare the next query
219
next_revs.update(parents)
220
encoded_id = revision_id
223
encoded_id = "missing:" + revision_id
225
if (revision_id not in client_seen_revs and
226
(not missing_rev or include_missing)):
227
# Client does not have this revision, give it to it.
228
# add parents to the result
229
result[encoded_id] = parents
230
# Approximate the serialized cost of this revision_id.
231
size_so_far += 2 + len(encoded_id) + sum(map(len, parents))
232
# get all the directly asked for parents, and then flesh out to
233
# 64K (compressed) or so. We do one level of depth at a time to
234
# stay in sync with the client. The 250000 magic number is
235
# estimated compression ratio taken from bzr.dev itself.
236
if self.no_extra_results or (
237
first_loop_done and size_so_far > 250000):
240
# don't query things we've already queried
241
next_revs.difference_update(queried_revs)
242
first_loop_done = True
244
# sorting trivially puts lexographically similar revision ids together.
246
for revision, parents in sorted(result.items()):
247
lines.append(' '.join((revision, ) + tuple(parents)))
249
return SuccessfulSmartServerResponse(
250
('ok', ), bz2.compress('\n'.join(lines)))
253
class SmartServerRepositoryGetRevisionGraph(SmartServerRepositoryReadLocked):
255
def do_readlocked_repository_request(self, repository, revision_id):
48
repository = bzrdir.open_repository()
49
return self.do_repository_request(repository, *args)
52
class SmartServerRepositoryGetRevisionGraph(SmartServerRepositoryRequest):
54
def do_repository_request(self, repository, revision_id):
256
55
"""Return the result of repository.get_revision_graph(revision_id).
258
Deprecated as of bzr 1.4, but supported for older clients.
260
57
:param repository: The repository to query in.
261
58
:param revision_id: The utf8 encoded revision_id to get a graph from.
262
59
:return: A smart server response where the body contains an utf8
266
63
revision_id = None
269
graph = repository.get_graph()
271
search_ids = [revision_id]
273
search_ids = repository.all_revision_ids()
274
search = graph._make_breadth_first_searcher(search_ids)
275
transitive_ids = set()
276
map(transitive_ids.update, list(search))
277
parent_map = graph.get_parent_map(transitive_ids)
278
revision_graph = _strip_NULL_ghosts(parent_map)
279
if revision_id and revision_id not in revision_graph:
67
revision_graph = repository.get_revision_graph(revision_id)
68
except errors.NoSuchRevision:
280
69
# Note that we return an empty body, rather than omitting the body.
281
70
# This way the client knows that it can always expect to find a body
282
71
# in the response for this method, even in the error case.
283
72
return FailedSmartServerResponse(('nosuchrevision', revision_id), '')
285
74
for revision, parents in revision_graph.items():
286
lines.append(' '.join((revision, ) + tuple(parents)))
75
lines.append(' '.join([revision,] + parents))
288
77
return SuccessfulSmartServerResponse(('ok', ), '\n'.join(lines))
291
class SmartServerRepositoryGetRevIdForRevno(SmartServerRepositoryReadLocked):
293
def do_readlocked_repository_request(self, repository, revno,
295
"""Find the revid for a given revno, given a known revno/revid pair.
300
found_flag, result = repository.get_rev_id_for_revno(revno, known_pair)
301
except errors.RevisionNotPresent, err:
302
if err.revision_id != known_pair[1]:
303
raise AssertionError(
304
'get_rev_id_for_revno raised RevisionNotPresent for '
305
'non-initial revision: ' + err.revision_id)
306
return FailedSmartServerResponse(
307
('nosuchrevision', err.revision_id))
309
return SuccessfulSmartServerResponse(('ok', result))
311
earliest_revno, earliest_revid = result
312
return SuccessfulSmartServerResponse(
313
('history-incomplete', earliest_revno, earliest_revid))
316
80
class SmartServerRequestHasRevision(SmartServerRepositoryRequest):
318
82
def do_repository_request(self, repository, revision_id):
398
163
return FailedSmartServerResponse(('LockContention',))
399
164
except errors.UnlockableTransport:
400
165
return FailedSmartServerResponse(('UnlockableTransport',))
401
except errors.LockFailed, e:
402
return FailedSmartServerResponse(('LockFailed',
403
str(e.lock), str(e.why)))
404
if token is not None:
405
repository.leave_lock_in_place()
166
repository.leave_lock_in_place()
406
167
repository.unlock()
407
168
if token is None:
409
170
return SuccessfulSmartServerResponse(('ok', token))
412
class SmartServerRepositoryGetStream(SmartServerRepositoryRequest):
414
def do_repository_request(self, repository, to_network_name):
415
"""Get a stream for inserting into a to_format repository.
417
:param repository: The repository to stream from.
418
:param to_network_name: The network name of the format of the target
421
self._to_format = network_format_registry.get(to_network_name)
422
if self._should_fake_unknown():
423
return FailedSmartServerResponse(
424
('UnknownMethod', 'Repository.get_stream'))
425
return None # Signal that we want a body.
427
def _should_fake_unknown(self):
428
"""Return True if we should return UnknownMethod to the client.
430
This is a workaround for bugs in pre-1.19 clients that claim to
431
support receiving streams of CHK repositories. The pre-1.19 client
432
expects inventory records to be serialized in the format defined by
433
to_network_name, but in pre-1.19 (at least) that format definition
434
tries to use the xml5 serializer, which does not correctly handle
435
rich-roots. After 1.19 the client can also accept inventory-deltas
436
(which avoids this issue), and those clients will use the
437
Repository.get_stream_1.19 verb instead of this one.
438
So: if this repository is CHK, and the to_format doesn't match,
439
we should just fake an UnknownSmartMethod error so that the client
440
will fallback to VFS, rather than sending it a stream we know it
443
from_format = self._repository._format
444
to_format = self._to_format
445
if not from_format.supports_chks:
446
# Source not CHK: that's ok
448
if (to_format.supports_chks and
449
from_format.repository_class is to_format.repository_class and
450
from_format._serializer == to_format._serializer):
451
# Source is CHK, but target matches: that's ok
452
# (e.g. 2a->2a, or CHK2->2a)
454
# Source is CHK, and target is not CHK or incompatible CHK. We can't
455
# generate a compatible stream.
458
def do_body(self, body_bytes):
459
repository = self._repository
460
repository.lock_read()
462
search_result, error = self.recreate_search(repository, body_bytes,
464
if error is not None:
467
source = repository._get_source(self._to_format)
468
stream = source.get_stream(search_result)
470
exc_info = sys.exc_info()
472
# On non-error, unlocking is done by the body stream handler.
475
raise exc_info[0], exc_info[1], exc_info[2]
476
return SuccessfulSmartServerResponse(('ok',),
477
body_stream=self.body_stream(stream, repository))
479
def body_stream(self, stream, repository):
480
byte_stream = _stream_to_byte_stream(stream, repository._format)
482
for bytes in byte_stream:
484
except errors.RevisionNotPresent, e:
485
# This shouldn't be able to happen, but as we don't buffer
486
# everything it can in theory happen.
488
yield FailedSmartServerResponse(('NoSuchRevision', e.revision_id))
493
class SmartServerRepositoryGetStream_1_19(SmartServerRepositoryGetStream):
495
def _should_fake_unknown(self):
496
"""Returns False; we don't need to workaround bugs in 1.19+ clients."""
500
def _stream_to_byte_stream(stream, src_format):
501
"""Convert a record stream to a self delimited byte stream."""
502
pack_writer = pack.ContainerSerialiser()
503
yield pack_writer.begin()
504
yield pack_writer.bytes_record(src_format.network_name(), '')
505
for substream_type, substream in stream:
506
if substream_type == 'inventory-deltas':
507
# This doesn't feel like the ideal place to issue this warning;
508
# however we don't want to do it in the Repository that's
509
# generating the stream, because that might be on the server.
510
# Instead we try to observe it as the stream goes by.
511
ui.ui_factory.warn_cross_format_fetch(src_format,
513
for record in substream:
514
if record.storage_kind in ('chunked', 'fulltext'):
515
serialised = record_to_fulltext_bytes(record)
516
elif record.storage_kind == 'inventory-delta':
517
serialised = record_to_inventory_delta_bytes(record)
518
elif record.storage_kind == 'absent':
519
raise ValueError("Absent factory for %s" % (record.key,))
521
serialised = record.get_bytes_as(record.storage_kind)
523
# Some streams embed the whole stream into the wire
524
# representation of the first record, which means that
525
# later records have no wire representation: we skip them.
526
yield pack_writer.bytes_record(serialised, [(substream_type,)])
527
yield pack_writer.end()
530
class _ByteStreamDecoder(object):
531
"""Helper for _byte_stream_to_stream.
533
The expected usage of this class is via the function _byte_stream_to_stream
534
which creates a _ByteStreamDecoder, pops off the stream format and then
535
yields the output of record_stream(), the main entry point to
538
Broadly this class has to unwrap two layers of iterators:
542
This is complicated by wishing to return type, iterator_for_type, but
543
getting the data for iterator_for_type when we find out type: we can't
544
simply pass a generator down to the NetworkRecordStream parser, instead
545
we have a little local state to seed each NetworkRecordStream instance,
546
and gather the type that we'll be yielding.
548
:ivar byte_stream: The byte stream being decoded.
549
:ivar stream_decoder: A pack parser used to decode the bytestream
550
:ivar current_type: The current type, used to join adjacent records of the
551
same type into a single stream.
552
:ivar first_bytes: The first bytes to give the next NetworkRecordStream.
555
def __init__(self, byte_stream):
556
"""Create a _ByteStreamDecoder."""
557
self.stream_decoder = pack.ContainerPushParser()
558
self.current_type = None
559
self.first_bytes = None
560
self.byte_stream = byte_stream
562
def iter_stream_decoder(self):
563
"""Iterate the contents of the pack from stream_decoder."""
564
# dequeue pending items
565
for record in self.stream_decoder.read_pending_records():
567
# Pull bytes of the wire, decode them to records, yield those records.
568
for bytes in self.byte_stream:
569
self.stream_decoder.accept_bytes(bytes)
570
for record in self.stream_decoder.read_pending_records():
573
def iter_substream_bytes(self):
574
if self.first_bytes is not None:
575
yield self.first_bytes
576
# If we run out of pack records, single the outer layer to stop.
577
self.first_bytes = None
578
for record in self.iter_pack_records:
579
record_names, record_bytes = record
580
record_name, = record_names
581
substream_type = record_name[0]
582
if substream_type != self.current_type:
583
# end of a substream, seed the next substream.
584
self.current_type = substream_type
585
self.first_bytes = record_bytes
589
def record_stream(self):
590
"""Yield substream_type, substream from the byte stream."""
592
# Make and consume sub generators, one per substream type:
593
while self.first_bytes is not None:
594
substream = NetworkRecordStream(self.iter_substream_bytes())
595
# after substream is fully consumed, self.current_type is set to
596
# the next type, and self.first_bytes is set to the matching bytes.
597
yield self.current_type, substream.read()
599
def seed_state(self):
600
"""Prepare the _ByteStreamDecoder to decode from the pack stream."""
601
# Set a single generator we can use to get data from the pack stream.
602
self.iter_pack_records = self.iter_stream_decoder()
603
# Seed the very first subiterator with content; after this each one
605
list(self.iter_substream_bytes())
608
def _byte_stream_to_stream(byte_stream):
609
"""Convert a byte stream into a format and a stream.
611
:param byte_stream: A bytes iterator, as output by _stream_to_byte_stream.
612
:return: (RepositoryFormat, stream_generator)
614
decoder = _ByteStreamDecoder(byte_stream)
615
for bytes in byte_stream:
616
decoder.stream_decoder.accept_bytes(bytes)
617
for record in decoder.stream_decoder.read_pending_records(max=1):
618
record_names, src_format_name = record
619
src_format = network_format_registry.get(src_format_name)
620
return src_format, decoder.record_stream()
623
173
class SmartServerRepositoryUnlock(SmartServerRepositoryRequest):
625
175
def do_repository_request(self, repository, token):
694
235
dirname = dirname.encode(sys.getfilesystemencoding())
695
236
# python's tarball module includes the whole path by default so
697
if not dirname.endswith('.bzr'):
698
raise ValueError(dirname)
238
assert dirname.endswith('.bzr')
699
239
tarball.add(dirname, '.bzr') # recursive by default
704
class SmartServerRepositoryInsertStreamLocked(SmartServerRepositoryRequest):
705
"""Insert a record stream from a RemoteSink into a repository.
707
This gets bytes pushed to it by the network infrastructure and turns that
708
into a bytes iterator using a thread. That is then processed by
709
_byte_stream_to_stream.
714
def do_repository_request(self, repository, resume_tokens, lock_token):
715
"""StreamSink.insert_stream for a remote repository."""
716
repository.lock_write(token=lock_token)
717
self.do_insert_stream_request(repository, resume_tokens)
719
def do_insert_stream_request(self, repository, resume_tokens):
720
tokens = [token for token in resume_tokens.split(' ') if token]
722
self.repository = repository
723
self.queue = Queue.Queue()
724
self.insert_thread = threading.Thread(target=self._inserter_thread)
725
self.insert_thread.start()
727
def do_chunk(self, body_stream_chunk):
728
self.queue.put(body_stream_chunk)
730
def _inserter_thread(self):
732
src_format, stream = _byte_stream_to_stream(
733
self.blocking_byte_stream())
734
self.insert_result = self.repository._get_sink().insert_stream(
735
stream, src_format, self.tokens)
736
self.insert_ok = True
738
self.insert_exception = sys.exc_info()
739
self.insert_ok = False
741
def blocking_byte_stream(self):
743
bytes = self.queue.get()
744
if bytes is StopIteration:
750
self.queue.put(StopIteration)
751
if self.insert_thread is not None:
752
self.insert_thread.join()
753
if not self.insert_ok:
754
exc_info = self.insert_exception
755
raise exc_info[0], exc_info[1], exc_info[2]
756
write_group_tokens, missing_keys = self.insert_result
757
if write_group_tokens or missing_keys:
758
# bzip needed? missing keys should typically be a small set.
759
# Should this be a streaming body response ?
760
missing_keys = sorted(missing_keys)
761
bytes = bencode.bencode((write_group_tokens, missing_keys))
762
self.repository.unlock()
763
return SuccessfulSmartServerResponse(('missing-basis', bytes))
765
self.repository.unlock()
766
return SuccessfulSmartServerResponse(('ok', ))
769
class SmartServerRepositoryInsertStream_1_19(SmartServerRepositoryInsertStreamLocked):
770
"""Insert a record stream from a RemoteSink into a repository.
772
Same as SmartServerRepositoryInsertStreamLocked, except:
773
- the lock token argument is optional
774
- servers that implement this verb accept 'inventory-delta' records in the
780
def do_repository_request(self, repository, resume_tokens, lock_token=None):
781
"""StreamSink.insert_stream for a remote repository."""
782
SmartServerRepositoryInsertStreamLocked.do_repository_request(
783
self, repository, resume_tokens, lock_token)
786
class SmartServerRepositoryInsertStream(SmartServerRepositoryInsertStreamLocked):
787
"""Insert a record stream from a RemoteSink into an unlocked repository.
789
This is the same as SmartServerRepositoryInsertStreamLocked, except it
790
takes no lock_tokens; i.e. it works with an unlocked (or lock-free, e.g.
791
like pack format) repository.
796
def do_repository_request(self, repository, resume_tokens):
797
"""StreamSink.insert_stream for a remote repository."""
798
repository.lock_write()
799
self.do_insert_stream_request(repository, resume_tokens)