~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/smart/repository.py

Committer: Robert Collins
Date: 2008-02-03 22:55:08 UTC
mto: This revision was merged to the branch mainline in revision 3216.
Revision ID: robertc@robertcollins.net-20080203225508-0rogbg0ggonuqfhp

Change the smart server get_parents method to take a graph search to exclude already recieved parents from. This prevents history shortcuts causing huge numbers of duplicates.

files modified:
NEWS

bzrlib/graph.py

bzrlib/remote.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_smart.py

Show diffs side-by-side

added added

removed removed

bzrlib/smart/repository.py

SuccessfulSmartServerResponse,

)

from bzrlib import revision as _mod_revision

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

class SmartServerRepositoryRequest(SmartServerRequest):

# is expected)

return None

def recreate_search(self, repository, recipe_bytes):

lines = recipe_bytes.split('\n')

start_keys = set(lines[0].split(' '))

exclude_keys = set(lines[1].split(' '))

revision_count = int(lines[2])

repository.lock_read()

try:

search = repository.get_graph()._make_breadth_first_searcher(

start_keys)

while True:

try:

next_revs = search.next()

except StopIteration:

break

search.stop_searching_any(exclude_keys.intersection(next_revs))

search_result = search.get_result()

if search_result.get_recipe()[2] != revision_count:

# we got back a different amount of data than expected, this

# gets reported as NoSuchRevision, because less revisions

# indicates missing revisions, and more should never happen as

# the excludes list considers ghosts and ensures that ghost

# filling races are not a problem.

return (None, FailedSmartServerResponse(('NoSuchRevision',)))

return (search, None)

finally:

repository.unlock()

class SmartServerRepositoryGetParentMap(SmartServerRepositoryRequest):

"""Bzr 1.2+ - get parent data for revisions during a graph search."""

def do_repository_request(self, repository, *revision_ids):

repository.lock_read()

try:

return self._do_repository_request(repository, revision_ids)

finally:

repository.unlock()

def _do_repository_request(self, repository, revision_ids):

"""Get parent details for some revisions.

All the parents for revision_ids are returned. Additionally up to 64KB

of additional parent data found by performing a breadth first search

from revision_ids is returned.

from revision_ids is returned. The verb takes a body containing the

current search state, see do_body for details.

100

101

:param repository: The repository to query in.

102

:param revision_ids: The utf8 encoded revision_id to answer for.

103

"""

104

self._revision_ids = revision_ids

105

return None # Signal that we want a body.

106

107

def do_body(self, body_bytes):

108

"""Process the current search state and perform the parent lookup.

109

110

:return: A smart server response where the body contains an utf8

encoded flattened list of the parents of the revisions, (the same

format as Repository.get_revision_graph).

111

encoded flattened list of the parents of the revisions (the same

112

format as Repository.get_revision_graph) which has been gzipped.

113

"""

114

repository = self._repository

115

repository.lock_read()

116

try:

117

return self._do_repository_request(body_bytes)

118

finally:

119

repository.unlock()

120

121

def _do_repository_request(self, body_bytes):

122

repository = self._repository

123

revision_ids = set(self._revision_ids)

124

search, error = self.recreate_search(repository, body_bytes)

125

if error is not None:

126

return error

127

# TODO might be nice to start up the search again; but thats not

128

# written or tested yet.

129

client_seen_revs = set(search.get_result().get_keys())

130

# Always include the requested ids.

131

client_seen_revs.difference_update(revision_ids)

132

lines = []

133

repo_graph = repository.get_graph()

134

result = {}

144

# adjust for the wire

145

if parents == (_mod_revision.NULL_REVISION,):

146

parents = ()

# add parents to the result

100

result[revision_id] = parents

101

147

# prepare the next query

102

148

next_revs.update(parents)

103

# Approximate the serialized cost of this revision_id.

104

size_so_far += 2 + len(revision_id) + sum(map(len, parents))

105

# get all the directly asked for parents, and then flesh out to

106

# 64K or so.

107

if first_loop_done and size_so_far > 65000:

108

next_revs = set()

109

break

149

if revision_id not in client_seen_revs:

150

# Client does not have this revision, give it to it.

151

# add parents to the result

152

result[revision_id] = parents

153

# Approximate the serialized cost of this revision_id.

154

size_so_far += 2 + len(revision_id) + sum(map(len, parents))

155

# get all the directly asked for parents, and then flesh out to

156

# 64K (compressed) or so. We do one level of depth at a time to

157

# stay in sync with the client. The 185000 magic number is

158

# estimated compression ratio taken from bzr.dev itself.

159

if first_loop_done and size_so_far > 185000:

160

next_revs = set()

161

break

110

162

# don't query things we've already queried

111

163

next_revs.difference_update(queried_revs)

112

164

first_loop_done = True

113

165

114

for revision, parents in result.items():

166

# sorting trivially puts lexographically similar revision ids together.

167

# Compression FTW.

168

for revision, parents in sorted(result.items()):

115

169

lines.append(' '.join((revision, ) + tuple(parents)))

116

170

117

return SuccessfulSmartServerResponse(('ok', ), '\n'.join(lines))

171

return SuccessfulSmartServerResponse(

172

('ok', ), bytes_to_gzip('\n'.join(lines)))

118

173

119

174

120

175

class SmartServerRepositoryGetRevisionGraph(SmartServerRepositoryRequest):

342

397

"""Bzr 1.1+ streaming pull."""

343

398

344

399

def do_body(self, body_bytes):

345

lines = body_bytes.split('\n')

346

start_keys = set(lines[0].split(' '))

347

exclude_keys = set(lines[1].split(' '))

348

revision_count = int(lines[2])

349

400

repository = self._repository

350

401

repository.lock_read()

351

402

try:

352

search = repository.get_graph()._make_breadth_first_searcher(

353

start_keys)

354

while True:

355

try:

356

next_revs = search.next()

357

except StopIteration:

358

break

359

search.stop_searching_any(exclude_keys.intersection(next_revs))

360

search_result = search.get_result()

361

if search_result.get_recipe()[2] != revision_count:

362

# we got back a different amount of data than expected, this

363

# gets reported as NoSuchRevision, because less revisions

364

# indicates missing revisions, and more should never happen as

365

# the excludes list considers ghosts and ensures that ghost

366

# filling races are not a problem.

367

return FailedSmartServerResponse(('NoSuchRevision',))

368

stream = repository.get_data_stream_for_search(search_result)

403

search, error = self.recreate_search(repository, body_bytes)

404

if error is not None:

405

return error

406

stream = repository.get_data_stream_for_search(search.get_result())

369

407

except Exception:

408

# On non-error, unlocking is done by the body stream handler.

370

409

repository.unlock()

371

410

raise

372

411

return SuccessfulSmartServerResponse(('ok',),

Older »