~bzr-pqm/bzr/bzr.dev : revision 6348

1

2

#

3

# This program is free software; you can redistribute it and/or modify

4

# it under the terms of the GNU General Public License as published by

5

# the Free Software Foundation; either version 2 of the License, or

6

# (at your option) any later version.

7

#

8

# This program is distributed in the hope that it will be useful,

9

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# GNU General Public License for more details.

12

#

13

# You should have received a copy of the GNU General Public License

14

# along with this program; if not, write to the Free Software

15

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

16

17

"""Searching in versioned file repositories."""

18

19

from bzrlib import (

20

debug,

21

revision,

22

trace,

23

)

24

25

from bzrlib.graph import (

26

DictParentsProvider,

27

Graph,

28

invert_parent_map,

29

)

30

31

32

class AbstractSearchResult(object):

33

"""The result of a search, describing a set of keys.

34

35

Search results are typically used as the 'fetch_spec' parameter when

36

fetching revisions.

37

38

:seealso: AbstractSearch

39

"""

40

41

def get_recipe(self):

42

"""Return a recipe that can be used to replay this search.

43

44

The recipe allows reconstruction of the same results at a later date.

45

46

:return: A tuple of `(search_kind_str, *details)`. The details vary by

47

kind of search result.

48

"""

49

raise NotImplementedError(self.get_recipe)

50

51

def get_network_struct(self):

52

"""Return a tuple that can be transmitted via the HPSS protocol."""

53

raise NotImplementedError(self.get_network_struct)

54

55

def get_keys(self):

56

"""Return the keys found in this search.

57

58

:return: A set of keys.

59

"""

60

raise NotImplementedError(self.get_keys)

61

62

def is_empty(self):

63

"""Return false if the search lists 1 or more revisions."""

64

raise NotImplementedError(self.is_empty)

65

66

def refine(self, seen, referenced):

67

"""Create a new search by refining this search.

68

69

:param seen: Revisions that have been satisfied.

70

:param referenced: Revision references observed while satisfying some

71

of this search.

72

:return: A search result.

73

"""

74

raise NotImplementedError(self.refine)

75

76

77

class AbstractSearch(object):

78

"""A search that can be executed, producing a search result.

79

80

:seealso: AbstractSearchResult

81

"""

82

83

def execute(self):

84

"""Construct a network-ready search result from this search description.

85

86

This may take some time to search repositories, etc.

87

88

:return: A search result (an object that implements

89

AbstractSearchResult's API).

90

"""

91

raise NotImplementedError(self.execute)

92

93

94

class SearchResult(AbstractSearchResult):

95

"""The result of a breadth first search.

96

97

A SearchResult provides the ability to reconstruct the search or access a

98

set of the keys the search found.

99

"""

100

101

def __init__(self, start_keys, exclude_keys, key_count, keys):

102

"""Create a SearchResult.

103

104

:param start_keys: The keys the search started at.

105

:param exclude_keys: The keys the search excludes.

106

:param key_count: The total number of keys (from start to but not

107

including exclude).

108

:param keys: The keys the search found. Note that in future we may get

109

a SearchResult from a smart server, in which case the keys list is

110

not necessarily immediately available.

111

"""

112

self._recipe = ('search', start_keys, exclude_keys, key_count)

113

self._keys = frozenset(keys)

114

115

def __repr__(self):

116

kind, start_keys, exclude_keys, key_count = self._recipe

117

if len(start_keys) > 5:

118

start_keys_repr = repr(list(start_keys)[:5])[:-1] + ', ...]'

119

else:

120

start_keys_repr = repr(start_keys)

121

if len(exclude_keys) > 5:

122

exclude_keys_repr = repr(list(exclude_keys)[:5])[:-1] + ', ...]'

123

else:

124

exclude_keys_repr = repr(exclude_keys)

125

return '<%s %s:(%s, %s, %d)>' % (self.__class__.__name__,

126

kind, start_keys_repr, exclude_keys_repr, key_count)

127

128

def get_recipe(self):

129

"""Return a recipe that can be used to replay this search.

130

131

The recipe allows reconstruction of the same results at a later date

132

without knowing all the found keys. The essential elements are a list

133

of keys to start and to stop at. In order to give reproducible

134

results when ghosts are encountered by a search they are automatically

135

added to the exclude list (or else ghost filling may alter the

136

results).

137

138

:return: A tuple ('search', start_keys_set, exclude_keys_set,

139

revision_count). To recreate the results of this search, create a

140

breadth first searcher on the same graph starting at start_keys.

141

Then call next() (or next_with_ghosts()) repeatedly, and on every

142

result, call stop_searching_any on any keys from the exclude_keys

143

set. The revision_count value acts as a trivial cross-check - the

144

found revisions of the new search should have as many elements as

145

revision_count. If it does not, then additional revisions have been

146

ghosted since the search was executed the first time and the second

147

time.

148

"""

149

return self._recipe

150

151

def get_network_struct(self):

152

start_keys = ' '.join(self._recipe[1])

153

stop_keys = ' '.join(self._recipe[2])

154

count = str(self._recipe[3])

155

return (self._recipe[0], '\n'.join((start_keys, stop_keys, count)))

156

157

def get_keys(self):

158

"""Return the keys found in this search.

159

160

:return: A set of keys.

161

"""

162

return self._keys

163

164

def is_empty(self):

165

"""Return false if the search lists 1 or more revisions."""

166

return self._recipe[3] == 0

167

168

def refine(self, seen, referenced):

169

"""Create a new search by refining this search.

170

171

:param seen: Revisions that have been satisfied.

172

:param referenced: Revision references observed while satisfying some

173

of this search.

174

"""

175

start = self._recipe[1]

176

exclude = self._recipe[2]

177

count = self._recipe[3]

178

keys = self.get_keys()

179

# New heads = referenced + old heads - seen things - exclude

180

pending_refs = set(referenced)

181

pending_refs.update(start)

182

pending_refs.difference_update(seen)

183

pending_refs.difference_update(exclude)

184

# New exclude = old exclude + satisfied heads

185

seen_heads = start.intersection(seen)

186

exclude.update(seen_heads)

187

# keys gets seen removed

188

keys = keys - seen

189

# length is reduced by len(seen)

190

count -= len(seen)

191

return SearchResult(pending_refs, exclude, count, keys)

192

193

194

class PendingAncestryResult(AbstractSearchResult):

195

"""A search result that will reconstruct the ancestry for some graph heads.

196

197

Unlike SearchResult, this doesn't hold the complete search result in

198

memory, it just holds a description of how to generate it.

199

"""

200

201

def __init__(self, heads, repo):

202

"""Constructor.

203

204

:param heads: an iterable of graph heads.

205

:param repo: a repository to use to generate the ancestry for the given

206

heads.

207

"""

208

self.heads = frozenset(heads)

209

self.repo = repo

210

211

def __repr__(self):

212

if len(self.heads) > 5:

213

heads_repr = repr(list(self.heads)[:5])[:-1]

214

heads_repr += ', <%d more>...]' % (len(self.heads) - 5,)

215

else:

216

heads_repr = repr(self.heads)

217

return '<%s heads:%s repo:%r>' % (

218

self.__class__.__name__, heads_repr, self.repo)

219

220

def get_recipe(self):

221

"""Return a recipe that can be used to replay this search.

222

223

The recipe allows reconstruction of the same results at a later date.

224

225

:seealso SearchResult.get_recipe:

226

227

:return: A tuple ('proxy-search', start_keys_set, set(), -1)

228

To recreate this result, create a PendingAncestryResult with the

229

start_keys_set.

230

"""

231

return ('proxy-search', self.heads, set(), -1)

232

233

def get_network_struct(self):

234

parts = ['ancestry-of']

235

parts.extend(self.heads)

236

return parts

237

238

def get_keys(self):

239

"""See SearchResult.get_keys.

240

241

Returns all the keys for the ancestry of the heads, excluding

242

NULL_REVISION.

243

"""

244

return self._get_keys(self.repo.get_graph())

245

246

def _get_keys(self, graph):

247

NULL_REVISION = revision.NULL_REVISION

248

keys = [key for (key, parents) in graph.iter_ancestry(self.heads)

249

if key != NULL_REVISION and parents is not None]

250

return keys

251

252

def is_empty(self):

253

"""Return false if the search lists 1 or more revisions."""

254

if revision.NULL_REVISION in self.heads:

255

return len(self.heads) == 1

256

else:

257

return len(self.heads) == 0

258

259

def refine(self, seen, referenced):

260

"""Create a new search by refining this search.

261

262

:param seen: Revisions that have been satisfied.

263

:param referenced: Revision references observed while satisfying some

264

of this search.

265

"""

266

referenced = self.heads.union(referenced)

267

return PendingAncestryResult(referenced - seen, self.repo)

268

269

270

class EmptySearchResult(AbstractSearchResult):

271

"""An empty search result."""

272

273

def is_empty(self):

274

return True

275

276

277

class EverythingResult(AbstractSearchResult):

278

"""A search result that simply requests everything in the repository."""

279

280

def __init__(self, repo):

281

self._repo = repo

282

283

def __repr__(self):

284

return '%s(%r)' % (self.__class__.__name__, self._repo)

285

286

def get_recipe(self):

287

raise NotImplementedError(self.get_recipe)

288

289

def get_network_struct(self):

290

return ('everything',)

291

292

def get_keys(self):

293

if 'evil' in debug.debug_flags:

294

from bzrlib import remote

295

if isinstance(self._repo, remote.RemoteRepository):

296

# warn developers (not users) not to do this

297

trace.mutter_callsite(

298

2, "EverythingResult(RemoteRepository).get_keys() is slow.")

299

return self._repo.all_revision_ids()

300

301

def is_empty(self):

302

# It's ok for this to wrongly return False: the worst that can happen

303

# is that RemoteStreamSource will initiate a get_stream on an empty

304

# repository. And almost all repositories are non-empty.

305

return False

306

307

def refine(self, seen, referenced):

308

heads = set(self._repo.all_revision_ids())

309

heads.difference_update(seen)

310

heads.update(referenced)

311

return PendingAncestryResult(heads, self._repo)

312

313

314

class EverythingNotInOther(AbstractSearch):

315

"""Find all revisions in that are in one repo but not the other."""

316

317

def __init__(self, to_repo, from_repo, find_ghosts=False):

318

self.to_repo = to_repo

319

self.from_repo = from_repo

320

self.find_ghosts = find_ghosts

321

322

def execute(self):

323

return self.to_repo.search_missing_revision_ids(

324

self.from_repo, find_ghosts=self.find_ghosts)

325

326

327

class NotInOtherForRevs(AbstractSearch):

328

"""Find all revisions missing in one repo for a some specific heads."""

329

330

def __init__(self, to_repo, from_repo, required_ids, if_present_ids=None,

331

find_ghosts=False, limit=None):

332

"""Constructor.

333

334

:param required_ids: revision IDs of heads that must be found, or else

335

the search will fail with NoSuchRevision. All revisions in their

336

ancestry not already in the other repository will be included in

337

the search result.

338

:param if_present_ids: revision IDs of heads that may be absent in the

339

source repository. If present, then their ancestry not already

340

found in other will be included in the search result.

341

:param limit: maximum number of revisions to fetch

342

"""

343

self.to_repo = to_repo

344

self.from_repo = from_repo

345

self.find_ghosts = find_ghosts

346

self.required_ids = required_ids

347

self.if_present_ids = if_present_ids

348

self.limit = limit

349

350

def __repr__(self):

351

if len(self.required_ids) > 5:

352

reqd_revs_repr = repr(list(self.required_ids)[:5])[:-1] + ', ...]'

353

else:

354

reqd_revs_repr = repr(self.required_ids)

355

if self.if_present_ids and len(self.if_present_ids) > 5:

356

ifp_revs_repr = repr(list(self.if_present_ids)[:5])[:-1] + ', ...]'

357

else:

358

ifp_revs_repr = repr(self.if_present_ids)

359

360

return ("<%s from:%r to:%r find_ghosts:%r req'd:%r if-present:%r"

361

"limit:%r>") % (

362

self.__class__.__name__, self.from_repo, self.to_repo,

363

self.find_ghosts, reqd_revs_repr, ifp_revs_repr,

364

self.limit)

365

366

def execute(self):

367

return self.to_repo.search_missing_revision_ids(

368

self.from_repo, revision_ids=self.required_ids,

369

if_present_ids=self.if_present_ids, find_ghosts=self.find_ghosts,

370

limit=self.limit)

371

372

373

def search_result_from_parent_map(parent_map, missing_keys):

374

"""Transform a parent_map into SearchResult information."""

375

if not parent_map:

376

# parent_map is empty or None, simple search result

377

return [], [], 0

378

# start_set is all the keys in the cache

379

start_set = set(parent_map)

380

# result set is all the references to keys in the cache

381

result_parents = set()

382

for parents in parent_map.itervalues():

383

result_parents.update(parents)

384

stop_keys = result_parents.difference(start_set)

385

# We don't need to send ghosts back to the server as a position to

386

# stop either.

387

stop_keys.difference_update(missing_keys)

388

key_count = len(parent_map)

389

if (revision.NULL_REVISION in result_parents

390

and revision.NULL_REVISION in missing_keys):

391

# If we pruned NULL_REVISION from the stop_keys because it's also

392

# in our cache of "missing" keys we need to increment our key count

393

# by 1, because the reconsitituted SearchResult on the server will

394

# still consider NULL_REVISION to be an included key.

395

key_count += 1

396

included_keys = start_set.intersection(result_parents)

397

start_set.difference_update(included_keys)

398

return start_set, stop_keys, key_count

399

400

401

def _run_search(parent_map, heads, exclude_keys):

402

"""Given a parent map, run a _BreadthFirstSearcher on it.

403

404

Start at heads, walk until you hit exclude_keys. As a further improvement,

405

watch for any heads that you encounter while walking, which means they were

406

not heads of the search.

407

408

This is mostly used to generate a succinct recipe for how to walk through

409

most of parent_map.

410

411

:return: (_BreadthFirstSearcher, set(heads_encountered_by_walking))

412

"""

413

g = Graph(DictParentsProvider(parent_map))

414

s = g._make_breadth_first_searcher(heads)

415

found_heads = set()

416

while True:

417

try:

418

next_revs = s.next()

419

except StopIteration:

420

break

421

for parents in s._current_parents.itervalues():

422

f_heads = heads.intersection(parents)

423

if f_heads:

424

found_heads.update(f_heads)

425

stop_keys = exclude_keys.intersection(next_revs)

426

if stop_keys:

427

s.stop_searching_any(stop_keys)

428

for parents in s._current_parents.itervalues():

429

f_heads = heads.intersection(parents)

430

if f_heads:

431

found_heads.update(f_heads)

432

return s, found_heads

433

434

435

def _find_possible_heads(parent_map, tip_keys, depth):

436

"""Walk backwards (towards children) through the parent_map.

437

438

This finds 'heads' that will hopefully succinctly describe our search

439

graph.

440

"""

441

child_map = invert_parent_map(parent_map)

442

heads = set()

443

current_roots = tip_keys

444

walked = set(current_roots)

445

while current_roots and depth > 0:

446

depth -= 1

447

children = set()

448

children_update = children.update

449

for p in current_roots:

450

# Is it better to pre- or post- filter the children?

451

try:

452

children_update(child_map[p])

453

except KeyError:

454

heads.add(p)

455

# If we've seen a key before, we don't want to walk it again. Note that

456

# 'children' stays relatively small while 'walked' grows large. So

457

# don't use 'difference_update' here which has to walk all of 'walked'.

458

# '.difference' is smart enough to walk only children and compare it to

459

# walked.

460

children = children.difference(walked)

461

walked.update(children)

462

current_roots = children

463

if current_roots:

464

# We walked to the end of depth, so these are the new tips.

465

heads.update(current_roots)

466

return heads

467

468

469

def limited_search_result_from_parent_map(parent_map, missing_keys, tip_keys,

470

depth):

471

"""Transform a parent_map that is searching 'tip_keys' into an

472

approximate SearchResult.

473

474

We should be able to generate a SearchResult from a given set of starting

475

keys, that covers a subset of parent_map that has the last step pointing at

476

tip_keys. This is to handle the case that really-long-searches shouldn't be

477

started from scratch on each get_parent_map request, but we *do* want to

478

filter out some of the keys that we've already seen, so we don't get

479

information that we already know about on every request.

480

481

The server will validate the search (that starting at start_keys and

482

stopping at stop_keys yields the exact key_count), so we have to be careful

483

to give an exact recipe.

484

485

Basic algorithm is:

486

1) Invert parent_map to get child_map (todo: have it cached and pass it

487

in)

488

2) Starting at tip_keys, walk towards children for 'depth' steps.

489

3) At that point, we have the 'start' keys.

490

4) Start walking parent_map from 'start' keys, counting how many keys

491

are seen, and generating stop_keys for anything that would walk

492

outside of the parent_map.

493

494

:param parent_map: A map from {child_id: (parent_ids,)}

495

:param missing_keys: parent_ids that we know are unavailable

496

:param tip_keys: the revision_ids that we are searching

497

:param depth: How far back to walk.

498

"""

499

if not parent_map:

500

# No search to send, because we haven't done any searching yet.

501

return [], [], 0

502

heads = _find_possible_heads(parent_map, tip_keys, depth)

503

s, found_heads = _run_search(parent_map, heads, set(tip_keys))

504

start_keys, exclude_keys, keys = s.get_state()

505

if found_heads:

506

# Anything in found_heads are redundant start_keys, we hit them while

507

# walking, so we can exclude them from the start list.

508

start_keys = set(start_keys).difference(found_heads)

509

return start_keys, exclude_keys, len(keys)