~bzr-pqm/bzr/bzr.dev

Committer: Canonical.com Patch Queue Manager
Date: 2009-02-20 07:13:04 UTC
mfrom: (4022.1.3 fetch.sinks)
Revision ID: pqm@pqm.ubuntu.com-20090220071304-mb95xwtanwl2bqa4

(robertc) Refactor fetch into a sender and sink component rather than
just a single object copying data. (Andrew Bennetts, Robert Collins)

files modified:
NEWS

bzrlib/fetch.py

bzrlib/remote.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/tests/per_repository/test_repository.py

Show diffs side-by-side

added added

removed removed

bzrlib/fetch.py

import bzrlib.errors as errors

from bzrlib.errors import InstallFailed

from bzrlib.progress import ProgressPhase

from bzrlib.revision import is_null, NULL_REVISION

from bzrlib.symbol_versioning import (deprecated_function,

deprecated_method,

)

from bzrlib.revision import NULL_REVISION

from bzrlib.tsort import topo_sort

from bzrlib.trace import mutter

import bzrlib.ui

'%r and %r' % (to_repository, from_repository))

self.to_repository = to_repository

self.from_repository = from_repository

self.sink = to_repository._get_sink()

# must not mutate self._last_revision as its potentially a shared instance

self._last_revision = last_revision

100

self.find_ghosts = find_ghosts

131

129

This initialises all the needed variables, and then fetches the

132

130

requested revisions, finally clearing the progress bar.

133

131

"""

132

# Roughly this is what we're aiming for fetch to become:

133

134

# missing = self.sink.insert_stream(self.source.get_stream(search))

135

# if missing:

136

# missing = self.sink.insert_stream(self.source.get_items(missing))

137

# assert not missing

134

138

self.count_total = 0

135

139

self.file_ids_names = {}

136

140

pp = ProgressPhase('Transferring', 4, self.pb)

139

143

search = self._revids_to_fetch()

140

144

if search is None:

141

145

return

142

if getattr(self, '_fetch_everything_for_search', None) is not None:

143

self._fetch_everything_for_search(search, pp)

144

else:

145

# backward compatibility

146

self._fetch_everything_for_revisions(search.get_keys, pp)

146

self._fetch_everything_for_search(search, pp)

147

finally:

148

self.pb.clear()

149

157

# item_keys_introduced_by should have a richer API than it does at the

158

# moment, so that it can feed the progress information back to this

159

# function?

160

self.pb = bzrlib.ui.ui_factory.nested_progress_bar()

161

try:

162

from_format = self.from_repository._format

163

stream = self.get_stream(search, pp)

164

self.sink.insert_stream(stream, from_format)

165

self.sink.finished()

166

finally:

167

if self.pb is not None:

168

self.pb.finished()

169

170

def get_stream(self, search, pp):

160

171

phase = 'file'

161

pb = bzrlib.ui.ui_factory.nested_progress_bar()

162

try:

163

revs = search.get_keys()

164

graph = self.from_repository.get_graph()

165

revs = list(graph.iter_topo_order(revs))

166

data_to_fetch = self.from_repository.item_keys_introduced_by(revs,

167

pb)

168

text_keys = []

169

for knit_kind, file_id, revisions in data_to_fetch:

170

if knit_kind != phase:

171

phase = knit_kind

172

# Make a new progress bar for this phase

173

pb.finished()

174

pp.next_phase()

175

pb = bzrlib.ui.ui_factory.nested_progress_bar()

176

if knit_kind == "file":

177

# Accumulate file texts

178

text_keys.extend([(file_id, revision) for revision in

179

revisions])

180

elif knit_kind == "inventory":

181

# Now copy the file texts.

182

to_texts = self.to_repository.texts

183

from_texts = self.from_repository.texts

184

to_texts.insert_record_stream(from_texts.get_record_stream(

185

text_keys, self.to_repository._fetch_order,

186

not self.to_repository._fetch_uses_deltas))

187

# Cause an error if a text occurs after we have done the

188

# copy.

189

text_keys = None

190

# Before we process the inventory we generate the root

191

# texts (if necessary) so that the inventories references

192

# will be valid.

193

self._generate_root_texts(revs)

194

# NB: This currently reopens the inventory weave in source;

195

# using a single stream interface instead would avoid this.

196

self._fetch_inventory_weave(revs, pb)

197

elif knit_kind == "signatures":

198

# Nothing to do here; this will be taken care of when

199

# _fetch_revision_texts happens.

200

pass

201

elif knit_kind == "revisions":

202

self._fetch_revision_texts(revs, pb)

203

else:

204

raise AssertionError("Unknown knit kind %r" % knit_kind)

205

if self.to_repository._fetch_reconcile:

206

self.to_repository.reconcile()

207

finally:

208

if pb is not None:

209

pb.finished()

172

revs = search.get_keys()

173

graph = self.from_repository.get_graph()

174

revs = list(graph.iter_topo_order(revs))

175

data_to_fetch = self.from_repository.item_keys_introduced_by(

176

revs, self.pb)

177

text_keys = []

178

for knit_kind, file_id, revisions in data_to_fetch:

179

if knit_kind != phase:

180

phase = knit_kind

181

# Make a new progress bar for this phase

182

self.pb.finished()

183

pp.next_phase()

184

self.pb = bzrlib.ui.ui_factory.nested_progress_bar()

185

if knit_kind == "file":

186

# Accumulate file texts

187

text_keys.extend([(file_id, revision) for revision in

188

revisions])

189

elif knit_kind == "inventory":

190

# Now copy the file texts.

191

to_texts = self.to_repository.texts

192

from_texts = self.from_repository.texts

193

yield ('texts', from_texts.get_record_stream(

194

text_keys, self.to_repository._fetch_order,

195

not self.to_repository._fetch_uses_deltas))

196

# Cause an error if a text occurs after we have done the

197

# copy.

198

text_keys = None

199

# Before we process the inventory we generate the root

200

# texts (if necessary) so that the inventories references

201

# will be valid.

202

for _ in self._generate_root_texts(revs):

203

yield _

204

# NB: This currently reopens the inventory weave in source;

205

# using a single stream interface instead would avoid this.

206

self.pb.update("fetch inventory", 0, 1)

207

from_weave = self.from_repository.inventories

208

# we fetch only the referenced inventories because we do not

209

# know for unselected inventories whether all their required

210

# texts are present in the other repository - it could be

211

# corrupt.

212

yield ('inventories', from_weave.get_record_stream(

213

[(rev_id,) for rev_id in revs],

214

self.inventory_fetch_order(),

215

not self.delta_on_metadata()))

216

elif knit_kind == "signatures":

217

# Nothing to do here; this will be taken care of when

218

# _fetch_revision_texts happens.

219

pass

220

elif knit_kind == "revisions":

221

for _ in self._fetch_revision_texts(revs, self.pb):

222

yield _

223

else:

224

raise AssertionError("Unknown knit kind %r" % knit_kind)

210

225

self.count_copied += len(revs)

211

226

212

227

def _revids_to_fetch(self):

213

228

"""Determines the exact revisions needed from self.from_repository to

214

229

install self._last_revision in self.to_repository.

229

244

except errors.NoSuchRevision, e:

230

245

raise InstallFailed([self._last_revision])

231

246

232

def _fetch_inventory_weave(self, revs, pb):

233

pb.update("fetch inventory", 0, 2)

234

to_weave = self.to_repository.inventories

235

# just merge, this is optimisable and its means we don't

236

# copy unreferenced data such as not-needed inventories.

237

pb.update("fetch inventory", 1, 3)

238

from_weave = self.from_repository.inventories

239

pb.update("fetch inventory", 2, 3)

240

# we fetch only the referenced inventories because we do not

241

# know for unselected inventories whether all their required

242

# texts are present in the other repository - it could be

243

# corrupt.

244

to_weave.insert_record_stream(from_weave.get_record_stream(

245

[(rev_id,) for rev_id in revs],

246

self.to_repository._fetch_order,

247

not self.to_repository._fetch_uses_deltas))

248

249

247

def _fetch_revision_texts(self, revs, pb):

250

248

# fetch signatures first and then the revision texts

251

249

# may need to be a InterRevisionStore call here.

252

to_sf = self.to_repository.signatures

253

250

from_sf = self.from_repository.signatures

254

251

# A missing signature is just skipped.

255

to_sf.insert_record_stream(filter_absent(from_sf.get_record_stream(

256

[(rev_id,) for rev_id in revs],

252

keys = [(rev_id,) for rev_id in revs]

253

signatures = filter_absent(from_sf.get_record_stream(

254

keys,

257

255

self.to_repository._fetch_order,

258

not self.to_repository._fetch_uses_deltas)))

259

self._fetch_just_revision_texts(revs)

260

261

def _fetch_just_revision_texts(self, version_ids):

262

to_rf = self.to_repository.revisions

263

from_rf = self.from_repository.revisions

256

not self.to_repository._fetch_uses_deltas))

264

257

# If a revision has a delta, this is actually expanded inside the

265

258

# insert_record_stream code now, which is an alternate fix for

266

259

# bug #261339

267

to_rf.insert_record_stream(from_rf.get_record_stream(

268

[(rev_id,) for rev_id in version_ids],

260

from_rf = self.from_repository.revisions

261

revisions = from_rf.get_record_stream(

262

keys,

269

263

self.to_repository._fetch_order,

270

not self.to_repository._fetch_uses_deltas))

264

not self.delta_on_metadata())

265

return [('signatures', signatures), ('revisions', revisions)]

271

266

272

267

def _generate_root_texts(self, revs):

273

268

"""This will be called by __fetch between fetching weave texts and

276

271

Subclasses should override this if they need to generate root texts

277

272

after fetching weave texts.

278

273

"""

279

pass

274

return []

275

276

def inventory_fetch_order(self):

277

return self.to_repository._fetch_order

278

279

def delta_on_metadata(self):

280

src_serializer = self.from_repository._format._serializer

281

target_serializer = self.to_repository._format._serializer

282

return (self.to_repository._fetch_uses_deltas and

283

src_serializer == target_serializer)

280

284

281

285

282

286

class Inter1and2Helper(object):

285

289

This is for use by fetchers and converters.

286

290

"""

287

291

288

def __init__(self, source, target):

292

def __init__(self, source):

289

293

"""Constructor.

290

294

291

295

:param source: The repository data comes from

292

:param target: The repository data goes to

293

296

"""

294

297

self.source = source

295

self.target = target

296

298

297

299

def iter_rev_trees(self, revs):

298

300

"""Iterate through RevisionTrees efficiently.

338

340

339

341

:param revs: the revisions to include

340

342

"""

341

to_texts = self.target.texts

342

343

graph = self.source.get_graph()

343

344

parent_map = graph.get_parent_map(revs)

344

345

rev_order = topo_sort(parent_map)

368

369

if parent != NULL_REVISION and

369

370

rev_id_to_root_id.get(parent, root_id) == root_id)

370

371

yield FulltextContentFactory(key, parent_keys, None, '')

371

to_texts.insert_record_stream(yield_roots())

372

373

def regenerate_inventory(self, revs):

374

"""Generate a new inventory versionedfile in target, convertin data.

375

376

The inventory is retrieved from the source, (deserializing it), and

377

stored in the target (reserializing it in a different format).

378

:param revs: The revisions to include

379

"""

380

for tree in self.iter_rev_trees(revs):

381

parents = tree.get_parent_ids()

382

self.target.add_inventory(tree.get_revision_id(), tree.inventory,

383

parents)

384

385

def fetch_revisions(self, revision_ids):

386

# TODO: should this batch them up rather than requesting 10,000

387

# revisions at once?

388

for revision in self.source.get_revisions(revision_ids):

389

self.target.add_revision(revision.revision_id, revision)

372

return [('texts', yield_roots())]

390

373

391

374

392

375

class Model1toKnit2Fetcher(RepoFetcher):

394

377

"""

395

378

def __init__(self, to_repository, from_repository, last_revision=None,

396

379

pb=None, find_ghosts=True):

397

self.helper = Inter1and2Helper(from_repository, to_repository)

398

RepoFetcher.__init__(self, to_repository, from_repository,

399

last_revision, pb, find_ghosts)

400

401

def _generate_root_texts(self, revs):

402

self.helper.generate_root_texts(revs)

403

404

def _fetch_inventory_weave(self, revs, pb):

405

self.helper.regenerate_inventory(revs)

406

407

def _fetch_revision_texts(self, revs, pb):

408

"""Fetch revision object texts"""

409

count = 0

410

total = len(revs)

411

for rev in revs:

412

pb.update('copying revisions', count, total)

413

try:

414

sig_text = self.from_repository.get_signature_text(rev)

415

self.to_repository.add_signature_text(rev, sig_text)

416

except errors.NoSuchRevision:

417

# not signed.

418

pass

419

self._copy_revision(rev)

420

count += 1

421

422

def _copy_revision(self, rev):

423

self.helper.fetch_revisions([rev])

424

425

426

class Knit1to2Fetcher(RepoFetcher):

427

"""Fetch from a Knit1 repository into a Knit2 repository"""

428

429

def __init__(self, to_repository, from_repository, last_revision=None,

430

pb=None, find_ghosts=True):

431

self.helper = Inter1and2Helper(from_repository, to_repository)

432

RepoFetcher.__init__(self, to_repository, from_repository,

433

last_revision, pb, find_ghosts)

434

435

def _generate_root_texts(self, revs):

436

self.helper.generate_root_texts(revs)

437

438

def _fetch_inventory_weave(self, revs, pb):

439

self.helper.regenerate_inventory(revs)

440

441

def _fetch_just_revision_texts(self, version_ids):

442

self.helper.fetch_revisions(version_ids)

380

self.helper = Inter1and2Helper(from_repository)

381

RepoFetcher.__init__(self, to_repository, from_repository,

382

last_revision, pb, find_ghosts)

383

384

def _generate_root_texts(self, revs):

385

return self.helper.generate_root_texts(revs)

386

387

def inventory_fetch_order(self):

388

return 'topological'

389

390

Knit1to2Fetcher = Model1toKnit2Fetcher

Older »