~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/commit.py

Committer: Martin Pool
Date: 2005-09-13 01:37:23 UTC
Revision ID: mbp@sourcefrog.net-20050913013723-7e0026b48cbf08ff

- BROKEN: start refactoring fetch code to work well with weaves

files removed:
bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_commit.py

bzrlib/xml4.py

files renamed:
bzrlib/selftest/test_weave.py => tools/testweave.py

files modified:
.bzrignore

NEWS

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/check.py

bzrlib/commit.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/merge.py

bzrlib/osutils.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/store.py

bzrlib/trace.py

bzrlib/upgrade.py

bzrlib/weave.py

bzrlib/weavestore.py

bzrlib/workingtree.py

bzrlib/xml.py

tools/history2weaves.py

Show diffs side-by-side

added added

removed removed

bzrlib/commit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# XXX: Can we do any better about making interrupted commits change

# nothing? Perhaps the best approach is to integrate commit of

# AtomicFiles with releasing the lock on the Branch.

# TODO: Separate 'prepare' phase where we find a list of potentially

# committed files. We then can then pause the commit to prompt for a

# commit message, knowing the summary will be the same as what's

# actually used for the commit. (But perhaps simpler to simply get

# the tree status, then use that for a selective commit?)

# The newly committed revision is going to have a shape corresponding

# to that of the working inventory. Files that are not in the

# working tree and that were in the predecessor are reported as

# removed --- this can include files that were either removed from the

# inventory or deleted in the working tree. If they were only

# deleted from disk, they are removed from the working inventory.

# We then consider the remaining entries, which will be in the new

# version. Directory entries are simply copied across. File entries

# must be checked to see if a new version of the file should be

# recorded. For each parent revision inventory, we check to see what

# version of the file was present. If the file was present in at

# least one tree, and if it was the same version in all the trees,

# then we can just refer to that version. Otherwise, a new version

# representing the merger of the file versions must be added.

# TODO: Update hashcache before and after - or does the WorkingTree

# look after that?

# This code requires all merge parents to be present in the branch.

# We could relax this but for the sake of simplicity the constraint is

# here for now. It's not totally clear to me how we'd know which file

# need new text versions if some parents are absent. -- mbp 20050915

## XXX: Can we do any better about making interrupted commits change

## nothing?

## XXX: If we merged two versions of a file then we still need to

## create a new version representing that merge, even if it didn't

## change from the parent.

## TODO: Read back the just-generated changeset, and make sure it

## applies and recreates the right state.

import os

import sys

import time

import pdb

import tempfile

import sha

from binascii import hexlify

from cStringIO import StringIO

from bzrlib.osutils import (local_time_offset, username,

rand_bytes, compact_date, user_email,

kind_marker, is_inside_any, quotefn,

sha_string, sha_strings, sha_file, isdir, isfile,

split_lines)

from bzrlib.branch import gen_file_id, INVENTORY_FILEID, ANCESTRY_FILEID

from bzrlib.errors import (BzrError, PointlessCommit,

HistoryMissing,

)

sha_string, sha_file, isdir, isfile)

from bzrlib.branch import gen_file_id

from bzrlib.errors import BzrError, PointlessCommit

from bzrlib.revision import Revision, RevisionReference

from bzrlib.trace import mutter, note, warning

from bzrlib.trace import mutter, note

from bzrlib.xml5 import serializer_v5

from bzrlib.inventory import Inventory

from bzrlib.delta import compare_trees

from bzrlib.weave import Weave

from bzrlib.weavefile import read_weave, write_weave_v5

from bzrlib.atomicfile import AtomicFile

New code should use the Commit class instead.

"""

## XXX: Remove this in favor of Branch.commit?

Commit().commit(*args, **kwargs)

137

113

committer=None,

138

114

specific_files=None,

139

115

rev_id=None,

140

allow_pointless=True,

141

verbose=False):

116

allow_pointless=True):

142

117

"""Commit working copy as a new revision.

143

118

119

The basic approach is to add all the file texts into the

120

store, then the inventory, then make a new revision pointing

121

to that inventory and store that.

122

123

This is not quite safe if the working copy changes during the

124

commit; for the moment that is simply not allowed. A better

125

approach is to make a temporary copy of the files before

126

computing their hashes, and then add those hashes in turn to

127

the inventory. This should mean at least that there are no

128

broken hash pointers. There is no way we can get a snapshot

129

of the whole directory at an instant. This would also have to

130

be robust against files disappearing, moving, etc. So the

131

whole thing is a bit hard.

132

133

This raises PointlessCommit if there are no changes, no new merges,

134

and allow_pointless is false.

135

144

136

timestamp -- if not None, seconds-since-epoch for a

145

137

postdated/predated commit.

146

138

147

specific_files -- If true, commit only those files.

139

specific_files

140

If true, commit only those files.

148

141

149

rev_id -- If set, use this as the new revision id.

142

rev_id

143

If set, use this as the new revision id.

150

144

Useful for test or import commands that need to tightly

151

145

control what revisions are assigned. If you duplicate

152

146

a revision id that exists elsewhere it is your own fault.

153

147

If null (default), a time/random revision id is generated.

154

155

allow_pointless -- If true (default), commit even if nothing

156

has changed and no merges are recorded.

157

148

"""

158

mutter('preparing to commit')

159

149

160

150

self.branch = branch

161

self.weave_store = branch.weave_store

151

self.branch.lock_write()

162

152

self.rev_id = rev_id

163

153

self.specific_files = specific_files

164

154

self.allow_pointless = allow_pointless

168

158

else:

169

159

self.timestamp = long(timestamp)

170

160

171

if rev_id is None:

172

self.rev_id = _gen_revision_id(self.branch, self.timestamp)

173

else:

174

self.rev_id = rev_id

175

176

161

if committer is None:

177

162

self.committer = username(self.branch)

178

163

else:

187

172

assert isinstance(message, basestring), type(message)

188

173

self.message = message

189

174

190

self.branch.lock_write()

191

175

try:

176

# First walk over the working inventory; and both update that

177

# and also build a new revision inventory. The revision

178

# inventory needs to hold the text-id, sha1 and size of the

179

# actual file versions committed in the revision. (These are

180

# not present in the working inventory.) We also need to

181

# detect missing/deleted files, and remove them from the

182

# working inventory.

183

192

184

self.work_tree = self.branch.working_tree()

193

185

self.work_inv = self.work_tree.inventory

194

186

self.basis_tree = self.branch.basis_tree()

195

187

self.basis_inv = self.basis_tree.inventory

196

188

197

self._gather_parents()

198

self._check_parents_present()

189

self.pending_merges = self.branch.pending_merges()

190

if self.pending_merges:

191

raise NotImplementedError("sorry, can't commit merges to the weave format yet")

199

192

193

if self.rev_id is None:

194

self.rev_id = _gen_revision_id(self.branch, time.time())

195

196

# todo: update hashcache

197

self.delta = compare_trees(self.basis_tree, self.work_tree,

198

specific_files=self.specific_files)

199

200

if not (self.delta.has_changed()

201

or self.allow_pointless

202

or self.pending_merges):

203

raise PointlessCommit()

204

205

self.new_inv = self.basis_inv.copy()

206

207

self.delta.show(sys.stdout)

208

200

209

self._remove_deleted()

201

self.new_inv = Inventory()

202

self._store_entries()

203

self._report_deletes()

204

205

if not (self.allow_pointless

206

or len(self.parents) > 1

207

or self.new_inv != self.basis_inv):

208

raise PointlessCommit()

209

210

self._store_files()

211

212

self.branch._write_inventory(self.work_inv)

210

213

self._record_inventory()

211

self._record_ancestry()

214

212

215

self._make_revision()

213

note('committed r%d {%s}', (self.branch.revno() + 1),

216

note('committted r%d {%s}', (self.branch.revno() + 1),

214

217

self.rev_id)

215

218

self.branch.append_revision(self.rev_id)

216

219

self.branch.set_pending_merges([])

218

221

self.branch.unlock()

219

222

220

223

221

222

224

def _record_inventory(self):

223

"""Store the inventory for the new revision."""

224

inv_text = serializer_v5.write_inventory_to_string(self.new_inv)

225

self.inv_sha1 = sha_string(inv_text)

226

self.weave_store.add_text(INVENTORY_FILEID, self.rev_id,

227

split_lines(inv_text), self.parents)

228

229

230

def _record_ancestry(self):

231

"""Append merged revision ancestry to the ancestry file.

232

233

This should be the merged ancestry of all parents, plus the

234

new revision id."""

235

w = self.weave_store.get_weave_or_empty(ANCESTRY_FILEID)

236

lines = self._merge_ancestry_lines(w)

237

w.add(self.rev_id, self.parents, lines)

238

self.weave_store.put_weave(ANCESTRY_FILEID, w)

239

240

241

def _merge_ancestry_lines(self, ancestry_weave):

242

"""Return merged ancestry lines.

243

244

The lines are revision-ids followed by newlines."""

245

seen = set()

246

ancs = []

247

for parent_id in self.parents:

248

for line in ancestry_weave.get(parent_id):

249

assert line[-1] == '\n'

250

if line not in seen:

251

ancs.append(line)

252

seen.add(line)

253

r = self.rev_id + '\n'

254

assert r not in ancs

255

ancs.append(r)

256

mutter('merged ancestry of {%s}:\n%s', self.rev_id, ''.join(ancs))

257

return ancs

258

259

260

def _gather_parents(self):

261

pending_merges = self.branch.pending_merges()

262

self.parents = []

263

self.parent_trees = []

264

precursor_id = self.branch.last_revision()

265

if precursor_id:

266

self.parents.append(precursor_id)

267

self.parent_trees.append(self.basis_tree)

268

self.parents += pending_merges

269

self.parent_trees.extend(map(self.branch.revision_tree, pending_merges))

270

271

272

def _check_parents_present(self):

273

for parent_id in self.parents:

274

mutter('commit parent revision {%s}', parent_id)

275

if not self.branch.has_revision(parent_id):

276

warning("can't commit a merge from an absent parent")

277

raise HistoryMissing(self.branch, 'revision', parent_id)

278

279

225

inv_tmp = StringIO()

226

serializer_v5.write_inventory(self.new_inv, inv_tmp)

227

self.inv_sha1 = sha_string(inv_tmp.getvalue())

228

inv_tmp.seek(0)

229

self.branch.inventory_store.add(inv_tmp, self.rev_id)

230

231

280

232

def _make_revision(self):

281

233

"""Record a new revision object for this commit."""

282

234

self.rev = Revision(timestamp=self.timestamp,

285

237

message=self.message,

286

238

inventory_sha1=self.inv_sha1,

287

239

revision_id=self.rev_id)

288

self.rev.parents = map(RevisionReference, self.parents)

289

rev_tmp = StringIO()

240

241

self.rev.parents = []

242

precursor_id = self.branch.last_patch()

243

if precursor_id:

244

self.rev.parents.append(RevisionReference(precursor_id))

245

for merge_rev in self.pending_merges:

246

rev.parents.append(RevisionReference(merge_rev))

247

248

rev_tmp = tempfile.TemporaryFile()

290

249

serializer_v5.write_revision(self.rev, rev_tmp)

291

250

rev_tmp.seek(0)

292

251

self.branch.revision_store.add(rev_tmp, self.rev_id)

294

253

295

254

296

255

def _remove_deleted(self):

297

"""Remove deleted files from the working inventories.

298

299

This is done prior to taking the working inventory as the

300

basis for the new committed inventory.

301

302

This returns true if any files

303

*that existed in the basis inventory* were deleted.

304

Files that were added and deleted

305

in the working copy don't matter.

306

"""

307

specific = self.specific_files

308

deleted_ids = []

309

for path, ie in self.work_inv.iter_entries():

310

if specific and not is_inside_any(specific, path):

311

continue

312

if not self.work_tree.has_filename(path):

313

note('missing %s', path)

314

deleted_ids.append(ie.file_id)

315

if deleted_ids:

316

for file_id in deleted_ids:

317

del self.work_inv[file_id]

318

self.branch._write_inventory(self.work_inv)

319

320

321

def _find_file_parents(self, file_id):

322

"""Return the text versions and hashes for all file parents.

323

324

Returned as a map from text version to inventory entry.

325

326

This is a set containing the file versions in all parents

327

revisions containing the file. If the file is new, the set

328

will be empty."""

329

r = {}

330

for tree in self.parent_trees:

331

if file_id in tree.inventory:

332

ie = tree.inventory[file_id]

333

assert ie.kind == 'file'

334

assert ie.file_id == file_id

335

if ie.text_version in r:

336

assert r[ie.text_version] == ie

337

else:

338

r[ie.text_version] = ie

339

return r

340

341

342

def _store_entries(self):

343

"""Build revision inventory and store modified files.

344

345

This is called with new_inv a new empty inventory. Depending on

346

which files are selected for commit, and which ones have

347

been modified or merged, new inventory entries are built

348

based on the working and parent inventories.

349

350

As a side-effect this stores new text versions for committed

351

files with text changes or merges.

352

353

Each entry can have one of several things happen:

354

355

carry_file -- carried from the previous version (if not

356

selected for commit)

357

358

commit_nonfile -- no text to worry about

359

360

commit_old_text -- same text, may have moved

361

362

commit_file -- new text version

363

"""

364

for path, new_ie in self.work_inv.iter_entries():

365

file_id = new_ie.file_id

366

mutter('check %s {%s}', path, new_ie.file_id)

367

if self.specific_files:

368

if not is_inside_any(self.specific_files, path):

369

mutter('%s not selected for commit', path)

370

self._carry_file(file_id)

371

continue

372

if new_ie.kind != 'file':

373

self._commit_nonfile(file_id)

374

continue

375

376

file_parents = self._find_file_parents(file_id)

377

if len(file_parents) == 1:

378

parent_ie = file_parents.values()[0]

379

wc_sha1 = self.work_tree.get_file_sha1(file_id)

380

if parent_ie.text_sha1 == wc_sha1:

381

# text not changed or merged

382

self._commit_old_text(file_id, parent_ie)

383

continue

384

385

mutter('parents of %s are %r', path, file_parents)

386

387

# file is either new, or a file merge; need to record

388

# a new version

389

if len(file_parents) > 1:

390

note('merged %s', path)

391

elif len(file_parents) == 0:

392

note('added %s', path)

256

"""Remove deleted files from the working and stored inventories."""

257

for path, id, kind in self.delta.removed:

258

if self.work_inv.has_id(id):

259

del self.work_inv[id]

260

if self.new_inv.has_id(id):

261

del self.new_inv[id]

262

263

264

265

def _store_files(self):

266

"""Store new texts of modified/added files."""

267

# We must make sure that directories are added before anything

268

# inside them is added. the files within the delta report are

269

# sorted by path so we know the directory will come before its

270

# contents.

271

for path, file_id, kind in self.delta.added:

272

if kind != 'file':

273

ie = self.work_inv[file_id].copy()

274

self.new_inv.add(ie)

393

275

else:

394

note('modified %s', path)

395

self._commit_file(new_ie, file_id, file_parents)

396

397

398

def _commit_nonfile(self, file_id):

399

self.new_inv.add(self.work_inv[file_id].copy())

400

401

402

def _carry_file(self, file_id):

403

"""Carry the file unchanged from the basis revision."""

404

if self.basis_inv.has_id(file_id):

405

self.new_inv.add(self.basis_inv[file_id].copy())

406

407

408

def _commit_old_text(self, file_id, parent_ie):

409

"""Keep the same text as last time, but possibly a different name."""

410

ie = self.work_inv[file_id].copy()

411

ie.text_version = parent_ie.text_version

412

ie.text_size = parent_ie.text_size

413

ie.text_sha1 = parent_ie.text_sha1

414

self.new_inv.add(ie)

415

416

417

def _report_deletes(self):

418

for file_id in self.basis_inv:

419

if file_id not in self.new_inv:

420

note('deleted %s', self.basis_inv.id2path(file_id))

421

422

423

def _commit_file(self, new_ie, file_id, file_parents):

424

mutter('store new text for {%s} in revision {%s}',

425

file_id, self.rev_id)

276

self._store_file_text(file_id)

277

278

for path, file_id, kind in self.delta.modified:

279

if kind != 'file':

280

continue

281

self._store_file_text(file_id)

282

283

for old_path, new_path, file_id, kind, text_modified in self.delta.renamed:

284

if kind != 'file':

285

continue

286

if not text_modified:

287

continue

288

self._store_file_text(file_id)

289

290

291

def _store_file_text(self, file_id):

292

"""Store updated text for one modified or added file."""

293

note('store new text for {%s} in revision {%s}',

294

file_id, self.rev_id)

426

295

new_lines = self.work_tree.get_file(file_id).readlines()

296

if file_id in self.new_inv: # was in basis inventory

297

ie = self.new_inv[file_id]

298

assert ie.file_id == file_id

299

assert file_id in self.basis_inv

300

assert self.basis_inv[file_id].kind == 'file'

301

old_version = self.basis_inv[file_id].text_version

302

file_parents = [old_version]

303

else: # new in this revision

304

ie = self.work_inv[file_id].copy()

305

self.new_inv.add(ie)

306

assert file_id not in self.basis_inv

307

file_parents = []

308

assert ie.kind == 'file'

427

309

self._add_text_to_weave(file_id, new_lines, file_parents)

428

new_ie.text_version = self.rev_id

429

new_ie.text_sha1 = sha_strings(new_lines)

430

new_ie.text_size = sum(map(len, new_lines))

431

self.new_inv.add(new_ie)

310

# make a new inventory entry for this file, using whatever

311

# it had in the working copy, plus details on the new text

312

ie.text_sha1 = _sha_strings(new_lines)

313

ie.text_size = sum(map(len, new_lines))

314

ie.text_version = self.rev_id

315

ie.entry_version = self.rev_id

432

316

433

317

434

318

def _add_text_to_weave(self, file_id, new_lines, parents):

435

if file_id.startswith('__'):

436

raise ValueError('illegal file-id %r for text file' % file_id)

437

self.weave_store.add_text(file_id, self.rev_id, new_lines, parents)

319

weave_fn = self.branch.controlfilename(['weaves', file_id+'.weave'])

320

if os.path.exists(weave_fn):

321

w = read_weave(file(weave_fn, 'rb'))

322

else:

323

w = Weave()

324

# XXX: Should set the appropriate parents by looking for this file_id

325

# in all revision parents

326

parent_idxs = map(w.lookup, parents)

327

w.add(self.rev_id, parent_idxs, new_lines)

328

af = AtomicFile(weave_fn)

329

try:

330

write_weave_v5(w, af)

331

af.commit()

332

finally:

333

af.close()

438

334

439

335

440

336

def _gen_revision_id(branch, when):

444

340

return s

445

341

446

342

447

448

343

def _sha_strings(strings):

344

"""Return the sha-1 of concatenation of strings"""

345

s = sha.new()

346

map(s.update, strings)

347

return s.hexdigest()

Older »