~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/commit.py

Committer: Martin Pool
Date: 2005-05-26 16:09:45 UTC
Revision ID: mbp@sourcefrog.net-20050526160945-7574594a45ba1c9c

- more invariant checks in AtomicFile

files added:
bzrlib/statcache.py

files removed:
HACKING

Makefile

bzr-man.py

bzrlib/builtins.py

bzrlib/delta.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/intset.py

bzrlib/lock.py

bzrlib/merge3.py

bzrlib/meta_store.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/selftest

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_weave.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/weavestore.py

bzrlib/xml5.py

contrib/bash/bzr

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

doc/split-join-files.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

testsweet.py

tools

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/history2weaves.py

tools/http_client.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files renamed:
contrib/newinventory.py => bzrlib/newinventory.py

contrib/bash/bzr.simple => contrib/bash/bzr

bzrlib/util/elementtree/ => elementtree/

bzrlib/util/urlgrabber/ => urlgrabber/

files modified:
.bzrignore

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge_core.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/status.py

bzrlib/store.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib/add-bzr-to-baz

doc/formats.txt

doc/index.txt

doc/todo-from-arch.txt

setup.py

testbzr

Show diffs side-by-side

added added

removed removed

bzrlib/commit.py

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

## XXX: Can we do any better about making interrupted commits change

## nothing?

## XXX: If we merged two versions of a file then we still need to

## create a new version representing that merge, even if it didn't

## change from the parent.

## TODO: Read back the just-generated changeset, and make sure it

## applies and recreates the right state.

import os

import sys

import time

import tempfile

import sha

from binascii import hexlify

from cStringIO import StringIO

from bzrlib.osutils import (local_time_offset, username,

rand_bytes, compact_date, user_email,

kind_marker, is_inside_any, quotefn,

sha_string, sha_strings, sha_file, isdir, isfile)

from bzrlib.branch import gen_file_id, INVENTORY_FILEID, ANCESTRY_FILEID

from bzrlib.errors import BzrError, PointlessCommit

from bzrlib.revision import Revision, RevisionReference

from bzrlib.trace import mutter, note

from bzrlib.xml5 import serializer_v5

from bzrlib.inventory import Inventory

from bzrlib.delta import compare_trees

from bzrlib.weave import Weave

from bzrlib.weavefile import read_weave, write_weave_v5

from bzrlib.atomicfile import AtomicFile

def commit(*args, **kwargs):

"""Commit a new revision to a branch.

Function-style interface for convenience of old callers.

New code should use the Commit class instead.

"""

Commit().commit(*args, **kwargs)

class NullCommitReporter(object):

"""I report on progress of a commit."""

def added(self, path):

pass

def removed(self, path):

pass

def renamed(self, old_path, new_path):

pass

class ReportCommitToLog(NullCommitReporter):

def added(self, path):

note('added %s', path)

def removed(self, path):

note('removed %s', path)

def renamed(self, old_path, new_path):

note('renamed %s => %s', old_path, new_path)

class Commit(object):

"""Task of committing a new revision.

This is a MethodObject: it accumulates state as the commit is

prepared, and then it is discarded. It doesn't represent

historical revisions, just the act of recording a new one.

missing_ids

Modified to hold a list of files that have been deleted from

the working directory; these should be removed from the

working inventory.

100

"""

101

def __init__(self,

102

reporter=None):

103

if reporter is not None:

104

self.reporter = reporter

105

else:

106

self.reporter = NullCommitReporter()

107

108

109

def commit(self,

110

branch, message,

111

timestamp=None,

112

timezone=None,

113

committer=None,

114

specific_files=None,

115

rev_id=None,

116

allow_pointless=True):

117

"""Commit working copy as a new revision.

118

119

The basic approach is to add all the file texts into the

120

store, then the inventory, then make a new revision pointing

121

to that inventory and store that.

122

123

This is not quite safe if the working copy changes during the

124

commit; for the moment that is simply not allowed. A better

125

approach is to make a temporary copy of the files before

126

computing their hashes, and then add those hashes in turn to

127

the inventory. This should mean at least that there are no

128

broken hash pointers. There is no way we can get a snapshot

129

of the whole directory at an instant. This would also have to

130

be robust against files disappearing, moving, etc. So the

131

whole thing is a bit hard.

132

133

This raises PointlessCommit if there are no changes, no new merges,

134

and allow_pointless is false.

135

136

timestamp -- if not None, seconds-since-epoch for a

137

postdated/predated commit.

138

139

specific_files

140

If true, commit only those files.

141

142

rev_id

143

If set, use this as the new revision id.

144

Useful for test or import commands that need to tightly

145

control what revisions are assigned. If you duplicate

146

a revision id that exists elsewhere it is your own fault.

147

If null (default), a time/random revision id is generated.

148

"""

149

150

self.branch = branch

151

self.branch.lock_write()

152

self.rev_id = rev_id

153

self.specific_files = specific_files

154

self.allow_pointless = allow_pointless

155

156

if timestamp is None:

157

self.timestamp = time.time()

158

else:

159

self.timestamp = long(timestamp)

160

161

if committer is None:

162

self.committer = username(self.branch)

163

else:

164

assert isinstance(committer, basestring), type(committer)

165

self.committer = committer

166

167

if timezone is None:

168

self.timezone = local_time_offset()

169

else:

170

self.timezone = int(timezone)

171

172

assert isinstance(message, basestring), type(message)

173

self.message = message

174

175

try:

176

# First walk over the working inventory; and both update that

177

# and also build a new revision inventory. The revision

178

# inventory needs to hold the text-id, sha1 and size of the

179

# actual file versions committed in the revision. (These are

180

# not present in the working inventory.) We also need to

181

# detect missing/deleted files, and remove them from the

182

# working inventory.

183

184

self.work_tree = self.branch.working_tree()

185

self.work_inv = self.work_tree.inventory

186

self.basis_tree = self.branch.basis_tree()

187

self.basis_inv = self.basis_tree.inventory

188

189

self._gather_parents()

190

191

if self.rev_id is None:

192

self.rev_id = _gen_revision_id(self.branch, time.time())

193

194

# TODO: update hashcache

195

self.delta = compare_trees(self.basis_tree, self.work_tree,

196

specific_files=self.specific_files)

197

198

if not (self.delta.has_changed()

199

or self.allow_pointless

200

or len(self.parents) != 1):

201

raise PointlessCommit()

202

203

self.new_inv = self.basis_inv.copy()

204

205

self.delta.show(sys.stdout)

206

207

self._remove_deleted()

208

self._store_files()

209

210

self.branch._write_inventory(self.work_inv)

211

self._record_inventory()

212

self._record_ancestry()

213

214

self._make_revision()

215

note('committted r%d {%s}', (self.branch.revno() + 1),

216

self.rev_id)

217

self.branch.append_revision(self.rev_id)

218

self.branch.set_pending_merges([])

219

finally:

220

self.branch.unlock()

221

222

223

def _record_inventory(self):

224

"""Store the inventory for the new revision."""

225

inv_tmp = StringIO()

226

serializer_v5.write_inventory(self.new_inv, inv_tmp)

227

inv_tmp.seek(0)

228

self.inv_sha1 = sha_string(inv_tmp.getvalue())

229

inv_lines = inv_tmp.readlines()

230

self.branch.weave_store.add_text(INVENTORY_FILEID, self.rev_id,

231

inv_lines, self.parents)

232

233

234

def _record_ancestry(self):

235

"""Append merged revision ancestry to the ancestry file."""

236

if len(self.parents) > 1:

237

raise NotImplementedError("sorry, can't commit merges yet")

238

w = self.branch.weave_store.get_weave_or_empty(ANCESTRY_FILEID)

239

if self.parents:

240

lines = w.get(w.lookup(self.parents[0]))

241

else:

242

lines = []

243

lines.append(self.rev_id + '\n')

244

parent_idxs = map(w.lookup, self.parents)

245

w.add(self.rev_id, parent_idxs, lines)

246

self.branch.weave_store.put_weave(ANCESTRY_FILEID, w)

247

248

249

def _gather_parents(self):

250

pending_merges = self.branch.pending_merges()

251

if pending_merges:

252

raise NotImplementedError("sorry, can't commit merges to the weave format yet")

253

self.parents = []

254

precursor_id = self.branch.last_patch()

255

if precursor_id:

256

self.parents.append(precursor_id)

257

self.parents += pending_merges

258

259

260

def _make_revision(self):

261

"""Record a new revision object for this commit."""

262

self.rev = Revision(timestamp=self.timestamp,

263

timezone=self.timezone,

264

committer=self.committer,

265

message=self.message,

266

inventory_sha1=self.inv_sha1,

267

revision_id=self.rev_id)

268

self.rev.parents = map(RevisionReference, self.parents)

269

rev_tmp = tempfile.TemporaryFile()

270

serializer_v5.write_revision(self.rev, rev_tmp)

271

rev_tmp.seek(0)

272

self.branch.revision_store.add(rev_tmp, self.rev_id)

273

mutter('new revision_id is {%s}', self.rev_id)

274

275

276

def _remove_deleted(self):

277

"""Remove deleted files from the working and stored inventories."""

278

for path, id, kind in self.delta.removed:

279

if self.work_inv.has_id(id):

280

del self.work_inv[id]

281

if self.new_inv.has_id(id):

282

del self.new_inv[id]

283

284

285

286

def _store_files(self):

287

"""Store new texts of modified/added files."""

288

# We must make sure that directories are added before anything

289

# inside them is added. the files within the delta report are

290

# sorted by path so we know the directory will come before its

291

# contents.

292

for path, file_id, kind in self.delta.added:

293

if kind != 'file':

294

ie = self.work_inv[file_id].copy()

295

self.new_inv.add(ie)

296

else:

297

self._store_file_text(file_id)

298

299

for path, file_id, kind in self.delta.modified:

300

if kind != 'file':

301

continue

302

self._store_file_text(file_id)

303

304

for old_path, new_path, file_id, kind, text_modified in self.delta.renamed:

305

if kind != 'file':

306

continue

307

if not text_modified:

308

continue

309

self._store_file_text(file_id)

310

311

312

def _store_file_text(self, file_id):

313

"""Store updated text for one modified or added file."""

314

note('store new text for {%s} in revision {%s}',

315

file_id, self.rev_id)

316

new_lines = self.work_tree.get_file(file_id).readlines()

317

if file_id in self.new_inv: # was in basis inventory

318

ie = self.new_inv[file_id]

319

assert ie.file_id == file_id

320

assert file_id in self.basis_inv

321

assert self.basis_inv[file_id].kind == 'file'

322

old_version = self.basis_inv[file_id].text_version

323

file_parents = [old_version]

324

else: # new in this revision

325

ie = self.work_inv[file_id].copy()

326

self.new_inv.add(ie)

327

assert file_id not in self.basis_inv

328

file_parents = []

329

assert ie.kind == 'file'

330

self._add_text_to_weave(file_id, new_lines, file_parents)

331

# make a new inventory entry for this file, using whatever

332

# it had in the working copy, plus details on the new text

333

ie.text_sha1 = sha_strings(new_lines)

334

ie.text_size = sum(map(len, new_lines))

335

ie.text_version = self.rev_id

336

ie.entry_version = self.rev_id

337

338

339

def _add_text_to_weave(self, file_id, new_lines, parents):

340

if file_id.startswith('__'):

341

raise ValueError('illegal file-id %r for text file' % file_id)

342

self.branch.weave_store.add_text(file_id, self.rev_id, new_lines, parents)

343

344

345

def _gen_revision_id(branch, when):

def commit(branch, message,

timestamp=None,

timezone=None,

committer=None,

verbose=True,

specific_files=None,

rev_id=None):

"""Commit working copy as a new revision.

The basic approach is to add all the file texts into the

store, then the inventory, then make a new revision pointing

to that inventory and store that.

This is not quite safe if the working copy changes during the

commit; for the moment that is simply not allowed. A better

approach is to make a temporary copy of the files before

computing their hashes, and then add those hashes in turn to

the inventory. This should mean at least that there are no

broken hash pointers. There is no way we can get a snapshot

of the whole directory at an instant. This would also have to

be robust against files disappearing, moving, etc. So the

whole thing is a bit hard.

timestamp -- if not None, seconds-since-epoch for a

postdated/predated commit.

specific_files

If true, commit only those files.

rev_id

If set, use this as the new revision id.

Useful for test or import commands that need to tightly

control what revisions are assigned. If you duplicate

a revision id that exists elsewhere it is your own fault.

If null (default), a time/random revision id is generated.

"""

import os, time, tempfile

from inventory import Inventory

from osutils import isdir, isfile, sha_string, quotefn, \

local_time_offset, username, kind_marker, is_inside_any

from branch import gen_file_id

from errors import BzrError

from revision import Revision

from trace import mutter, note

branch._need_writelock()

# First walk over the working inventory; and both update that

# and also build a new revision inventory. The revision

# inventory needs to hold the text-id, sha1 and size of the

# actual file versions committed in the revision. (These are

# not present in the working inventory.) We also need to

# detect missing/deleted files, and remove them from the

# working inventory.

work_tree = branch.working_tree()

work_inv = work_tree.inventory

inv = Inventory()

basis = branch.basis_tree()

basis_inv = basis.inventory

missing_ids = []

if verbose:

note('looking for changes...')

for path, entry in work_inv.iter_entries():

## TODO: Check that the file kind has not changed from the previous

## revision of this file (if any).

entry = entry.copy()

p = branch.abspath(path)

file_id = entry.file_id

mutter('commit prep file %s, id %r ' % (p, file_id))

if specific_files and not is_inside_any(specific_files, path):

if basis_inv.has_id(file_id):

# carry over with previous state

100

inv.add(basis_inv[file_id].copy())

101

else:

102

# omit this from committed inventory

103

pass

104

continue

105

106

if not work_tree.has_id(file_id):

107

if verbose:

108

print('deleted %s%s' % (path, kind_marker(entry.kind)))

109

mutter(" file is missing, removing from inventory")

110

missing_ids.append(file_id)

111

continue

112

113

inv.add(entry)

114

115

if basis_inv.has_id(file_id):

116

old_kind = basis_inv[file_id].kind

117

if old_kind != entry.kind:

118

raise BzrError("entry %r changed kind from %r to %r"

119

% (file_id, old_kind, entry.kind))

120

121

if entry.kind == 'directory':

122

if not isdir(p):

123

raise BzrError("%s is entered as directory but not a directory"

124

% quotefn(p))

125

elif entry.kind == 'file':

126

if not isfile(p):

127

raise BzrError("%s is entered as file but is not a file" % quotefn(p))

128

129

new_sha1 = work_tree.get_file_sha1(file_id)

130

131

old_ie = basis_inv.has_id(file_id) and basis_inv[file_id]

132

if (old_ie

133

and old_ie.text_sha1 == new_sha1):

134

## assert content == basis.get_file(file_id).read()

135

entry.text_id = old_ie.text_id

136

entry.text_sha1 = new_sha1

137

entry.text_size = old_ie.text_size

138

mutter(' unchanged from previous text_id {%s}' %

139

entry.text_id)

140

else:

141

content = file(p, 'rb').read()

142

143

# calculate the sha again, just in case the file contents

144

# changed since we updated the cache

145

entry.text_sha1 = sha_string(content)

146

entry.text_size = len(content)

147

148

entry.text_id = gen_file_id(entry.name)

149

branch.text_store.add(content, entry.text_id)

150

mutter(' stored with text_id {%s}' % entry.text_id)

151

if verbose:

152

if not old_ie:

153

print('added %s' % path)

154

elif (old_ie.name == entry.name

155

and old_ie.parent_id == entry.parent_id):

156

print('modified %s' % path)

157

else:

158

print('renamed %s' % path)

159

160

161

for file_id in missing_ids:

162

# Any files that have been deleted are now removed from the

163

# working inventory. Files that were not selected for commit

164

# are left as they were in the working inventory and ommitted

165

# from the revision inventory.

166

167

# have to do this later so we don't mess up the iterator.

168

# since parents may be removed before their children we

169

# have to test.

170

171

# FIXME: There's probably a better way to do this; perhaps

172

# the workingtree should know how to filter itbranch.

173

if work_inv.has_id(file_id):

174

del work_inv[file_id]

175

176

177

if rev_id is None:

178

rev_id = _gen_revision_id(time.time())

179

inv_id = rev_id

180

181

inv_tmp = tempfile.TemporaryFile()

182

inv.write_xml(inv_tmp)

183

inv_tmp.seek(0)

184

branch.inventory_store.add(inv_tmp, inv_id)

185

mutter('new inventory_id is {%s}' % inv_id)

186

187

branch._write_inventory(work_inv)

188

189

if timestamp == None:

190

timestamp = time.time()

191

192

if committer == None:

193

committer = username()

194

195

if timezone == None:

196

timezone = local_time_offset()

197

198

mutter("building commit log message")

199

rev = Revision(timestamp=timestamp,

200

timezone=timezone,

201

committer=committer,

202

precursor = branch.last_patch(),

203

message = message,

204

inventory_id=inv_id,

205

revision_id=rev_id)

206

207

rev_tmp = tempfile.TemporaryFile()

208

rev.write_xml(rev_tmp)

209

rev_tmp.seek(0)

210

branch.revision_store.add(rev_tmp, rev_id)

211

mutter("new revision_id is {%s}" % rev_id)

212

213

## XXX: Everything up to here can simply be orphaned if we abort

214

## the commit; it will leave junk files behind but that doesn't

215

## matter.

216

217

## TODO: Read back the just-generated changeset, and make sure it

218

## applies and recreates the right state.

219

220

## TODO: Also calculate and store the inventory SHA1

221

mutter("committing patch r%d" % (branch.revno() + 1))

222

223

branch.append_revision(rev_id)

224

225

if verbose:

226

note("commited r%d" % branch.revno())

227

228

229

230

def _gen_revision_id(when):

346

231

"""Return new revision-id."""

347

s = '%s-%s-' % (user_email(branch), compact_date(when))

232

from binascii import hexlify

233

from osutils import rand_bytes, compact_date, user_email

234

235

s = '%s-%s-' % (user_email(), compact_date(when))

348

236

s += hexlify(rand_bytes(8))

349

237

return s

350

238

239

Older »