~bzr-pqm/bzr/bzr.dev

Committer: Martin Pool
Date: 2005-09-15 04:32:41 UTC
Revision ID: mbp@sourcefrog.net-20050915043239-f8f8978f485c1b85

- new weave based cleanup [broken]

  - don't do a simple comparison to the basis version, but rather take
    file merges into account.

  - do more generation of XML in memory rather than to temporary files

  - can now commit basic merges (not totally correctly though)

  - track file parent versions

  - new revision id derived from the supplied timestamp (if given),
    not the current timestamp

  - only write working inventory if there were changes caused by
    deleted files

  - put back WorkingTree.get_file_size

  - methods to serialize to/from strings, as well as files.

Doesn't handle selective commit or proper merges yet.

files modified:
bzrlib/commit.py

bzrlib/hashcache.py

bzrlib/workingtree.py

bzrlib/xml.py

Show diffs side-by-side

added added

removed removed

bzrlib/commit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

## XXX: Can we do any better about making interrupted commits change

## nothing?

## XXX: If we merged two versions of a file then we still need to

## create a new version representing that merge, even if it didn't

## change from the parent.

## TODO: Read back the just-generated changeset, and make sure it

## applies and recreates the right state.

## This is not quite safe if the working copy changes during the

## commit; for the moment that is simply not allowed. A better

## approach is to make a temporary copy of the files before

## computing their hashes, and then add those hashes in turn to

## the inventory. This should mean at least that there are no

## broken hash pointers. There is no way we can get a snapshot

## of the whole directory at an instant. This would also have to

## be robust against files disappearing, moving, etc. So the

## whole thing is a bit hard.

## The newly committed revision is going to have a shape corresponding

## to that of the working inventory. Files that are not in the

## working tree and that were in the predecessor are reported as

## removed -- this can include files that were either removed from the

## inventory or deleted in the working tree. If they were only

## deleted from disk, they are removed from the working inventory.

## We then consider the remaining entries, which will be in the new

## version. Directory entries are simply copied across. File entries

## must be checked to see if a new version of the file should be

## recorded. For each parent revision inventory, we check to see what

## version of the file was present. If the file was present in at

## least one tree, and if it was the same version in all the trees,

## then we can just refer to that version. Otherwise, a new version

## representing the merger of the file versions must be added.

# XXX: Can we do any better about making interrupted commits change

# nothing? Perhaps the best approach is to integrate commit of

# AtomicFiles with releasing the lock on the Branch.

# TODO: Separate 'prepare' phase where we find a list of potentially

# committed files. We then can then pause the commit to prompt for a

# commit message, knowing the summary will be the same as what's

# actually used for the commit. (But perhaps simpler to simply get

# the tree status, then use that for a selective commit?)

# The newly committed revision is going to have a shape corresponding

# to that of the working inventory. Files that are not in the

# working tree and that were in the predecessor are reported as

# removed --- this can include files that were either removed from the

# inventory or deleted in the working tree. If they were only

# deleted from disk, they are removed from the working inventory.

# We then consider the remaining entries, which will be in the new

# version. Directory entries are simply copied across. File entries

# must be checked to see if a new version of the file should be

# recorded. For each parent revision inventory, we check to see what

# version of the file was present. If the file was present in at

# least one tree, and if it was the same version in all the trees,

# then we can just refer to that version. Otherwise, a new version

# representing the merger of the file versions must be added.

# TODO: Update hashcache before and after - or does the WorkingTree

# look after that?

import os

import sys

import time

import tempfile

import sha

import pdb

from binascii import hexlify

from cStringIO import StringIO

from bzrlib.osutils import (local_time_offset, username,

rand_bytes, compact_date, user_email,

kind_marker, is_inside_any, quotefn,

sha_string, sha_strings, sha_file, isdir, isfile)

sha_string, sha_strings, sha_file, isdir, isfile,

split_lines)

from bzrlib.branch import gen_file_id, INVENTORY_FILEID, ANCESTRY_FILEID

from bzrlib.errors import BzrError, PointlessCommit

from bzrlib.revision import Revision, RevisionReference

from bzrlib.trace import mutter, note

from bzrlib.xml5 import serializer_v5

from bzrlib.inventory import Inventory

from bzrlib.delta import compare_trees

from bzrlib.weave import Weave

from bzrlib.weavefile import read_weave, write_weave_v5

from bzrlib.atomicfile import AtomicFile

166

155

"""

167

156

168

157

self.branch = branch

158

self.weave_store = branch.weave_store

169

159

self.rev_id = rev_id

170

160

self.specific_files = specific_files

171

161

self.allow_pointless = allow_pointless

175

165

else:

176

166

self.timestamp = long(timestamp)

177

167

168

if rev_id is None:

169

self.rev_id = _gen_revision_id(self.branch, self.timestamp)

170

else:

171

self.rev_id = rev_id

172

178

173

if committer is None:

179

174

self.committer = username(self.branch)

180

175

else:

191

186

192

187

self.branch.lock_write()

193

188

try:

194

# First walk over the working inventory; and both update that

195

# and also build a new revision inventory. The revision

196

# inventory needs to hold the text-id, sha1 and size of the

197

# actual file versions committed in the revision. (These are

198

# not present in the working inventory.) We also need to

199

# detect missing/deleted files, and remove them from the

200

# working inventory.

201

202

189

self.work_tree = self.branch.working_tree()

203

190

self.work_inv = self.work_tree.inventory

204

191

self.basis_tree = self.branch.basis_tree()

206

193

207

194

self._gather_parents()

208

195

209

if self.rev_id is None:

210

self.rev_id = _gen_revision_id(self.branch, time.time())

211

212

self._remove_deletions()

196

any_deletions = self._remove_deleted()

197

self.new_inv = self.work_inv.copy()

198

any_changes = self._store_files()

199

self._report_deletes()

213

200

214

201

# TODO: update hashcache

215

self.delta = compare_trees(self.basis_tree, self.work_tree,

216

specific_files=self.specific_files)

217

218

if not (self.delta.has_changed()

219

or self.allow_pointless

202

if not (self.allow_pointless

203

or any_changes

204

or any_deletions

220

205

or len(self.parents) != 1):

221

206

raise PointlessCommit()

222

207

223

self.new_inv = self.basis_inv.copy()

224

225

## FIXME: Don't write to stdout!

226

self.delta.show(sys.stdout)

227

228

self._remove_deleted()

229

self._store_files()

230

231

self.branch._write_inventory(self.work_inv)

208

if any_deletions:

209

self.branch._write_inventory(self.work_inv)

232

210

self._record_inventory()

233

211

self._record_ancestry()

234

235

212

self._make_revision()

236

213

note('committted r%d {%s}', (self.branch.revno() + 1),

237

214

self.rev_id)

242

219

243

220

244

221

245

def _remove_deletions(self):

246

"""Remove deleted files from the working inventory."""

247

pass

248

249

250

251

222

def _record_inventory(self):

252

223

"""Store the inventory for the new revision."""

253

inv_tmp = StringIO()

254

serializer_v5.write_inventory(self.new_inv, inv_tmp)

255

inv_tmp.seek(0)

256

self.inv_sha1 = sha_string(inv_tmp.getvalue())

257

inv_lines = inv_tmp.readlines()

258

self.branch.weave_store.add_text(INVENTORY_FILEID, self.rev_id,

259

inv_lines, self.parents)

224

inv_text = serializer_v5.write_inventory_to_string(self.new_inv)

225

self.inv_sha1 = sha_string(inv_text)

226

self.weave_store.add_text(INVENTORY_FILEID, self.rev_id,

227

split_lines(inv_text), self.parents)

260

228

261

229

262

230

def _record_ancestry(self):

263

231

"""Append merged revision ancestry to the ancestry file."""

264

if len(self.parents) > 1:

265

raise NotImplementedError("sorry, can't commit merges yet")

266

w = self.branch.weave_store.get_weave_or_empty(ANCESTRY_FILEID)

232

w = self.weave_store.get_weave_or_empty(ANCESTRY_FILEID)

267

233

if self.parents:

268

234

lines = w.get(w.lookup(self.parents[0]))

269

235

else:

271

237

lines.append(self.rev_id + '\n')

272

238

parent_idxs = map(w.lookup, self.parents)

273

239

w.add(self.rev_id, parent_idxs, lines)

274

self.branch.weave_store.put_weave(ANCESTRY_FILEID, w)

240

self.weave_store.put_weave(ANCESTRY_FILEID, w)

275

241

276

242

277

243

def _gather_parents(self):

278

244

pending_merges = self.branch.pending_merges()

279

if pending_merges:

280

raise NotImplementedError("sorry, can't commit merges to the weave format yet")

281

245

self.parents = []

282

246

precursor_id = self.branch.last_revision()

283

247

if precursor_id:

284

248

self.parents.append(precursor_id)

285

249

self.parents += pending_merges

250

self.parent_trees = map(self.branch.revision_tree, self.parents)

286

251

287

252

288

253

def _make_revision(self):

294

259

inventory_sha1=self.inv_sha1,

295

260

revision_id=self.rev_id)

296

261

self.rev.parents = map(RevisionReference, self.parents)

297

rev_tmp = tempfile.TemporaryFile()

262

rev_tmp = StringIO()

298

263

serializer_v5.write_revision(self.rev, rev_tmp)

299

264

rev_tmp.seek(0)

300

265

self.branch.revision_store.add(rev_tmp, self.rev_id)

302

267

303

268

304

269

def _remove_deleted(self):

305

"""Remove deleted files from the working and stored inventories."""

306

for path, id, kind in self.delta.removed:

307

if self.work_inv.has_id(id):

308

del self.work_inv[id]

309

if self.new_inv.has_id(id):

310

del self.new_inv[id]

311

270

"""Remove deleted files from the working inventories.

271

272

This is done prior to taking the working inventory as the

273

basis for the new committed inventory.

274

275

This returns true if any files

276

*that existed in the basis inventory* were deleted.

277

Files that were added and deleted

278

in the working copy don't matter.

279

"""

280

any_deletes = False

281

for file_id in list(iter(self.work_inv)): # snapshot for deletion

282

if not self.work_tree.has_id(file_id):

283

note('missing %s', self.work_inv.id2path(file_id))

284

del self.work_inv[file_id]

285

if self.basis_inv.has_id(file_id):

286

any_deletes = True

287

return any_deletes

288

289

290

def _find_file_parents(self, file_id):

291

"""Return the text versions and hashes for all file parents.

292

293

Returned as a map from text version to text sha1.

294

295

This is a set containing the file versions in all parents

296

revisions containing the file. If the file is new, the set

297

will be empty."""

298

r = {}

299

for tree in self.parent_trees:

300

if file_id in tree.inventory:

301

ie = tree.inventory[file_id]

302

assert ie.kind == 'file'

303

assert ie.file_id == file_id

304

if ie.text_version in r:

305

assert r[ie.text_version] == ie.text_sha1

306

else:

307

r[ie.text_version] = ie.text_sha1

308

return r

312

309

313

310

314

311

def _store_files(self):

315

"""Store new texts of modified/added files."""

316

# We must make sure that directories are added before anything

317

# inside them is added. the files within the delta report are

318

# sorted by path so we know the directory will come before its

319

# contents.

320

for path, file_id, kind in self.delta.added:

321

if kind != 'file':

322

ie = self.work_inv[file_id].copy()

323

self.new_inv.add(ie)

312

"""Store new texts of modified/added files.

313

314

This is called with new_inv set to a copy of the working

315

inventory, with deleted/removed files already cut out. So

316

this code only needs to deal with setting text versions, and

317

possibly recording new file texts."""

318

any_changes = False

319

for path, new_ie in self.new_inv.iter_entries():

320

if self.specific_files:

321

if not is_inside_any(self.specific_files, path):

322

# Not done yet

323

pass

324

mutter('check %s {%s}', path, new_ie.file_id)

325

if new_ie.kind != 'file':

326

# only regular files have texts to update

327

continue

328

file_id = new_ie.file_id

329

file_parents = self._find_file_parents(file_id)

330

wc_sha1 = self.work_tree.get_file_sha1(file_id)

331

wc_len = self.work_tree.get_file_size(file_id)

332

if (len(file_parents) == 1

333

and file_parents.values()[0] == wc_sha1):

334

# same as the single previous version, can reuse that

335

text_version = file_parents.keys()[0]

324

336

else:

325

self._store_file_text(file_id)

326

327

for path, file_id, kind in self.delta.modified:

328

if kind != 'file':

329

continue

330

self._store_file_text(file_id)

331

332

for old_path, new_path, file_id, kind, text_modified in self.delta.renamed:

333

if kind != 'file':

334

continue

335

if not text_modified:

336

continue

337

self._store_file_text(file_id)

338

339

340

def _store_file_text(self, file_id):

341

"""Store updated text for one modified or added file."""

342

note('store new text for {%s} in revision {%s}',

343

file_id, self.rev_id)

337

# file is either new, or a file merge; need to record

338

# a new version

339

if len(file_parents) > 1:

340

note('merged %s', path)

341

elif len(file_parents) == 0:

342

note('added %s', path)

343

else:

344

note('modified %s', path)

345

self._store_text(file_id, file_parents)

346

text_version = self.rev_id

347

any_changes = True

348

new_ie.text_version = text_version

349

new_ie.text_sha1 = wc_sha1

350

new_ie.text_size = wc_len

351

return any_changes

352

353

354

def _report_deletes(self):

355

for file_id in self.basis_inv:

356

if file_id not in self.new_inv:

357

note('deleted %s', self.basis_inv.id2path(file_id))

358

359

360

def _store_text(self, file_id, file_parents):

361

mutter('store new text for {%s} in revision {%s}',

362

file_id, self.rev_id)

344

363

new_lines = self.work_tree.get_file(file_id).readlines()

345

if file_id in self.new_inv: # was in basis inventory

346

ie = self.new_inv[file_id]

347

assert ie.file_id == file_id

348

assert file_id in self.basis_inv

349

assert self.basis_inv[file_id].kind == 'file'

350

old_version = self.basis_inv[file_id].text_version

351

file_parents = [old_version]

352

else: # new in this revision

353

ie = self.work_inv[file_id].copy()

354

self.new_inv.add(ie)

355

assert file_id not in self.basis_inv

356

file_parents = []

357

assert ie.kind == 'file'

358

364

self._add_text_to_weave(file_id, new_lines, file_parents)

359

# make a new inventory entry for this file, using whatever

360

# it had in the working copy, plus details on the new text

361

ie.text_sha1 = sha_strings(new_lines)

362

ie.text_size = sum(map(len, new_lines))

363

ie.text_version = self.rev_id

364

ie.entry_version = self.rev_id

365

366

367

def _add_text_to_weave(self, file_id, new_lines, parents):

368

if file_id.startswith('__'):

369

raise ValueError('illegal file-id %r for text file' % file_id)

370

self.branch.weave_store.add_text(file_id, self.rev_id, new_lines, parents)

370

self.weave_store.add_text(file_id, self.rev_id, new_lines, parents)

371

372

373

def _gen_revision_id(branch, when):

376

s += hexlify(rand_bytes(8))

377

return s

378

379

380

381

Older »