~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tree.py

Committer: mbp at sourcefrog
Date: 2005-03-29 08:03:40 UTC
Revision ID: mbp@sourcefrog.net-20050329080340-6e09ef3afea64159

todo

files added:
bzrlib/tests.py

doc/faq.txt

doc/quickref.txt

doc/roadmap.txt

doc/testing.txt

doc/work-order.txt

files removed:
.rsyncexclude

HACKING

Makefile

NEWS.developers

TODO

bzr-man.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/clone.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/intset.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revisionspec.py

bzrlib/selftest

bzrlib/selftest/HTTPTestUtil.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_bad_files.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_revision_info.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_weave.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testremotebranch.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/weavestore.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/quotes.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/split-join-files.txt

doc/switch-in-branch.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/revfile.txt

notes/schemas.xml

patches

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/symlink-support.patch

testbzr

tools

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files renamed:
bzrlib/util/elementtree/ => elementtree/

files modified:
.bzrignore

NEWS

README

build-api

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/osutils.py

bzrlib/revision.py

bzrlib/store.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/xml.py

doc/Makefile

doc/bitkeeper.txt

doc/formats.txt

doc/index.txt

doc/interrupted.txt

doc/merge.txt

doc/python.txt

doc/random.txt

doc/svk.txt

doc/tagging.txt

doc/todo-from-arch.txt

elementtree/ElementTree.py

notes/performance.txt

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/tree.py

"""Tree classes, representing directory at point in time.

"""

import os

from cStringIO import StringIO

from sets import Set

import os.path, os, fnmatch

from inventory import Inventory

from trace import mutter, note

from osutils import pumpfile, compare_files, filesize, quotefn, sha_file, \

joinpath, splitpath, appendpath, isdir, isfile, file_kind, fingerprint_file

from errors import bailout

import branch

from stat import S_ISREG, S_ISDIR, ST_MODE, ST_SIZE

import bzrlib

from bzrlib.trace import mutter, note

from bzrlib.errors import BzrError, BzrCheckError

from bzrlib.inventory import Inventory

from bzrlib.osutils import pumpfile, appendpath, fingerprint_file

exporters = {}

class Tree(object):

class Tree:

"""Abstract file tree.

There are several subclasses:

def has_id(self, file_id):

return self.inventory.has_id(file_id)

__contains__ = has_id

def __iter__(self):

return iter(self.inventory)

def id_set(self):

"""Return set of all ids in this tree."""

return self.inventory.id_set()

def id2path(self, file_id):

return self.inventory.id2path(file_id)

def _get_inventory(self):

return self._inventory

def get_file_by_path(self, path):

return self.get_file(self._inventory.path2id(path))

inventory = property(_get_inventory,

doc="Inventory of this Tree")

def _check_retrieved(self, ie, f):

if not __debug__:

return

fp = fingerprint_file(f)

f.seek(0)

if ie.text_size != None:

if ie.text_size is not None:

if ie.text_size != fp['size']:

raise BzrError("mismatched size for file %r in %r" % (ie.file_id, self._store),

bailout("mismatched size for file %r in %r" % (ie.file_id, self._store),

["inventory expects %d bytes" % ie.text_size,

"file is actually %d bytes" % fp['size'],

"store is probably damaged/corrupt"])

if ie.text_sha1 != fp['sha1']:

raise BzrError("wrong SHA-1 for file %r in %r" % (ie.file_id, self._store),

bailout("wrong SHA-1 for file %r in %r" % (ie.file_id, self._store),

["inventory expects %s" % ie.text_sha1,

"file is actually %s" % fp['sha1'],

"store is probably damaged/corrupt"])

def print_file(self, file_id):

"""Print file with id `file_id` to stdout."""

100

import sys

101

sys.stdout.write(self.get_file_text(file_id))

102

103

104

def export(self, dest, format='dir', root=None):

105

"""Export this tree."""

106

try:

107

exporter = exporters[format]

108

except KeyError:

109

from bzrlib.errors import BzrCommandError

110

raise BzrCommandError("export format %r not supported" % format)

111

exporter(self, dest, root)

112

113

def export(self, dest):

"""Export this tree to a new directory.

`dest` should not exist, and will be created holding the

contents of this tree.

:todo: To handle subdirectories we need to create the

100

directories first.

101

102

:note: If the export fails, the destination directory will be

103

left in a half-assed state.

104

"""

105

os.mkdir(dest)

106

mutter('export version %r' % self)

107

inv = self.inventory

108

for dp, ie in inv.iter_entries():

109

kind = ie.kind

110

fullpath = appendpath(dest, dp)

111

if kind == 'directory':

112

os.mkdir(fullpath)

113

elif kind == 'file':

114

pumpfile(self.get_file(ie.file_id), file(fullpath, 'wb'))

115

else:

116

bailout("don't know how to export {%s} of kind %r", fid, kind)

117

mutter(" export {%s} kind %s to %s" % (ie.file_id, kind, fullpath))

118

119

120

121

class WorkingTree(Tree):

122

"""Working copy tree.

123

124

The inventory is held in the `Branch` working-inventory, and the

125

files are in a directory on disk.

126

127

It is possible for a `WorkingTree` to have a filename which is

128

not listed in the Inventory and vice versa.

129

"""

130

def __init__(self, basedir, inv):

131

self._inventory = inv

132

self.basedir = basedir

133

self.path2id = inv.path2id

134

135

def __repr__(self):

136

return "<%s of %s>" % (self.__class__.__name__,

137

self.basedir)

138

139

def abspath(self, filename):

140

return os.path.join(self.basedir, filename)

141

142

def has_filename(self, filename):

143

return os.path.exists(self.abspath(filename))

144

145

def get_file(self, file_id):

146

return self.get_file_byname(self.id2path(file_id))

147

148

def get_file_byname(self, filename):

149

return file(self.abspath(filename), 'rb')

150

151

def _get_store_filename(self, file_id):

152

return self.abspath(self.id2path(file_id))

153

154

def has_id(self, file_id):

155

# files that have been deleted are excluded

156

if not self.inventory.has_id(file_id):

157

return False

158

return os.access(self.abspath(self.inventory.id2path(file_id)), os.F_OK)

159

160

def get_file_size(self, file_id):

161

return os.stat(self._get_store_filename(file_id))[ST_SIZE]

162

163

def get_file_sha1(self, file_id):

164

f = self.get_file(file_id)

165

return sha_file(f)

166

167

168

def file_class(self, filename):

169

if self.path2id(filename):

170

return 'V'

171

elif self.is_ignored(filename):

172

return 'I'

173

else:

174

return '?'

175

176

177

def list_files(self):

178

"""Recursively list all files as (path, class, kind, id).

179

180

Lists, but does not descend into unversioned directories.

181

182

This does not include files that have been deleted in this

183

tree.

184

185

Skips the control directory.

186

"""

187

inv = self.inventory

188

189

def descend(from_dir, from_dir_id, dp):

190

ls = os.listdir(dp)

191

ls.sort()

192

for f in ls:

193

## TODO: If we find a subdirectory with its own .bzr

194

## directory, then that is a separate tree and we

195

## should exclude it.

196

if bzrlib.BZRDIR == f:

197

continue

198

199

# path within tree

200

fp = appendpath(from_dir, f)

201

202

# absolute path

203

fap = appendpath(dp, f)

204

205

f_ie = inv.get_child(from_dir_id, f)

206

if f_ie:

207

c = 'V'

208

elif self.is_ignored(fp):

209

c = 'I'

210

else:

211

c = '?'

212

213

fk = file_kind(fap)

214

215

if f_ie:

216

if f_ie.kind != fk:

217

bailout("file %r entered as kind %r id %r, now of kind %r"

218

% (fap, f_ie.kind, f_ie.file_id, fk))

219

220

yield fp, c, fk, (f_ie and f_ie.file_id)

221

222

if fk != 'directory':

223

continue

224

225

if c != 'V':

226

# don't descend unversioned directories

227

continue

228

229

for ff in descend(fp, f_ie.file_id, fap):

230

yield ff

231

232

for f in descend('', None, self.basedir):

233

yield f

234

235

236

237

def unknowns(self):

238

for subp in self.extras():

239

if not self.is_ignored(subp):

240

yield subp

241

242

243

def extras(self):

244

"""Yield all unknown files in this WorkingTree.

245

246

If there are any unknown directories then only the directory is

247

returned, not all its children. But if there are unknown files

248

under a versioned subdirectory, they are returned.

249

250

Currently returned depth-first, sorted by name within directories.

251

"""

252

## TODO: Work from given directory downwards

253

254

for path, dir_entry in self.inventory.directories():

255

mutter("search for unknowns in %r" % path)

256

dirabs = self.abspath(path)

257

if not isdir(dirabs):

258

# e.g. directory deleted

259

continue

260

261

fl = []

262

for subf in os.listdir(dirabs):

263

if (subf != '.bzr'

264

and (subf not in dir_entry.children)):

265

fl.append(subf)

266

267

fl.sort()

268

for subf in fl:

269

subp = appendpath(path, subf)

270

yield subp

271

272

273

def ignored_files(self):

274

"""Yield list of PATH, IGNORE_PATTERN"""

275

for subp in self.extras():

276

pat = self.is_ignored(subp)

277

if pat != None:

278

yield subp, pat

279

280

281

def get_ignore_list(self):

282

"""Return list of ignore patterns.

283

284

Cached in the Tree object after the first call.

285

"""

286

if hasattr(self, '_ignorelist'):

287

return self._ignorelist

288

289

l = bzrlib.DEFAULT_IGNORE[:]

290

if self.has_filename(bzrlib.IGNORE_FILENAME):

291

f = self.get_file_byname(bzrlib.IGNORE_FILENAME)

292

l.extend([line.rstrip("\n\r") for line in f.readlines()])

293

self._ignorelist = l

294

return l

295

296

297

def is_ignored(self, filename):

298

"""Check whether the filename matches an ignore pattern.

299

300

Patterns containing '/' need to match the whole path; others

301

match against only the last component.

302

303

If the file is ignored, returns the pattern which caused it to

304

be ignored, otherwise None. So this can simply be used as a

305

boolean if desired."""

306

307

## TODO: Use '**' to match directories, and other extended globbing stuff from cvs/rsync.

308

309

for pat in self.get_ignore_list():

310

if '/' in pat:

311

# as a special case, you can put ./ at the start of a pattern;

312

# this is good to match in the top-level only;

313

if pat[:2] == './':

314

newpat = pat[2:]

315

else:

316

newpat = pat

317

if fnmatch.fnmatchcase(filename, newpat):

318

return pat

319

else:

320

if fnmatch.fnmatchcase(splitpath(filename)[-1], pat):

321

return pat

322

return None

323

324

325

326

114

327

115

328

class RevisionTree(Tree):

116

329

"""Tree viewing a previous revision.

117

330

118

331

File text can be retrieved from the text store.

119

332

120

TODO: Some kind of `__repr__` method, but a good one

333

:todo: Some kind of `__repr__` method, but a good one

121

334

probably means knowing the branch and revision number,

122

335

or at least passing a description to the constructor.

123

336

"""

124

337

125

def __init__(self, weave_store, inv, revision_id):

126

self._weave_store = weave_store

338

def __init__(self, store, inv):

339

self._store = store

127

340

self._inventory = inv

128

self._revision_id = revision_id

129

130

def get_weave(self, file_id):

131

return self._weave_store.get_weave(file_id)

132

133

134

def get_file_lines(self, file_id):

341

342

def get_file(self, file_id):

135

343

ie = self._inventory[file_id]

136

weave = self.get_weave(file_id)

137

return weave.get(ie.text_version)

138

139

140

def get_file_text(self, file_id):

141

return ''.join(self.get_file_lines(file_id))

142

143

144

def get_file(self, file_id):

145

return StringIO(self.get_file_text(file_id))

344

f = self._store[ie.text_id]

345

mutter(" get fileid{%s} from %r" % (file_id, self))

346

self._check_retrieved(ie, f)

347

return f

146

348

147

349

def get_file_size(self, file_id):

148

350

return self._inventory[file_id].text_size

149

351

150

352

def get_file_sha1(self, file_id):

151

353

ie = self._inventory[file_id]

152

if ie.kind == "file":

153

return ie.text_sha1

354

return ie.text_sha1

154

355

155

356

def has_filename(self, filename):

156

357

return bool(self.inventory.path2id(filename))

172

373

if False: # just to make it a generator

173

374

yield None

174

375

175

def __contains__(self, file_id):

176

return file_id in self._inventory

177

178

def get_file_sha1(self, file_id):

179

assert self._inventory[file_id].kind == "root_directory"

180

return None

181

182

183

376

184

377

185

378

######################################################################

237

430

238

431

239

432

240

def find_renames(old_inv, new_inv):

241

for file_id in old_inv:

242

if file_id not in new_inv:

243

continue

244

old_name = old_inv.id2path(file_id)

245

new_name = new_inv.id2path(file_id)

246

if old_name != new_name:

247

yield (old_name, new_name)

248

249

250

251

######################################################################

252

# export

253

254

def dir_exporter(tree, dest, root):

255

"""Export this tree to a new directory.

256

257

`dest` should not exist, and will be created holding the

258

contents of this tree.

259

260

TODO: To handle subdirectories we need to create the

261

directories first.

262

263

:note: If the export fails, the destination directory will be

264

left in a half-assed state.

265

"""

266

import os

267

os.mkdir(dest)

268

mutter('export version %r' % tree)

269

inv = tree.inventory

270

for dp, ie in inv.iter_entries():

271

kind = ie.kind

272

fullpath = appendpath(dest, dp)

273

if kind == 'directory':

274

os.mkdir(fullpath)

275

elif kind == 'file':

276

pumpfile(tree.get_file(ie.file_id), file(fullpath, 'wb'))

277

else:

278

raise BzrError("don't know how to export {%s} of kind %r" % (ie.file_id, kind))

279

mutter(" export {%s} kind %s to %s" % (ie.file_id, kind, fullpath))

280

exporters['dir'] = dir_exporter

281

282

try:

283

import tarfile

284

except ImportError:

285

pass

286

else:

287

def get_root_name(dest):

288

"""Get just the root name for a tarball.

289

290

>>> get_root_name('mytar.tar')

291

'mytar'

292

>>> get_root_name('mytar.tar.bz2')

293

'mytar'

294

>>> get_root_name('tar.tar.tar.tgz')

295

'tar.tar.tar'

296

>>> get_root_name('bzr-0.0.5.tar.gz')

297

'bzr-0.0.5'

298

>>> get_root_name('a/long/path/mytar.tgz')

299

'mytar'

300

>>> get_root_name('../parent/../dir/other.tbz2')

301

'other'

302

"""

303

endings = ['.tar', '.tar.gz', '.tgz', '.tar.bz2', '.tbz2']

304

dest = os.path.basename(dest)

305

for end in endings:

306

if dest.endswith(end):

307

return dest[:-len(end)]

308

309

def tar_exporter(tree, dest, root, compression=None):

310

"""Export this tree to a new tar file.

311

312

`dest` will be created holding the contents of this tree; if it

313

already exists, it will be clobbered, like with "tar -c".

314

"""

315

from time import time

316

now = time()

317

compression = str(compression or '')

318

if root is None:

319

root = get_root_name(dest)

320

try:

321

ball = tarfile.open(dest, 'w:' + compression)

322

except tarfile.CompressionError, e:

323

raise BzrError(str(e))

324

mutter('export version %r' % tree)

325

inv = tree.inventory

326

for dp, ie in inv.iter_entries():

327

mutter(" export {%s} kind %s to %s" % (ie.file_id, ie.kind, dest))

328

item = tarfile.TarInfo(os.path.join(root, dp))

329

# TODO: would be cool to actually set it to the timestamp of the

330

# revision it was last changed

331

item.mtime = now

332

if ie.kind == 'directory':

333

item.type = tarfile.DIRTYPE

334

fileobj = None

335

item.name += '/'

336

item.size = 0

337

item.mode = 0755

338

elif ie.kind == 'file':

339

item.type = tarfile.REGTYPE

340

fileobj = tree.get_file(ie.file_id)

341

item.size = _find_file_size(fileobj)

342

item.mode = 0644

343

else:

344

raise BzrError("don't know how to export {%s} of kind %r" %

345

(ie.file_id, ie.kind))

346

347

ball.addfile(item, fileobj)

348

ball.close()

349

exporters['tar'] = tar_exporter

350

351

def tgz_exporter(tree, dest, root):

352

tar_exporter(tree, dest, root, compression='gz')

353

exporters['tgz'] = tgz_exporter

354

355

def tbz_exporter(tree, dest, root):

356

tar_exporter(tree, dest, root, compression='bz2')

357

exporters['tbz2'] = tbz_exporter

358

359

360

def _find_file_size(fileobj):

361

offset = fileobj.tell()

362

try:

363

fileobj.seek(0, 2)

364

size = fileobj.tell()

365

except TypeError:

366

# gzip doesn't accept second argument to seek()

367

fileobj.seek(0)

368

size = 0

369

while True:

370

nread = len(fileobj.read())

371

if nread == 0:

372

break

373

size += nread

374

fileobj.seek(offset)

375

return size

Older »