~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tree.py

Committer: Martin Pool
Date: 2005-09-30 05:15:03 UTC
mto: (1185.14.2)
mto: This revision was merged to the branch mainline in revision 1396.
Revision ID: mbp@sourcefrog.net-20050930051503-9c049325215ddd1c

- fix up Branch.open_downlevel for Transport

files added:
HACKING

Makefile

NEWS.developers

bzr-man.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/clone.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/intset.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revisionspec.py

bzrlib/selftest

bzrlib/selftest/HTTPTestUtil.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_bad_files.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_revision_info.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_weave.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/testtransport.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store

bzrlib/store/compressed_text.py

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/add-bzr-to-baz

contrib/bash/bzr

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

doc/revfile-annotation.txt

doc/split-join-files.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

patches

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/symlink-support.patch

tools

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
bzrlib/remotebranch.py

bzrlib/tests.py

doc/faq.txt

doc/quickref.txt

test.sh

files renamed:
bzrlib/store.py => bzrlib/store/__init__.py

elementtree/ => bzrlib/util/elementtree/

urlgrabber/ => bzrlib/util/urlgrabber/

contrib/bash/bzr => contrib/bash/bzr.simple

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/mdiff.py

bzrlib/osutils.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/xml.py

contrib/zsh/_bzr

doc/formats.txt

doc/index.txt

doc/revfile.txt

doc/tagging.txt

doc/todo-from-arch.txt

setup.py

testbzr

Show diffs side-by-side

added added

removed removed

bzrlib/tree.py

"""Tree classes, representing directory at point in time.

"""

from sets import Set

import os.path, os, fnmatch

from osutils import pumpfile, compare_files, filesize, quotefn, sha_file, \

joinpath, splitpath, appendpath, isdir, isfile, file_kind, fingerprint_file

import errno

from stat import S_ISREG, S_ISDIR, ST_MODE, ST_SIZE

from inventory import Inventory

from trace import mutter, note

from errors import bailout

import branch

import os

from cStringIO import StringIO

import bzrlib

class Tree:

from bzrlib.trace import mutter, note

from bzrlib.errors import BzrError, BzrCheckError

from bzrlib.inventory import Inventory

from bzrlib.osutils import pumpfile, appendpath, fingerprint_file

exporters = {}

class Tree(object):

"""Abstract file tree.

There are several subclasses:

def has_id(self, file_id):

return self.inventory.has_id(file_id)

def id_set(self):

"""Return set of all ids in this tree."""

return self.inventory.id_set()

__contains__ = has_id

def __iter__(self):

return iter(self.inventory)

def id2path(self, file_id):

return self.inventory.id2path(file_id)

def _get_inventory(self):

return self._inventory

def get_file_by_path(self, path):

return self.get_file(self._inventory.path2id(path))

inventory = property(_get_inventory,

doc="Inventory of this Tree")

def _check_retrieved(self, ie, f):

if not __debug__:

return

fp = fingerprint_file(f)

f.seek(0)

if ie.text_size != None:

if ie.text_size != fp['size']:

bailout("mismatched size for file %r in %r" % (ie.file_id, self._store),

raise BzrError("mismatched size for file %r in %r" % (ie.file_id, self._store),

["inventory expects %d bytes" % ie.text_size,

"file is actually %d bytes" % fp['size'],

"store is probably damaged/corrupt"])

if ie.text_sha1 != fp['sha1']:

bailout("wrong SHA-1 for file %r in %r" % (ie.file_id, self._store),

raise BzrError("wrong SHA-1 for file %r in %r" % (ie.file_id, self._store),

["inventory expects %s" % ie.text_sha1,

"file is actually %s" % fp['sha1'],

"store is probably damaged/corrupt"])

def print_file(self, fileid):

"""Print file with id `fileid` to stdout."""

def print_file(self, file_id):

"""Print file with id `file_id` to stdout."""

100

import sys

pumpfile(self.get_file(fileid), sys.stdout)

100

101

def export(self, dest):

102

"""Export this tree to a new directory.

103

104

`dest` should not exist, and will be created holding the

105

contents of this tree.

106

107

TODO: To handle subdirectories we need to create the

108

directories first.

109

110

:note: If the export fails, the destination directory will be

111

left in a half-assed state.

112

"""

113

os.mkdir(dest)

114

mutter('export version %r' % self)

115

inv = self.inventory

116

for dp, ie in inv.iter_entries():

117

kind = ie.kind

118

fullpath = appendpath(dest, dp)

119

if kind == 'directory':

120

os.mkdir(fullpath)

121

elif kind == 'file':

122

pumpfile(self.get_file(ie.file_id), file(fullpath, 'wb'))

123

else:

124

bailout("don't know how to export {%s} of kind %r" % (fid, kind))

125

mutter(" export {%s} kind %s to %s" % (ie.file_id, kind, fullpath))

126

127

128

129

class WorkingTree(Tree):

130

"""Working copy tree.

131

132

The inventory is held in the `Branch` working-inventory, and the

133

files are in a directory on disk.

134

135

It is possible for a `WorkingTree` to have a filename which is

136

not listed in the Inventory and vice versa.

137

"""

138

def __init__(self, basedir, inv):

139

self._inventory = inv

140

self.basedir = basedir

141

self.path2id = inv.path2id

142

143

def __repr__(self):

144

return "<%s of %s>" % (self.__class__.__name__,

145

self.basedir)

146

147

def abspath(self, filename):

148

return os.path.join(self.basedir, filename)

149

150

def has_filename(self, filename):

151

return os.path.exists(self.abspath(filename))

152

153

def get_file(self, file_id):

154

return self.get_file_byname(self.id2path(file_id))

155

156

def get_file_byname(self, filename):

157

return file(self.abspath(filename), 'rb')

158

159

def _get_store_filename(self, file_id):

160

## XXX: badly named; this isn't in the store at all

161

return self.abspath(self.id2path(file_id))

162

163

def has_id(self, file_id):

164

# files that have been deleted are excluded

165

if not self.inventory.has_id(file_id):

166

return False

167

return os.access(self.abspath(self.inventory.id2path(file_id)), os.F_OK)

168

169

def get_file_size(self, file_id):

170

return os.stat(self._get_store_filename(file_id))[ST_SIZE]

171

172

def get_file_sha1(self, file_id):

173

f = self.get_file(file_id)

174

return sha_file(f)

175

176

177

def file_class(self, filename):

178

if self.path2id(filename):

179

return 'V'

180

elif self.is_ignored(filename):

181

return 'I'

182

else:

183

return '?'

184

185

186

def list_files(self):

187

"""Recursively list all files as (path, class, kind, id).

188

189

Lists, but does not descend into unversioned directories.

190

191

This does not include files that have been deleted in this

192

tree.

193

194

Skips the control directory.

195

"""

196

inv = self.inventory

197

198

def descend(from_dir_relpath, from_dir_id, dp):

199

ls = os.listdir(dp)

200

ls.sort()

201

for f in ls:

202

## TODO: If we find a subdirectory with its own .bzr

203

## directory, then that is a separate tree and we

204

## should exclude it.

205

if bzrlib.BZRDIR == f:

206

continue

207

208

# path within tree

209

fp = appendpath(from_dir_relpath, f)

210

211

# absolute path

212

fap = appendpath(dp, f)

213

214

f_ie = inv.get_child(from_dir_id, f)

215

if f_ie:

216

c = 'V'

217

elif self.is_ignored(fp):

218

c = 'I'

219

else:

220

c = '?'

221

222

fk = file_kind(fap)

223

224

if f_ie:

225

if f_ie.kind != fk:

226

bailout("file %r entered as kind %r id %r, now of kind %r"

227

% (fap, f_ie.kind, f_ie.file_id, fk))

228

229

yield fp, c, fk, (f_ie and f_ie.file_id)

230

231

if fk != 'directory':

232

continue

233

234

if c != 'V':

235

# don't descend unversioned directories

236

continue

237

238

for ff in descend(fp, f_ie.file_id, fap):

239

yield ff

240

241

for f in descend('', inv.root.file_id, self.basedir):

242

yield f

243

244

245

246

def unknowns(self):

247

for subp in self.extras():

248

if not self.is_ignored(subp):

249

yield subp

250

251

252

def extras(self):

253

"""Yield all unknown files in this WorkingTree.

254

255

If there are any unknown directories then only the directory is

256

returned, not all its children. But if there are unknown files

257

under a versioned subdirectory, they are returned.

258

259

Currently returned depth-first, sorted by name within directories.

260

"""

261

## TODO: Work from given directory downwards

262

263

for path, dir_entry in self.inventory.directories():

264

mutter("search for unknowns in %r" % path)

265

dirabs = self.abspath(path)

266

if not isdir(dirabs):

267

# e.g. directory deleted

268

continue

269

270

fl = []

271

for subf in os.listdir(dirabs):

272

if (subf != '.bzr'

273

and (subf not in dir_entry.children)):

274

fl.append(subf)

275

276

fl.sort()

277

for subf in fl:

278

subp = appendpath(path, subf)

279

yield subp

280

281

282

def ignored_files(self):

283

"""Yield list of PATH, IGNORE_PATTERN"""

284

for subp in self.extras():

285

pat = self.is_ignored(subp)

286

if pat != None:

287

yield subp, pat

288

289

290

def get_ignore_list(self):

291

"""Return list of ignore patterns.

292

293

Cached in the Tree object after the first call.

294

"""

295

if hasattr(self, '_ignorelist'):

296

return self._ignorelist

297

298

l = bzrlib.DEFAULT_IGNORE[:]

299

if self.has_filename(bzrlib.IGNORE_FILENAME):

300

f = self.get_file_byname(bzrlib.IGNORE_FILENAME)

301

l.extend([line.rstrip("\n\r") for line in f.readlines()])

302

self._ignorelist = l

303

return l

304

305

306

def is_ignored(self, filename):

307

r"""Check whether the filename matches an ignore pattern.

308

309

Patterns containing '/' or '\' need to match the whole path;

310

others match against only the last component.

311

312

If the file is ignored, returns the pattern which caused it to

313

be ignored, otherwise None. So this can simply be used as a

314

boolean if desired."""

315

316

# TODO: Use '**' to match directories, and other extended

317

# globbing stuff from cvs/rsync.

318

319

# XXX: fnmatch is actually not quite what we want: it's only

320

# approximately the same as real Unix fnmatch, and doesn't

321

# treat dotfiles correctly and allows * to match /.

322

# Eventually it should be replaced with something more

323

# accurate.

324

325

for pat in self.get_ignore_list():

326

if '/' in pat or '\\' in pat:

327

328

# as a special case, you can put ./ at the start of a

329

# pattern; this is good to match in the top-level

330

# only;

331

332

if (pat[:2] == './') or (pat[:2] == '.\\'):

333

newpat = pat[2:]

334

else:

335

newpat = pat

336

if fnmatch.fnmatchcase(filename, newpat):

337

return pat

338

else:

339

if fnmatch.fnmatchcase(splitpath(filename)[-1], pat):

340

return pat

341

return None

342

343

344

345

101

sys.stdout.write(self.get_file_text(file_id))

102

103

104

def export(self, dest, format='dir', root=None):

105

"""Export this tree."""

106

try:

107

exporter = exporters[format]

108

except KeyError:

109

from bzrlib.errors import BzrCommandError

110

raise BzrCommandError("export format %r not supported" % format)

111

exporter(self, dest, root)

112

113

346

114

347

115

class RevisionTree(Tree):

348

116

"""Tree viewing a previous revision.

354

122

or at least passing a description to the constructor.

355

123

"""

356

124

357

def __init__(self, store, inv):

358

self._store = store

125

def __init__(self, weave_store, inv, revision_id):

126

self._weave_store = weave_store

359

127

self._inventory = inv

128

self._revision_id = revision_id

129

130

def get_weave(self, file_id):

131

return self._weave_store.get_weave(file_id)

132

133

134

def get_file_lines(self, file_id):

135

ie = self._inventory[file_id]

136

weave = self.get_weave(file_id)

137

return weave.get(ie.text_version)

138

139

140

def get_file_text(self, file_id):

141

return ''.join(self.get_file_lines(file_id))

142

360

143

361

144

def get_file(self, file_id):

362

ie = self._inventory[file_id]

363

f = self._store[ie.text_id]

364

mutter(" get fileid{%s} from %r" % (file_id, self))

365

self._check_retrieved(ie, f)

366

return f

145

return StringIO(self.get_file_text(file_id))

367

146

368

147

def get_file_size(self, file_id):

369

148

return self._inventory[file_id].text_size

370

149

371

150

def get_file_sha1(self, file_id):

372

151

ie = self._inventory[file_id]

373

return ie.text_sha1

152

if ie.kind == "file":

153

return ie.text_sha1

374

154

375

155

def has_filename(self, filename):

376

156

return bool(self.inventory.path2id(filename))

392

172

if False: # just to make it a generator

393

173

yield None

394

174

175

def __contains__(self, file_id):

176

return file_id in self._inventory

177

178

def get_file_sha1(self, file_id):

179

assert self._inventory[file_id].kind == "root_directory"

180

return None

181

182

395

183

396

184

397

185

######################################################################

458

246

if old_name != new_name:

459

247

yield (old_name, new_name)

460

248

249

250

251

######################################################################

252

# export

253

254

def dir_exporter(tree, dest, root):

255

"""Export this tree to a new directory.

256

257

`dest` should not exist, and will be created holding the

258

contents of this tree.

259

260

TODO: To handle subdirectories we need to create the

261

directories first.

262

263

:note: If the export fails, the destination directory will be

264

left in a half-assed state.

265

"""

266

import os

267

os.mkdir(dest)

268

mutter('export version %r' % tree)

269

inv = tree.inventory

270

for dp, ie in inv.iter_entries():

271

kind = ie.kind

272

fullpath = appendpath(dest, dp)

273

if kind == 'directory':

274

os.mkdir(fullpath)

275

elif kind == 'file':

276

pumpfile(tree.get_file(ie.file_id), file(fullpath, 'wb'))

277

else:

278

raise BzrError("don't know how to export {%s} of kind %r" % (ie.file_id, kind))

279

mutter(" export {%s} kind %s to %s" % (ie.file_id, kind, fullpath))

280

exporters['dir'] = dir_exporter

281

282

try:

283

import tarfile

284

except ImportError:

285

pass

286

else:

287

def get_root_name(dest):

288

"""Get just the root name for a tarball.

289

290

>>> get_root_name('mytar.tar')

291

'mytar'

292

>>> get_root_name('mytar.tar.bz2')

293

'mytar'

294

>>> get_root_name('tar.tar.tar.tgz')

295

'tar.tar.tar'

296

>>> get_root_name('bzr-0.0.5.tar.gz')

297

'bzr-0.0.5'

298

>>> get_root_name('a/long/path/mytar.tgz')

299

'mytar'

300

>>> get_root_name('../parent/../dir/other.tbz2')

301

'other'

302

"""

303

endings = ['.tar', '.tar.gz', '.tgz', '.tar.bz2', '.tbz2']

304

dest = os.path.basename(dest)

305

for end in endings:

306

if dest.endswith(end):

307

return dest[:-len(end)]

308

309

def tar_exporter(tree, dest, root, compression=None):

310

"""Export this tree to a new tar file.

311

312

`dest` will be created holding the contents of this tree; if it

313

already exists, it will be clobbered, like with "tar -c".

314

"""

315

from time import time

316

now = time()

317

compression = str(compression or '')

318

if root is None:

319

root = get_root_name(dest)

320

try:

321

ball = tarfile.open(dest, 'w:' + compression)

322

except tarfile.CompressionError, e:

323

raise BzrError(str(e))

324

mutter('export version %r' % tree)

325

inv = tree.inventory

326

for dp, ie in inv.iter_entries():

327

mutter(" export {%s} kind %s to %s" % (ie.file_id, ie.kind, dest))

328

item = tarfile.TarInfo(os.path.join(root, dp))

329

# TODO: would be cool to actually set it to the timestamp of the

330

# revision it was last changed

331

item.mtime = now

332

if ie.kind == 'directory':

333

item.type = tarfile.DIRTYPE

334

fileobj = None

335

item.name += '/'

336

item.size = 0

337

item.mode = 0755

338

elif ie.kind == 'file':

339

item.type = tarfile.REGTYPE

340

fileobj = tree.get_file(ie.file_id)

341

item.size = _find_file_size(fileobj)

342

item.mode = 0644

343

else:

344

raise BzrError("don't know how to export {%s} of kind %r" %

345

(ie.file_id, ie.kind))

346

347

ball.addfile(item, fileobj)

348

ball.close()

349

exporters['tar'] = tar_exporter

350

351

def tgz_exporter(tree, dest, root):

352

tar_exporter(tree, dest, root, compression='gz')

353

exporters['tgz'] = tgz_exporter

354

355

def tbz_exporter(tree, dest, root):

356

tar_exporter(tree, dest, root, compression='bz2')

357

exporters['tbz2'] = tbz_exporter

358

359

360

def _find_file_size(fileobj):

361

offset = fileobj.tell()

362

try:

363

fileobj.seek(0, 2)

364

size = fileobj.tell()

365

except TypeError:

366

# gzip doesn't accept second argument to seek()

367

fileobj.seek(0)

368

size = 0

369

while True:

370

nread = len(fileobj.read())

371

if nread == 0:

372

break

373

size += nread

374

fileobj.seek(offset)

375

return size

Older »