~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/inventory.py

Committer: mbp at sourcefrog
Date: 2005-03-09 04:08:15 UTC
Revision ID: mbp@sourcefrog.net-20050309040815-13242001617e4a06

import from baz patch-364

files added:
bzrlib/tests.py

files removed:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

NEWS.developers

TODO

build-api

bzr-man.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/clone.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revfile.py

bzrlib/revisionspec.py

bzrlib/selftest

bzrlib/selftest/HTTPTestUtil.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_bad_files.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_revision_info.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_weave.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/testtransport.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store

bzrlib/store/compressed_text.py

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/textinv.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

setup.py

testbzr

tools

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files renamed:
bzrlib/commands.py => bzr.py

bzrlib/store/__init__.py => bzrlib/store.py

files modified:
README

bzrlib/__init__.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/inventory.py

bzrlib/osutils.py

bzrlib/revision.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/xml.py

Show diffs side-by-side

added added

removed removed

bzrlib/inventory.py

#! /usr/bin/env python

# -*- coding: UTF-8 -*-

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# TODO: Maybe also keep the full path of the entry, and the children?

# But those depend on its position within a particular inventory, and

# it would be nice not to need to hold the backpointer here.

# TODO: Perhaps split InventoryEntry into subclasses for files,

# directories, etc etc.

# This should really be an id randomly assigned when the tree is

# created, but it's not for now.

ROOT_ID = "TREE_ROOT"

import os.path

import re

import sys

import types

import bzrlib

from bzrlib.errors import BzrError, BzrCheckError

from bzrlib.osutils import quotefn, splitpath, joinpath, appendpath, sha_strings

from bzrlib.trace import mutter

from bzrlib.errors import NotVersionedError

class InventoryEntry(object):

"""Inventories map files to their name in a revision."""

__author__ = "Martin Pool <mbp@canonical.com>"

import sys, os.path, types

from sets import Set

from xml import XMLMixin

from ElementTree import ElementTree, Element

from errors import bailout

from osutils import uuid, quotefn, splitpath, joinpath, appendpath

from trace import mutter

class InventoryEntry(XMLMixin):

"""Description of a versioned file.

An InventoryEntry has the following fields, which are also

present in the XML inventory-entry element:

file_id

name

(within the parent directory)

kind

'directory' or 'file' or 'symlink'

parent_id

file_id of the parent directory, or ROOT_ID

revision

the revision_id in which this variation of this file was

introduced.

executable

Indicates that this file should be executable on systems

that support it.

text_sha1

sha-1 of the text of the file

text_size

size in bytes of the text of the file

(reading a version 4 tree created a text_id field.)

* *file_id*

* *name*: (only the basename within the directory, must not

contain slashes)

* *kind*: "directory" or "file"

* *directory_id*: (if absent/null means the branch root directory)

* *text_sha1*: only for files

* *text_size*: in bytes, only for files

* *text_id*: identifier for the text version, only for files

InventoryEntries can also exist inside a WorkingTree

inventory, in which case they are not yet bound to a

particular revision of the file. In that case the text_sha1,

text_size and text_id are absent.

>>> i = Inventory()

>>> i.path2id('')

'TREE_ROOT'

>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))

InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT')

>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))

InventoryEntry('2323', 'hello.c', kind='file', parent_id='123')

>>> i.add(InventoryEntry('123', 'src', kind='directory'))

>>> i.add(InventoryEntry('2323', 'hello.c', parent_id='123'))

>>> for j in i.iter_entries():

... print j

...

('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))

('src', InventoryEntry('123', 'src', kind='directory', parent_id=None))

('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))

>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))

>>> i.add(InventoryEntry('2323', 'bye.c', parent_id='123'))

Traceback (most recent call last):

...

BzrError: inventory already contains entry with id {2323}

>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))

InventoryEntry('2324', 'bye.c', kind='file', parent_id='123')

>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))

InventoryEntry('2325', 'wibble', kind='directory', parent_id='123')

BzrError: ('inventory already contains entry with id {2323}', [])

>>> i.add(InventoryEntry('2324', 'bye.c', parent_id='123'))

>>> i.add(InventoryEntry('2325', 'wibble', parent_id='123', kind='directory'))

>>> i.path2id('src/wibble')

'2325'

>>> '2325' in i

100

True

101

>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))

102

InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')

>>> i.add(InventoryEntry('2326', 'wibble.c', parent_id='2325'))

103

>>> i['2326']

104

InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')

105

>>> for path, entry in i.iter_entries():

106

... print path.replace('\\\\', '/') # for win32 os.sep

107

... assert i.path2id(path)

>>> for j in i.iter_entries():

... print j[0]

... assert i.path2id(j[0])

108

...

109

src

110

src/bye.c

111

src/hello.c

112

src/wibble

113

src/wibble/wibble.c

114

>>> i.id2path('2326').replace('\\\\', '/')

>>> i.id2path('2326')

115

'src/wibble/wibble.c'

:todo: Maybe also keep the full path of the entry, and the children?

But those depend on its position within a particular inventory, and

it would be nice not to need to hold the backpointer here.

116

"""

117

118

__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',

119

'text_id', 'parent_id', 'children', 'executable',

120

'revision', 'symlink_target']

121

122

def _add_text_to_weave(self, new_lines, parents, weave_store):

123

weave_store.add_text(self.file_id, self.revision, new_lines, parents)

124

125

def __init__(self, file_id, name, kind, parent_id, text_id=None):

def __init__(self, file_id, name, kind='file', text_id=None,

parent_id=None):

126

"""Create an InventoryEntry

127

128

The filename must be a single component, relative to the

129

parent directory; it cannot be a whole path or relative name.

130

131

>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)

>>> e = InventoryEntry('123', 'hello.c')

132

100

>>> e.name

133

101

'hello.c'

134

102

>>> e.file_id

135

103

'123'

136

>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)

104

>>> e = InventoryEntry('123', 'src/hello.c')

137

105

Traceback (most recent call last):

138

BzrCheckError: InventoryEntry name 'src/hello.c' is invalid

106

BzrError: ("InventoryEntry name is not a simple filename: 'src/hello.c'", [])

139

107

"""

140

assert isinstance(name, basestring), name

141

if '/' in name or '\\' in name:

142

raise BzrCheckError('InventoryEntry name %r is invalid' % name)

143

144

self.executable = False

145

self.revision = None

146

self.text_sha1 = None

147

self.text_size = None

108

109

if len(splitpath(name)) != 1:

110

bailout('InventoryEntry name is not a simple filename: %r'

111

% name)

112

148

113

self.file_id = file_id

149

114

self.name = name

115

assert kind in ['file', 'directory']

150

116

self.kind = kind

151

117

self.text_id = text_id

152

118

self.parent_id = parent_id

153

self.symlink_target = None

154

if kind == 'directory':

155

self.children = {}

156

elif kind == 'file':

157

pass

158

elif kind == 'symlink':

159

pass

160

else:

161

raise BzrError("unhandled entry kind %r" % kind)

162

163

def read_symlink_target(self, path):

164

if self.kind == 'symlink':

165

try:

166

self.symlink_target = os.readlink(path)

167

except OSError,e:

168

raise BzrError("os.readlink error, %s" % e)

169

170

def sorted_children(self):

171

l = self.children.items()

172

l.sort()

173

return l

174

175

def check(self, checker, rev_id, inv, tree):

176

if self.parent_id != None:

177

if not inv.has_id(self.parent_id):

178

raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'

179

% (self.parent_id, rev_id))

180

if self.kind == 'file':

181

revision = self.revision

182

t = (self.file_id, revision)

183

if t in checker.checked_texts:

184

prev_sha = checker.checked_texts[t]

185

if prev_sha != self.text_sha1:

186

raise BzrCheckError('mismatched sha1 on {%s} in {%s}' %

187

(self.file_id, rev_id))

188

else:

189

checker.repeated_text_cnt += 1

190

return

191

mutter('check version {%s} of {%s}', rev_id, self.file_id)

192

file_lines = tree.get_file_lines(self.file_id)

193

checker.checked_text_cnt += 1

194

if self.text_size != sum(map(len, file_lines)):

195

raise BzrCheckError('text {%s} wrong size' % self.text_id)

196

if self.text_sha1 != sha_strings(file_lines):

197

raise BzrCheckError('text {%s} wrong sha1' % self.text_id)

198

checker.checked_texts[t] = self.text_sha1

199

elif self.kind == 'directory':

200

if self.text_sha1 != None or self.text_size != None or self.text_id != None:

201

raise BzrCheckError('directory {%s} has text in revision {%s}'

202

% (self.file_id, rev_id))

203

elif self.kind == 'root_directory':

204

pass

205

elif self.kind == 'symlink':

206

if self.text_sha1 != None or self.text_size != None or self.text_id != None:

207

raise BzrCheckError('symlink {%s} has text in revision {%s}'

208

% (self.file_id, rev_id))

209

if self.symlink_target == None:

210

raise BzrCheckError('symlink {%s} has no target in revision {%s}'

211

% (self.file_id, rev_id))

212

else:

213

raise BzrCheckError('unknown entry kind %r in revision {%s}' %

214

(self.kind, rev_id))

119

self.text_sha1 = None

120

self.text_size = None

215

121

216

122

217

123

def copy(self):

218

124

other = InventoryEntry(self.file_id, self.name, self.kind,

219

self.parent_id)

220

other.executable = self.executable

221

other.text_id = self.text_id

125

self.text_id, self.parent_id)

222

126

other.text_sha1 = self.text_sha1

223

127

other.text_size = self.text_size

224

other.symlink_target = self.symlink_target

225

other.revision = self.revision

226

# note that children are *not* copied; they're pulled across when

227

# others are added

228

128

return other

229

129

230

def _get_snapshot_change(self, previous_entries):

231

if len(previous_entries) > 1:

232

return 'merged'

233

elif len(previous_entries) == 0:

234

return 'added'

235

else:

236

return 'modified/renamed/reparented'

237

130

238

131

def __repr__(self):

239

132

return ("%s(%r, %r, kind=%r, parent_id=%r)"

243

136

self.kind,

244

137

self.parent_id))

245

138

246

def snapshot(self, revision, path, previous_entries, work_tree,

247

weave_store):

248

"""Make a snapshot of this entry.

139

140

def to_element(self):

141

"""Convert to XML element"""

142

e = Element('entry')

143

144

e.set('name', self.name)

145

e.set('file_id', self.file_id)

146

e.set('kind', self.kind)

147

148

if self.text_size is not None:

149

e.set('text_size', '%d' % self.text_size)

150

151

for f in ['text_id', 'text_sha1', 'parent_id']:

152

v = getattr(self, f)

153

if v is not None:

154

e.set(f, v)

155

156

e.tail = '\n'

157

158

return e

159

160

161

def from_element(cls, elt):

162

assert elt.tag == 'entry'

163

self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'))

164

self.text_id = elt.get('text_id')

165

self.text_sha1 = elt.get('text_sha1')

166

self.parent_id = elt.get('parent_id')

249

167

250

This means that all its fields are populated, that it has its

251

text stored in the text store or weave.

252

"""

253

mutter('new parents of %s are %r', path, previous_entries)

254

self._read_tree_state(path, work_tree)

255

if len(previous_entries) == 1:

256

# cannot be unchanged unless there is only one parent file rev.

257

parent_ie = previous_entries.values()[0]

258

if self._unchanged(path, parent_ie, work_tree):

259

mutter("found unchanged entry")

260

self.revision = parent_ie.revision

261

return "unchanged"

262

mutter('new revision for {%s}', self.file_id)

263

self.revision = revision

264

change = self._get_snapshot_change(previous_entries)

265

if self.kind != 'file':

266

return change

267

self._snapshot_text(previous_entries, work_tree, weave_store)

268

return change

269

270

def _snapshot_text(self, file_parents, work_tree, weave_store):

271

mutter('storing file {%s} in revision {%s}',

272

self.file_id, self.revision)

273

# special case to avoid diffing on renames or

274

# reparenting

275

if (len(file_parents) == 1

276

and self.text_sha1 == file_parents.values()[0].text_sha1

277

and self.text_size == file_parents.values()[0].text_size):

278

previous_ie = file_parents.values()[0]

279

weave_store.add_identical_text(

280

self.file_id, previous_ie.revision,

281

self.revision, file_parents)

282

else:

283

new_lines = work_tree.get_file(self.file_id).readlines()

284

self._add_text_to_weave(new_lines, file_parents, weave_store)

285

self.text_sha1 = sha_strings(new_lines)

286

self.text_size = sum(map(len, new_lines))

287

288

def __eq__(self, other):

168

## mutter("read inventoryentry: %r" % (elt.attrib))

169

170

v = elt.get('text_size')

171

self.text_size = v and int(v)

172

173

return self

174

175

176

from_element = classmethod(from_element)

177

178

def __cmp__(self, other):

179

if self is other:

180

return 0

289

181

if not isinstance(other, InventoryEntry):

290

182

return NotImplemented

291

183

292

return ((self.file_id == other.file_id)

293

and (self.name == other.name)

294

and (other.symlink_target == self.symlink_target)

295

and (self.text_sha1 == other.text_sha1)

296

and (self.text_size == other.text_size)

297

and (self.text_id == other.text_id)

298

and (self.parent_id == other.parent_id)

299

and (self.kind == other.kind)

300

and (self.revision == other.revision)

301

and (self.executable == other.executable)

302

)

303

304

def __ne__(self, other):

305

return not (self == other)

306

307

def __hash__(self):

308

raise ValueError('not hashable')

309

310

def _unchanged(self, path, previous_ie, work_tree):

311

compatible = True

312

# different inv parent

313

if previous_ie.parent_id != self.parent_id:

314

compatible = False

315

# renamed

316

elif previous_ie.name != self.name:

317

compatible = False

318

if self.kind == 'symlink':

319

if self.symlink_target != previous_ie.symlink_target:

320

compatible = False

321

if self.kind == 'file':

322

if self.text_sha1 != previous_ie.text_sha1:

323

compatible = False

324

else:

325

# FIXME: 20050930 probe for the text size when getting sha1

326

# in _read_tree_state

327

self.text_size = previous_ie.text_size

328

return compatible

329

330

def _read_tree_state(self, path, work_tree):

331

if self.kind == 'symlink':

332

self.read_symlink_target(work_tree.abspath(path))

333

if self.kind == 'file':

334

self.text_sha1 = work_tree.get_file_sha1(self.file_id)

335

self.executable = work_tree.is_executable(self.file_id)

336

337

338

class RootEntry(InventoryEntry):

339

def __init__(self, file_id):

340

self.file_id = file_id

341

self.children = {}

342

self.kind = 'root_directory'

343

self.parent_id = None

344

self.name = ''

345

346

def __eq__(self, other):

347

if not isinstance(other, RootEntry):

348

return NotImplemented

349

350

return (self.file_id == other.file_id) \

351

and (self.children == other.children)

352

353

354

355

class Inventory(object):

184

return cmp(self.file_id, other.file_id) \

185

or cmp(self.name, other.name) \

186

or cmp(self.text_sha1, other.text_sha1) \

187

or cmp(self.text_size, other.text_size) \

188

or cmp(self.text_id, other.text_id) \

189

or cmp(self.parent_id, other.parent_id) \

190

or cmp(self.kind, other.kind)

191

192

193

194

class Inventory(XMLMixin):

356

195

"""Inventory of versioned files in a tree.

357

196

358

This describes which file_id is present at each point in the tree,

359

and possibly the SHA-1 or other information about the file.

360

Entries can be looked up either by path or by file_id.

197

An Inventory acts like a set of InventoryEntry items. You can

198

also look files up by their file_id or name.

199

200

May be read from and written to a metadata file in a tree. To

201

manipulate the inventory (for example to add a file), it is read

202

in, modified, and then written back out.

361

203

362

204

The inventory represents a typical unix file tree, with

363

205

directories containing files and subdirectories. We never store

367

209

returned quickly.

368

210

369

211

InventoryEntry objects must not be modified after they are

370

inserted, other than through the Inventory API.

212

inserted.

371

213

372

214

>>> inv = Inventory()

373

>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))

374

InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT')

215

>>> inv.write_xml(sys.stdout)

216

217

</inventory>

218

>>> inv.add(InventoryEntry('123-123', 'hello.c'))

375

219

>>> inv['123-123'].name

376

220

'hello.c'

221

>>> for file_id in inv: print file_id

222

...

223

123-123

377

224

378

225

May be treated as an iterator or set to look up file ids:

379

226

386

233

387

234

>>> [x[0] for x in inv.iter_entries()]

388

235

['hello.c']

389

>>> inv = Inventory('TREE_ROOT-12345678-12345678')

390

>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))

391

InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT-12345678-12345678')

236

237

>>> inv.write_xml(sys.stdout)

238

239

240

</inventory>

241

392

242

"""

393

def __init__(self, root_id=ROOT_ID):

243

244

## TODO: Clear up handling of files in subdirectories; we probably

245

## do want to be able to just look them up by name but this

246

## probably means gradually walking down the path, looking up as we go.

247

248

## TODO: Make sure only canonical filenames are stored.

249

250

## TODO: Do something sensible about the possible collisions on

251

## case-losing filesystems. Perhaps we should just always forbid

252

## such collisions.

253

254

## _tree should probably just be stored as

255

## InventoryEntry._children on each directory.

256

257

def __init__(self):

394

258

"""Create or read an inventory.

395

259

396

260

If a working directory is specified, the inventory is read

397

261

from there. If the file is specified, read from that. If not,

398

262

the inventory is created empty.

399

400

The inventory is created with a default root directory, with

401

an id of None.

402

263

"""

403

# We are letting Branch.initialize() create a unique inventory

404

# root id. Rather than generating a random one here.

405

#if root_id is None:

406

# root_id = bzrlib.branch.gen_file_id('TREE_ROOT')

407

self.root = RootEntry(root_id)

408

self._byid = {self.root.file_id: self.root}

409

410

411

def copy(self):

412

other = Inventory(self.root.file_id)

413

# copy recursively so we know directories will be added before

414

# their children. There are more efficient ways than this...

415

for path, entry in self.iter_entries():

416

if entry == self.root:

417

continue

418

other.add(entry.copy())

419

return other

264

self._byid = dict()

265

266

# _tree is indexed by parent_id; at each level a map from name

267

# to ie. The None entry is the root.

268

self._tree = {None: {}}

420

269

421

270

422

271

def __iter__(self):

428

277

return len(self._byid)

429

278

430

279

431

def iter_entries(self, from_dir=None):

280

def iter_entries(self, parent_id=None):

432

281

"""Return (path, entry) pairs, in order by name."""

433

if from_dir == None:

434

assert self.root

435

from_dir = self.root

436

elif isinstance(from_dir, basestring):

437

from_dir = self._byid[from_dir]

438

439

kids = from_dir.children.items()

282

kids = self._tree[parent_id].items()

440

283

kids.sort()

441

284

for name, ie in kids:

442

285

yield name, ie

443

286

if ie.kind == 'directory':

444

for cn, cie in self.iter_entries(from_dir=ie.file_id):

445

yield os.path.join(name, cn), cie

446

447

448

def entries(self):

449

"""Return list of (path, ie) for all entries except the root.

450

451

This may be faster than iter_entries.

452

"""

453

accum = []

454

def descend(dir_ie, dir_path):

455

kids = dir_ie.children.items()

456

kids.sort()

457

for name, ie in kids:

458

child_path = os.path.join(dir_path, name)

459

accum.append((child_path, ie))

460

if ie.kind == 'directory':

461

descend(ie, child_path)

462

463

descend(self.root, '')

464

return accum

465

466

467

def directories(self):

468

"""Return (path, entry) pairs for all directories, including the root.

469

"""

470

accum = []

471

def descend(parent_ie, parent_path):

472

accum.append((parent_path, parent_ie))

473

474

kids = [(ie.name, ie) for ie in parent_ie.children.itervalues() if ie.kind == 'directory']

475

kids.sort()

476

477

for name, child_ie in kids:

478

child_path = os.path.join(parent_path, name)

479

descend(child_ie, child_path)

480

descend(self.root, '')

481

return accum

287

for cn, cie in self.iter_entries(parent_id=ie.file_id):

288

yield joinpath([name, cn]), cie

289

290

291

def directories(self, include_root=True):

292

"""Return (path, entry) pairs for all directories.

293

"""

294

if include_root:

295

yield '', None

296

for path, entry in self.iter_entries():

297

if entry.kind == 'directory':

298

yield path, entry

482

299

483

300

484

301

302

def children(self, parent_id):

303

"""Return entries that are direct children of parent_id."""

304

return self._tree[parent_id]

305

306

307

308

# TODO: return all paths and entries

309

310

485

311

def __contains__(self, file_id):

486

312

"""True if this entry contains a file with given id.

487

313

488

314

>>> inv = Inventory()

489

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

490

InventoryEntry('123', 'foo.c', kind='file', parent_id='TREE_ROOT')

315

>>> inv.add(InventoryEntry('123', 'foo.c'))

491

316

>>> '123' in inv

492

317

True

493

318

>>> '456' in inv

500

325

"""Return the entry for given file_id.

501

326

502

327

>>> inv = Inventory()

503

>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))

504

InventoryEntry('123123', 'hello.c', kind='file', parent_id='TREE_ROOT')

328

>>> inv.add(InventoryEntry('123123', 'hello.c'))

505

329

>>> inv['123123'].name

506

330

'hello.c'

507

331

"""

508

try:

509

return self._byid[file_id]

510

except KeyError:

511

if file_id == None:

512

raise BzrError("can't look up file_id None")

513

else:

514

raise BzrError("file_id {%s} not in inventory" % file_id)

515

516

517

def get_file_kind(self, file_id):

518

return self._byid[file_id].kind

519

520

def get_child(self, parent_id, filename):

521

return self[parent_id].children.get(filename)

332

return self._byid[file_id]

522

333

523

334

524

335

def add(self, entry):

525

336

"""Add entry to inventory.

526

337

527

338

To add a file to a branch ready to be committed, use Branch.add,

528

which calls this.

529

530

Returns the new entry object.

531

"""

532

if entry.file_id in self._byid:

533

raise BzrError("inventory already contains entry with id {%s}" % entry.file_id)

534

535

if entry.parent_id == ROOT_ID or entry.parent_id is None:

536

entry.parent_id = self.root.file_id

537

538

try:

539

parent = self._byid[entry.parent_id]

540

except KeyError:

541

raise BzrError("parent_id {%s} not in inventory" % entry.parent_id)

542

543

if parent.children.has_key(entry.name):

544

raise BzrError("%s is already versioned" %

545

appendpath(self.id2path(parent.file_id), entry.name))

339

which calls this."""

340

if entry.file_id in self:

341

bailout("inventory already contains entry with id {%s}" % entry.file_id)

342

343

if entry.parent_id != None:

344

if entry.parent_id not in self:

345

bailout("parent_id %s of new entry not found in inventory"

346

% entry.parent_id)

347

348

if self._tree[entry.parent_id].has_key(entry.name):

349

bailout("%s is already versioned"

350

% appendpath(self.id2path(entry.parent_id), entry.name))

546

351

547

352

self._byid[entry.file_id] = entry

548

parent.children[entry.name] = entry

549

return entry

550

551

552

def add_path(self, relpath, kind, file_id=None):

553

"""Add entry from a path.

554

555

The immediate parent must already be versioned.

556

557

Returns the new entry object."""

558

from bzrlib.branch import gen_file_id

559

560

parts = bzrlib.osutils.splitpath(relpath)

561

if len(parts) == 0:

562

raise BzrError("cannot re-add root of inventory")

563

564

if file_id == None:

565

file_id = gen_file_id(relpath)

566

567

parent_path = parts[:-1]

568

parent_id = self.path2id(parent_path)

569

if parent_id == None:

570

raise NotVersionedError(parent_path)

571

572

ie = InventoryEntry(file_id, parts[-1],

573

kind=kind, parent_id=parent_id)

574

return self.add(ie)

353

self._tree[entry.parent_id][entry.name] = entry

354

355

if entry.kind == 'directory':

356

self._tree[entry.file_id] = {}

575

357

576

358

577

359

def __delitem__(self, file_id):

578

360

"""Remove entry by id.

579

361

580

362

>>> inv = Inventory()

581

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

582

InventoryEntry('123', 'foo.c', kind='file', parent_id='TREE_ROOT')

363

>>> inv.add(InventoryEntry('123', 'foo.c'))

583

364

>>> '123' in inv

584

365

True

585

366

>>> del inv['123']

588

369

"""

589

370

ie = self[file_id]

590

371

591

assert self[ie.parent_id].children[ie.name] == ie

372

assert self._tree[ie.parent_id][ie.name] == ie

592

373

593

374

# TODO: Test deleting all children; maybe hoist to a separate

594

375

# deltree method?

595

376

if ie.kind == 'directory':

596

for cie in ie.children.values():

377

for cie in self._tree[file_id].values():

597

378

del self[cie.file_id]

598

del ie.children

379

del self._tree[file_id]

599

380

600

381

del self._byid[file_id]

601

del self[ie.parent_id].children[ie.name]

602

603

604

def __eq__(self, other):

382

del self._tree[ie.parent_id][ie.name]

383

384

385

def id_set(self):

386

return Set(self._byid)

387

388

389

def to_element(self):

390

"""Convert to XML Element"""

391

e = Element('inventory')

392

e.text = '\n'

393

for path, ie in self.iter_entries():

394

e.append(ie.to_element())

395

return e

396

397

398

def from_element(cls, elt):

399

"""Construct from XML Element

400

401

>>> inv = Inventory()

402

>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c'))

403

>>> elt = inv.to_element()

404

>>> inv2 = Inventory.from_element(elt)

405

>>> inv2 == inv

406

True

407

"""

408

assert elt.tag == 'inventory'

409

o = cls()

410

for e in elt:

411

o.add(InventoryEntry.from_element(e))

412

return o

413

414

from_element = classmethod(from_element)

415

416

417

def __cmp__(self, other):

605

418

"""Compare two sets by comparing their contents.

606

419

607

420

>>> i1 = Inventory()

608

421

>>> i2 = Inventory()

609

422

>>> i1 == i2

610

423

True

611

>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

612

InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')

424

>>> i1.add(InventoryEntry('123', 'foo'))

613

425

>>> i1 == i2

614

426

False

615

>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

616

InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')

427

>>> i2.add(InventoryEntry('123', 'foo'))

617

428

>>> i1 == i2

618

429

True

619

430

"""

431

if self is other:

432

return 0

433

620

434

if not isinstance(other, Inventory):

621

435

return NotImplemented

622

436

623

if len(self._byid) != len(other._byid):

624

# shortcut: obviously not the same

625

return False

626

627

return self._byid == other._byid

628

629

630

def __ne__(self, other):

631

return not self.__eq__(other)

632

633

634

def __hash__(self):

635

raise ValueError('not hashable')

636

637

638

def get_idpath(self, file_id):

639

"""Return a list of file_ids for the path to an entry.

640

641

The list contains one element for each directory followed by

642

the id of the file itself. So the length of the returned list

643

is equal to the depth of the file in the tree, counting the

644

root directory as depth 1.

645

"""

437

if self.id_set() ^ other.id_set():

438

return 1

439

440

for file_id in self._byid:

441

c = cmp(self[file_id], other[file_id])

442

if c: return c

443

444

return 0

445

446

447

def id2path(self, file_id):

448

"""Return as a list the path to file_id."""

646

449

p = []

647

450

while file_id != None:

648

try:

649

ie = self._byid[file_id]

650

except KeyError:

651

raise BzrError("file_id {%s} not found in inventory" % file_id)

652

p.insert(0, ie.file_id)

451

ie = self[file_id]

452

p = [ie.name] + p

653

453

file_id = ie.parent_id

654

return p

655

656

657

def id2path(self, file_id):

658

"""Return as a list the path to file_id."""

659

660

# get all names, skipping root

661

p = [self._byid[fid].name for fid in self.get_idpath(file_id)[1:]]

662

return os.sep.join(p)

454

return joinpath(p)

663

455

664

456

665

457

671

463

672

464

This returns the entry of the last component in the path,

673

465

which may be either a file or a directory.

674

675

Returns None iff the path is not found.

676

466

"""

677

if isinstance(name, types.StringTypes):

678

name = splitpath(name)

679

680

mutter("lookup path %r" % name)

681

682

parent = self.root

683

for f in name:

467

assert isinstance(name, types.StringTypes)

468

469

parent_id = None

470

for f in splitpath(name):

684

471

try:

685

cie = parent.children[f]

472

cie = self._tree[parent_id][f]

686

473

assert cie.name == f

687

assert cie.parent_id == parent.file_id

688

parent = cie

474

parent_id = cie.file_id

689

475

except KeyError:

690

476

# or raise an error?

691

477

return None

692

478

693

return parent.file_id

479

return parent_id

480

481

482

def get_child(self, parent_id, child_name):

483

return self._tree[parent_id].get(child_name)

694

484

695

485

696

486

def has_filename(self, names):

698

488

699

489

700

490

def has_id(self, file_id):

491

assert isinstance(file_id, str)

701

492

return self._byid.has_key(file_id)

702

493

703

494

704

def rename(self, file_id, new_parent_id, new_name):

705

"""Move a file within the inventory.

706

707

This can change either the name, or the parent, or both.

708

709

This does not move the working file."""

710

if not is_valid_name(new_name):

711

raise BzrError("not an acceptable filename: %r" % new_name)

712

713

new_parent = self._byid[new_parent_id]

714

if new_name in new_parent.children:

715

raise BzrError("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))

716

717

new_parent_idpath = self.get_idpath(new_parent_id)

718

if file_id in new_parent_idpath:

719

raise BzrError("cannot move directory %r into a subdirectory of itself, %r"

720

% (self.id2path(file_id), self.id2path(new_parent_id)))

721

722

file_ie = self._byid[file_id]

723

old_parent = self._byid[file_ie.parent_id]

724

725

# TODO: Don't leave things messed up if this fails

726

727

del old_parent.children[file_ie.name]

728

new_parent.children[new_name] = file_ie

729

730

file_ie.name = new_name

731

file_ie.parent_id = new_parent_id

732

733

734

735

736

_NAME_RE = None

737

738

def is_valid_name(name):

739

global _NAME_RE

740

if _NAME_RE == None:

741

_NAME_RE = re.compile(r'^[^/\\]+$')

742

743

return bool(_NAME_RE.match(name))

495

496

if __name__ == '__main__':

497

import doctest, inventory

498

doctest.testmod(inventory)

Older »