~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/inventory.py

Committer: Martin Pool
Date: 2005-09-13 05:22:41 UTC
Revision ID: mbp@sourcefrog.net-20050913052241-52dbd8e8ced620f6

- better BZR_DEBUG trace output

files added:
HACKING

Makefile

bzr-man.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/intset.py

bzrlib/lock.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/selftest

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/weavestore.py

bzrlib/xml5.py

contrib/bash/bzr

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

doc/split-join-files.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testsweet.py

tools

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/history2weaves.py

tools/http_client.py

tools/testweave.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
bzrlib/statcache.py

bzrlib/tests.py

files renamed:
elementtree/ => bzrlib/util/elementtree/

urlgrabber/ => bzrlib/util/urlgrabber/

contrib/bash/bzr => contrib/bash/bzr.simple

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

NEWS

README

TODO

build-api

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/osutils.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/status.py

bzrlib/store.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib/add-bzr-to-baz

doc/formats.txt

doc/index.txt

doc/tagging.txt

doc/todo-from-arch.txt

setup.py

testbzr

Show diffs side-by-side

added added

removed removed

bzrlib/inventory.py

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# TODO: Maybe also keep the full path of the entry, and the children?

# But those depend on its position within a particular inventory, and

# it would be nice not to need to hold the backpointer here.

# TODO: Perhaps split InventoryEntry into subclasses for files,

# directories, etc etc.

# This should really be an id randomly assigned when the tree is

# created, but it's not for now.

ROOT_ID = "TREE_ROOT"

import sys, os.path, types, re

from sets import Set

try:

from cElementTree import Element, ElementTree, SubElement

except ImportError:

from elementtree.ElementTree import Element, ElementTree, SubElement

from xml import XMLMixin

from errors import bailout, BzrError, BzrCheckError

import bzrlib

from bzrlib.errors import BzrError, BzrCheckError

from bzrlib.osutils import uuid, quotefn, splitpath, joinpath, appendpath

from bzrlib.trace import mutter

from bzrlib.errors import NotVersionedError

class InventoryEntry(XMLMixin):

class InventoryEntry(object):

"""Description of a versioned file.

An InventoryEntry has the following fields, which are also

present in the XML inventory-entry element:

* *file_id*

* *name*: (only the basename within the directory, must not

contain slashes)

* *kind*: "directory" or "file"

* *directory_id*: (if absent/null means the branch root directory)

* *text_sha1*: only for files

* *text_size*: in bytes, only for files

* *text_id*: identifier for the text version, only for files

InventoryEntries can also exist inside a WorkingTree

inventory, in which case they are not yet bound to a

particular revision of the file. In that case the text_sha1,

text_size and text_id are absent.

file_id

name

(within the parent directory)

kind

'directory' or 'file'

parent_id

file_id of the parent directory, or ROOT_ID

entry_version

the revision_id in which the name or parent of this file was

last changed

text_sha1

sha-1 of the text of the file

text_size

size in bytes of the text of the file

text_version

the revision_id in which the text of this file was introduced

(reading a version 4 tree created a text_id field.)

>>> i = Inventory()

>>> i.path2id('')

'TREE_ROOT'

>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))

InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT')

>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))

InventoryEntry('2323', 'hello.c', kind='file', parent_id='123')

>>> for j in i.iter_entries():

... print j

...

>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))

Traceback (most recent call last):

...

BzrError: ('inventory already contains entry with id {2323}', [])

BzrError: inventory already contains entry with id {2323}

>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))

InventoryEntry('2324', 'bye.c', kind='file', parent_id='123')

>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))

InventoryEntry('2325', 'wibble', kind='directory', parent_id='123')

>>> i.path2id('src/wibble')

'2325'

>>> '2325' in i

True

>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))

InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')

>>> i['2326']

100

InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')

101

>>> for j in i.iter_entries():

109

src/wibble/wibble.c

110

>>> i.id2path('2326')

111

'src/wibble/wibble.c'

TODO: Maybe also keep the full path of the entry, and the children?

But those depend on its position within a particular inventory, and

it would be nice not to need to hold the backpointer here.

112

"""

# TODO: split InventoryEntry into subclasses for files,

100

# directories, etc etc.

101

102

text_sha1 = None

103

text_size = None

104

113

114

__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',

115

'text_id', 'parent_id', 'children',

116

'text_version', 'entry_version', ]

117

118

105

119

def __init__(self, file_id, name, kind, parent_id, text_id=None):

106

120

"""Create an InventoryEntry

107

121

117

131

Traceback (most recent call last):

118

132

BzrCheckError: InventoryEntry name 'src/hello.c' is invalid

119

133

"""

134

assert isinstance(name, basestring), name

120

135

if '/' in name or '\\' in name:

121

136

raise BzrCheckError('InventoryEntry name %r is invalid' % name)

122

137

138

self.text_version = None

139

self.entry_version = None

140

self.text_sha1 = None

141

self.text_size = None

123

142

self.file_id = file_id

124

143

self.name = name

125

144

self.kind = kind

142

161

143

162

def copy(self):

144

163

other = InventoryEntry(self.file_id, self.name, self.kind,

145

self.parent_id, text_id=self.text_id)

164

self.parent_id)

165

other.text_id = self.text_id

146

166

other.text_sha1 = self.text_sha1

147

167

other.text_size = self.text_size

168

other.text_version = self.text_version

169

# note that children are *not* copied; they're pulled across when

170

# others are added

148

171

return other

149

172

150

173

157

180

self.parent_id))

158

181

159

182

160

def to_element(self):

161

"""Convert to XML element"""

162

e = Element('entry')

163

164

e.set('name', self.name)

165

e.set('file_id', self.file_id)

166

e.set('kind', self.kind)

167

168

if self.text_size != None:

169

e.set('text_size', '%d' % self.text_size)

170

171

for f in ['text_id', 'text_sha1']:

172

v = getattr(self, f)

173

if v != None:

174

e.set(f, v)

175

176

# to be conservative, we don't externalize the root pointers

177

# for now, leaving them as null in the xml form. in a future

178

# version it will be implied by nested elements.

179

if self.parent_id != ROOT_ID:

180

assert isinstance(self.parent_id, basestring)

181

e.set('parent_id', self.parent_id)

182

183

e.tail = '\n'

184

185

return e

186

187

188

def from_element(cls, elt):

189

assert elt.tag == 'entry'

190

191

## original format inventories don't have a parent_id for

192

## nodes in the root directory, but it's cleaner to use one

193

## internally.

194

parent_id = elt.get('parent_id')

195

if parent_id == None:

196

parent_id = ROOT_ID

197

198

self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'), parent_id)

199

self.text_id = elt.get('text_id')

200

self.text_sha1 = elt.get('text_sha1')

201

202

## mutter("read inventoryentry: %r" % (elt.attrib))

203

204

v = elt.get('text_size')

205

self.text_size = v and int(v)

206

207

return self

208

209

210

from_element = classmethod(from_element)

211

212

def __cmp__(self, other):

213

if self is other:

214

return 0

183

def __eq__(self, other):

215

184

if not isinstance(other, InventoryEntry):

216

185

return NotImplemented

217

186

218

return cmp(self.file_id, other.file_id) \

219

or cmp(self.name, other.name) \

220

or cmp(self.text_sha1, other.text_sha1) \

221

or cmp(self.text_size, other.text_size) \

222

or cmp(self.text_id, other.text_id) \

223

or cmp(self.parent_id, other.parent_id) \

224

or cmp(self.kind, other.kind)

187

return (self.file_id == other.file_id) \

188

and (self.name == other.name) \

189

and (self.text_sha1 == other.text_sha1) \

190

and (self.text_size == other.text_size) \

191

and (self.text_id == other.text_id) \

192

and (self.parent_id == other.parent_id) \

193

and (self.kind == other.kind) \

194

and (self.text_version == other.text_version) \

195

and (self.entry_version == other.entry_version)

196

197

198

def __ne__(self, other):

199

return not (self == other)

200

201

def __hash__(self):

202

raise ValueError('not hashable')

225

203

226

204

227

205

233

211

self.parent_id = None

234

212

self.name = ''

235

213

236

def __cmp__(self, other):

237

if self is other:

238

return 0

214

def __eq__(self, other):

239

215

if not isinstance(other, RootEntry):

240

216

return NotImplemented

241

return cmp(self.file_id, other.file_id) \

242

or cmp(self.children, other.children)

243

244

245

246

class Inventory(XMLMixin):

217

218

return (self.file_id == other.file_id) \

219

and (self.children == other.children)

220

221

222

223

class Inventory(object):

247

224

"""Inventory of versioned files in a tree.

248

225

249

226

This describes which file_id is present at each point in the tree,

261

238

inserted, other than through the Inventory API.

262

239

263

240

>>> inv = Inventory()

264

>>> inv.write_xml(sys.stdout)

265

266

</inventory>

267

241

>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))

242

InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT')

268

243

>>> inv['123-123'].name

269

244

'hello.c'

270

245

279

254

280

255

>>> [x[0] for x in inv.iter_entries()]

281

256

['hello.c']

282

283

>>> inv.write_xml(sys.stdout)

284

285

286

</inventory>

287

257

>>> inv = Inventory('TREE_ROOT-12345678-12345678')

258

>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))

259

InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT-12345678-12345678')

288

260

"""

289

def __init__(self):

261

def __init__(self, root_id=ROOT_ID):

290

262

"""Create or read an inventory.

291

263

292

264

If a working directory is specified, the inventory is read

296

268

The inventory is created with a default root directory, with

297

269

an id of None.

298

270

"""

299

self.root = RootEntry(ROOT_ID)

271

# We are letting Branch(init=True) create a unique inventory

272

# root id. Rather than generating a random one here.

273

#if root_id is None:

274

# root_id = bzrlib.branch.gen_file_id('TREE_ROOT')

275

self.root = RootEntry(root_id)

300

276

self._byid = {self.root.file_id: self.root}

301

277

302

278

279

def copy(self):

280

other = Inventory(self.root.file_id)

281

# copy recursively so we know directories will be added before

282

# their children. There are more efficient ways than this...

283

for path, entry in self.iter_entries():

284

if entry == self.root:

285

continue

286

other.add(entry.copy())

287

return other

288

289

303

290

def __iter__(self):

304

291

return iter(self._byid)

305

292

324

311

if ie.kind == 'directory':

325

312

for cn, cie in self.iter_entries(from_dir=ie.file_id):

326

313

yield os.path.join(name, cn), cie

327

314

315

316

def entries(self):

317

"""Return list of (path, ie) for all entries except the root.

318

319

This may be faster than iter_entries.

320

"""

321

accum = []

322

def descend(dir_ie, dir_path):

323

kids = dir_ie.children.items()

324

kids.sort()

325

for name, ie in kids:

326

child_path = os.path.join(dir_path, name)

327

accum.append((child_path, ie))

328

if ie.kind == 'directory':

329

descend(ie, child_path)

330

331

descend(self.root, '')

332

return accum

328

333

329

334

330

335

def directories(self):

331

"""Return (path, entry) pairs for all directories.

336

"""Return (path, entry) pairs for all directories, including the root.

332

337

"""

333

def descend(parent_ie):

334

parent_name = parent_ie.name

335

yield parent_name, parent_ie

336

337

# directory children in sorted order

338

dn = []

339

for ie in parent_ie.children.itervalues():

340

if ie.kind == 'directory':

341

dn.append((ie.name, ie))

342

dn.sort()

338

accum = []

339

def descend(parent_ie, parent_path):

340

accum.append((parent_path, parent_ie))

343

341

344

for name, child_ie in dn:

345

for sub_name, sub_ie in descend(child_ie):

346

yield appendpath(parent_name, sub_name), sub_ie

342

kids = [(ie.name, ie) for ie in parent_ie.children.itervalues() if ie.kind == 'directory']

343

kids.sort()

347

344

348

for name, ie in descend(self.root):

349

yield name, ie

345

for name, child_ie in kids:

346

child_path = os.path.join(parent_path, name)

347

descend(child_ie, child_path)

348

descend(self.root, '')

349

return accum

350

351

352

355

356

>>> inv = Inventory()

357

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

358

InventoryEntry('123', 'foo.c', kind='file', parent_id='TREE_ROOT')

358

359

>>> '123' in inv

359

360

True

360

361

>>> '456' in inv

368

369

370

>>> inv = Inventory()

370

371

>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))

372

InventoryEntry('123123', 'hello.c', kind='file', parent_id='TREE_ROOT')

371

373

>>> inv['123123'].name

372

374

'hello.c'

373

375

"""

391

393

"""Add entry to inventory.

392

394

393

395

To add a file to a branch ready to be committed, use Branch.add,

394

which calls this."""

396

which calls this.

397

398

Returns the new entry object.

399

"""

395

400

if entry.file_id in self._byid:

396

bailout("inventory already contains entry with id {%s}" % entry.file_id)

401

raise BzrError("inventory already contains entry with id {%s}" % entry.file_id)

402

403

if entry.parent_id == ROOT_ID or entry.parent_id is None:

404

entry.parent_id = self.root.file_id

397

405

398

406

try:

399

407

parent = self._byid[entry.parent_id]

400

408

except KeyError:

401

bailout("parent_id {%s} not in inventory" % entry.parent_id)

409

raise BzrError("parent_id {%s} not in inventory" % entry.parent_id)

402

410

403

411

if parent.children.has_key(entry.name):

404

bailout("%s is already versioned" %

412

raise BzrError("%s is already versioned" %

405

413

appendpath(self.id2path(parent.file_id), entry.name))

406

414

407

415

self._byid[entry.file_id] = entry

408

416

parent.children[entry.name] = entry

417

return entry

409

418

410

419

411

420

def add_path(self, relpath, kind, file_id=None):

412

421

"""Add entry from a path.

413

422

414

The immediate parent must already be versioned"""

423

The immediate parent must already be versioned.

424

425

Returns the new entry object."""

426

from bzrlib.branch import gen_file_id

427

415

428

parts = bzrlib.osutils.splitpath(relpath)

416

429

if len(parts) == 0:

417

bailout("cannot re-add root of inventory")

430

raise BzrError("cannot re-add root of inventory")

418

431

419

432

if file_id == None:

420

file_id = bzrlib.branch.gen_file_id(relpath)

421

422

parent_id = self.path2id(parts[:-1])

423

assert parent_id != None

433

file_id = gen_file_id(relpath)

434

435

parent_path = parts[:-1]

436

parent_id = self.path2id(parent_path)

437

if parent_id == None:

438

raise NotVersionedError(parent_path)

439

424

440

ie = InventoryEntry(file_id, parts[-1],

425

441

kind=kind, parent_id=parent_id)

426

442

return self.add(ie)

431

447

432

448

>>> inv = Inventory()

433

449

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

450

InventoryEntry('123', 'foo.c', kind='file', parent_id='TREE_ROOT')

434

451

>>> '123' in inv

435

452

True

436

453

>>> del inv['123']

452

469

del self[ie.parent_id].children[ie.name]

453

470

454

471

455

def id_set(self):

456

return Set(self._byid)

457

458

459

def to_element(self):

460

"""Convert to XML Element"""

461

e = Element('inventory')

462

e.text = '\n'

463

for path, ie in self.iter_entries():

464

e.append(ie.to_element())

465

return e

466

467

468

def from_element(cls, elt):

469

"""Construct from XML Element

470

471

>>> inv = Inventory()

472

>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c', 'file', ROOT_ID))

473

>>> elt = inv.to_element()

474

>>> inv2 = Inventory.from_element(elt)

475

>>> inv2 == inv

476

True

477

"""

478

assert elt.tag == 'inventory'

479

o = cls()

480

for e in elt:

481

o.add(InventoryEntry.from_element(e))

482

return o

483

484

from_element = classmethod(from_element)

485

486

487

def __cmp__(self, other):

472

def __eq__(self, other):

488

473

"""Compare two sets by comparing their contents.

489

474

490

475

>>> i1 = Inventory()

492

477

>>> i1 == i2

493

478

True

494

479

>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

480

InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')

495

481

>>> i1 == i2

496

482

False

497

483

>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

484

InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')

498

485

>>> i1 == i2

499

486

True

500

487

"""

501

if self is other:

502

return 0

503

504

488

if not isinstance(other, Inventory):

505

489

return NotImplemented

506

490

507

if self.id_set() ^ other.id_set():

508

return 1

509

510

for file_id in self._byid:

511

c = cmp(self[file_id], other[file_id])

512

if c: return c

513

514

return 0

491

if len(self._byid) != len(other._byid):

492

# shortcut: obviously not the same

493

return False

494

495

return self._byid == other._byid

496

497

498

def __ne__(self, other):

499

return not (self == other)

500

501

502

def __hash__(self):

503

raise ValueError('not hashable')

515

504

516

505

517

506

def get_idpath(self, file_id):

527

516

try:

528

517

ie = self._byid[file_id]

529

518

except KeyError:

530

bailout("file_id {%s} not found in inventory" % file_id)

519

raise BzrError("file_id {%s} not found in inventory" % file_id)

531

520

p.insert(0, ie.file_id)

532

521

file_id = ie.parent_id

533

522

return p

537

526

"""Return as a list the path to file_id."""

538

527

539

528

# get all names, skipping root

540

p = [self[fid].name for fid in self.get_idpath(file_id)[1:]]

529

p = [self._byid[fid].name for fid in self.get_idpath(file_id)[1:]]

541

530

return os.sep.join(p)

542

531

543

532

587

576

588

577

This does not move the working file."""

589

578

if not is_valid_name(new_name):

590

bailout("not an acceptable filename: %r" % new_name)

579

raise BzrError("not an acceptable filename: %r" % new_name)

591

580

592

581

new_parent = self._byid[new_parent_id]

593

582

if new_name in new_parent.children:

594

bailout("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))

583

raise BzrError("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))

595

584

596

585

new_parent_idpath = self.get_idpath(new_parent_id)

597

586

if file_id in new_parent_idpath:

598

bailout("cannot move directory %r into a subdirectory of itself, %r"

587

raise BzrError("cannot move directory %r into a subdirectory of itself, %r"

599

588

% (self.id2path(file_id), self.id2path(new_parent_id)))

600

589

601

590

file_ie = self._byid[file_id]

612

601

613

602

614

603

615

_NAME_RE = re.compile(r'^[^/\\]+$')

604

_NAME_RE = None

616

605

617

606

def is_valid_name(name):

607

global _NAME_RE

608

if _NAME_RE == None:

609

_NAME_RE = re.compile(r'^[^/\\]+$')

610

618

611

return bool(_NAME_RE.match(name))

Older »