~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/inventory.py

Committer: mbp at sourcefrog
Date: 2005-03-29 08:01:30 UTC
Revision ID: mbp@sourcefrog.net-20050329080130-89b78af279b196b3

match ignore patterns like ./config.h

files added:
bzrlib/tests.py

doc/faq.txt

doc/quickref.txt

doc/roadmap.txt

doc/testing.txt

doc/work-order.txt

files removed:
.rsyncexclude

TODO

bzr-man.py

bzrlib/atomicfile.py

bzrlib/changeset.py

bzrlib/commit.py

bzrlib/help.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge_core.py

bzrlib/newinventory.py

bzrlib/patch.py

bzrlib/progress.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/selftest.py

bzrlib/statcache.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/upgrade.py

bzrlib/whitebox.py

bzrlib/workingtree.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/fortune

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/quotes.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/switch-in-branch.txt

notes/new-inventory-sample.xml

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/progress.diff

patches/symlink-support.patch

testbzr

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

files modified:
.bzrignore

NEWS

README

build-api

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/osutils.py

bzrlib/revision.py

bzrlib/store.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

doc/Makefile

doc/bitkeeper.txt

doc/formats.txt

doc/index.txt

doc/interrupted.txt

doc/merge.txt

doc/python.txt

doc/random.txt

doc/svk.txt

doc/tagging.txt

doc/todo-from-arch.txt

elementtree/ElementTree.py

notes/performance.txt

Show diffs side-by-side

added added

removed removed

bzrlib/inventory.py

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# This should really be an id randomly assigned when the tree is

# created, but it's not for now.

ROOT_ID = "TREE_ROOT"

import sys, os.path, types, re

"""Inventories map files to their name in a revision."""

# TODO: Maybe store inventory_id in the file? Not really needed.

__author__ = "Martin Pool <mbp@canonical.com>"

import sys, os.path, types

from sets import Set

try:

from cElementTree import Element, ElementTree, SubElement

except ImportError:

from elementtree.ElementTree import Element, ElementTree, SubElement

from bzrlib.xml import XMLMixin

from bzrlib.errors import BzrError, BzrCheckError

from xml import XMLMixin

from errors import bailout

import bzrlib

from bzrlib.osutils import uuid, quotefn, splitpath, joinpath, appendpath

>>> i = Inventory()

>>> i.path2id('')

'TREE_ROOT'

>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))

>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))

>>> i.add(InventoryEntry('123', 'src', kind='directory'))

>>> i.add(InventoryEntry('2323', 'hello.c', parent_id='123'))

>>> for j in i.iter_entries():

... print j

...

('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))

('src', InventoryEntry('123', 'src', kind='directory', parent_id=None))

('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))

>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))

>>> i.add(InventoryEntry('2323', 'bye.c', parent_id='123'))

Traceback (most recent call last):

...

BzrError: inventory already contains entry with id {2323}

>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))

>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))

BzrError: ('inventory already contains entry with id {2323}', [])

>>> i.add(InventoryEntry('2324', 'bye.c', parent_id='123'))

>>> i.add(InventoryEntry('2325', 'wibble', parent_id='123', kind='directory'))

>>> i.path2id('src/wibble')

'2325'

>>> '2325' in i

True

>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))

>>> i.add(InventoryEntry('2326', 'wibble.c', parent_id='2325'))

>>> i['2326']

InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')

>>> for j in i.iter_entries():

>>> i.id2path('2326')

'src/wibble/wibble.c'

TODO: Maybe also keep the full path of the entry, and the children?

:todo: Maybe also keep the full path of the entry, and the children?

But those depend on its position within a particular inventory, and

it would be nice not to need to hold the backpointer here.

"""

# TODO: split InventoryEntry into subclasses for files,

# directories, etc etc.

100

101

text_sha1 = None

102

text_size = None

103

104

def __init__(self, file_id, name, kind, parent_id, text_id=None):

def __init__(self, file_id, name, kind='file', text_id=None,

parent_id=None):

105

100

"""Create an InventoryEntry

106

101

107

102

The filename must be a single component, relative to the

108

103

parent directory; it cannot be a whole path or relative name.

109

104

110

>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)

105

>>> e = InventoryEntry('123', 'hello.c')

111

106

>>> e.name

112

107

'hello.c'

113

108

>>> e.file_id

114

109

'123'

115

>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)

110

>>> e = InventoryEntry('123', 'src/hello.c')

116

111

Traceback (most recent call last):

117

BzrCheckError: InventoryEntry name 'src/hello.c' is invalid

112

BzrError: ("InventoryEntry name is not a simple filename: 'src/hello.c'", [])

118

113

"""

119

if '/' in name or '\\' in name:

120

raise BzrCheckError('InventoryEntry name %r is invalid' % name)

114

115

if len(splitpath(name)) != 1:

116

bailout('InventoryEntry name is not a simple filename: %r'

117

% name)

121

118

122

119

self.file_id = file_id

123

120

self.name = name

121

assert kind in ['file', 'directory']

124

122

self.kind = kind

125

123

self.text_id = text_id

126

124

self.parent_id = parent_id

125

self.text_sha1 = None

126

self.text_size = None

127

if kind == 'directory':

128

self.children = {}

129

elif kind == 'file':

130

pass

131

else:

132

raise BzrError("unhandled entry kind %r" % kind)

133

134

135

136

def sorted_children(self):

137

l = self.children.items()

138

l.sort()

139

return l

140

129

141

130

142

131

def copy(self):

143

132

other = InventoryEntry(self.file_id, self.name, self.kind,

144

self.parent_id, text_id=self.text_id)

133

self.text_id, self.parent_id)

145

134

other.text_sha1 = self.text_sha1

146

135

other.text_size = self.text_size

147

# note that children are *not* copied; they're pulled across when

148

# others are added

149

136

return other

150

137

151

138

166

153

e.set('file_id', self.file_id)

167

154

e.set('kind', self.kind)

168

155

169

if self.text_size != None:

156

if self.text_size is not None:

170

157

e.set('text_size', '%d' % self.text_size)

171

158

172

for f in ['text_id', 'text_sha1']:

159

for f in ['text_id', 'text_sha1', 'parent_id']:

173

160

v = getattr(self, f)

174

if v != None:

161

if v is not None:

175

162

e.set(f, v)

176

163

177

# to be conservative, we don't externalize the root pointers

178

# for now, leaving them as null in the xml form. in a future

179

# version it will be implied by nested elements.

180

if self.parent_id != ROOT_ID:

181

assert isinstance(self.parent_id, basestring)

182

e.set('parent_id', self.parent_id)

183

184

164

e.tail = '\n'

185

165

186

166

return e

188

168

189

169

def from_element(cls, elt):

190

170

assert elt.tag == 'entry'

191

192

## original format inventories don't have a parent_id for

193

## nodes in the root directory, but it's cleaner to use one

194

## internally.

195

parent_id = elt.get('parent_id')

196

if parent_id == None:

197

parent_id = ROOT_ID

198

199

self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'), parent_id)

171

self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'))

200

172

self.text_id = elt.get('text_id')

201

173

self.text_sha1 = elt.get('text_sha1')

174

self.parent_id = elt.get('parent_id')

202

175

203

176

## mutter("read inventoryentry: %r" % (elt.attrib))

204

177

210

183

211

184

from_element = classmethod(from_element)

212

185

213

def __eq__(self, other):

186

def __cmp__(self, other):

187

if self is other:

188

return 0

214

189

if not isinstance(other, InventoryEntry):

215

190

return NotImplemented

216

191

217

return (self.file_id == other.file_id) \

218

and (self.name == other.name) \

219

and (self.text_sha1 == other.text_sha1) \

220

and (self.text_size == other.text_size) \

221

and (self.text_id == other.text_id) \

222

and (self.parent_id == other.parent_id) \

223

and (self.kind == other.kind)

224

225

226

def __ne__(self, other):

227

return not (self == other)

228

229

def __hash__(self):

230

raise ValueError('not hashable')

231

232

233

234

class RootEntry(InventoryEntry):

235

def __init__(self, file_id):

236

self.file_id = file_id

237

self.children = {}

238

self.kind = 'root_directory'

239

self.parent_id = None

240

self.name = ''

241

242

def __eq__(self, other):

243

if not isinstance(other, RootEntry):

244

return NotImplemented

245

246

return (self.file_id == other.file_id) \

247

and (self.children == other.children)

192

return cmp(self.file_id, other.file_id) \

193

or cmp(self.name, other.name) \

194

or cmp(self.text_sha1, other.text_sha1) \

195

or cmp(self.text_size, other.text_size) \

196

or cmp(self.text_id, other.text_id) \

197

or cmp(self.parent_id, other.parent_id) \

198

or cmp(self.kind, other.kind)

248

199

249

200

250

201

251

202

class Inventory(XMLMixin):

252

203

"""Inventory of versioned files in a tree.

253

204

254

This describes which file_id is present at each point in the tree,

255

and possibly the SHA-1 or other information about the file.

256

Entries can be looked up either by path or by file_id.

205

An Inventory acts like a set of InventoryEntry items. You can

206

also look files up by their file_id or name.

207

208

May be read from and written to a metadata file in a tree. To

209

manipulate the inventory (for example to add a file), it is read

210

in, modified, and then written back out.

257

211

258

212

The inventory represents a typical unix file tree, with

259

213

directories containing files and subdirectories. We never store

263

217

returned quickly.

264

218

265

219

InventoryEntry objects must not be modified after they are

266

inserted, other than through the Inventory API.

220

inserted.

267

221

268

222

>>> inv = Inventory()

269

223

>>> inv.write_xml(sys.stdout)

270

224

271

225

</inventory>

272

>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))

226

>>> inv.add(InventoryEntry('123-123', 'hello.c'))

273

227

>>> inv['123-123'].name

274

228

'hello.c'

275

229

291

245

</inventory>

292

246

293

247

"""

248

249

## TODO: Make sure only canonical filenames are stored.

250

251

## TODO: Do something sensible about the possible collisions on

252

## case-losing filesystems. Perhaps we should just always forbid

253

## such collisions.

254

255

## TODO: No special cases for root, rather just give it a file id

256

## like everything else.

257

258

## TODO: Probably change XML serialization to use nesting

259

294

260

def __init__(self):

295

261

"""Create or read an inventory.

296

262

297

263

If a working directory is specified, the inventory is read

298

264

from there. If the file is specified, read from that. If not,

299

265

the inventory is created empty.

300

301

The inventory is created with a default root directory, with

302

an id of None.

303

266

"""

304

self.root = RootEntry(ROOT_ID)

305

self._byid = {self.root.file_id: self.root}

267

self._root = InventoryEntry(None, '', kind='directory')

268

self._byid = {None: self._root}

306

269

307

270

308

271

def __iter__(self):

314

277

return len(self._byid)

315

278

316

279

317

def iter_entries(self, from_dir=None):

280

def iter_entries(self, parent_id=None):

318

281

"""Return (path, entry) pairs, in order by name."""

319

if from_dir == None:

320

assert self.root

321

from_dir = self.root

322

elif isinstance(from_dir, basestring):

323

from_dir = self._byid[from_dir]

324

325

kids = from_dir.children.items()

282

kids = self[parent_id].children.items()

326

283

kids.sort()

327

284

for name, ie in kids:

328

285

yield name, ie

329

286

if ie.kind == 'directory':

330

for cn, cie in self.iter_entries(from_dir=ie.file_id):

331

yield os.path.join(name, cn), cie

332

333

334

def entries(self):

335

"""Return list of (path, ie) for all entries except the root.

336

337

This may be faster than iter_entries.

338

"""

339

accum = []

340

def descend(dir_ie, dir_path):

341

kids = dir_ie.children.items()

342

kids.sort()

343

for name, ie in kids:

344

child_path = os.path.join(dir_path, name)

345

accum.append((child_path, ie))

346

if ie.kind == 'directory':

347

descend(ie, child_path)

348

349

descend(self.root, '')

350

return accum

287

for cn, cie in self.iter_entries(parent_id=ie.file_id):

288

yield joinpath([name, cn]), cie

351

289

352

290

353

291

def directories(self):

354

"""Return (path, entry) pairs for all directories, including the root.

292

"""Return (path, entry) pairs for all directories.

355

293

"""

356

accum = []

357

def descend(parent_ie, parent_path):

358

accum.append((parent_path, parent_ie))

359

360

kids = [(ie.name, ie) for ie in parent_ie.children.itervalues() if ie.kind == 'directory']

361

kids.sort()

362

363

for name, child_ie in kids:

364

child_path = os.path.join(parent_path, name)

365

descend(child_ie, child_path)

366

descend(self.root, '')

367

return accum

294

yield '', self._root

295

for path, entry in self.iter_entries():

296

if entry.kind == 'directory':

297

yield path, entry

368

298

369

299

370

300

372

302

"""True if this entry contains a file with given id.

373

303

374

304

>>> inv = Inventory()

375

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

305

>>> inv.add(InventoryEntry('123', 'foo.c'))

376

306

>>> '123' in inv

377

307

True

378

308

>>> '456' in inv

385

315

"""Return the entry for given file_id.

386

316

387

317

>>> inv = Inventory()

388

>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))

318

>>> inv.add(InventoryEntry('123123', 'hello.c'))

389

319

>>> inv['123123'].name

390

320

'hello.c'

391

321

"""

392

try:

393

return self._byid[file_id]

394

except KeyError:

395

if file_id == None:

396

raise BzrError("can't look up file_id None")

397

else:

398

raise BzrError("file_id {%s} not in inventory" % file_id)

399

400

401

def get_file_kind(self, file_id):

402

return self._byid[file_id].kind

322

return self._byid[file_id]

323

403

324

404

325

def get_child(self, parent_id, filename):

405

return self[parent_id].children.get(filename)

326

if parent_id == None:

327

return self._root.children.get(filename)

328

else:

329

return self[parent_id].children.get(filename)

406

330

407

331

408

332

def add(self, entry):

411

335

To add a file to a branch ready to be committed, use Branch.add,

412

336

which calls this."""

413

337

if entry.file_id in self._byid:

414

raise BzrError("inventory already contains entry with id {%s}" % entry.file_id)

338

bailout("inventory already contains entry with id {%s}" % entry.file_id)

415

339

416

try:

417

parent = self._byid[entry.parent_id]

418

except KeyError:

419

raise BzrError("parent_id {%s} not in inventory" % entry.parent_id)

340

parent = self._byid[entry.parent_id]

341

if parent.kind != 'directory':

342

bailout("attempt to add under non-directory {%s}" % parent.file_id)

420

343

421

344

if parent.children.has_key(entry.name):

422

raise BzrError("%s is already versioned" %

345

bailout("%s is already versioned" %

423

346

appendpath(self.id2path(parent.file_id), entry.name))

424

347

425

348

self._byid[entry.file_id] = entry

432

355

The immediate parent must already be versioned"""

433

356

parts = bzrlib.osutils.splitpath(relpath)

434

357

if len(parts) == 0:

435

raise BzrError("cannot re-add root of inventory")

358

bailout("cannot re-add root of inventory")

436

359

437

if file_id == None:

360

if file_id is None:

438

361

file_id = bzrlib.branch.gen_file_id(relpath)

439

362

440

363

parent_id = self.path2id(parts[:-1])

441

assert parent_id != None

442

364

ie = InventoryEntry(file_id, parts[-1],

443

365

kind=kind, parent_id=parent_id)

444

366

return self.add(ie)

448

370

"""Remove entry by id.

449

371

450

372

>>> inv = Inventory()

451

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

373

>>> inv.add(InventoryEntry('123', 'foo.c'))

452

374

>>> '123' in inv

453

375

True

454

376

>>> del inv['123']

470

392

del self[ie.parent_id].children[ie.name]

471

393

472

394

395

def id_set(self):

396

return Set(self._byid)

397

398

473

399

def to_element(self):

474

400

"""Convert to XML Element"""

475

401

e = Element('inventory')

483

409

"""Construct from XML Element

484

410

485

411

>>> inv = Inventory()

486

>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c', 'file', ROOT_ID))

412

>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c'))

487

413

>>> elt = inv.to_element()

488

414

>>> inv2 = Inventory.from_element(elt)

489

415

>>> inv2 == inv

498

424

from_element = classmethod(from_element)

499

425

500

426

501

def __eq__(self, other):

427

def __cmp__(self, other):

502

428

"""Compare two sets by comparing their contents.

503

429

504

430

>>> i1 = Inventory()

505

431

>>> i2 = Inventory()

506

432

>>> i1 == i2

507

433

True

508

>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

434

>>> i1.add(InventoryEntry('123', 'foo'))

509

435

>>> i1 == i2

510

436

False

511

>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

437

>>> i2.add(InventoryEntry('123', 'foo'))

512

438

>>> i1 == i2

513

439

True

514

440

"""

441

if self is other:

442

return 0

443

515

444

if not isinstance(other, Inventory):

516

445

return NotImplemented

517

446

518

if len(self._byid) != len(other._byid):

519

# shortcut: obviously not the same

520

return False

521

522

return self._byid == other._byid

523

524

525

def __ne__(self, other):

526

return not (self == other)

527

528

529

def __hash__(self):

530

raise ValueError('not hashable')

531

532

533

534

def get_idpath(self, file_id):

535

"""Return a list of file_ids for the path to an entry.

536

537

The list contains one element for each directory followed by

538

the id of the file itself. So the length of the returned list

539

is equal to the depth of the file in the tree, counting the

540

root directory as depth 1.

541

"""

447

if self.id_set() ^ other.id_set():

448

return 1

449

450

for file_id in self._byid:

451

c = cmp(self[file_id], other[file_id])

452

if c: return c

453

454

return 0

455

456

457

def id2path(self, file_id):

458

"""Return as a list the path to file_id."""

542

459

p = []

543

460

while file_id != None:

544

try:

545

ie = self._byid[file_id]

546

except KeyError:

547

raise BzrError("file_id {%s} not found in inventory" % file_id)

548

p.insert(0, ie.file_id)

461

ie = self[file_id]

462

p = [ie.name] + p

549

463

file_id = ie.parent_id

550

return p

551

552

553

def id2path(self, file_id):

554

"""Return as a list the path to file_id."""

555

556

# get all names, skipping root

557

p = [self[fid].name for fid in self.get_idpath(file_id)[1:]]

558

return os.sep.join(p)

464

return joinpath(p)

559

465

560

466

561

467

567

473

568

474

This returns the entry of the last component in the path,

569

475

which may be either a file or a directory.

570

571

Returns None iff the path is not found.

572

476

"""

573

477

if isinstance(name, types.StringTypes):

574

478

name = splitpath(name)

575

479

576

mutter("lookup path %r" % name)

577

578

parent = self.root

480

parent = self[None]

579

481

for f in name:

580

482

try:

581

483

cie = parent.children[f]

582

484

assert cie.name == f

583

assert cie.parent_id == parent.file_id

584

485

parent = cie

585

486

except KeyError:

586

487

# or raise an error?

594

495

595

496

596

497

def has_id(self, file_id):

498

assert isinstance(file_id, str)

597

499

return self._byid.has_key(file_id)

598

500

599

501

600

def rename(self, file_id, new_parent_id, new_name):

601

"""Move a file within the inventory.

602

603

This can change either the name, or the parent, or both.

604

605

This does not move the working file."""

606

if not is_valid_name(new_name):

607

raise BzrError("not an acceptable filename: %r" % new_name)

608

609

new_parent = self._byid[new_parent_id]

610

if new_name in new_parent.children:

611

raise BzrError("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))

612

613

new_parent_idpath = self.get_idpath(new_parent_id)

614

if file_id in new_parent_idpath:

615

raise BzrError("cannot move directory %r into a subdirectory of itself, %r"

616

% (self.id2path(file_id), self.id2path(new_parent_id)))

617

618

file_ie = self._byid[file_id]

619

old_parent = self._byid[file_ie.parent_id]

620

621

# TODO: Don't leave things messed up if this fails

622

623

del old_parent.children[file_ie.name]

624

new_parent.children[new_name] = file_ie

625

626

file_ie.name = new_name

627

file_ie.parent_id = new_parent_id

628

629

630

631

632

_NAME_RE = re.compile(r'^[^/\\]+$')

633

634

def is_valid_name(name):

635

return bool(_NAME_RE.match(name))

502

503

504

505

if __name__ == '__main__':

506

import doctest, inventory

507

doctest.testmod(inventory)

Older »