~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/inventory.py

Committer: Martin Pool
Date: 2005-06-20 04:33:23 UTC
Revision ID: mbp@sourcefrog.net-20050620043323-1f681c2b5b15d026

- write into store using AtomicFile

files added:
.rsyncexclude

TODO

bzr-man.py

bzrlib/atomicfile.py

bzrlib/changeset.py

bzrlib/commit.py

bzrlib/help.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge_core.py

bzrlib/newinventory.py

bzrlib/patch.py

bzrlib/progress.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/selftest.py

bzrlib/statcache.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/upgrade.py

bzrlib/whitebox.py

bzrlib/workingtree.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/fortune

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/quotes.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/switch-in-branch.txt

notes/new-inventory-sample.xml

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/progress.diff

patches/symlink-support.patch

testbzr

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

files removed:
bzrlib/tests.py

doc/faq.txt

doc/quickref.txt

doc/roadmap.txt

doc/testing.txt

doc/work-order.txt

files modified:
.bzrignore

NEWS

README

build-api

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/osutils.py

bzrlib/revision.py

bzrlib/store.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

doc/Makefile

doc/bitkeeper.txt

doc/formats.txt

doc/index.txt

doc/interrupted.txt

doc/merge.txt

doc/python.txt

doc/random.txt

doc/svk.txt

doc/tagging.txt

doc/todo-from-arch.txt

elementtree/ElementTree.py

notes/performance.txt

Show diffs side-by-side

added added

removed removed

bzrlib/inventory.py

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Inventories map files to their name in a revision."""

# TODO: Maybe store inventory_id in the file? Not really needed.

__author__ = "Martin Pool <mbp@canonical.com>"

import sys, os.path, types

from sets import Set

# This should really be an id randomly assigned when the tree is

# created, but it's not for now.

ROOT_ID = "TREE_ROOT"

import sys, os.path, types, re

try:

from cElementTree import Element, ElementTree, SubElement

except ImportError:

from elementtree.ElementTree import Element, ElementTree, SubElement

from xml import XMLMixin

from errors import bailout

from bzrlib.xml import XMLMixin

from bzrlib.errors import BzrError, BzrCheckError

import bzrlib

from bzrlib.osutils import uuid, quotefn, splitpath, joinpath, appendpath

>>> i = Inventory()

>>> i.path2id('')

>>> i.add(InventoryEntry('123', 'src', kind='directory'))

>>> i.add(InventoryEntry('2323', 'hello.c', parent_id='123'))

'TREE_ROOT'

>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))

>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))

>>> for j in i.iter_entries():

... print j

...

('src', InventoryEntry('123', 'src', kind='directory', parent_id=None))

('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))

('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))

>>> i.add(InventoryEntry('2323', 'bye.c', parent_id='123'))

>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))

Traceback (most recent call last):

...

BzrError: ('inventory already contains entry with id {2323}', [])

>>> i.add(InventoryEntry('2324', 'bye.c', parent_id='123'))

>>> i.add(InventoryEntry('2325', 'wibble', parent_id='123', kind='directory'))

BzrError: inventory already contains entry with id {2323}

>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))

>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))

>>> i.path2id('src/wibble')

'2325'

>>> '2325' in i

True

>>> i.add(InventoryEntry('2326', 'wibble.c', parent_id='2325'))

>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))

>>> i['2326']

InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')

>>> for j in i.iter_entries():

>>> i.id2path('2326')

'src/wibble/wibble.c'

:todo: Maybe also keep the full path of the entry, and the children?

TODO: Maybe also keep the full path of the entry, and the children?

But those depend on its position within a particular inventory, and

it would be nice not to need to hold the backpointer here.

"""

def __init__(self, file_id, name, kind='file', text_id=None,

parent_id=None):

# TODO: split InventoryEntry into subclasses for files,

# directories, etc etc.

100

101

text_sha1 = None

102

text_size = None

103

104

def __init__(self, file_id, name, kind, parent_id, text_id=None):

100

105

"""Create an InventoryEntry

101

106

102

107

The filename must be a single component, relative to the

103

108

parent directory; it cannot be a whole path or relative name.

104

109

105

>>> e = InventoryEntry('123', 'hello.c')

110

>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)

106

111

>>> e.name

107

112

'hello.c'

108

113

>>> e.file_id

109

114

'123'

110

>>> e = InventoryEntry('123', 'src/hello.c')

115

>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)

111

116

Traceback (most recent call last):

112

BzrError: ("InventoryEntry name is not a simple filename: 'src/hello.c'", [])

117

BzrCheckError: InventoryEntry name 'src/hello.c' is invalid

113

118

"""

114

115

if len(splitpath(name)) != 1:

116

bailout('InventoryEntry name is not a simple filename: %r'

117

% name)

119

if '/' in name or '\\' in name:

120

raise BzrCheckError('InventoryEntry name %r is invalid' % name)

118

121

119

122

self.file_id = file_id

120

123

self.name = name

121

assert kind in ['file', 'directory']

122

124

self.kind = kind

123

125

self.text_id = text_id

124

126

self.parent_id = parent_id

125

self.text_sha1 = None

126

self.text_size = None

127

if kind == 'directory':

128

self.children = {}

129

elif kind == 'file':

130

pass

131

else:

132

raise BzrError("unhandled entry kind %r" % kind)

133

134

135

136

def sorted_children(self):

137

l = self.children.items()

138

l.sort()

139

return l

129

140

130

141

131

142

def copy(self):

132

143

other = InventoryEntry(self.file_id, self.name, self.kind,

133

self.text_id, self.parent_id)

144

self.parent_id, text_id=self.text_id)

134

145

other.text_sha1 = self.text_sha1

135

146

other.text_size = self.text_size

147

# note that children are *not* copied; they're pulled across when

148

# others are added

136

149

return other

137

150

138

151

153

166

e.set('file_id', self.file_id)

154

167

e.set('kind', self.kind)

155

168

156

if self.text_size is not None:

169

if self.text_size != None:

157

170

e.set('text_size', '%d' % self.text_size)

158

171

159

for f in ['text_id', 'text_sha1', 'parent_id']:

172

for f in ['text_id', 'text_sha1']:

160

173

v = getattr(self, f)

161

if v is not None:

174

if v != None:

162

175

e.set(f, v)

163

176

177

# to be conservative, we don't externalize the root pointers

178

# for now, leaving them as null in the xml form. in a future

179

# version it will be implied by nested elements.

180

if self.parent_id != ROOT_ID:

181

assert isinstance(self.parent_id, basestring)

182

e.set('parent_id', self.parent_id)

183

164

184

e.tail = '\n'

165

185

166

186

return e

168

188

169

189

def from_element(cls, elt):

170

190

assert elt.tag == 'entry'

171

self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'))

191

192

## original format inventories don't have a parent_id for

193

## nodes in the root directory, but it's cleaner to use one

194

## internally.

195

parent_id = elt.get('parent_id')

196

if parent_id == None:

197

parent_id = ROOT_ID

198

199

self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'), parent_id)

172

200

self.text_id = elt.get('text_id')

173

201

self.text_sha1 = elt.get('text_sha1')

174

self.parent_id = elt.get('parent_id')

175

202

176

203

## mutter("read inventoryentry: %r" % (elt.attrib))

177

204

183

210

184

211

from_element = classmethod(from_element)

185

212

186

def __cmp__(self, other):

187

if self is other:

188

return 0

213

def __eq__(self, other):

189

214

if not isinstance(other, InventoryEntry):

190

215

return NotImplemented

191

216

192

return cmp(self.file_id, other.file_id) \

193

or cmp(self.name, other.name) \

194

or cmp(self.text_sha1, other.text_sha1) \

195

or cmp(self.text_size, other.text_size) \

196

or cmp(self.text_id, other.text_id) \

197

or cmp(self.parent_id, other.parent_id) \

198

or cmp(self.kind, other.kind)

217

return (self.file_id == other.file_id) \

218

and (self.name == other.name) \

219

and (self.text_sha1 == other.text_sha1) \

220

and (self.text_size == other.text_size) \

221

and (self.text_id == other.text_id) \

222

and (self.parent_id == other.parent_id) \

223

and (self.kind == other.kind)

224

225

226

def __ne__(self, other):

227

return not (self == other)

228

229

def __hash__(self):

230

raise ValueError('not hashable')

231

232

233

234

class RootEntry(InventoryEntry):

235

def __init__(self, file_id):

236

self.file_id = file_id

237

self.children = {}

238

self.kind = 'root_directory'

239

self.parent_id = None

240

self.name = ''

241

242

def __eq__(self, other):

243

if not isinstance(other, RootEntry):

244

return NotImplemented

245

246

return (self.file_id == other.file_id) \

247

and (self.children == other.children)

199

248

200

249

201

250

202

251

class Inventory(XMLMixin):

203

252

"""Inventory of versioned files in a tree.

204

253

205

An Inventory acts like a set of InventoryEntry items. You can

206

also look files up by their file_id or name.

207

208

May be read from and written to a metadata file in a tree. To

209

manipulate the inventory (for example to add a file), it is read

210

in, modified, and then written back out.

254

This describes which file_id is present at each point in the tree,

255

and possibly the SHA-1 or other information about the file.

256

Entries can be looked up either by path or by file_id.

211

257

212

258

The inventory represents a typical unix file tree, with

213

259

directories containing files and subdirectories. We never store

217

263

returned quickly.

218

264

219

265

InventoryEntry objects must not be modified after they are

220

inserted.

266

inserted, other than through the Inventory API.

221

267

222

268

>>> inv = Inventory()

223

269

>>> inv.write_xml(sys.stdout)

224

270

225

271

</inventory>

226

>>> inv.add(InventoryEntry('123-123', 'hello.c'))

272

>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))

227

273

>>> inv['123-123'].name

228

274

'hello.c'

229

275

245

291

</inventory>

246

292

247

293

"""

248

249

## TODO: Make sure only canonical filenames are stored.

250

251

## TODO: Do something sensible about the possible collisions on

252

## case-losing filesystems. Perhaps we should just always forbid

253

## such collisions.

254

255

## TODO: No special cases for root, rather just give it a file id

256

## like everything else.

257

258

## TODO: Probably change XML serialization to use nesting

259

260

294

def __init__(self):

261

295

"""Create or read an inventory.

262

296

263

297

If a working directory is specified, the inventory is read

264

298

from there. If the file is specified, read from that. If not,

265

299

the inventory is created empty.

300

301

The inventory is created with a default root directory, with

302

an id of None.

266

303

"""

267

self._root = InventoryEntry(None, '', kind='directory')

268

self._byid = {None: self._root}

304

self.root = RootEntry(ROOT_ID)

305

self._byid = {self.root.file_id: self.root}

269

306

270

307

271

308

def __iter__(self):

277

314

return len(self._byid)

278

315

279

316

280

def iter_entries(self, parent_id=None):

317

def iter_entries(self, from_dir=None):

281

318

"""Return (path, entry) pairs, in order by name."""

282

kids = self[parent_id].children.items()

319

if from_dir == None:

320

assert self.root

321

from_dir = self.root

322

elif isinstance(from_dir, basestring):

323

from_dir = self._byid[from_dir]

324

325

kids = from_dir.children.items()

283

326

kids.sort()

284

327

for name, ie in kids:

285

328

yield name, ie

286

329

if ie.kind == 'directory':

287

for cn, cie in self.iter_entries(parent_id=ie.file_id):

288

yield joinpath([name, cn]), cie

330

for cn, cie in self.iter_entries(from_dir=ie.file_id):

331

yield os.path.join(name, cn), cie

332

333

334

def entries(self):

335

"""Return list of (path, ie) for all entries except the root.

336

337

This may be faster than iter_entries.

338

"""

339

accum = []

340

def descend(dir_ie, dir_path):

341

kids = dir_ie.children.items()

342

kids.sort()

343

for name, ie in kids:

344

child_path = os.path.join(dir_path, name)

345

accum.append((child_path, ie))

346

if ie.kind == 'directory':

347

descend(ie, child_path)

348

349

descend(self.root, '')

350

return accum

289

351

290

352

291

353

def directories(self):

292

"""Return (path, entry) pairs for all directories.

354

"""Return (path, entry) pairs for all directories, including the root.

293

355

"""

294

yield '', self._root

295

for path, entry in self.iter_entries():

296

if entry.kind == 'directory':

297

yield path, entry

356

accum = []

357

def descend(parent_ie, parent_path):

358

accum.append((parent_path, parent_ie))

359

360

kids = [(ie.name, ie) for ie in parent_ie.children.itervalues() if ie.kind == 'directory']

361

kids.sort()

362

363

for name, child_ie in kids:

364

child_path = os.path.join(parent_path, name)

365

descend(child_ie, child_path)

366

descend(self.root, '')

367

return accum

298

368

299

369

300

370

302

372

"""True if this entry contains a file with given id.

303

373

304

374

>>> inv = Inventory()

305

>>> inv.add(InventoryEntry('123', 'foo.c'))

375

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

306

376

>>> '123' in inv

307

377

True

308

378

>>> '456' in inv

315

385

"""Return the entry for given file_id.

316

386

317

387

>>> inv = Inventory()

318

>>> inv.add(InventoryEntry('123123', 'hello.c'))

388

>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))

319

389

>>> inv['123123'].name

320

390

'hello.c'

321

391

"""

322

return self._byid[file_id]

323

392

try:

393

return self._byid[file_id]

394

except KeyError:

395

if file_id == None:

396

raise BzrError("can't look up file_id None")

397

else:

398

raise BzrError("file_id {%s} not in inventory" % file_id)

399

400

401

def get_file_kind(self, file_id):

402

return self._byid[file_id].kind

324

403

325

404

def get_child(self, parent_id, filename):

326

if parent_id == None:

327

return self._root.children.get(filename)

328

else:

329

return self[parent_id].children.get(filename)

405

return self[parent_id].children.get(filename)

330

406

331

407

332

408

def add(self, entry):

335

411

To add a file to a branch ready to be committed, use Branch.add,

336

412

which calls this."""

337

413

if entry.file_id in self._byid:

338

bailout("inventory already contains entry with id {%s}" % entry.file_id)

414

raise BzrError("inventory already contains entry with id {%s}" % entry.file_id)

339

415

340

parent = self._byid[entry.parent_id]

341

if parent.kind != 'directory':

342

bailout("attempt to add under non-directory {%s}" % parent.file_id)

416

try:

417

parent = self._byid[entry.parent_id]

418

except KeyError:

419

raise BzrError("parent_id {%s} not in inventory" % entry.parent_id)

343

420

344

421

if parent.children.has_key(entry.name):

345

bailout("%s is already versioned" %

422

raise BzrError("%s is already versioned" %

346

423

appendpath(self.id2path(parent.file_id), entry.name))

347

424

348

425

self._byid[entry.file_id] = entry

355

432

The immediate parent must already be versioned"""

356

433

parts = bzrlib.osutils.splitpath(relpath)

357

434

if len(parts) == 0:

358

bailout("cannot re-add root of inventory")

435

raise BzrError("cannot re-add root of inventory")

359

436

360

if file_id is None:

437

if file_id == None:

361

438

file_id = bzrlib.branch.gen_file_id(relpath)

362

439

363

440

parent_id = self.path2id(parts[:-1])

441

assert parent_id != None

364

442

ie = InventoryEntry(file_id, parts[-1],

365

443

kind=kind, parent_id=parent_id)

366

444

return self.add(ie)

370

448

"""Remove entry by id.

371

449

372

450

>>> inv = Inventory()

373

>>> inv.add(InventoryEntry('123', 'foo.c'))

451

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

374

452

>>> '123' in inv

375

453

True

376

454

>>> del inv['123']

392

470

del self[ie.parent_id].children[ie.name]

393

471

394

472

395

def id_set(self):

396

return Set(self._byid)

397

398

399

473

def to_element(self):

400

474

"""Convert to XML Element"""

401

475

e = Element('inventory')

409

483

"""Construct from XML Element

410

484

411

485

>>> inv = Inventory()

412

>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c'))

486

>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c', 'file', ROOT_ID))

413

487

>>> elt = inv.to_element()

414

488

>>> inv2 = Inventory.from_element(elt)

415

489

>>> inv2 == inv

424

498

from_element = classmethod(from_element)

425

499

426

500

427

def __cmp__(self, other):

501

def __eq__(self, other):

428

502

"""Compare two sets by comparing their contents.

429

503

430

504

>>> i1 = Inventory()

431

505

>>> i2 = Inventory()

432

506

>>> i1 == i2

433

507

True

434

>>> i1.add(InventoryEntry('123', 'foo'))

508

>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

435

509

>>> i1 == i2

436

510

False

437

>>> i2.add(InventoryEntry('123', 'foo'))

511

>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

438

512

>>> i1 == i2

439

513

True

440

514

"""

441

if self is other:

442

return 0

443

444

515

if not isinstance(other, Inventory):

445

516

return NotImplemented

446

517

447

if self.id_set() ^ other.id_set():

448

return 1

449

450

for file_id in self._byid:

451

c = cmp(self[file_id], other[file_id])

452

if c: return c

453

454

return 0

455

456

457

def id2path(self, file_id):

458

"""Return as a list the path to file_id."""

518

if len(self._byid) != len(other._byid):

519

# shortcut: obviously not the same

520

return False

521

522

return self._byid == other._byid

523

524

525

def __ne__(self, other):

526

return not (self == other)

527

528

529

def __hash__(self):

530

raise ValueError('not hashable')

531

532

533

534

def get_idpath(self, file_id):

535

"""Return a list of file_ids for the path to an entry.

536

537

The list contains one element for each directory followed by

538

the id of the file itself. So the length of the returned list

539

is equal to the depth of the file in the tree, counting the

540

root directory as depth 1.

541

"""

459

542

p = []

460

543

while file_id != None:

461

ie = self[file_id]

462

p = [ie.name] + p

544

try:

545

ie = self._byid[file_id]

546

except KeyError:

547

raise BzrError("file_id {%s} not found in inventory" % file_id)

548

p.insert(0, ie.file_id)

463

549

file_id = ie.parent_id

464

return joinpath(p)

550

return p

551

552

553

def id2path(self, file_id):

554

"""Return as a list the path to file_id."""

555

556

# get all names, skipping root

557

p = [self[fid].name for fid in self.get_idpath(file_id)[1:]]

558

return os.sep.join(p)

465

559

466

560

467

561

473

567

474

568

This returns the entry of the last component in the path,

475

569

which may be either a file or a directory.

570

571

Returns None iff the path is not found.

476

572

"""

477

573

if isinstance(name, types.StringTypes):

478

574

name = splitpath(name)

479

575

480

parent = self[None]

576

mutter("lookup path %r" % name)

577

578

parent = self.root

481

579

for f in name:

482

580

try:

483

581

cie = parent.children[f]

484

582

assert cie.name == f

583

assert cie.parent_id == parent.file_id

485

584

parent = cie

486

585

except KeyError:

487

586

# or raise an error?

495

594

496

595

497

596

def has_id(self, file_id):

498

assert isinstance(file_id, str)

499

597

return self._byid.has_key(file_id)

500

598

501

599

502

503

504

505

if __name__ == '__main__':

506

import doctest, inventory

507

doctest.testmod(inventory)

600

def rename(self, file_id, new_parent_id, new_name):

601

"""Move a file within the inventory.

602

603

This can change either the name, or the parent, or both.

604

605

This does not move the working file."""

606

if not is_valid_name(new_name):

607

raise BzrError("not an acceptable filename: %r" % new_name)

608

609

new_parent = self._byid[new_parent_id]

610

if new_name in new_parent.children:

611

raise BzrError("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))

612

613

new_parent_idpath = self.get_idpath(new_parent_id)

614

if file_id in new_parent_idpath:

615

raise BzrError("cannot move directory %r into a subdirectory of itself, %r"

616

% (self.id2path(file_id), self.id2path(new_parent_id)))

617

618

file_ie = self._byid[file_id]

619

old_parent = self._byid[file_ie.parent_id]

620

621

# TODO: Don't leave things messed up if this fails

622

623

del old_parent.children[file_ie.name]

624

new_parent.children[new_name] = file_ie

625

626

file_ie.name = new_name

627

file_ie.parent_id = new_parent_id

628

629

630

631

632

_NAME_RE = re.compile(r'^[^/\\]+$')

633

634

def is_valid_name(name):

635

return bool(_NAME_RE.match(name))

Older »