~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/inventory.py

Committer: mbp at sourcefrog
Date: 2005-04-07 02:40:18 UTC
Revision ID: mbp@sourcefrog.net-20050407024018-cf7130ea991f4ebc0c353ed2

more notes on svk

files added:
.bzrignore

NEWS

README

build-api

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/newinventory.py

bzrlib/osutils.py

bzrlib/revision.py

bzrlib/store.py

bzrlib/tests.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/xml.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/faq.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quickref.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revision-syntax.txt

doc/roadmap.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/tagging.txt

doc/taxonomy.txt

doc/testing.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/work-order.txt

doc/workflow.txt

doc/yaml.txt

elementtree

elementtree/ElementTree.py

elementtree/__init__.py

notes

notes/new-inventory-sample.xml

notes/performance.txt

setup.py

test.sh

files removed:
.bzrignore

testsweet.py

testweave.py

tryconvert.py

weave.py

weavefile.py

Show diffs side-by-side

added added

removed removed

bzrlib/inventory.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Inventories map files to their name in a revision."""

# TODO: Maybe store inventory_id in the file? Not really needed.

__author__ = "Martin Pool <mbp@canonical.com>"

# This should really be an id randomly assigned when the tree is

# created, but it's not for now.

ROOT_ID = "TREE_ROOT"

import sys, os.path, types, re

from sets import Set

try:

from cElementTree import Element, ElementTree, SubElement

except ImportError:

from elementtree.ElementTree import Element, ElementTree, SubElement

from xml import XMLMixin

from errors import bailout, BzrError

import bzrlib

from bzrlib.osutils import uuid, quotefn, splitpath, joinpath, appendpath

from bzrlib.trace import mutter

class InventoryEntry(XMLMixin):

"""Description of a versioned file.

An InventoryEntry has the following fields, which are also

present in the XML inventory-entry element:

* *file_id*

* *name*: (only the basename within the directory, must not

contain slashes)

* *kind*: "directory" or "file"

* *directory_id*: (if absent/null means the branch root directory)

* *text_sha1*: only for files

* *text_size*: in bytes, only for files

* *text_id*: identifier for the text version, only for files

InventoryEntries can also exist inside a WorkingTree

inventory, in which case they are not yet bound to a

particular revision of the file. In that case the text_sha1,

text_size and text_id are absent.

>>> i = Inventory()

>>> i.path2id('')

'TREE_ROOT'

>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))

>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))

>>> for j in i.iter_entries():

... print j

...

('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))

('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))

>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))

Traceback (most recent call last):

...

BzrError: ('inventory already contains entry with id {2323}', [])

>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))

>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))

>>> i.path2id('src/wibble')

'2325'

>>> '2325' in i

True

>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))

>>> i['2326']

InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')

>>> for j in i.iter_entries():

... print j[0]

... assert i.path2id(j[0])

...

src

src/bye.c

src/hello.c

src/wibble

src/wibble/wibble.c

>>> i.id2path('2326')

'src/wibble/wibble.c'

100

:todo: Maybe also keep the full path of the entry, and the children?

101

But those depend on its position within a particular inventory, and

102

it would be nice not to need to hold the backpointer here.

103

"""

104

105

# TODO: split InventoryEntry into subclasses for files,

106

# directories, etc etc.

107

108

def __init__(self, file_id, name, kind, parent_id, text_id=None):

109

"""Create an InventoryEntry

110

111

The filename must be a single component, relative to the

112

parent directory; it cannot be a whole path or relative name.

113

114

>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)

115

>>> e.name

116

'hello.c'

117

>>> e.file_id

118

'123'

119

>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)

120

Traceback (most recent call last):

121

BzrError: ("InventoryEntry name is not a simple filename: 'src/hello.c'", [])

122

"""

123

124

if len(splitpath(name)) != 1:

125

bailout('InventoryEntry name is not a simple filename: %r'

126

% name)

127

128

self.file_id = file_id

129

self.name = name

130

assert kind in ['file', 'directory']

131

self.kind = kind

132

self.text_id = text_id

133

self.parent_id = parent_id

134

self.text_sha1 = None

135

self.text_size = None

136

if kind == 'directory':

137

self.children = {}

138

else:

139

assert kind == 'file'

140

141

142

def sorted_children(self):

143

l = self.children.items()

144

l.sort()

145

return l

146

147

148

def copy(self):

149

other = InventoryEntry(self.file_id, self.name, self.kind,

150

self.parent_id, text_id=self.text_id)

151

other.text_sha1 = self.text_sha1

152

other.text_size = self.text_size

153

return other

154

155

156

def __repr__(self):

157

return ("%s(%r, %r, kind=%r, parent_id=%r)"

158

% (self.__class__.__name__,

159

self.file_id,

160

self.name,

161

self.kind,

162

self.parent_id))

163

164

165

def to_element(self):

166

"""Convert to XML element"""

167

e = Element('entry')

168

169

e.set('name', self.name)

170

e.set('file_id', self.file_id)

171

e.set('kind', self.kind)

172

173

if self.text_size != None:

174

e.set('text_size', '%d' % self.text_size)

175

176

for f in ['text_id', 'text_sha1']:

177

v = getattr(self, f)

178

if v != None:

179

e.set(f, v)

180

181

# to be conservative, we don't externalize the root pointers

182

# for now, leaving them as null in the xml form. in a future

183

# version it will be implied by nested elements.

184

if self.parent_id != ROOT_ID:

185

assert isinstance(self.parent_id, basestring)

186

e.set('parent_id', self.parent_id)

187

188

e.tail = '\n'

189

190

return e

191

192

193

def from_element(cls, elt):

194

assert elt.tag == 'entry'

195

196

## original format inventories don't have a parent_id for

197

## nodes in the root directory, but it's cleaner to use one

198

## internally.

199

parent_id = elt.get('parent_id')

200

if parent_id == None:

201

parent_id = ROOT_ID

202

203

self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'), parent_id)

204

self.text_id = elt.get('text_id')

205

self.text_sha1 = elt.get('text_sha1')

206

207

## mutter("read inventoryentry: %r" % (elt.attrib))

208

209

v = elt.get('text_size')

210

self.text_size = v and int(v)

211

212

return self

213

214

215

from_element = classmethod(from_element)

216

217

def __cmp__(self, other):

218

if self is other:

219

return 0

220

if not isinstance(other, InventoryEntry):

221

return NotImplemented

222

223

return cmp(self.file_id, other.file_id) \

224

or cmp(self.name, other.name) \

225

or cmp(self.text_sha1, other.text_sha1) \

226

or cmp(self.text_size, other.text_size) \

227

or cmp(self.text_id, other.text_id) \

228

or cmp(self.parent_id, other.parent_id) \

229

or cmp(self.kind, other.kind)

230

231

232

233

class RootEntry(InventoryEntry):

234

def __init__(self, file_id):

235

self.file_id = file_id

236

self.children = {}

237

self.kind = 'root_directory'

238

self.parent_id = None

239

self.name = ''

240

241

def __cmp__(self, other):

242

if self is other:

243

return 0

244

if not isinstance(other, RootEntry):

245

return NotImplemented

246

return cmp(self.file_id, other.file_id) \

247

or cmp(self.children, other.children)

248

249

250

251

class Inventory(XMLMixin):

252

"""Inventory of versioned files in a tree.

253

254

An Inventory acts like a set of InventoryEntry items. You can

255

also look files up by their file_id or name.

256

257

May be read from and written to a metadata file in a tree. To

258

manipulate the inventory (for example to add a file), it is read

259

in, modified, and then written back out.

260

261

The inventory represents a typical unix file tree, with

262

directories containing files and subdirectories. We never store

263

the full path to a file, because renaming a directory implicitly

264

moves all of its contents. This class internally maintains a

265

lookup tree that allows the children under a directory to be

266

returned quickly.

267

268

InventoryEntry objects must not be modified after they are

269

inserted, other than through the Inventory API.

270

271

>>> inv = Inventory()

272

>>> inv.write_xml(sys.stdout)

273

274

</inventory>

275

>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))

276

>>> inv['123-123'].name

277

'hello.c'

278

279

May be treated as an iterator or set to look up file ids:

280

281

>>> bool(inv.path2id('hello.c'))

282

True

283

>>> '123-123' in inv

284

True

285

286

May also look up by name:

287

288

>>> [x[0] for x in inv.iter_entries()]

289

['hello.c']

290

291

>>> inv.write_xml(sys.stdout)

292

293

294

</inventory>

295

296

"""

297

298

## TODO: Make sure only canonical filenames are stored.

299

300

## TODO: Do something sensible about the possible collisions on

301

## case-losing filesystems. Perhaps we should just always forbid

302

## such collisions.

303

304

## TODO: No special cases for root, rather just give it a file id

305

## like everything else.

306

307

## TODO: Probably change XML serialization to use nesting

308

309

def __init__(self):

310

"""Create or read an inventory.

311

312

If a working directory is specified, the inventory is read

313

from there. If the file is specified, read from that. If not,

314

the inventory is created empty.

315

316

The inventory is created with a default root directory, with

317

an id of None.

318

"""

319

self.root = RootEntry(ROOT_ID)

320

self._byid = {self.root.file_id: self.root}

321

322

323

def __iter__(self):

324

return iter(self._byid)

325

326

327

def __len__(self):

328

"""Returns number of entries."""

329

return len(self._byid)

330

331

332

def iter_entries(self, from_dir=None):

333

"""Return (path, entry) pairs, in order by name."""

334

if from_dir == None:

335

assert self.root

336

from_dir = self.root

337

elif isinstance(from_dir, basestring):

338

from_dir = self._byid[from_dir]

339

340

kids = from_dir.children.items()

341

kids.sort()

342

for name, ie in kids:

343

yield name, ie

344

if ie.kind == 'directory':

345

for cn, cie in self.iter_entries(from_dir=ie.file_id):

346

yield '/'.join((name, cn)), cie

347

348

349

350

def directories(self):

351

"""Return (path, entry) pairs for all directories.

352

"""

353

def descend(parent_ie):

354

parent_name = parent_ie.name

355

yield parent_name, parent_ie

356

357

# directory children in sorted order

358

dn = []

359

for ie in parent_ie.children.itervalues():

360

if ie.kind == 'directory':

361

dn.append((ie.name, ie))

362

dn.sort()

363

364

for name, child_ie in dn:

365

for sub_name, sub_ie in descend(child_ie):

366

yield appendpath(parent_name, sub_name), sub_ie

367

368

for name, ie in descend(self.root):

369

yield name, ie

370

371

372

373

def __contains__(self, file_id):

374

"""True if this entry contains a file with given id.

375

376

>>> inv = Inventory()

377

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

378

>>> '123' in inv

379

True

380

>>> '456' in inv

381

False

382

"""

383

return file_id in self._byid

384

385

386

def __getitem__(self, file_id):

387

"""Return the entry for given file_id.

388

389

>>> inv = Inventory()

390

>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))

391

>>> inv['123123'].name

392

'hello.c'

393

"""

394

if file_id == None:

395

raise BzrError("can't look up file_id None")

396

397

try:

398

return self._byid[file_id]

399

except KeyError:

400

raise BzrError("file_id {%s} not in inventory" % file_id)

401

402

403

def get_child(self, parent_id, filename):

404

return self[parent_id].children.get(filename)

405

406

407

def add(self, entry):

408

"""Add entry to inventory.

409

410

To add a file to a branch ready to be committed, use Branch.add,

411

which calls this."""

412

if entry.file_id in self._byid:

413

bailout("inventory already contains entry with id {%s}" % entry.file_id)

414

415

try:

416

parent = self._byid[entry.parent_id]

417

except KeyError:

418

bailout("parent_id {%s} not in inventory" % entry.parent_id)

419

420

if parent.children.has_key(entry.name):

421

bailout("%s is already versioned" %

422

appendpath(self.id2path(parent.file_id), entry.name))

423

424

self._byid[entry.file_id] = entry

425

parent.children[entry.name] = entry

426

427

428

def add_path(self, relpath, kind, file_id=None):

429

"""Add entry from a path.

430

431

The immediate parent must already be versioned"""

432

parts = bzrlib.osutils.splitpath(relpath)

433

if len(parts) == 0:

434

bailout("cannot re-add root of inventory")

435

436

if file_id == None:

437

file_id = bzrlib.branch.gen_file_id(relpath)

438

439

parent_id = self.path2id(parts[:-1])

440

assert parent_id != None

441

ie = InventoryEntry(file_id, parts[-1],

442

kind=kind, parent_id=parent_id)

443

return self.add(ie)

444

445

446

def __delitem__(self, file_id):

447

"""Remove entry by id.

448

449

>>> inv = Inventory()

450

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

451

>>> '123' in inv

452

True

453

>>> del inv['123']

454

>>> '123' in inv

455

False

456

"""

457

ie = self[file_id]

458

459

assert self[ie.parent_id].children[ie.name] == ie

460

461

# TODO: Test deleting all children; maybe hoist to a separate

462

# deltree method?

463

if ie.kind == 'directory':

464

for cie in ie.children.values():

465

del self[cie.file_id]

466

del ie.children

467

468

del self._byid[file_id]

469

del self[ie.parent_id].children[ie.name]

470

471

472

def id_set(self):

473

return Set(self._byid)

474

475

476

def to_element(self):

477

"""Convert to XML Element"""

478

e = Element('inventory')

479

e.text = '\n'

480

for path, ie in self.iter_entries():

481

e.append(ie.to_element())

482

return e

483

484

485

def from_element(cls, elt):

486

"""Construct from XML Element

487

488

>>> inv = Inventory()

489

>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c', 'file', ROOT_ID))

490

>>> elt = inv.to_element()

491

>>> inv2 = Inventory.from_element(elt)

492

>>> inv2 == inv

493

True

494

"""

495

assert elt.tag == 'inventory'

496

o = cls()

497

for e in elt:

498

o.add(InventoryEntry.from_element(e))

499

return o

500

501

from_element = classmethod(from_element)

502

503

504

def __cmp__(self, other):

505

"""Compare two sets by comparing their contents.

506

507

>>> i1 = Inventory()

508

>>> i2 = Inventory()

509

>>> i1 == i2

510

True

511

>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

512

>>> i1 == i2

513

False

514

>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

515

>>> i1 == i2

516

True

517

"""

518

if self is other:

519

return 0

520

521

if not isinstance(other, Inventory):

522

return NotImplemented

523

524

if self.id_set() ^ other.id_set():

525

return 1

526

527

for file_id in self._byid:

528

c = cmp(self[file_id], other[file_id])

529

if c: return c

530

531

return 0

532

533

534

def get_idpath(self, file_id):

535

"""Return a list of file_ids for the path to an entry.

536

537

The list contains one element for each directory followed by

538

the id of the file itself. So the length of the returned list

539

is equal to the depth of the file in the tree, counting the

540

root directory as depth 1.

541

"""

542

p = []

543

while file_id != None:

544

try:

545

ie = self._byid[file_id]

546

except KeyError:

547

bailout("file_id {%s} not found in inventory" % file_id)

548

p.insert(0, ie.file_id)

549

file_id = ie.parent_id

550

return p

551

552

553

def id2path(self, file_id):

554

"""Return as a list the path to file_id."""

555

556

# get all names, skipping root

557

p = [self[fid].name for fid in self.get_idpath(file_id)[1:]]

558

return '/'.join(p)

559

560

561

562

def path2id(self, name):

563

"""Walk down through directories to return entry of last component.

564

565

names may be either a list of path components, or a single

566

string, in which case it is automatically split.

567

568

This returns the entry of the last component in the path,

569

which may be either a file or a directory.

570

571

Returns None iff the path is not found.

572

"""

573

if isinstance(name, types.StringTypes):

574

name = splitpath(name)

575

576

mutter("lookup path %r" % name)

577

578

parent = self.root

579

for f in name:

580

try:

581

cie = parent.children[f]

582

assert cie.name == f

583

assert cie.parent_id == parent.file_id

584

parent = cie

585

except KeyError:

586

# or raise an error?

587

return None

588

589

return parent.file_id

590

591

592

def has_filename(self, names):

593

return bool(self.path2id(names))

594

595

596

def has_id(self, file_id):

597

return self._byid.has_key(file_id)

598

599

600

def rename(self, file_id, new_parent_id, new_name):

601

"""Move a file within the inventory.

602

603

This can change either the name, or the parent, or both.

604

605

This does not move the working file."""

606

if not is_valid_name(new_name):

607

bailout("not an acceptable filename: %r" % new_name)

608

609

new_parent = self._byid[new_parent_id]

610

if new_name in new_parent.children:

611

bailout("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))

612

613

new_parent_idpath = self.get_idpath(new_parent_id)

614

if file_id in new_parent_idpath:

615

bailout("cannot move directory %r into a subdirectory of itself, %r"

616

% (self.id2path(file_id), self.id2path(new_parent_id)))

617

618

file_ie = self._byid[file_id]

619

old_parent = self._byid[file_ie.parent_id]

620

621

# TODO: Don't leave things messed up if this fails

622

623

del old_parent.children[file_ie.name]

624

new_parent.children[new_name] = file_ie

625

626

file_ie.name = new_name

627

file_ie.parent_id = new_parent_id

628

629

630

631

632

_NAME_RE = re.compile(r'^[^/\\]+$')

633

634

def is_valid_name(name):

635

return bool(_NAME_RE.match(name))

Older »