~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/inventory.py

Committer: Martin Pool
Date: 2005-06-06 05:55:19 UTC
Revision ID: mbp@sourcefrog.net-20050606055519-2fa201b47cefec08

- fix permissions on exported tar/zip files

files added:
.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge_core.py

bzrlib/newinventory.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/selftest.py

bzrlib/statcache.py

bzrlib/status.py

bzrlib/store.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/whitebox.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/fortune

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

elementtree

elementtree/ElementTree.py

elementtree/__init__.py

notes

notes/new-inventory-sample.xml

notes/performance.txt

patches

patches/symlink-support.patch

setup.py

testbzr

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

files removed:
.bzrignore

knit.py

testknit.py

testsweet.py

woolyweave.py

Show diffs side-by-side

added added

removed removed

bzrlib/inventory.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# This should really be an id randomly assigned when the tree is

# created, but it's not for now.

ROOT_ID = "TREE_ROOT"

import sys, os.path, types, re

try:

from cElementTree import Element, ElementTree, SubElement

except ImportError:

from elementtree.ElementTree import Element, ElementTree, SubElement

from xml import XMLMixin

from errors import bailout, BzrError, BzrCheckError

import bzrlib

from bzrlib.osutils import uuid, quotefn, splitpath, joinpath, appendpath

from bzrlib.trace import mutter

class InventoryEntry(XMLMixin):

"""Description of a versioned file.

An InventoryEntry has the following fields, which are also

present in the XML inventory-entry element:

* *file_id*

* *name*: (only the basename within the directory, must not

contain slashes)

* *kind*: "directory" or "file"

* *directory_id*: (if absent/null means the branch root directory)

* *text_sha1*: only for files

* *text_size*: in bytes, only for files

* *text_id*: identifier for the text version, only for files

InventoryEntries can also exist inside a WorkingTree

inventory, in which case they are not yet bound to a

particular revision of the file. In that case the text_sha1,

text_size and text_id are absent.

>>> i = Inventory()

>>> i.path2id('')

'TREE_ROOT'

>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))

>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))

>>> for j in i.iter_entries():

... print j

...

('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))

('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))

>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))

Traceback (most recent call last):

...

BzrError: ('inventory already contains entry with id {2323}', [])

>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))

>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))

>>> i.path2id('src/wibble')

'2325'

>>> '2325' in i

True

>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))

>>> i['2326']

InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')

>>> for j in i.iter_entries():

... print j[0]

... assert i.path2id(j[0])

...

src

src/bye.c

src/hello.c

src/wibble

src/wibble/wibble.c

>>> i.id2path('2326')

'src/wibble/wibble.c'

TODO: Maybe also keep the full path of the entry, and the children?

But those depend on its position within a particular inventory, and

it would be nice not to need to hold the backpointer here.

"""

# TODO: split InventoryEntry into subclasses for files,

# directories, etc etc.

100

101

text_sha1 = None

102

text_size = None

103

104

def __init__(self, file_id, name, kind, parent_id, text_id=None):

105

"""Create an InventoryEntry

106

107

The filename must be a single component, relative to the

108

parent directory; it cannot be a whole path or relative name.

109

110

>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)

111

>>> e.name

112

'hello.c'

113

>>> e.file_id

114

'123'

115

>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)

116

Traceback (most recent call last):

117

BzrCheckError: InventoryEntry name 'src/hello.c' is invalid

118

"""

119

if '/' in name or '\\' in name:

120

raise BzrCheckError('InventoryEntry name %r is invalid' % name)

121

122

self.file_id = file_id

123

self.name = name

124

self.kind = kind

125

self.text_id = text_id

126

self.parent_id = parent_id

127

if kind == 'directory':

128

self.children = {}

129

elif kind == 'file':

130

pass

131

else:

132

raise BzrError("unhandled entry kind %r" % kind)

133

134

135

136

def sorted_children(self):

137

l = self.children.items()

138

l.sort()

139

return l

140

141

142

def copy(self):

143

other = InventoryEntry(self.file_id, self.name, self.kind,

144

self.parent_id, text_id=self.text_id)

145

other.text_sha1 = self.text_sha1

146

other.text_size = self.text_size

147

# note that children are *not* copied; they're pulled across when

148

# others are added

149

return other

150

151

152

def __repr__(self):

153

return ("%s(%r, %r, kind=%r, parent_id=%r)"

154

% (self.__class__.__name__,

155

self.file_id,

156

self.name,

157

self.kind,

158

self.parent_id))

159

160

161

def to_element(self):

162

"""Convert to XML element"""

163

e = Element('entry')

164

165

e.set('name', self.name)

166

e.set('file_id', self.file_id)

167

e.set('kind', self.kind)

168

169

if self.text_size != None:

170

e.set('text_size', '%d' % self.text_size)

171

172

for f in ['text_id', 'text_sha1']:

173

v = getattr(self, f)

174

if v != None:

175

e.set(f, v)

176

177

# to be conservative, we don't externalize the root pointers

178

# for now, leaving them as null in the xml form. in a future

179

# version it will be implied by nested elements.

180

if self.parent_id != ROOT_ID:

181

assert isinstance(self.parent_id, basestring)

182

e.set('parent_id', self.parent_id)

183

184

e.tail = '\n'

185

186

return e

187

188

189

def from_element(cls, elt):

190

assert elt.tag == 'entry'

191

192

## original format inventories don't have a parent_id for

193

## nodes in the root directory, but it's cleaner to use one

194

## internally.

195

parent_id = elt.get('parent_id')

196

if parent_id == None:

197

parent_id = ROOT_ID

198

199

self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'), parent_id)

200

self.text_id = elt.get('text_id')

201

self.text_sha1 = elt.get('text_sha1')

202

203

## mutter("read inventoryentry: %r" % (elt.attrib))

204

205

v = elt.get('text_size')

206

self.text_size = v and int(v)

207

208

return self

209

210

211

from_element = classmethod(from_element)

212

213

def __eq__(self, other):

214

if not isinstance(other, InventoryEntry):

215

return NotImplemented

216

217

return (self.file_id == other.file_id) \

218

and (self.name == other.name) \

219

and (self.text_sha1 == other.text_sha1) \

220

and (self.text_size == other.text_size) \

221

and (self.text_id == other.text_id) \

222

and (self.parent_id == other.parent_id) \

223

and (self.kind == other.kind)

224

225

226

def __ne__(self, other):

227

return not (self == other)

228

229

def __hash__(self):

230

raise ValueError('not hashable')

231

232

233

234

class RootEntry(InventoryEntry):

235

def __init__(self, file_id):

236

self.file_id = file_id

237

self.children = {}

238

self.kind = 'root_directory'

239

self.parent_id = None

240

self.name = ''

241

242

def __eq__(self, other):

243

if not isinstance(other, RootEntry):

244

return NotImplemented

245

246

return (self.file_id == other.file_id) \

247

and (self.children == other.children)

248

249

250

251

class Inventory(XMLMixin):

252

"""Inventory of versioned files in a tree.

253

254

This describes which file_id is present at each point in the tree,

255

and possibly the SHA-1 or other information about the file.

256

Entries can be looked up either by path or by file_id.

257

258

The inventory represents a typical unix file tree, with

259

directories containing files and subdirectories. We never store

260

the full path to a file, because renaming a directory implicitly

261

moves all of its contents. This class internally maintains a

262

lookup tree that allows the children under a directory to be

263

returned quickly.

264

265

InventoryEntry objects must not be modified after they are

266

inserted, other than through the Inventory API.

267

268

>>> inv = Inventory()

269

>>> inv.write_xml(sys.stdout)

270

271

</inventory>

272

>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))

273

>>> inv['123-123'].name

274

'hello.c'

275

276

May be treated as an iterator or set to look up file ids:

277

278

>>> bool(inv.path2id('hello.c'))

279

True

280

>>> '123-123' in inv

281

True

282

283

May also look up by name:

284

285

>>> [x[0] for x in inv.iter_entries()]

286

['hello.c']

287

288

>>> inv.write_xml(sys.stdout)

289

290

291

</inventory>

292

293

"""

294

def __init__(self):

295

"""Create or read an inventory.

296

297

If a working directory is specified, the inventory is read

298

from there. If the file is specified, read from that. If not,

299

the inventory is created empty.

300

301

The inventory is created with a default root directory, with

302

an id of None.

303

"""

304

self.root = RootEntry(ROOT_ID)

305

self._byid = {self.root.file_id: self.root}

306

307

308

def __iter__(self):

309

return iter(self._byid)

310

311

312

def __len__(self):

313

"""Returns number of entries."""

314

return len(self._byid)

315

316

317

def iter_entries(self, from_dir=None):

318

"""Return (path, entry) pairs, in order by name."""

319

if from_dir == None:

320

assert self.root

321

from_dir = self.root

322

elif isinstance(from_dir, basestring):

323

from_dir = self._byid[from_dir]

324

325

kids = from_dir.children.items()

326

kids.sort()

327

for name, ie in kids:

328

yield name, ie

329

if ie.kind == 'directory':

330

for cn, cie in self.iter_entries(from_dir=ie.file_id):

331

yield os.path.join(name, cn), cie

332

333

334

def entries(self):

335

"""Return list of (path, ie) for all entries except the root.

336

337

This may be faster than iter_entries.

338

"""

339

accum = []

340

def descend(dir_ie, dir_path):

341

kids = dir_ie.children.items()

342

kids.sort()

343

for name, ie in kids:

344

child_path = os.path.join(dir_path, name)

345

accum.append((child_path, ie))

346

if ie.kind == 'directory':

347

descend(ie, child_path)

348

349

descend(self.root, '')

350

return accum

351

352

353

def directories(self):

354

"""Return (path, entry) pairs for all directories, including the root.

355

"""

356

accum = []

357

def descend(parent_ie, parent_path):

358

accum.append((parent_path, parent_ie))

359

360

kids = [(ie.name, ie) for ie in parent_ie.children.itervalues() if ie.kind == 'directory']

361

kids.sort()

362

363

for name, child_ie in kids:

364

child_path = os.path.join(parent_path, name)

365

descend(child_ie, child_path)

366

descend(self.root, '')

367

return accum

368

369

370

371

def __contains__(self, file_id):

372

"""True if this entry contains a file with given id.

373

374

>>> inv = Inventory()

375

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

376

>>> '123' in inv

377

True

378

>>> '456' in inv

379

False

380

"""

381

return file_id in self._byid

382

383

384

def __getitem__(self, file_id):

385

"""Return the entry for given file_id.

386

387

>>> inv = Inventory()

388

>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))

389

>>> inv['123123'].name

390

'hello.c'

391

"""

392

try:

393

return self._byid[file_id]

394

except KeyError:

395

if file_id == None:

396

raise BzrError("can't look up file_id None")

397

else:

398

raise BzrError("file_id {%s} not in inventory" % file_id)

399

400

401

def get_file_kind(self, file_id):

402

return self._byid[file_id].kind

403

404

def get_child(self, parent_id, filename):

405

return self[parent_id].children.get(filename)

406

407

408

def add(self, entry):

409

"""Add entry to inventory.

410

411

To add a file to a branch ready to be committed, use Branch.add,

412

which calls this."""

413

if entry.file_id in self._byid:

414

bailout("inventory already contains entry with id {%s}" % entry.file_id)

415

416

try:

417

parent = self._byid[entry.parent_id]

418

except KeyError:

419

bailout("parent_id {%s} not in inventory" % entry.parent_id)

420

421

if parent.children.has_key(entry.name):

422

bailout("%s is already versioned" %

423

appendpath(self.id2path(parent.file_id), entry.name))

424

425

self._byid[entry.file_id] = entry

426

parent.children[entry.name] = entry

427

428

429

def add_path(self, relpath, kind, file_id=None):

430

"""Add entry from a path.

431

432

The immediate parent must already be versioned"""

433

parts = bzrlib.osutils.splitpath(relpath)

434

if len(parts) == 0:

435

bailout("cannot re-add root of inventory")

436

437

if file_id == None:

438

file_id = bzrlib.branch.gen_file_id(relpath)

439

440

parent_id = self.path2id(parts[:-1])

441

assert parent_id != None

442

ie = InventoryEntry(file_id, parts[-1],

443

kind=kind, parent_id=parent_id)

444

return self.add(ie)

445

446

447

def __delitem__(self, file_id):

448

"""Remove entry by id.

449

450

>>> inv = Inventory()

451

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

452

>>> '123' in inv

453

True

454

>>> del inv['123']

455

>>> '123' in inv

456

False

457

"""

458

ie = self[file_id]

459

460

assert self[ie.parent_id].children[ie.name] == ie

461

462

# TODO: Test deleting all children; maybe hoist to a separate

463

# deltree method?

464

if ie.kind == 'directory':

465

for cie in ie.children.values():

466

del self[cie.file_id]

467

del ie.children

468

469

del self._byid[file_id]

470

del self[ie.parent_id].children[ie.name]

471

472

473

def to_element(self):

474

"""Convert to XML Element"""

475

e = Element('inventory')

476

e.text = '\n'

477

for path, ie in self.iter_entries():

478

e.append(ie.to_element())

479

return e

480

481

482

def from_element(cls, elt):

483

"""Construct from XML Element

484

485

>>> inv = Inventory()

486

>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c', 'file', ROOT_ID))

487

>>> elt = inv.to_element()

488

>>> inv2 = Inventory.from_element(elt)

489

>>> inv2 == inv

490

True

491

"""

492

assert elt.tag == 'inventory'

493

o = cls()

494

for e in elt:

495

o.add(InventoryEntry.from_element(e))

496

return o

497

498

from_element = classmethod(from_element)

499

500

501

def __eq__(self, other):

502

"""Compare two sets by comparing their contents.

503

504

>>> i1 = Inventory()

505

>>> i2 = Inventory()

506

>>> i1 == i2

507

True

508

>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

509

>>> i1 == i2

510

False

511

>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

512

>>> i1 == i2

513

True

514

"""

515

if not isinstance(other, Inventory):

516

return NotImplemented

517

518

if len(self._byid) != len(other._byid):

519

# shortcut: obviously not the same

520

return False

521

522

return self._byid == other._byid

523

524

525

def __ne__(self, other):

526

return not (self == other)

527

528

529

def __hash__(self):

530

raise ValueError('not hashable')

531

532

533

534

def get_idpath(self, file_id):

535

"""Return a list of file_ids for the path to an entry.

536

537

The list contains one element for each directory followed by

538

the id of the file itself. So the length of the returned list

539

is equal to the depth of the file in the tree, counting the

540

root directory as depth 1.

541

"""

542

p = []

543

while file_id != None:

544

try:

545

ie = self._byid[file_id]

546

except KeyError:

547

bailout("file_id {%s} not found in inventory" % file_id)

548

p.insert(0, ie.file_id)

549

file_id = ie.parent_id

550

return p

551

552

553

def id2path(self, file_id):

554

"""Return as a list the path to file_id."""

555

556

# get all names, skipping root

557

p = [self[fid].name for fid in self.get_idpath(file_id)[1:]]

558

return os.sep.join(p)

559

560

561

562

def path2id(self, name):

563

"""Walk down through directories to return entry of last component.

564

565

names may be either a list of path components, or a single

566

string, in which case it is automatically split.

567

568

This returns the entry of the last component in the path,

569

which may be either a file or a directory.

570

571

Returns None iff the path is not found.

572

"""

573

if isinstance(name, types.StringTypes):

574

name = splitpath(name)

575

576

mutter("lookup path %r" % name)

577

578

parent = self.root

579

for f in name:

580

try:

581

cie = parent.children[f]

582

assert cie.name == f

583

assert cie.parent_id == parent.file_id

584

parent = cie

585

except KeyError:

586

# or raise an error?

587

return None

588

589

return parent.file_id

590

591

592

def has_filename(self, names):

593

return bool(self.path2id(names))

594

595

596

def has_id(self, file_id):

597

return self._byid.has_key(file_id)

598

599

600

def rename(self, file_id, new_parent_id, new_name):

601

"""Move a file within the inventory.

602

603

This can change either the name, or the parent, or both.

604

605

This does not move the working file."""

606

if not is_valid_name(new_name):

607

bailout("not an acceptable filename: %r" % new_name)

608

609

new_parent = self._byid[new_parent_id]

610

if new_name in new_parent.children:

611

bailout("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))

612

613

new_parent_idpath = self.get_idpath(new_parent_id)

614

if file_id in new_parent_idpath:

615

bailout("cannot move directory %r into a subdirectory of itself, %r"

616

% (self.id2path(file_id), self.id2path(new_parent_id)))

617

618

file_ie = self._byid[file_id]

619

old_parent = self._byid[file_ie.parent_id]

620

621

# TODO: Don't leave things messed up if this fails

622

623

del old_parent.children[file_ie.name]

624

new_parent.children[new_name] = file_ie

625

626

file_ie.name = new_name

627

file_ie.parent_id = new_parent_id

628

629

630

631

632

_NAME_RE = re.compile(r'^[^/\\]+$')

633

634

def is_valid_name(name):

635

return bool(_NAME_RE.match(name))

Older »