~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/inventory.py

Committer: Martin Pool
Date: 2005-06-30 08:40:59 UTC
mto: This revision was merged to the branch mainline in revision 852.
Revision ID: mbp@sourcefrog.net-20050630084059-d6eb6cb46972365b

Rename Weave.get_included to inclusions and getiter to get_iter

Refactor annotate() code

files added:
.bzrignore

testsweet.py

testweave.py

tryconvert.py

weave.py

weavefile.py

files removed:
.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/mdiff.py

bzrlib/newinventory.py

bzrlib/osutils.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/store.py

bzrlib/tests.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/xml.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/faq.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quickref.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

elementtree

elementtree/ElementTree.py

elementtree/__init__.py

notes

notes/new-inventory-sample.xml

notes/performance.txt

setup.py

test.sh

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

Show diffs side-by-side

added added

removed removed

bzrlib/inventory.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# TODO: Maybe store inventory_id in the file? Not really needed.

# This should really be an id randomly assigned when the tree is

# created, but it's not for now.

ROOT_ID = "TREE_ROOT"

import sys, os.path, types, re

from sets import Set

try:

from cElementTree import Element, ElementTree, SubElement

except ImportError:

from elementtree.ElementTree import Element, ElementTree, SubElement

from xml import XMLMixin

from errors import bailout, BzrError

import bzrlib

from bzrlib.osutils import uuid, quotefn, splitpath, joinpath, appendpath

from bzrlib.trace import mutter

class InventoryEntry(XMLMixin):

"""Description of a versioned file.

An InventoryEntry has the following fields, which are also

present in the XML inventory-entry element:

* *file_id*

* *name*: (only the basename within the directory, must not

contain slashes)

* *kind*: "directory" or "file"

* *directory_id*: (if absent/null means the branch root directory)

* *text_sha1*: only for files

* *text_size*: in bytes, only for files

* *text_id*: identifier for the text version, only for files

InventoryEntries can also exist inside a WorkingTree

inventory, in which case they are not yet bound to a

particular revision of the file. In that case the text_sha1,

text_size and text_id are absent.

>>> i = Inventory()

>>> i.path2id('')

'TREE_ROOT'

>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))

>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))

>>> for j in i.iter_entries():

... print j

...

('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))

('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))

>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))

Traceback (most recent call last):

...

BzrError: ('inventory already contains entry with id {2323}', [])

>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))

>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))

>>> i.path2id('src/wibble')

'2325'

>>> '2325' in i

True

>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))

>>> i['2326']

InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')

>>> for j in i.iter_entries():

... print j[0]

... assert i.path2id(j[0])

...

src

src/bye.c

src/hello.c

src/wibble

src/wibble/wibble.c

>>> i.id2path('2326')

'src/wibble/wibble.c'

TODO: Maybe also keep the full path of the entry, and the children?

But those depend on its position within a particular inventory, and

it would be nice not to need to hold the backpointer here.

100

"""

101

102

# TODO: split InventoryEntry into subclasses for files,

103

# directories, etc etc.

104

105

def __init__(self, file_id, name, kind, parent_id, text_id=None):

106

"""Create an InventoryEntry

107

108

The filename must be a single component, relative to the

109

parent directory; it cannot be a whole path or relative name.

110

111

>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)

112

>>> e.name

113

'hello.c'

114

>>> e.file_id

115

'123'

116

>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)

117

Traceback (most recent call last):

118

BzrError: ("InventoryEntry name is not a simple filename: 'src/hello.c'", [])

119

"""

120

121

if len(splitpath(name)) != 1:

122

bailout('InventoryEntry name is not a simple filename: %r'

123

% name)

124

125

self.file_id = file_id

126

self.name = name

127

self.kind = kind

128

self.text_id = text_id

129

self.parent_id = parent_id

130

self.text_sha1 = None

131

self.text_size = None

132

if kind == 'directory':

133

self.children = {}

134

elif kind == 'file':

135

pass

136

else:

137

raise BzrError("unhandled entry kind %r" % kind)

138

139

140

141

def sorted_children(self):

142

l = self.children.items()

143

l.sort()

144

return l

145

146

147

def copy(self):

148

other = InventoryEntry(self.file_id, self.name, self.kind,

149

self.parent_id, text_id=self.text_id)

150

other.text_sha1 = self.text_sha1

151

other.text_size = self.text_size

152

return other

153

154

155

def __repr__(self):

156

return ("%s(%r, %r, kind=%r, parent_id=%r)"

157

% (self.__class__.__name__,

158

self.file_id,

159

self.name,

160

self.kind,

161

self.parent_id))

162

163

164

def to_element(self):

165

"""Convert to XML element"""

166

e = Element('entry')

167

168

e.set('name', self.name)

169

e.set('file_id', self.file_id)

170

e.set('kind', self.kind)

171

172

if self.text_size != None:

173

e.set('text_size', '%d' % self.text_size)

174

175

for f in ['text_id', 'text_sha1']:

176

v = getattr(self, f)

177

if v != None:

178

e.set(f, v)

179

180

# to be conservative, we don't externalize the root pointers

181

# for now, leaving them as null in the xml form. in a future

182

# version it will be implied by nested elements.

183

if self.parent_id != ROOT_ID:

184

assert isinstance(self.parent_id, basestring)

185

e.set('parent_id', self.parent_id)

186

187

e.tail = '\n'

188

189

return e

190

191

192

def from_element(cls, elt):

193

assert elt.tag == 'entry'

194

195

## original format inventories don't have a parent_id for

196

## nodes in the root directory, but it's cleaner to use one

197

## internally.

198

parent_id = elt.get('parent_id')

199

if parent_id == None:

200

parent_id = ROOT_ID

201

202

self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'), parent_id)

203

self.text_id = elt.get('text_id')

204

self.text_sha1 = elt.get('text_sha1')

205

206

## mutter("read inventoryentry: %r" % (elt.attrib))

207

208

v = elt.get('text_size')

209

self.text_size = v and int(v)

210

211

return self

212

213

214

from_element = classmethod(from_element)

215

216

def __cmp__(self, other):

217

if self is other:

218

return 0

219

if not isinstance(other, InventoryEntry):

220

return NotImplemented

221

222

return cmp(self.file_id, other.file_id) \

223

or cmp(self.name, other.name) \

224

or cmp(self.text_sha1, other.text_sha1) \

225

or cmp(self.text_size, other.text_size) \

226

or cmp(self.text_id, other.text_id) \

227

or cmp(self.parent_id, other.parent_id) \

228

or cmp(self.kind, other.kind)

229

230

231

232

class RootEntry(InventoryEntry):

233

def __init__(self, file_id):

234

self.file_id = file_id

235

self.children = {}

236

self.kind = 'root_directory'

237

self.parent_id = None

238

self.name = ''

239

240

def __cmp__(self, other):

241

if self is other:

242

return 0

243

if not isinstance(other, RootEntry):

244

return NotImplemented

245

return cmp(self.file_id, other.file_id) \

246

or cmp(self.children, other.children)

247

248

249

250

class Inventory(XMLMixin):

251

"""Inventory of versioned files in a tree.

252

253

This describes which file_id is present at each point in the tree,

254

and possibly the SHA-1 or other information about the file.

255

Entries can be looked up either by path or by file_id.

256

257

The inventory represents a typical unix file tree, with

258

directories containing files and subdirectories. We never store

259

the full path to a file, because renaming a directory implicitly

260

moves all of its contents. This class internally maintains a

261

lookup tree that allows the children under a directory to be

262

returned quickly.

263

264

InventoryEntry objects must not be modified after they are

265

inserted, other than through the Inventory API.

266

267

>>> inv = Inventory()

268

>>> inv.write_xml(sys.stdout)

269

270

</inventory>

271

>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))

272

>>> inv['123-123'].name

273

'hello.c'

274

275

May be treated as an iterator or set to look up file ids:

276

277

>>> bool(inv.path2id('hello.c'))

278

True

279

>>> '123-123' in inv

280

True

281

282

May also look up by name:

283

284

>>> [x[0] for x in inv.iter_entries()]

285

['hello.c']

286

287

>>> inv.write_xml(sys.stdout)

288

289

290

</inventory>

291

292

"""

293

294

## TODO: Make sure only canonical filenames are stored.

295

296

## TODO: Do something sensible about the possible collisions on

297

## case-losing filesystems. Perhaps we should just always forbid

298

## such collisions.

299

300

## TODO: No special cases for root, rather just give it a file id

301

## like everything else.

302

303

## TODO: Probably change XML serialization to use nesting rather

304

## than parent_id pointers.

305

306

## TODO: Perhaps hold the ElementTree in memory and work directly

307

## on that rather than converting into Python objects every time?

308

309

def __init__(self):

310

"""Create or read an inventory.

311

312

If a working directory is specified, the inventory is read

313

from there. If the file is specified, read from that. If not,

314

the inventory is created empty.

315

316

The inventory is created with a default root directory, with

317

an id of None.

318

"""

319

self.root = RootEntry(ROOT_ID)

320

self._byid = {self.root.file_id: self.root}

321

322

323

def __iter__(self):

324

return iter(self._byid)

325

326

327

def __len__(self):

328

"""Returns number of entries."""

329

return len(self._byid)

330

331

332

def iter_entries(self, from_dir=None):

333

"""Return (path, entry) pairs, in order by name."""

334

if from_dir == None:

335

assert self.root

336

from_dir = self.root

337

elif isinstance(from_dir, basestring):

338

from_dir = self._byid[from_dir]

339

340

kids = from_dir.children.items()

341

kids.sort()

342

for name, ie in kids:

343

yield name, ie

344

if ie.kind == 'directory':

345

for cn, cie in self.iter_entries(from_dir=ie.file_id):

346

yield os.path.join(name, cn), cie

347

348

349

350

def directories(self):

351

"""Return (path, entry) pairs for all directories.

352

"""

353

def descend(parent_ie):

354

parent_name = parent_ie.name

355

yield parent_name, parent_ie

356

357

# directory children in sorted order

358

dn = []

359

for ie in parent_ie.children.itervalues():

360

if ie.kind == 'directory':

361

dn.append((ie.name, ie))

362

dn.sort()

363

364

for name, child_ie in dn:

365

for sub_name, sub_ie in descend(child_ie):

366

yield appendpath(parent_name, sub_name), sub_ie

367

368

for name, ie in descend(self.root):

369

yield name, ie

370

371

372

373

def __contains__(self, file_id):

374

"""True if this entry contains a file with given id.

375

376

>>> inv = Inventory()

377

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

378

>>> '123' in inv

379

True

380

>>> '456' in inv

381

False

382

"""

383

return file_id in self._byid

384

385

386

def __getitem__(self, file_id):

387

"""Return the entry for given file_id.

388

389

>>> inv = Inventory()

390

>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))

391

>>> inv['123123'].name

392

'hello.c'

393

"""

394

if file_id == None:

395

raise BzrError("can't look up file_id None")

396

397

try:

398

return self._byid[file_id]

399

except KeyError:

400

raise BzrError("file_id {%s} not in inventory" % file_id)

401

402

403

def get_child(self, parent_id, filename):

404

return self[parent_id].children.get(filename)

405

406

407

def add(self, entry):

408

"""Add entry to inventory.

409

410

To add a file to a branch ready to be committed, use Branch.add,

411

which calls this."""

412

if entry.file_id in self._byid:

413

bailout("inventory already contains entry with id {%s}" % entry.file_id)

414

415

try:

416

parent = self._byid[entry.parent_id]

417

except KeyError:

418

bailout("parent_id {%s} not in inventory" % entry.parent_id)

419

420

if parent.children.has_key(entry.name):

421

bailout("%s is already versioned" %

422

appendpath(self.id2path(parent.file_id), entry.name))

423

424

self._byid[entry.file_id] = entry

425

parent.children[entry.name] = entry

426

427

428

def add_path(self, relpath, kind, file_id=None):

429

"""Add entry from a path.

430

431

The immediate parent must already be versioned"""

432

parts = bzrlib.osutils.splitpath(relpath)

433

if len(parts) == 0:

434

bailout("cannot re-add root of inventory")

435

436

if file_id == None:

437

file_id = bzrlib.branch.gen_file_id(relpath)

438

439

parent_id = self.path2id(parts[:-1])

440

assert parent_id != None

441

ie = InventoryEntry(file_id, parts[-1],

442

kind=kind, parent_id=parent_id)

443

return self.add(ie)

444

445

446

def __delitem__(self, file_id):

447

"""Remove entry by id.

448

449

>>> inv = Inventory()

450

>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))

451

>>> '123' in inv

452

True

453

>>> del inv['123']

454

>>> '123' in inv

455

False

456

"""

457

ie = self[file_id]

458

459

assert self[ie.parent_id].children[ie.name] == ie

460

461

# TODO: Test deleting all children; maybe hoist to a separate

462

# deltree method?

463

if ie.kind == 'directory':

464

for cie in ie.children.values():

465

del self[cie.file_id]

466

del ie.children

467

468

del self._byid[file_id]

469

del self[ie.parent_id].children[ie.name]

470

471

472

def id_set(self):

473

return Set(self._byid)

474

475

476

def to_element(self):

477

"""Convert to XML Element"""

478

e = Element('inventory')

479

e.text = '\n'

480

for path, ie in self.iter_entries():

481

e.append(ie.to_element())

482

return e

483

484

485

def from_element(cls, elt):

486

"""Construct from XML Element

487

488

>>> inv = Inventory()

489

>>> inv.add(InventoryEntry('foo.c-123981239', 'foo.c', 'file', ROOT_ID))

490

>>> elt = inv.to_element()

491

>>> inv2 = Inventory.from_element(elt)

492

>>> inv2 == inv

493

True

494

"""

495

assert elt.tag == 'inventory'

496

o = cls()

497

for e in elt:

498

o.add(InventoryEntry.from_element(e))

499

return o

500

501

from_element = classmethod(from_element)

502

503

504

def __cmp__(self, other):

505

"""Compare two sets by comparing their contents.

506

507

>>> i1 = Inventory()

508

>>> i2 = Inventory()

509

>>> i1 == i2

510

True

511

>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

512

>>> i1 == i2

513

False

514

>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))

515

>>> i1 == i2

516

True

517

"""

518

if self is other:

519

return 0

520

521

if not isinstance(other, Inventory):

522

return NotImplemented

523

524

if self.id_set() ^ other.id_set():

525

return 1

526

527

for file_id in self._byid:

528

c = cmp(self[file_id], other[file_id])

529

if c: return c

530

531

return 0

532

533

534

def get_idpath(self, file_id):

535

"""Return a list of file_ids for the path to an entry.

536

537

The list contains one element for each directory followed by

538

the id of the file itself. So the length of the returned list

539

is equal to the depth of the file in the tree, counting the

540

root directory as depth 1.

541

"""

542

p = []

543

while file_id != None:

544

try:

545

ie = self._byid[file_id]

546

except KeyError:

547

bailout("file_id {%s} not found in inventory" % file_id)

548

p.insert(0, ie.file_id)

549

file_id = ie.parent_id

550

return p

551

552

553

def id2path(self, file_id):

554

"""Return as a list the path to file_id."""

555

556

# get all names, skipping root

557

p = [self[fid].name for fid in self.get_idpath(file_id)[1:]]

558

return os.sep.join(p)

559

560

561

562

def path2id(self, name):

563

"""Walk down through directories to return entry of last component.

564

565

names may be either a list of path components, or a single

566

string, in which case it is automatically split.

567

568

This returns the entry of the last component in the path,

569

which may be either a file or a directory.

570

571

Returns None iff the path is not found.

572

"""

573

if isinstance(name, types.StringTypes):

574

name = splitpath(name)

575

576

mutter("lookup path %r" % name)

577

578

parent = self.root

579

for f in name:

580

try:

581

cie = parent.children[f]

582

assert cie.name == f

583

assert cie.parent_id == parent.file_id

584

parent = cie

585

except KeyError:

586

# or raise an error?

587

return None

588

589

return parent.file_id

590

591

592

def has_filename(self, names):

593

return bool(self.path2id(names))

594

595

596

def has_id(self, file_id):

597

return self._byid.has_key(file_id)

598

599

600

def rename(self, file_id, new_parent_id, new_name):

601

"""Move a file within the inventory.

602

603

This can change either the name, or the parent, or both.

604

605

This does not move the working file."""

606

if not is_valid_name(new_name):

607

bailout("not an acceptable filename: %r" % new_name)

608

609

new_parent = self._byid[new_parent_id]

610

if new_name in new_parent.children:

611

bailout("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))

612

613

new_parent_idpath = self.get_idpath(new_parent_id)

614

if file_id in new_parent_idpath:

615

bailout("cannot move directory %r into a subdirectory of itself, %r"

616

% (self.id2path(file_id), self.id2path(new_parent_id)))

617

618

file_ie = self._byid[file_id]

619

old_parent = self._byid[file_ie.parent_id]

620

621

# TODO: Don't leave things messed up if this fails

622

623

del old_parent.children[file_ie.name]

624

new_parent.children[new_name] = file_ie

625

626

file_ie.name = new_name

627

file_ie.parent_id = new_parent_id

628

629

630

631

632

_NAME_RE = re.compile(r'^[^/\\]+$')

633

634

def is_valid_name(name):

635

return bool(_NAME_RE.match(name))

Older »