~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/bundle/read_bundle.py

Committer: Martin Pool
Date: 2006-06-10 23:16:19 UTC
mfrom: (1759 +trunk)
mto: This revision was merged to the branch mainline in revision 1761.
Revision ID: mbp@sourcefrog.net-20060610231619-05b997deeb005d02

[merge] bzr.dev

files added:
bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/read_bundle.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v07.py

bzrlib/patches.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_urlutils.py

bzrlib/urlutils.py

files removed:
bzrlib/util/configobj/validate.py

patience-test.py

files modified:
BRANCH.TODO

HACKING

NEWS

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_status.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/patiencediff.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/sign_my_commits.py

bzrlib/store/__init__.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_store.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_xml.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/weave.py

bzrlib/workingtree.py

bzrlib/xml5.py

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/bundle/read_bundle.py

#!/usr/bin/env python

"""\

Read in a bundle stream, and process it into a BundleReader object.

"""

import base64

from cStringIO import StringIO

import os

import pprint

from bzrlib.errors import (TestamentMismatch, BzrError,

MalformedHeader, MalformedPatches, NotABundle)

from bzrlib.bundle.common import get_header, header_str

from bzrlib.inventory import (Inventory, InventoryEntry,

InventoryDirectory, InventoryFile,

InventoryLink)

from bzrlib.osutils import sha_file, sha_string

from bzrlib.revision import Revision, NULL_REVISION

from bzrlib.testament import StrictTestament

from bzrlib.trace import mutter, warning

from bzrlib.tree import Tree

from bzrlib.xml5 import serializer_v5

class RevisionInfo(object):

"""Gets filled out for each revision object that is read.

"""

def __init__(self, revision_id):

self.revision_id = revision_id

self.sha1 = None

self.committer = None

self.date = None

self.timestamp = None

self.timezone = None

self.inventory_sha1 = None

self.parent_ids = None

self.base_id = None

self.message = None

self.properties = None

self.tree_actions = None

def __str__(self):

return pprint.pformat(self.__dict__)

def as_revision(self):

rev = Revision(revision_id=self.revision_id,

committer=self.committer,

timestamp=float(self.timestamp),

timezone=int(self.timezone),

inventory_sha1=self.inventory_sha1,

message='\n'.join(self.message))

if self.parent_ids:

rev.parent_ids.extend(self.parent_ids)

if self.properties:

for property in self.properties:

key_end = property.find(': ')

assert key_end is not None

key = property[:key_end].encode('utf-8')

value = property[key_end+2:].encode('utf-8')

rev.properties[key] = value

return rev

class BundleInfo(object):

"""This contains the meta information. Stuff that allows you to

recreate the revision or inventory XML.

"""

def __init__(self):

self.committer = None

self.date = None

self.message = None

# A list of RevisionInfo objects

self.revisions = []

# The next entries are created during complete_info() and

# other post-read functions.

# A list of real Revision objects

self.real_revisions = []

self.timestamp = None

self.timezone = None

def __str__(self):

return pprint.pformat(self.__dict__)

def complete_info(self):

"""This makes sure that all information is properly

split up, based on the assumptions that can be made

when information is missing.

"""

from bzrlib.bundle.common import unpack_highres_date

# Put in all of the guessable information.

if not self.timestamp and self.date:

100

self.timestamp, self.timezone = unpack_highres_date(self.date)

101

102

self.real_revisions = []

103

for rev in self.revisions:

104

if rev.timestamp is None:

105

if rev.date is not None:

106

rev.timestamp, rev.timezone = \

107

unpack_highres_date(rev.date)

108

else:

109

rev.timestamp = self.timestamp

110

rev.timezone = self.timezone

111

if rev.message is None and self.message:

112

rev.message = self.message

113

if rev.committer is None and self.committer:

114

rev.committer = self.committer

115

self.real_revisions.append(rev.as_revision())

116

117

def get_base(self, revision):

118

revision_info = self.get_revision_info(revision.revision_id)

119

if revision_info.base_id is not None:

120

if revision_info.base_id == NULL_REVISION:

121

return None

122

else:

123

return revision_info.base_id

124

if len(revision.parent_ids) == 0:

125

# There is no base listed, and

126

# the lowest revision doesn't have a parent

127

# so this is probably against the empty tree

128

# and thus base truly is None

129

return None

130

else:

131

return revision.parent_ids[-1]

132

133

def _get_target(self):

134

"""Return the target revision."""

135

if len(self.real_revisions) > 0:

136

return self.real_revisions[0].revision_id

137

elif len(self.revisions) > 0:

138

return self.revisions[0].revision_id

139

return None

140

141

target = property(_get_target, doc='The target revision id')

142

143

def get_revision(self, revision_id):

144

for r in self.real_revisions:

145

if r.revision_id == revision_id:

146

return r

147

raise KeyError(revision_id)

148

149

def get_revision_info(self, revision_id):

150

for r in self.revisions:

151

if r.revision_id == revision_id:

152

return r

153

raise KeyError(revision_id)

154

155

156

class BundleReader(object):

157

"""This class reads in a bundle from a file, and returns

158

a Bundle object, which can then be applied against a tree.

159

"""

160

def __init__(self, from_file):

161

"""Read in the bundle from the file.

162

163

:param from_file: A file-like object (must have iterator support).

164

"""

165

object.__init__(self)

166

self.from_file = iter(from_file)

167

self._next_line = None

168

169

self.info = BundleInfo()

170

# We put the actual inventory ids in the footer, so that the patch

171

# is easier to read for humans.

172

# Unfortunately, that means we need to read everything before we

173

# can create a proper bundle.

174

self._read()

175

self._validate()

176

177

def _read(self):

178

self._read_header()

179

while self._next_line is not None:

180

self._read_revision_header()

181

if self._next_line is None:

182

break

183

self._read_patches()

184

self._read_footer()

185

186

def _validate(self):

187

"""Make sure that the information read in makes sense

188

and passes appropriate checksums.

189

"""

190

# Fill in all the missing blanks for the revisions

191

# and generate the real_revisions list.

192

self.info.complete_info()

193

194

def _validate_revision(self, inventory, revision_id):

195

"""Make sure all revision entries match their checksum."""

196

197

# This is a mapping from each revision id to it's sha hash

198

rev_to_sha1 = {}

199

200

rev = self.info.get_revision(revision_id)

201

rev_info = self.info.get_revision_info(revision_id)

202

assert rev.revision_id == rev_info.revision_id

203

assert rev.revision_id == revision_id

204

sha1 = StrictTestament(rev, inventory).as_sha1()

205

if sha1 != rev_info.sha1:

206

raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)

207

if rev_to_sha1.has_key(rev.revision_id):

208

raise BzrError('Revision {%s} given twice in the list'

209

% (rev.revision_id))

210

rev_to_sha1[rev.revision_id] = sha1

211

212

def _validate_references_from_repository(self, repository):

213

"""Now that we have a repository which should have some of the

214

revisions we care about, go through and validate all of them

215

that we can.

216

"""

217

rev_to_sha = {}

218

inv_to_sha = {}

219

def add_sha(d, revision_id, sha1):

220

if revision_id is None:

221

if sha1 is not None:

222

raise BzrError('A Null revision should always'

223

'have a null sha1 hash')

224

return

225

if revision_id in d:

226

# This really should have been validated as part

227

# of _validate_revisions but lets do it again

228

if sha1 != d[revision_id]:

229

raise BzrError('** Revision %r referenced with 2 different'

230

' sha hashes %s != %s' % (revision_id,

231

sha1, d[revision_id]))

232

else:

233

d[revision_id] = sha1

234

235

# All of the contained revisions were checked

236

# in _validate_revisions

237

checked = {}

238

for rev_info in self.info.revisions:

239

checked[rev_info.revision_id] = True

240

add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)

241

242

for (rev, rev_info) in zip(self.info.real_revisions, self.info.revisions):

243

add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)

244

245

count = 0

246

missing = {}

247

for revision_id, sha1 in rev_to_sha.iteritems():

248

if repository.has_revision(revision_id):

249

testament = StrictTestament.from_revision(repository,

250

revision_id)

251

local_sha1 = testament.as_sha1()

252

if sha1 != local_sha1:

253

raise BzrError('sha1 mismatch. For revision id {%s}'

254

'local: %s, bundle: %s' % (revision_id, local_sha1, sha1))

255

else:

256

count += 1

257

elif revision_id not in checked:

258

missing[revision_id] = sha1

259

260

for inv_id, sha1 in inv_to_sha.iteritems():

261

if repository.has_revision(inv_id):

262

# Note: branch.get_inventory_sha1() just returns the value that

263

# is stored in the revision text, and that value may be out

264

# of date. This is bogus, because that means we aren't

265

# validating the actual text, just that we wrote and read the

266

# string. But for now, what the hell.

267

local_sha1 = repository.get_inventory_sha1(inv_id)

268

if sha1 != local_sha1:

269

raise BzrError('sha1 mismatch. For inventory id {%s}'

270

'local: %s, bundle: %s' %

271

(inv_id, local_sha1, sha1))

272

else:

273

count += 1

274

275

if len(missing) > 0:

276

# I don't know if this is an error yet

277

warning('Not all revision hashes could be validated.'

278

' Unable validate %d hashes' % len(missing))

279

mutter('Verified %d sha hashes for the bundle.' % count)

280

281

def _validate_inventory(self, inv, revision_id):

282

"""At this point we should have generated the BundleTree,

283

so build up an inventory, and make sure the hashes match.

284

"""

285

286

assert inv is not None

287

288

# Now we should have a complete inventory entry.

289

s = serializer_v5.write_inventory_to_string(inv)

290

sha1 = sha_string(s)

291

# Target revision is the last entry in the real_revisions list

292

rev = self.info.get_revision(revision_id)

293

assert rev.revision_id == revision_id

294

if sha1 != rev.inventory_sha1:

295

open(',,bogus-inv', 'wb').write(s)

296

warning('Inventory sha hash mismatch for revision %s. %s'

297

' != %s' % (revision_id, sha1, rev.inventory_sha1))

298

299

def get_bundle(self, repository):

300

"""Return the meta information, and a Bundle tree which can

301

be used to populate the local stores and working tree, respectively.

302

"""

303

return self.info, self.revision_tree(repository, self.info.target)

304

305

def revision_tree(self, repository, revision_id, base=None):

306

revision = self.info.get_revision(revision_id)

307

base = self.info.get_base(revision)

308

assert base != revision_id

309

self._validate_references_from_repository(repository)

310

revision_info = self.info.get_revision_info(revision_id)

311

inventory_revision_id = revision_id

312

bundle_tree = BundleTree(repository.revision_tree(base),

313

inventory_revision_id)

314

self._update_tree(bundle_tree, revision_id)

315

316

inv = bundle_tree.inventory

317

self._validate_inventory(inv, revision_id)

318

self._validate_revision(inv, revision_id)

319

320

return bundle_tree

321

322

def _next(self):

323

"""yield the next line, but secretly

324

keep 1 extra line for peeking.

325

"""

326

for line in self.from_file:

327

last = self._next_line

328

self._next_line = line

329

if last is not None:

330

#mutter('yielding line: %r' % last)

331

yield last

332

last = self._next_line

333

self._next_line = None

334

#mutter('yielding line: %r' % last)

335

yield last

336

337

def _read_header(self):

338

"""Read the bzr header"""

339

header = get_header()

340

found = False

341

for line in self._next():

342

if found:

343

# not all mailers will keep trailing whitespace

344

if line == '#\n':

345

line = '# \n'

346

if (not line.startswith('# ') or not line.endswith('\n')

347

or line[2:-1].decode('utf-8') != header[0]):

348

raise MalformedHeader('Found a header, but it'

349

' was improperly formatted')

350

header.pop(0) # We read this line.

351

if not header:

352

break # We found everything.

353

elif (line.startswith('#') and line.endswith('\n')):

354

line = line[1:-1].strip().decode('utf-8')

355

if line[:len(header_str)] == header_str:

356

if line == header[0]:

357

found = True

358

else:

359

raise MalformedHeader('Found what looks like'

360

' a header, but did not match')

361

header.pop(0)

362

else:

363

raise NotABundle('Did not find an opening header')

364

365

def _read_revision_header(self):

366

self.info.revisions.append(RevisionInfo(None))

367

for line in self._next():

368

# The bzr header is terminated with a blank line

369

# which does not start with '#'

370

if line is None or line == '\n':

371

break

372

self._handle_next(line)

373

374

def _read_next_entry(self, line, indent=1):

375

"""Read in a key-value pair

376

"""

377

if not line.startswith('#'):

378

raise MalformedHeader('Bzr header did not start with #')

379

line = line[1:-1].decode('utf-8') # Remove the '#' and '\n'

380

if line[:indent] == ' '*indent:

381

line = line[indent:]

382

if not line:

383

return None, None# Ignore blank lines

384

385

loc = line.find(': ')

386

if loc != -1:

387

key = line[:loc]

388

value = line[loc+2:]

389

if not value:

390

value = self._read_many(indent=indent+2)

391

elif line[-1:] == ':':

392

key = line[:-1]

393

value = self._read_many(indent=indent+2)

394

else:

395

raise MalformedHeader('While looking for key: value pairs,'

396

' did not find the colon %r' % (line))

397

398

key = key.replace(' ', '_')

399

#mutter('found %s: %s' % (key, value))

400

return key, value

401

402

def _handle_next(self, line):

403

if line is None:

404

return

405

key, value = self._read_next_entry(line, indent=1)

406

mutter('_handle_next %r => %r' % (key, value))

407

if key is None:

408

return

409

410

revision_info = self.info.revisions[-1]

411

if hasattr(revision_info, key):

412

if getattr(revision_info, key) is None:

413

setattr(revision_info, key, value)

414

else:

415

raise MalformedHeader('Duplicated Key: %s' % key)

416

else:

417

# What do we do with a key we don't recognize

418

raise MalformedHeader('Unknown Key: "%s"' % key)

419

420

def _read_many(self, indent):

421

"""If a line ends with no entry, that means that it should be

422

followed with multiple lines of values.

423

424

This detects the end of the list, because it will be a line that

425

does not start properly indented.

426

"""

427

values = []

428

start = '#' + (' '*indent)

429

430

if self._next_line is None or self._next_line[:len(start)] != start:

431

return values

432

433

for line in self._next():

434

values.append(line[len(start):-1].decode('utf-8'))

435

if self._next_line is None or self._next_line[:len(start)] != start:

436

break

437

return values

438

439

def _read_one_patch(self):

440

"""Read in one patch, return the complete patch, along with

441

the next line.

442

443

:return: action, lines, do_continue

444

"""

445

#mutter('_read_one_patch: %r' % self._next_line)

446

# Peek and see if there are no patches

447

if self._next_line is None or self._next_line.startswith('#'):

448

return None, [], False

449

450

first = True

451

lines = []

452

for line in self._next():

453

if first:

454

if not line.startswith('==='):

455

raise MalformedPatches('The first line of all patches'

456

' should be a bzr meta line "==="'

457

': %r' % line)

458

action = line[4:-1].decode('utf-8')

459

elif line.startswith('... '):

460

action += line[len('... '):-1].decode('utf-8')

461

462

if (self._next_line is not None and

463

self._next_line.startswith('===')):

464

return action, lines, True

465

elif self._next_line is None or self._next_line.startswith('#'):

466

return action, lines, False

467

468

if first:

469

first = False

470

elif not line.startswith('... '):

471

lines.append(line)

472

473

return action, lines, False

474

475

def _read_patches(self):

476

do_continue = True

477

revision_actions = []

478

while do_continue:

479

action, lines, do_continue = self._read_one_patch()

480

if action is not None:

481

revision_actions.append((action, lines))

482

assert self.info.revisions[-1].tree_actions is None

483

self.info.revisions[-1].tree_actions = revision_actions

484

485

def _read_footer(self):

486

"""Read the rest of the meta information.

487

488

:param first_line: The previous step iterates past what it

489

can handle. That extra line is given here.

490

"""

491

for line in self._next():

492

self._handle_next(line)

493

if not self._next_line.startswith('#'):

494

self._next().next()

495

break

496

if self._next_line is None:

497

break

498

499

def _update_tree(self, bundle_tree, revision_id):

500

"""This fills out a BundleTree based on the information

501

that was read in.

502

503

:param bundle_tree: A BundleTree to update with the new information.

504

"""

505

506

def get_rev_id(last_changed, path, kind):

507

if last_changed is not None:

508

changed_revision_id = last_changed.decode('utf-8')

509

else:

510

changed_revision_id = revision_id

511

bundle_tree.note_last_changed(path, changed_revision_id)

512

return changed_revision_id

513

514

def extra_info(info, new_path):

515

last_changed = None

516

encoding = None

517

for info_item in info:

518

try:

519

name, value = info_item.split(':', 1)

520

except ValueError:

521

raise 'Value %r has no colon' % info_item

522

if name == 'last-changed':

523

last_changed = value

524

elif name == 'executable':

525

assert value in ('yes', 'no'), value

526

val = (value == 'yes')

527

bundle_tree.note_executable(new_path, val)

528

elif name == 'target':

529

bundle_tree.note_target(new_path, value)

530

elif name == 'encoding':

531

encoding = value

532

return last_changed, encoding

533

534

def do_patch(path, lines, encoding):

535

if encoding is not None:

536

assert encoding == 'base64'

537

patch = base64.decodestring(''.join(lines))

538

else:

539

patch = ''.join(lines)

540

bundle_tree.note_patch(path, patch)

541

542

def renamed(kind, extra, lines):

543

info = extra.split(' // ')

544

if len(info) < 2:

545

raise BzrError('renamed action lines need both a from and to'

546

': %r' % extra)

547

old_path = info[0]

548

if info[1].startswith('=> '):

549

new_path = info[1][3:]

550

else:

551

new_path = info[1]

552

553

bundle_tree.note_rename(old_path, new_path)

554

last_modified, encoding = extra_info(info[2:], new_path)

555

revision = get_rev_id(last_modified, new_path, kind)

556

if lines:

557

do_patch(new_path, lines, encoding)

558

559

def removed(kind, extra, lines):

560

info = extra.split(' // ')

561

if len(info) > 1:

562

# TODO: in the future we might allow file ids to be

563

# given for removed entries

564

raise BzrError('removed action lines should only have the path'

565

': %r' % extra)

566

path = info[0]

567

bundle_tree.note_deletion(path)

568

569

def added(kind, extra, lines):

570

info = extra.split(' // ')

571

if len(info) <= 1:

572

raise BzrError('add action lines require the path and file id'

573

': %r' % extra)

574

elif len(info) > 5:

575

raise BzrError('add action lines have fewer than 5 entries.'

576

': %r' % extra)

577

path = info[0]

578

if not info[1].startswith('file-id:'):

579

raise BzrError('The file-id should follow the path for an add'

580

': %r' % extra)

581

file_id = info[1][8:]

582

583

bundle_tree.note_id(file_id, path, kind)

584

# this will be overridden in extra_info if executable is specified.

585

bundle_tree.note_executable(path, False)

586

last_changed, encoding = extra_info(info[2:], path)

587

revision = get_rev_id(last_changed, path, kind)

588

if kind == 'directory':

589

return

590

do_patch(path, lines, encoding)

591

592

def modified(kind, extra, lines):

593

info = extra.split(' // ')

594

if len(info) < 1:

595

raise BzrError('modified action lines have at least'

596

'the path in them: %r' % extra)

597

path = info[0]

598

599

last_modified, encoding = extra_info(info[1:], path)

600

revision = get_rev_id(last_modified, path, kind)

601

if lines:

602

do_patch(path, lines, encoding)

603

604

valid_actions = {

605

'renamed':renamed,

606

'removed':removed,

607

'added':added,

608

'modified':modified

609

}

610

for action_line, lines in \

611

self.info.get_revision_info(revision_id).tree_actions:

612

first = action_line.find(' ')

613

if first == -1:

614

raise BzrError('Bogus action line'

615

' (no opening space): %r' % action_line)

616

second = action_line.find(' ', first+1)

617

if second == -1:

618

raise BzrError('Bogus action line'

619

' (missing second space): %r' % action_line)

620

action = action_line[:first]

621

kind = action_line[first+1:second]

622

if kind not in ('file', 'directory', 'symlink'):

623

raise BzrError('Bogus action line'

624

' (invalid object kind %r): %r' % (kind, action_line))

625

extra = action_line[second+1:]

626

627

if action not in valid_actions:

628

raise BzrError('Bogus action line'

629

' (unrecognized action): %r' % action_line)

630

valid_actions[action](kind, extra, lines)

631

632

633

class BundleTree(Tree):

634

def __init__(self, base_tree, revision_id):

635

self.base_tree = base_tree

636

self._renamed = {} # Mapping from old_path => new_path

637

self._renamed_r = {} # new_path => old_path

638

self._new_id = {} # new_path => new_id

639

self._new_id_r = {} # new_id => new_path

640

self._kinds = {} # new_id => kind

641

self._last_changed = {} # new_id => revision_id

642

self._executable = {} # new_id => executable value

643

self.patches = {}

644

self._targets = {} # new path => new symlink target

645

self.deleted = []

646

self.contents_by_id = True

647

self.revision_id = revision_id

648

self._inventory = None

649

650

def __str__(self):

651

return pprint.pformat(self.__dict__)

652

653

def note_rename(self, old_path, new_path):

654

"""A file/directory has been renamed from old_path => new_path"""

655

assert not self._renamed.has_key(new_path)

656

assert not self._renamed_r.has_key(old_path)

657

self._renamed[new_path] = old_path

658

self._renamed_r[old_path] = new_path

659

660

def note_id(self, new_id, new_path, kind='file'):

661

"""Files that don't exist in base need a new id."""

662

self._new_id[new_path] = new_id

663

self._new_id_r[new_id] = new_path

664

self._kinds[new_id] = kind

665

666

def note_last_changed(self, file_id, revision_id):

667

if (self._last_changed.has_key(file_id)

668

and self._last_changed[file_id] != revision_id):

669

raise BzrError('Mismatched last-changed revision for file_id {%s}'

670

': %s != %s' % (file_id,

671

self._last_changed[file_id],

672

revision_id))

673

self._last_changed[file_id] = revision_id

674

675

def note_patch(self, new_path, patch):

676

"""There is a patch for a given filename."""

677

self.patches[new_path] = patch

678

679

def note_target(self, new_path, target):

680

"""The symlink at the new path has the given target"""

681

self._targets[new_path] = target

682

683

def note_deletion(self, old_path):

684

"""The file at old_path has been deleted."""

685

self.deleted.append(old_path)

686

687

def note_executable(self, new_path, executable):

688

self._executable[new_path] = executable

689

690

def old_path(self, new_path):

691

"""Get the old_path (path in the base_tree) for the file at new_path"""

692

assert new_path[:1] not in ('\\', '/')

693

old_path = self._renamed.get(new_path)

694

if old_path is not None:

695

return old_path

696

dirname,basename = os.path.split(new_path)

697

# dirname is not '' doesn't work, because

698

# dirname may be a unicode entry, and is

699

# requires the objects to be identical

700

if dirname != '':

701

old_dir = self.old_path(dirname)

702

if old_dir is None:

703

old_path = None

704

else:

705

old_path = os.path.join(old_dir, basename)

706

else:

707

old_path = new_path

708

#If the new path wasn't in renamed, the old one shouldn't be in

709

#renamed_r

710

if self._renamed_r.has_key(old_path):

711

return None

712

return old_path

713

714

def new_path(self, old_path):

715

"""Get the new_path (path in the target_tree) for the file at old_path

716

in the base tree.

717

"""

718

assert old_path[:1] not in ('\\', '/')

719

new_path = self._renamed_r.get(old_path)

720

if new_path is not None:

721

return new_path

722

if self._renamed.has_key(new_path):

723

return None

724

dirname,basename = os.path.split(old_path)

725

if dirname != '':

726

new_dir = self.new_path(dirname)

727

if new_dir is None:

728

new_path = None

729

else:

730

new_path = os.path.join(new_dir, basename)

731

else:

732

new_path = old_path

733

#If the old path wasn't in renamed, the new one shouldn't be in

734

#renamed_r

735

if self._renamed.has_key(new_path):

736

return None

737

return new_path

738

739

def path2id(self, path):

740

"""Return the id of the file present at path in the target tree."""

741

file_id = self._new_id.get(path)

742

if file_id is not None:

743

return file_id

744

old_path = self.old_path(path)

745

if old_path is None:

746

return None

747

if old_path in self.deleted:

748

return None

749

if hasattr(self.base_tree, 'path2id'):

750

return self.base_tree.path2id(old_path)

751

else:

752

return self.base_tree.inventory.path2id(old_path)

753

754

def id2path(self, file_id):

755

"""Return the new path in the target tree of the file with id file_id"""

756

path = self._new_id_r.get(file_id)

757

if path is not None:

758

return path

759

old_path = self.base_tree.id2path(file_id)

760

if old_path is None:

761

return None

762

if old_path in self.deleted:

763

return None

764

return self.new_path(old_path)

765

766

def old_contents_id(self, file_id):

767

"""Return the id in the base_tree for the given file_id.

768

Return None if the file did not exist in base.

769

"""

770

if self.contents_by_id:

771

if self.base_tree.has_id(file_id):

772

return file_id

773

else:

774

return None

775

new_path = self.id2path(file_id)

776

return self.base_tree.path2id(new_path)

777

778

def get_file(self, file_id):

779

"""Return a file-like object containing the new contents of the

780

file given by file_id.

781

782

TODO: It might be nice if this actually generated an entry

783

in the text-store, so that the file contents would

784

then be cached.

785

"""

786

base_id = self.old_contents_id(file_id)

787

if base_id is not None:

788

patch_original = self.base_tree.get_file(base_id)

789

else:

790

patch_original = None

791

file_patch = self.patches.get(self.id2path(file_id))

792

if file_patch is None:

793

if (patch_original is None and

794

self.get_kind(file_id) == 'directory'):

795

return StringIO()

796

assert patch_original is not None, "None: %s" % file_id

797

return patch_original

798

799

assert not file_patch.startswith('\\'), \

800

'Malformed patch for %s, %r' % (file_id, file_patch)

801

return patched_file(file_patch, patch_original)

802

803

def get_symlink_target(self, file_id):

804

new_path = self.id2path(file_id)

805

try:

806

return self._targets[new_path]

807

except KeyError:

808

return self.base_tree.get_symlink_target(file_id)

809

810

def get_kind(self, file_id):

811

if file_id in self._kinds:

812

return self._kinds[file_id]

813

return self.base_tree.inventory[file_id].kind

814

815

def is_executable(self, file_id):

816

path = self.id2path(file_id)

817

if path in self._executable:

818

return self._executable[path]

819

else:

820

return self.base_tree.inventory[file_id].executable

821

822

def get_last_changed(self, file_id):

823

path = self.id2path(file_id)

824

if path in self._last_changed:

825

return self._last_changed[path]

826

return self.base_tree.inventory[file_id].revision

827

828

def get_size_and_sha1(self, file_id):

829

"""Return the size and sha1 hash of the given file id.

830

If the file was not locally modified, this is extracted

831

from the base_tree. Rather than re-reading the file.

832

"""

833

new_path = self.id2path(file_id)

834

if new_path is None:

835

return None, None

836

if new_path not in self.patches:

837

# If the entry does not have a patch, then the

838

# contents must be the same as in the base_tree

839

ie = self.base_tree.inventory[file_id]

840

if ie.text_size is None:

841

return ie.text_size, ie.text_sha1

842

return int(ie.text_size), ie.text_sha1

843

fileobj = self.get_file(file_id)

844

content = fileobj.read()

845

return len(content), sha_string(content)

846

847

def _get_inventory(self):

848

"""Build up the inventory entry for the BundleTree.

849

850

This need to be called before ever accessing self.inventory

851

"""

852

from os.path import dirname, basename

853

854

assert self.base_tree is not None

855

base_inv = self.base_tree.inventory

856

root_id = base_inv.root.file_id

857

try:

858

# New inventories have a unique root_id

859

inv = Inventory(root_id, self.revision_id)

860

except TypeError:

861

inv = Inventory(revision_id=self.revision_id)

862

863

def add_entry(file_id):

864

path = self.id2path(file_id)

865

if path is None:

866

return

867

parent_path = dirname(path)

868

if parent_path == u'':

869

parent_id = root_id

870

else:

871

parent_id = self.path2id(parent_path)

872

873

kind = self.get_kind(file_id)

874

revision_id = self.get_last_changed(file_id)

875

876

name = basename(path)

877

if kind == 'directory':

878

ie = InventoryDirectory(file_id, name, parent_id)

879

elif kind == 'file':

880

ie = InventoryFile(file_id, name, parent_id)

881

ie.executable = self.is_executable(file_id)

882

elif kind == 'symlink':

883

ie = InventoryLink(file_id, name, parent_id)

884

ie.symlink_target = self.get_symlink_target(file_id)

885

ie.revision = revision_id

886

887

if kind in ('directory', 'symlink'):

888

ie.text_size, ie.text_sha1 = None, None

889

else:

890

ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)

891

if (ie.text_size is None) and (kind == 'file'):

892

raise BzrError('Got a text_size of None for file_id %r' % file_id)

893

inv.add(ie)

894

895

sorted_entries = self.sorted_path_id()

896

for path, file_id in sorted_entries:

897

if file_id == inv.root.file_id:

898

continue

899

add_entry(file_id)

900

901

return inv

902

903

# Have to overload the inherited inventory property

904

# because _get_inventory is only called in the parent.

905

# Reading the docs, property() objects do not use

906

# overloading, they use the function as it was defined

907

# at that instant

908

inventory = property(_get_inventory)

909

910

def __iter__(self):

911

for path, entry in self.inventory.iter_entries():

912

yield entry.file_id

913

914

def sorted_path_id(self):

915

paths = []

916

for result in self._new_id.iteritems():

917

paths.append(result)

918

for id in self.base_tree:

919

path = self.id2path(id)

920

if path is None:

921

continue

922

paths.append((path, id))

923

paths.sort()

924

return paths

925

926

927

def patched_file(file_patch, original):

928

"""Produce a file-like object with the patched version of a text"""

929

from bzrlib.patches import iter_patched

930

from bzrlib.iterablefile import IterableFile

931

if file_patch == "":

932

return IterableFile(())

933

return IterableFile(iter_patched(original, file_patch.splitlines(True)))

Older »