~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/bundle/bundle_data.py

Committer: Robert Collins
Date: 2006-08-08 23:19:29 UTC
mfrom: (1884 +trunk)
mto: This revision was merged to the branch mainline in revision 1912.
Revision ID: robertc@robertcollins.net-20060808231929-4e3e298190214b3a

current status

files added:
bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/ignores.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/response.py

profile_imports.py

files removed:
foo bar

files renamed:
bzrlib/bundle/read_bundle.py => bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/v07.py => bzrlib/bundle/serializer/v08.py

files modified:
.bzrignore

HACKING

Makefile

NEWS

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export/tar_exporter.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/fakenfs.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/urlutils.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

doc/README.1st

doc/configuration.txt

doc/setting_up_email.txt

doc/tutorial.txt

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/bundle/bundle_data.py

#!/usr/bin/env python

"""\

Read in a bundle stream, and process it into a BundleReader object.

"""

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Read in a bundle stream, and process it into a BundleReader object."""

import base64

from cStringIO import StringIO

import os

import pprint

import bzrlib.errors

from bzrlib.errors import (TestamentMismatch, BzrError,

MalformedHeader, MalformedPatches, NotABundle)

from bzrlib.bundle.common import get_header, header_str

from bzrlib.inventory import (Inventory, InventoryEntry,

InventoryDirectory, InventoryFile,

InventoryLink)

from bzrlib.osutils import sha_file, sha_string

from bzrlib.osutils import sha_file, sha_string, pathjoin

from bzrlib.revision import Revision, NULL_REVISION

from bzrlib.testament import StrictTestament

from bzrlib.trace import mutter, warning

import bzrlib.transport

from bzrlib.tree import Tree

import bzrlib.urlutils

from bzrlib.xml5 import serializer_v5

109

split up, based on the assumptions that can be made

110

when information is missing.

111

"""

from bzrlib.bundle.common import unpack_highres_date

112

from bzrlib.bundle.serializer import unpack_highres_date

113

# Put in all of the guessable information.

114

if not self.timestamp and self.date:

100

115

self.timestamp, self.timezone = unpack_highres_date(self.date)

152

167

return r

153

168

raise KeyError(revision_id)

154

169

155

156

class BundleReader(object):

157

"""This class reads in a bundle from a file, and returns

158

a Bundle object, which can then be applied against a tree.

159

"""

160

def __init__(self, from_file):

161

"""Read in the bundle from the file.

162

163

:param from_file: A file-like object (must have iterator support).

164

"""

165

object.__init__(self)

166

self.from_file = iter(from_file)

167

self._next_line = None

168

169

self.info = BundleInfo()

170

# We put the actual inventory ids in the footer, so that the patch

171

# is easier to read for humans.

172

# Unfortunately, that means we need to read everything before we

173

# can create a proper bundle.

174

self._read()

175

self._validate()

176

177

def _read(self):

178

self._read_header()

179

while self._next_line is not None:

180

self._read_revision_header()

181

if self._next_line is None:

182

break

183

self._read_patches()

184

self._read_footer()

185

186

def _validate(self):

187

"""Make sure that the information read in makes sense

188

and passes appropriate checksums.

189

"""

190

# Fill in all the missing blanks for the revisions

191

# and generate the real_revisions list.

192

self.info.complete_info()

193

194

def _validate_revision(self, inventory, revision_id):

195

"""Make sure all revision entries match their checksum."""

196

197

# This is a mapping from each revision id to it's sha hash

198

rev_to_sha1 = {}

199

200

rev = self.info.get_revision(revision_id)

201

rev_info = self.info.get_revision_info(revision_id)

202

assert rev.revision_id == rev_info.revision_id

203

assert rev.revision_id == revision_id

204

sha1 = StrictTestament(rev, inventory).as_sha1()

205

if sha1 != rev_info.sha1:

206

raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)

207

if rev_to_sha1.has_key(rev.revision_id):

208

raise BzrError('Revision {%s} given twice in the list'

209

% (rev.revision_id))

210

rev_to_sha1[rev.revision_id] = sha1

170

def revision_tree(self, repository, revision_id, base=None):

171

revision = self.get_revision(revision_id)

172

base = self.get_base(revision)

173

assert base != revision_id

174

self._validate_references_from_repository(repository)

175

revision_info = self.get_revision_info(revision_id)

176

inventory_revision_id = revision_id

177

bundle_tree = BundleTree(repository.revision_tree(base),

178

inventory_revision_id)

179

self._update_tree(bundle_tree, revision_id)

180

181

inv = bundle_tree.inventory

182

self._validate_inventory(inv, revision_id)

183

self._validate_revision(inv, revision_id)

184

185

return bundle_tree

211

186

212

187

def _validate_references_from_repository(self, repository):

213

188

"""Now that we have a repository which should have some of the

235

210

# All of the contained revisions were checked

236

211

# in _validate_revisions

237

212

checked = {}

238

for rev_info in self.info.revisions:

213

for rev_info in self.revisions:

239

214

checked[rev_info.revision_id] = True

240

215

add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)

241

216

242

for (rev, rev_info) in zip(self.info.real_revisions, self.info.revisions):

217

for (rev, rev_info) in zip(self.real_revisions, self.revisions):

243

218

add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)

244

219

245

220

count = 0

289

264

s = serializer_v5.write_inventory_to_string(inv)

290

265

sha1 = sha_string(s)

291

266

# Target revision is the last entry in the real_revisions list

292

rev = self.info.get_revision(revision_id)

267

rev = self.get_revision(revision_id)

293

268

assert rev.revision_id == revision_id

294

269

if sha1 != rev.inventory_sha1:

295

270

open(',,bogus-inv', 'wb').write(s)

296

271

warning('Inventory sha hash mismatch for revision %s. %s'

297

272

' != %s' % (revision_id, sha1, rev.inventory_sha1))

298

273

299

def get_bundle(self, repository):

300

"""Return the meta information, and a Bundle tree which can

301

be used to populate the local stores and working tree, respectively.

302

"""

303

return self.info, self.revision_tree(repository, self.info.target)

304

305

def revision_tree(self, repository, revision_id, base=None):

306

revision = self.info.get_revision(revision_id)

307

base = self.info.get_base(revision)

308

assert base != revision_id

309

self._validate_references_from_repository(repository)

310

revision_info = self.info.get_revision_info(revision_id)

311

inventory_revision_id = revision_id

312

bundle_tree = BundleTree(repository.revision_tree(base),

313

inventory_revision_id)

314

self._update_tree(bundle_tree, revision_id)

315

316

inv = bundle_tree.inventory

317

self._validate_inventory(inv, revision_id)

318

self._validate_revision(inv, revision_id)

319

320

return bundle_tree

321

322

def _next(self):

323

"""yield the next line, but secretly

324

keep 1 extra line for peeking.

325

"""

326

for line in self.from_file:

327

last = self._next_line

328

self._next_line = line

329

if last is not None:

330

#mutter('yielding line: %r' % last)

331

yield last

332

last = self._next_line

333

self._next_line = None

334

#mutter('yielding line: %r' % last)

335

yield last

336

337

def _read_header(self):

338

"""Read the bzr header"""

339

header = get_header()

340

found = False

341

for line in self._next():

342

if found:

343

# not all mailers will keep trailing whitespace

344

if line == '#\n':

345

line = '# \n'

346

if (not line.startswith('# ') or not line.endswith('\n')

347

or line[2:-1].decode('utf-8') != header[0]):

348

raise MalformedHeader('Found a header, but it'

349

' was improperly formatted')

350

header.pop(0) # We read this line.

351

if not header:

352

break # We found everything.

353

elif (line.startswith('#') and line.endswith('\n')):

354

line = line[1:-1].strip().decode('utf-8')

355

if line[:len(header_str)] == header_str:

356

if line == header[0]:

357

found = True

358

else:

359

raise MalformedHeader('Found what looks like'

360

' a header, but did not match')

361

header.pop(0)

362

else:

363

raise NotABundle('Did not find an opening header')

364

365

def _read_revision_header(self):

366

self.info.revisions.append(RevisionInfo(None))

367

for line in self._next():

368

# The bzr header is terminated with a blank line

369

# which does not start with '#'

370

if line is None or line == '\n':

371

break

372

self._handle_next(line)

373

374

def _read_next_entry(self, line, indent=1):

375

"""Read in a key-value pair

376

"""

377

if not line.startswith('#'):

378

raise MalformedHeader('Bzr header did not start with #')

379

line = line[1:-1].decode('utf-8') # Remove the '#' and '\n'

380

if line[:indent] == ' '*indent:

381

line = line[indent:]

382

if not line:

383

return None, None# Ignore blank lines

384

385

loc = line.find(': ')

386

if loc != -1:

387

key = line[:loc]

388

value = line[loc+2:]

389

if not value:

390

value = self._read_many(indent=indent+2)

391

elif line[-1:] == ':':

392

key = line[:-1]

393

value = self._read_many(indent=indent+2)

394

else:

395

raise MalformedHeader('While looking for key: value pairs,'

396

' did not find the colon %r' % (line))

397

398

key = key.replace(' ', '_')

399

#mutter('found %s: %s' % (key, value))

400

return key, value

401

402

def _handle_next(self, line):

403

if line is None:

404

return

405

key, value = self._read_next_entry(line, indent=1)

406

mutter('_handle_next %r => %r' % (key, value))

407

if key is None:

408

return

409

410

revision_info = self.info.revisions[-1]

411

if hasattr(revision_info, key):

412

if getattr(revision_info, key) is None:

413

setattr(revision_info, key, value)

414

else:

415

raise MalformedHeader('Duplicated Key: %s' % key)

416

else:

417

# What do we do with a key we don't recognize

418

raise MalformedHeader('Unknown Key: "%s"' % key)

419

420

def _read_many(self, indent):

421

"""If a line ends with no entry, that means that it should be

422

followed with multiple lines of values.

423

424

This detects the end of the list, because it will be a line that

425

does not start properly indented.

426

"""

427

values = []

428

start = '#' + (' '*indent)

429

430

if self._next_line is None or self._next_line[:len(start)] != start:

431

return values

432

433

for line in self._next():

434

values.append(line[len(start):-1].decode('utf-8'))

435

if self._next_line is None or self._next_line[:len(start)] != start:

436

break

437

return values

438

439

def _read_one_patch(self):

440

"""Read in one patch, return the complete patch, along with

441

the next line.

442

443

:return: action, lines, do_continue

444

"""

445

#mutter('_read_one_patch: %r' % self._next_line)

446

# Peek and see if there are no patches

447

if self._next_line is None or self._next_line.startswith('#'):

448

return None, [], False

449

450

first = True

451

lines = []

452

for line in self._next():

453

if first:

454

if not line.startswith('==='):

455

raise MalformedPatches('The first line of all patches'

456

' should be a bzr meta line "==="'

457

': %r' % line)

458

action = line[4:-1].decode('utf-8')

459

elif line.startswith('... '):

460

action += line[len('... '):-1].decode('utf-8')

461

462

if (self._next_line is not None and

463

self._next_line.startswith('===')):

464

return action, lines, True

465

elif self._next_line is None or self._next_line.startswith('#'):

466

return action, lines, False

467

468

if first:

469

first = False

470

elif not line.startswith('... '):

471

lines.append(line)

472

473

return action, lines, False

474

475

def _read_patches(self):

476

do_continue = True

477

revision_actions = []

478

while do_continue:

479

action, lines, do_continue = self._read_one_patch()

480

if action is not None:

481

revision_actions.append((action, lines))

482

assert self.info.revisions[-1].tree_actions is None

483

self.info.revisions[-1].tree_actions = revision_actions

484

485

def _read_footer(self):

486

"""Read the rest of the meta information.

487

488

:param first_line: The previous step iterates past what it

489

can handle. That extra line is given here.

490

"""

491

for line in self._next():

492

self._handle_next(line)

493

if not self._next_line.startswith('#'):

494

self._next().next()

495

break

496

if self._next_line is None:

497

break

274

def _validate_revision(self, inventory, revision_id):

275

"""Make sure all revision entries match their checksum."""

276

277

# This is a mapping from each revision id to it's sha hash

278

rev_to_sha1 = {}

279

280

rev = self.get_revision(revision_id)

281

rev_info = self.get_revision_info(revision_id)

282

assert rev.revision_id == rev_info.revision_id

283

assert rev.revision_id == revision_id

284

sha1 = StrictTestament(rev, inventory).as_sha1()

285

if sha1 != rev_info.sha1:

286

raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)

287

if rev_to_sha1.has_key(rev.revision_id):

288

raise BzrError('Revision {%s} given twice in the list'

289

% (rev.revision_id))

290

rev_to_sha1[rev.revision_id] = sha1

498

291

499

292

def _update_tree(self, bundle_tree, revision_id):

500

293

"""This fills out a BundleTree based on the information

608

401

'modified':modified

609

402

}

610

403

for action_line, lines in \

611

self.info.get_revision_info(revision_id).tree_actions:

404

self.get_revision_info(revision_id).tree_actions:

612

405

first = action_line.find(' ')

613

406

if first == -1:

614

407

raise BzrError('Bogus action line'

702

495

if old_dir is None:

703

496

old_path = None

704

497

else:

705

old_path = os.path.join(old_dir, basename)

498

old_path = pathjoin(old_dir, basename)

706

499

else:

707

500

old_path = new_path

708

501

#If the new path wasn't in renamed, the old one shouldn't be in

727

520

if new_dir is None:

728

521

new_path = None

729

522

else:

730

new_path = os.path.join(new_dir, basename)

523

new_path = pathjoin(new_dir, basename)

731

524

else:

732

525

new_path = old_path

733

526

#If the old path wasn't in renamed, the new one shouldn't be in

930

723

from bzrlib.iterablefile import IterableFile

931

724

if file_patch == "":

932

725

return IterableFile(())

933

return IterableFile(iter_patched(original, file_patch.splitlines(True)))

726

# string.splitlines(True) also splits on '\r', but the iter_patched code

727

# only expects to iterate over '\n' style lines

728

return IterableFile(iter_patched(original,

729

StringIO(file_patch).readlines()))

Older »